diff options
2154 files changed, 51685 insertions, 26558 deletions
@@ -953,11 +953,11 @@ S: Blacksburg, Virginia 24061 S: USA N: Randy Dunlap -E: rdunlap@xenotime.net -W: http://www.xenotime.net/linux/linux.html -W: http://www.linux-usb.org +E: rdunlap@infradead.org +W: http://www.infradead.org/~rdunlap/ D: Linux-USB subsystem, USB core/UHCI/printer/storage drivers D: x86 SMP, ACPI, bootflag hacking +D: documentation, builds S: (ask for current address) S: USA @@ -1510,6 +1510,14 @@ D: Natsemi ethernet D: Cobalt Networks (x86) support D: This-and-That +N: Mark M. Hoffman +E: mhoffman@lightlink.com +D: asb100, lm93 and smsc47b397 hardware monitoring drivers +D: hwmon subsystem core +D: hwmon subsystem maintainer +D: i2c-sis96x and i2c-stub SMBus drivers +S: USA + N: Dirk Hohndel E: hohndel@suse.de D: The XFree86[tm] Project diff --git a/Documentation/ABI/testing/sysfs-class-net-mesh b/Documentation/ABI/testing/sysfs-class-net-mesh index bc41da61608d..bdcd8b4e38f2 100644 --- a/Documentation/ABI/testing/sysfs-class-net-mesh +++ b/Documentation/ABI/testing/sysfs-class-net-mesh @@ -67,6 +67,14 @@ Description: Defines the penalty which will be applied to an originator message's tq-field on every hop. +What: /sys/class/net/<mesh_iface>/mesh/network_coding +Date: Nov 2012 +Contact: Martin Hundeboll <martin@hundeboll.net> +Description: + Controls whether Network Coding (using some magic + to send fewer wifi packets but still the same + content) is enabled or not. + What: /sys/class/net/<mesh_iface>/mesh/orig_interval Date: May 2010 Contact: Marek Lindner <lindner_marek@yahoo.de> diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 7514dbf0a679..c36892c072da 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -227,7 +227,7 @@ X!Isound/sound_firmware.c <chapter id="uart16x50"> <title>16x50 UART Driver</title> !Edrivers/tty/serial/serial_core.c -!Edrivers/tty/serial/8250/8250.c +!Edrivers/tty/serial/8250/8250_core.c </chapter> <chapter id="fbdev"> diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index c379a2a6949f..aa0c1e63f050 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -60,8 +60,7 @@ own source tree. For example: "dontdiff" is a list of files which are generated by the kernel during the build process, and should be ignored in any diff(1)-generated patch. The "dontdiff" file is included in the kernel tree in -2.6.12 and later. For earlier kernel versions, you can get it -from <http://www.xenotime.net/linux/doc/dontdiff>. +2.6.12 and later. Make sure your patch does not include any extra files which do not belong in a patch submission. Make sure to review your patch -after- diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX index f5635a09c3f6..bc461b6425a7 100644 --- a/Documentation/cgroups/00-INDEX +++ b/Documentation/cgroups/00-INDEX @@ -18,6 +18,8 @@ memcg_test.txt - Memory Resource Controller; implementation details. memory.txt - Memory Resource Controller; design, accounting, interface, testing. +net_cls.txt + - Network classifier cgroups details and usages. net_prio.txt - Network priority cgroups details and usages. resource_counter.txt diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroups/net_cls.txt new file mode 100644 index 000000000000..9face6bb578a --- /dev/null +++ b/Documentation/cgroups/net_cls.txt @@ -0,0 +1,34 @@ +Network classifier cgroup +------------------------- + +The Network classifier cgroup provides an interface to +tag network packets with a class identifier (classid). + +The Traffic Controller (tc) can be used to assign +different priorities to packets from different cgroups. + +Creating a net_cls cgroups instance creates a net_cls.classid file. +This net_cls.classid value is initialized to 0. + +You can write hexadecimal values to net_cls.classid; the format for these +values is 0xAAAABBBB; AAAA is the major handle number and BBBB +is the minor handle number. +Reading net_cls.classid yields a decimal result. + +Example: +mkdir /sys/fs/cgroup/net_cls +mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls +mkdir /sys/fs/cgroup/net_cls/0 +echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid + - setting a 10:1 handle. + +cat /sys/fs/cgroup/net_cls/0/net_cls.classid +1048577 + +configuring tc: +tc qdisc add dev eth0 root handle 10: htb + +tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit + - creating traffic class 10:1 + +tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup diff --git a/Documentation/devicetree/bindings/marvell.txt b/Documentation/devicetree/bindings/marvell.txt index f1533d91953a..f7a0da6b4022 100644 --- a/Documentation/devicetree/bindings/marvell.txt +++ b/Documentation/devicetree/bindings/marvell.txt @@ -115,6 +115,9 @@ prefixed with the string "marvell,", for Marvell Technology Group Ltd. - compatible : "marvell,mv64360-eth-block" - reg : Offset and length of the register set for this block + Optional properties: + - clocks : Phandle to the clock control device and gate bit + Example Discovery Ethernet block node: ethernet-block@2000 { #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt index 13b707b7355c..c3a14e0ad0ad 100644 --- a/Documentation/devicetree/bindings/mfd/ab8500.txt +++ b/Documentation/devicetree/bindings/mfd/ab8500.txt @@ -13,9 +13,6 @@ Required parent device properties: 4 = active high level-sensitive 8 = active low level-sensitive -Optional parent device properties: -- reg : contains the PRCMU mailbox address for the AB8500 i2c port - The AB8500 consists of a large and varied group of sub-devices: Device IRQ Names Supply Names Description @@ -86,9 +83,8 @@ Non-standard child device properties: - stericsson,amic2-bias-vamic1 : Analoge Mic wishes to use a non-standard Vamic - stericsson,earpeice-cmv : Earpeice voltage (only: 950 | 1100 | 1270 | 1580) -ab8500@5 { +ab8500 { compatible = "stericsson,ab8500"; - reg = <5>; /* mailbox 5 is i2c */ interrupts = <0 40 0x4>; interrupt-controller; #interrupt-cells = <2>; diff --git a/Documentation/devicetree/bindings/net/can/atmel-can.txt b/Documentation/devicetree/bindings/net/can/atmel-can.txt new file mode 100644 index 000000000000..72cf0c5daff4 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/atmel-can.txt @@ -0,0 +1,14 @@ +* AT91 CAN * + +Required properties: + - compatible: Should be "atmel,at91sam9263-can" or "atmel,at91sam9x5-can" + - reg: Should contain CAN controller registers location and length + - interrupts: Should contain IRQ line for the CAN controller + +Example: + + can0: can@f000c000 { + compatbile = "atmel,at91sam9x5-can"; + reg = <0xf000c000 0x300>; + interrupts = <40 4 5> + }; diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index ecfdf756d10f..4f2ca6b4a182 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -15,16 +15,22 @@ Required properties: - mac_control : Specifies Default MAC control register content for the specific platform - slaves : Specifies number for slaves -- cpts_active_slave : Specifies the slave to use for time stamping +- active_slave : Specifies the slave to use for time stamping, + ethtool and SIOCGMIIPHY - cpts_clock_mult : Numerator to convert input clock ticks into nanoseconds - cpts_clock_shift : Denominator to convert input clock ticks into nanoseconds -- phy_id : Specifies slave phy id -- mac-address : Specifies slave MAC address Optional properties: - ti,hwmods : Must be "cpgmac0" - no_bd_ram : Must be 0 or 1 - dual_emac : Specifies Switch to act as Dual EMAC + +Slave Properties: +Required properties: +- phy_id : Specifies slave phy id +- mac-address : Specifies slave MAC address + +Optional properties: - dual_emac_res_vlan : Specifies VID to be used to segregate the ports Note: "ti,hwmods" field is used to fetch the base address and irq @@ -47,7 +53,7 @@ Examples: rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; cpsw_emac0: slave@0 { @@ -73,7 +79,7 @@ Examples: rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; cpsw_emac0: slave@0 { diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt new file mode 100644 index 000000000000..49f4f7ae3f51 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -0,0 +1,91 @@ +Marvell Distributed Switch Architecture Device Tree Bindings +------------------------------------------------------------ + +Required properties: +- compatible : Should be "marvell,dsa" +- #address-cells : Must be 2, first cell is the address on the MDIO bus + and second cell is the address in the switch tree. + Second cell is used only when cascading/chaining. +- #size-cells : Must be 0 +- dsa,ethernet : Should be a phandle to a valid Ethernet device node +- dsa,mii-bus : Should be a phandle to a valid MDIO bus device node + +Optionnal properties: +- interrupts : property with a value describing the switch + interrupt number (not supported by the driver) + +A DSA node can contain multiple switch chips which are therefore child nodes of +the parent DSA node. The maximum number of allowed child nodes is 4 +(DSA_MAX_SWITCHES). +Each of these switch child nodes should have the following required properties: + +- reg : Describes the switch address on the MII bus +- #address-cells : Must be 1 +- #size-cells : Must be 0 + +A switch may have multiple "port" children nodes + +Each port children node must have the following mandatory properties: +- reg : Describes the port address in the switch +- label : Describes the label associated with this port, special + labels are "cpu" to indicate a CPU port and "dsa" to + indicate an uplink/downlink port. + +Note that a port labelled "dsa" will imply checking for the uplink phandle +described below. + +Optionnal property: +- link : Should be a phandle to another switch's DSA port. + This property is only used when switches are being + chained/cascaded together. + +Example: + + dsa@0 { + compatible = "marvell,dsa"; + #address-cells = <2>; + #size-cells = <0>; + + interrupts = <10>; + dsa,ethernet = <ðernet0>; + dsa,mii-bus = <&mii_bus0>; + + switch@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <16 0>; /* MDIO address 16, switch 0 in tree */ + + port@0 { + reg = <0>; + label = "lan1"; + }; + + port@1 { + reg = <1>; + label = "lan2"; + }; + + port@5 { + reg = <5>; + label = "cpu"; + }; + + switch0uplink: port@6 { + reg = <6>; + label = "dsa"; + link = <&switch1uplink>; + }; + }; + + switch@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <17 1>; /* MDIO address 17, switch 1 in tree */ + + switch1uplink: port@0 { + reg = <0>; + label = "dsa"; + link = <&switch0uplink>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt index 34e7aafa321c..9417e54c26c0 100644 --- a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt +++ b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt @@ -9,6 +9,10 @@ Required properties: - compatible: "marvell,orion-mdio" - reg: address and length of the SMI register +Optional properties: +- interrupts: interrupt line number for the SMI error/done interrupt +- clocks: Phandle to the clock control device and gate bit + The child nodes of the MDIO driver are the individual PHY devices connected to this MDIO bus. They must have a "reg" property given the PHY address on the MDIO bus. diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt index 1e1145ca4f3c..8f01cb190f25 100644 --- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt @@ -11,6 +11,9 @@ Required properties: - "nvidia,tegra20-uart" - "nxp,lpc3220-uart" - "ibm,qpace-nwp-serial" + - "altr,16550-FIFO32" + - "altr,16550-FIFO64" + - "altr,16550-FIFO128" - "serial" if the port type is unknown. - reg : offset and length of the register set for the device. - interrupts : should contain uart interrupt. diff --git a/Documentation/hwmon/adm1275 b/Documentation/hwmon/adm1275 index 2cfa25667123..15b4a20d5062 100644 --- a/Documentation/hwmon/adm1275 +++ b/Documentation/hwmon/adm1275 @@ -15,7 +15,7 @@ Supported chips: Addresses scanned: - Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1276.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/adt7410 b/Documentation/hwmon/adt7410 index 96004000dc2a..58150c480e56 100644 --- a/Documentation/hwmon/adt7410 +++ b/Documentation/hwmon/adt7410 @@ -4,9 +4,14 @@ Kernel driver adt7410 Supported chips: * Analog Devices ADT7410 Prefix: 'adt7410' - Addresses scanned: I2C 0x48 - 0x4B + Addresses scanned: None Datasheet: Publicly available at the Analog Devices website http://www.analog.com/static/imported-files/data_sheets/ADT7410.pdf + * Analog Devices ADT7420 + Prefix: 'adt7420' + Addresses scanned: None + Datasheet: Publicly available at the Analog Devices website + http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf Author: Hartmut Knaack <knaack.h@gmx.de> @@ -27,6 +32,10 @@ value per second or even justget one sample on demand for power saving. Besides, it can completely power down its ADC, if power management is required. +The ADT7420 is register compatible, the only differences being the package, +a slightly narrower operating temperature range (-40°C to +150°C), and a +better accuracy (0.25°C instead of 0.50°C.) + Configuration Notes ------------------- diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42 index 165077121238..868d74d6b773 100644 --- a/Documentation/hwmon/jc42 +++ b/Documentation/hwmon/jc42 @@ -49,7 +49,7 @@ Supported chips: Addresses scanned: I2C 0x18 - 0x1f Author: - Guenter Roeck <guenter.roeck@ericsson.com> + Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/lineage-pem b/Documentation/hwmon/lineage-pem index 2ba5ed126858..83b2ddc160c8 100644 --- a/Documentation/hwmon/lineage-pem +++ b/Documentation/hwmon/lineage-pem @@ -8,7 +8,7 @@ Supported devices: Documentation: http://www.lineagepower.com/oem/pdf/CPLI2C.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066 index a21db81c4591..26025e419d35 100644 --- a/Documentation/hwmon/lm25066 +++ b/Documentation/hwmon/lm25066 @@ -19,7 +19,7 @@ Supported chips: Datasheet: http://www.national.com/pf/LM/LM5066.html -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/lm75 b/Documentation/hwmon/lm75 index c91a1d15fa28..69af1c7db6b7 100644 --- a/Documentation/hwmon/lm75 +++ b/Documentation/hwmon/lm75 @@ -23,7 +23,7 @@ Supported chips: Datasheet: Publicly available at the Maxim website http://www.maxim-ic.com/ * Microchip (TelCom) TCN75 - Prefix: 'lm75' + Prefix: 'tcn75' Addresses scanned: none Datasheet: Publicly available at the Microchip website http://www.microchip.com/ diff --git a/Documentation/hwmon/ltc2978 b/Documentation/hwmon/ltc2978 index c365f9beb5dd..e4d75c606c97 100644 --- a/Documentation/hwmon/ltc2978 +++ b/Documentation/hwmon/ltc2978 @@ -5,13 +5,13 @@ Supported chips: * Linear Technology LTC2978 Prefix: 'ltc2978' Addresses scanned: - - Datasheet: http://cds.linear.com/docs/Datasheet/2978fa.pdf + Datasheet: http://www.linear.com/product/ltc2978 * Linear Technology LTC3880 Prefix: 'ltc3880' Addresses scanned: - - Datasheet: http://cds.linear.com/docs/Datasheet/3880f.pdf + Datasheet: http://www.linear.com/product/ltc3880 -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/ltc4261 b/Documentation/hwmon/ltc4261 index eba2e2c4b94d..9378a75c6134 100644 --- a/Documentation/hwmon/ltc4261 +++ b/Documentation/hwmon/ltc4261 @@ -8,7 +8,7 @@ Supported chips: Datasheet: http://cds.linear.com/docs/Datasheet/42612fb.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/max16064 b/Documentation/hwmon/max16064 index f8b478076f6d..d59cc7829bec 100644 --- a/Documentation/hwmon/max16064 +++ b/Documentation/hwmon/max16064 @@ -7,7 +7,7 @@ Supported chips: Addresses scanned: - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065 index c11f64a1f2ad..208a29e43010 100644 --- a/Documentation/hwmon/max16065 +++ b/Documentation/hwmon/max16065 @@ -24,7 +24,7 @@ Supported chips: http://datasheets.maxim-ic.com/en/ds/MAX16070-MAX16071.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/max34440 b/Documentation/hwmon/max34440 index 47651ff341ae..37cbf472a19d 100644 --- a/Documentation/hwmon/max34440 +++ b/Documentation/hwmon/max34440 @@ -27,7 +27,7 @@ Supported chips: Addresses scanned: - Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX34461.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/max8688 b/Documentation/hwmon/max8688 index fe849871df32..e78078638b91 100644 --- a/Documentation/hwmon/max8688 +++ b/Documentation/hwmon/max8688 @@ -7,7 +7,7 @@ Supported chips: Addresses scanned: - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus index 3d3a0f97f966..cf756ed48ff9 100644 --- a/Documentation/hwmon/pmbus +++ b/Documentation/hwmon/pmbus @@ -34,7 +34,7 @@ Supported chips: Addresses scanned: - Datasheet: n.a. -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/smm665 b/Documentation/hwmon/smm665 index 59e316140542..a341eeedab75 100644 --- a/Documentation/hwmon/smm665 +++ b/Documentation/hwmon/smm665 @@ -29,7 +29,7 @@ Supported chips: http://www.summitmicro.com/prod_select/summary/SMM766/SMM766_2086.pdf http://www.summitmicro.com/prod_select/summary/SMM766B/SMM766B_2122.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Module Parameters diff --git a/Documentation/hwmon/ucd9000 b/Documentation/hwmon/ucd9000 index 0df5f276505b..805e33edb978 100644 --- a/Documentation/hwmon/ucd9000 +++ b/Documentation/hwmon/ucd9000 @@ -11,7 +11,7 @@ Supported chips: http://focus.ti.com/lit/ds/symlink/ucd9090.pdf http://focus.ti.com/lit/ds/symlink/ucd90910.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/ucd9200 b/Documentation/hwmon/ucd9200 index fd7d07b1908a..1e8060e631bd 100644 --- a/Documentation/hwmon/ucd9200 +++ b/Documentation/hwmon/ucd9200 @@ -15,7 +15,7 @@ Supported chips: http://focus.ti.com/lit/ds/symlink/ucd9246.pdf http://focus.ti.com/lit/ds/symlink/ucd9248.pdf -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100 index 3d924b6b59e9..756b57c6b73e 100644 --- a/Documentation/hwmon/zl6100 +++ b/Documentation/hwmon/zl6100 @@ -54,7 +54,7 @@ http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401 http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256 -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c index 30fe4bb9a069..0d6018c316c7 100644 --- a/Documentation/i2c/busses/i2c-diolan-u2c +++ b/Documentation/i2c/busses/i2c-diolan-u2c @@ -5,7 +5,7 @@ Supported adapters: Documentation: http://www.diolan.com/i2c/u2c12.html -Author: Guenter Roeck <guenter.roeck@ericsson.com> +Author: Guenter Roeck <linux@roeck-us.net> Description ----------- diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt index 3262b6e4d686..e544c7ff8cfa 100644 --- a/Documentation/input/alps.txt +++ b/Documentation/input/alps.txt @@ -3,10 +3,26 @@ ALPS Touchpad Protocol Introduction ------------ - -Currently the ALPS touchpad driver supports four protocol versions in use by -ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various -protocol versions is contained in the following sections. +Currently the ALPS touchpad driver supports five protocol versions in use by +ALPS touchpads, called versions 1, 2, 3, 4 and 5. + +Since roughly mid-2010 several new ALPS touchpads have been released and +integrated into a variety of laptops and netbooks. These new touchpads +have enough behavior differences that the alps_model_data definition +table, describing the properties of the different versions, is no longer +adequate. The design choices were to re-define the alps_model_data +table, with the risk of regression testing existing devices, or isolate +the new devices outside of the alps_model_data table. The latter design +choice was made. The new touchpad signatures are named: "Rushmore", +"Pinnacle", and "Dolphin", which you will see in the alps.c code. +For the purposes of this document, this group of ALPS touchpads will +generically be called "new ALPS touchpads". + +We experimented with probing the ACPI interface _HID (Hardware ID)/_CID +(Compatibility ID) definition as a way to uniquely identify the +different ALPS variants but there did not appear to be a 1:1 mapping. +In fact, it appeared to be an m:n mapping between the _HID and actual +hardware type. Detection --------- @@ -20,9 +36,13 @@ If the E6 report is successful, the touchpad model is identified using the "E7 report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is matched against known models in the alps_model_data_array. -With protocol versions 3 and 4, the E7 report model signature is always -73-02-64. To differentiate between these versions, the response from the -"Enter Command Mode" sequence must be inspected as described below. +For older touchpads supporting protocol versions 3 and 4, the E7 report +model signature is always 73-02-64. To differentiate between these +versions, the response from the "Enter Command Mode" sequence must be +inspected as described below. + +The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but +seem to be better differentiated by the EC Command Mode response. Command Mode ------------ @@ -47,6 +67,14 @@ address of the register being read, and the third contains the value of the register. Registers are written by writing the value one nibble at a time using the same encoding used for addresses. +For the new ALPS touchpads, the EC command is used to enter command +mode. The response in the new ALPS touchpads is significantly different, +and more important in determining the behavior. This code has been +separated from the original alps_model_data table and put in the +alps_identify function. For example, there seem to be two hardware init +sequences for the "Dolphin" touchpads as determined by the second byte +of the EC response. + Packet Format ------------- @@ -187,3 +215,28 @@ There are several things worth noting here. well. So far no v4 devices with tracksticks have been encountered. + +ALPS Absolute Mode - Protocol Version 5 +--------------------------------------- +This is basically Protocol Version 3 but with different logic for packet +decode. It uses the same alps_process_touchpad_packet_v3 call with a +specialized decode_fields function pointer to correctly interpret the +packets. This appears to only be used by the Dolphin devices. + +For single-touch, the 6-byte packet format is: + + byte 0: 1 1 0 0 1 0 0 0 + byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + byte 2: 0 y6 y5 y4 y3 y2 y1 y0 + byte 3: 0 M R L 1 m r l + byte 4: y10 y9 y8 y7 x10 x9 x8 x7 + byte 5: 0 z6 z5 z4 z3 z2 z1 z0 + +For mt, the format is: + + byte 0: 1 1 1 n3 1 n2 n1 x24 + byte 1: 1 y7 y6 y5 y4 y3 y2 y1 + byte 2: ? x2 x1 y12 y11 y10 y9 y8 + byte 3: 0 x23 x22 x21 x20 x19 x18 x17 + byte 4: 0 x9 x8 x7 x6 x5 x4 x3 + byte 5: 0 x16 x15 x14 x13 x12 x11 x10 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..8ccbf27aead4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -596,9 +596,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. is selected automatically. Check Documentation/kdump/kdump.txt for further details. - crashkernel_low=size[KMG] - [KNL, x86] parts under 4G. - crashkernel=range1:size1[,range2:size2,...][@offset] [KNL] Same as above, but depends on the memory in the running system. The syntax of range is @@ -606,6 +603,26 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel=size[KMG],high + [KNL, x86_64] range could be above 4G. Allow kernel + to allocate physical memory region from top, so could + be above 4G if system have more than 4G ram installed. + Otherwise memory region will be allocated below 4G, if + available. + It will be ignored if crashkernel=X is specified. + crashkernel=size[KMG],low + [KNL, x86_64] range under 4G. When crashkernel=X,high + is passed, kernel could allocate physical memory region + above 4G, that cause second kernel crash on system + that require some amount of low memory, e.g. swiotlb + requires at least 64M+32K low memory. Kernel would + try to allocate 72M below 4G automatically. + This one let user to specify own low range under 4G + for second kernel instead. + 0: to disable low allocation. + It will be ignored when crashkernel=X,high is not used + or memory reserved is below 4G. + cs89x0_dma= [HW,NET] Format: <dma> @@ -788,6 +805,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. edd= [EDD] Format: {"off" | "on" | "skip[mbr]"} + efi_no_storage_paranoia [EFI; X86] + Using this parameter you can use more than 50% of + your efi variable storage. Use this parameter only if + you are really sure that your UEFI does sane gc and + fulfills the spec otherwise your board may brick. + eisa_irq_edge= [PARISC,HW] See header of drivers/parisc/eisa.c. diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index 703cf4370c79..67a9cb259d40 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt @@ -71,8 +71,9 @@ submits skb to qdisc), so if you need something from that cb later, you should store info in the skb->data on your own. To hook the MLME interface you have to populate the ml_priv field of your -net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are -required. +net_device with a pointer to struct ieee802154_mlme_ops instance. The fields +assoc_req, assoc_resp, disassoc_req, start_req, and scan_req are optional. +All other fields are required. We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index dc2dc87d2557..f98ca633b528 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -29,7 +29,7 @@ route/max_size - INTEGER neigh/default/gc_thresh1 - INTEGER Minimum number of entries to keep. Garbage collector will not purge entries if there are fewer than this number. - Default: 256 + Default: 128 neigh/default/gc_thresh3 - INTEGER Maximum number of neighbor entries allowed. Increase this @@ -175,14 +175,6 @@ tcp_congestion_control - STRING is inherited. [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ] -tcp_cookie_size - INTEGER - Default size of TCP Cookie Transactions (TCPCT) option, that may be - overridden on a per socket basis by the TCPCT socket option. - Values greater than the maximum (16) are interpreted as the maximum. - Values greater than zero and less than the minimum (8) are interpreted - as the minimum. Odd values are interpreted as the next even value. - Default: 0 (off). - tcp_dsack - BOOLEAN Allows TCP to send "duplicate" SACKs. @@ -190,7 +182,9 @@ tcp_early_retrans - INTEGER Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold for triggering fast retransmit when the amount of outstanding data is small and when no previously unsent data can be transmitted (such - that limited transmit could be used). + that limited transmit could be used). Also controls the use of + Tail loss probe (TLP) that converts RTOs occuring due to tail + losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01). Possible values: 0 disables ER 1 enables ER @@ -198,7 +192,9 @@ tcp_early_retrans - INTEGER by a fourth of RTT. This mitigates connection falsely recovers when network has a small degree of reordering (less than 3 packets). - Default: 2 + 3 enables delayed ER and TLP. + 4 enables TLP only. + Default: 3 tcp_ecn - INTEGER Control use of Explicit Congestion Notification (ECN) by TCP. @@ -229,36 +225,13 @@ tcp_fin_timeout - INTEGER Default: 60 seconds tcp_frto - INTEGER - Enables Forward RTO-Recovery (F-RTO) defined in RFC4138. + Enables Forward RTO-Recovery (F-RTO) defined in RFC5682. F-RTO is an enhanced recovery algorithm for TCP retransmission - timeouts. It is particularly beneficial in wireless environments - where packet loss is typically due to random radio interference - rather than intermediate router congestion. F-RTO is sender-side - only modification. Therefore it does not require any support from - the peer. - - If set to 1, basic version is enabled. 2 enables SACK enhanced - F-RTO if flow uses SACK. The basic version can be used also when - SACK is in use though scenario(s) with it exists where F-RTO - interacts badly with the packet counting of the SACK enabled TCP - flow. - -tcp_frto_response - INTEGER - When F-RTO has detected that a TCP retransmission timeout was - spurious (i.e, the timeout would have been avoided had TCP set a - longer retransmission timeout), TCP has several options what to do - next. Possible values are: - 0 Rate halving based; a smooth and conservative response, - results in halved cwnd and ssthresh after one RTT - 1 Very conservative response; not recommended because even - though being valid, it interacts poorly with the rest of - Linux TCP, halves cwnd and ssthresh immediately - 2 Aggressive response; undoes congestion control measures - that are now known to be unnecessary (ignoring the - possibility of a lost retransmission that would require - TCP to be more cautious), cwnd and ssthresh are restored - to the values prior timeout - Default: 0 (rate halving based) + timeouts. It is particularly beneficial in networks where the + RTT fluctuates (e.g., wireless). F-RTO is sender-side only + modification. It does not require any support from the peer. + + By default it's enabled with a non-zero value. 0 disables F-RTO. tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index f2a2488f1bf3..9573d0c48c6e 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -15,6 +15,13 @@ amemthresh - INTEGER enabled and the variable is automatically set to 2, otherwise the strategy is disabled and the variable is set to 1. +backup_only - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + If set, disable the director function while the server is + in backup mode to avoid packet loops for DR/TUN methods. + conntrack - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt new file mode 100644 index 000000000000..1c2dab409625 --- /dev/null +++ b/Documentation/networking/netlink_mmap.txt @@ -0,0 +1,339 @@ +This file documents how to use memory mapped I/O with netlink. + +Author: Patrick McHardy <kaber@trash.net> + +Overview +-------- + +Memory mapped netlink I/O can be used to increase throughput and decrease +overhead of unicast receive and transmit operations. Some netlink subsystems +require high throughput, these are mainly the netfilter subsystems +nfnetlink_queue and nfnetlink_log, but it can also help speed up large +dump operations of f.i. the routing database. + +Memory mapped netlink I/O used two circular ring buffers for RX and TX which +are mapped into the processes address space. + +The RX ring is used by the kernel to directly construct netlink messages into +user-space memory without copying them as done with regular socket I/O, +additionally as long as the ring contains messages no recvmsg() or poll() +syscalls have to be issued by user-space to get more message. + +The TX ring is used to process messages directly from user-space memory, the +kernel processes all messages contained in the ring using a single sendmsg() +call. + +Usage overview +-------------- + +In order to use memory mapped netlink I/O, user-space needs three main changes: + +- ring setup +- conversion of the RX path to get messages from the ring instead of recvmsg() +- conversion of the TX path to construct messages into the ring + +Ring setup is done using setsockopt() to provide the ring parameters to the +kernel, then a call to mmap() to map the ring into the processes address space: + +- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); +- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); +- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) + +Usage of either ring is optional, but even if only the RX ring is used the +mapping still needs to be writable in order to update the frame status after +processing. + +Conversion of the reception path involves calling poll() on the file +descriptor, once the socket is readable the frames from the ring are +processsed in order until no more messages are available, as indicated by +a status word in the frame header. + +On kernel side, in order to make use of memory mapped I/O on receive, the +originating netlink subsystem needs to support memory mapped I/O, otherwise +it will use an allocated socket buffer as usual and the contents will be + copied to the ring on transmission, nullifying most of the performance gains. +Dumps of kernel databases automatically support memory mapped I/O. + +Conversion of the transmit path involves changing message contruction to +use memory from the TX ring instead of (usually) a buffer declared on the +stack and setting up the frame header approriately. Optionally poll() can +be used to wait for free frames in the TX ring. + +Structured and definitions for using memory mapped I/O are contained in +<linux/netlink.h>. + +RX and TX rings +---------------- + +Each ring contains a number of continous memory blocks, containing frames of +fixed size dependant on the parameters used for ring setup. + +Ring: [ block 0 ] + [ frame 0 ] + [ frame 1 ] + [ block 1 ] + [ frame 2 ] + [ frame 3 ] + ... + [ block n ] + [ frame 2 * n ] + [ frame 2 * n + 1 ] + +The blocks are only visible to the kernel, from the point of view of user-space +the ring just contains the frames in a continous memory zone. + +The ring parameters used for setting up the ring are defined as follows: + +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +Frames are grouped into blocks, where each block is a continous region of memory +and holds nm_block_size / nm_frame_size frames. The total number of frames in +the ring is nm_frame_nr. The following invariants hold: + +- frames_per_block = nm_block_size / nm_frame_size + +- nm_frame_nr = frames_per_block * nm_block_nr + +Some parameters are constrained, specifically: + +- nm_block_size must be a multiple of the architectures memory page size. + The getpagesize() function can be used to get the page size. + +- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be + able to hold at least the frame header + +- nm_frame_size must be smaller or equal to nm_block_size + +- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT + +- nm_frame_nr must equal the actual number of frames as specified above. + +When the kernel can't allocate phsyically continous memory for a ring block, +it will fall back to use physically discontinous memory. This might affect +performance negatively, in order to avoid this the nm_frame_size parameter +should be chosen to be as small as possible for the required frame size and +the number of blocks should be increased instead. + +Ring frames +------------ + +Each frames contain a frame header, consisting of a synchronization word and some +meta-data, and the message itself. + +Frame: [ header message ] + +The frame header is defined as follows: + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +- nm_status is used for synchronizing processing between the kernel and user- + space and specifies ownership of the frame as well as the operation to perform + +- nm_len contains the length of the message contained in the data area + +- nm_group specified the destination multicast group of message + +- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending + process. These values correspond to the data available using SOCK_PASSCRED in + the SCM_CREDENTIALS cmsg. + +The possible values in the status word are: + +- NL_MMAP_STATUS_UNUSED: + RX ring: frame belongs to the kernel and contains no message + for user-space. Approriate action is to invoke poll() + to wait for new messages. + + TX ring: frame belongs to user-space and can be used for + message construction. + +- NL_MMAP_STATUS_RESERVED: + RX ring only: frame is currently used by the kernel for message + construction and contains no valid message yet. + Appropriate action is to invoke poll() to wait for + new messages. + +- NL_MMAP_STATUS_VALID: + RX ring: frame contains a valid message. Approriate action is + to process the message and release the frame back to + the kernel by setting the status to + NL_MMAP_STATUS_UNUSED or queue the frame by setting the + status to NL_MMAP_STATUS_SKIP. + + TX ring: the frame contains a valid message from user-space to + be processed by the kernel. After completing processing + the kernel will release the frame back to user-space by + setting the status to NL_MMAP_STATUS_UNUSED. + +- NL_MMAP_STATUS_COPY: + RX ring only: a message is ready to be processed but could not be + stored in the ring, either because it exceeded the + frame size or because the originating subsystem does + not support memory mapped I/O. Appropriate action is + to invoke recvmsg() to receive the message and release + the frame back to the kernel by setting the status to + NL_MMAP_STATUS_UNUSED. + +- NL_MMAP_STATUS_SKIP: + RX ring only: user-space queued the message for later processing, but + processed some messages following it in the ring. The + kernel should skip this frame when looking for unused + frames. + +The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the +frame header. + +TX limitations +-------------- + +Kernel processing usually involves validation of the message received by +user-space, then processing its contents. The kernel must assure that +userspace is not able to modify the message contents after they have been +validated. In order to do so, the message is copied from the ring frame +to an allocated buffer if either of these conditions is false: + +- only a single mapping of the ring exists +- the file descriptor is not shared between processes + +This means that for threaded programs, the kernel will fall back to copying. + +Example +------- + +Ring setup: + + unsigned int block_size = 16 * getpagesize(); + struct nl_mmap_req req = { + .nm_block_size = block_size, + .nm_block_nr = 64, + .nm_frame_size = 16384, + .nm_frame_nr = 64 * block_size / 16384, + }; + unsigned int ring_size; + void *rx_ring, *tx_ring; + + /* Configure ring parameters */ + if (setsockopt(fd, NETLINK_RX_RING, &req, sizeof(req)) < 0) + exit(1); + if (setsockopt(fd, NETLINK_TX_RING, &req, sizeof(req)) < 0) + exit(1) + + /* Calculate size of each invididual ring */ + ring_size = req.nm_block_nr * req.nm_block_size; + + /* Map RX/TX rings. The TX ring is located after the RX ring */ + rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if ((long)rx_ring == -1L) + exit(1); + tx_ring = rx_ring + ring_size: + +Message reception: + +This example assumes some ring parameters of the ring setup are available. + + unsigned int frame_offset = 0; + struct nl_mmap_hdr *hdr; + struct nlmsghdr *nlh; + unsigned char buf[16384]; + ssize_t len; + + while (1) { + struct pollfd pfds[1]; + + pfds[0].fd = fd; + pfds[0].events = POLLIN | POLLERR; + pfds[0].revents = 0; + + if (poll(pfds, 1, -1) < 0 && errno != -EINTR) + exit(1); + + /* Check for errors. Error handling omitted */ + if (pfds[0].revents & POLLERR) + <handle error> + + /* If no new messages, poll again */ + if (!(pfds[0].revents & POLLIN)) + continue; + + /* Process all frames */ + while (1) { + /* Get next frame header */ + hdr = rx_ring + frame_offset; + + if (hdr->nm_status == NL_MMAP_STATUS_VALID) + /* Regular memory mapped frame */ + nlh = (void *hdr) + NL_MMAP_HDRLEN; + len = hdr->nm_len; + + /* Release empty message immediately. May happen + * on error during message construction. + */ + if (len == 0) + goto release; + } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { + /* Frame queued to socket receive queue */ + len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (len <= 0) + break; + nlh = buf; + } else + /* No more messages to process, continue polling */ + break; + + process_msg(nlh); +release: + /* Release frame back to the kernel */ + hdr->nm_status = NL_MMAP_STATUS_UNUSED; + + /* Advance frame offset to next frame */ + frame_offset = (frame_offset + frame_size) % ring_size; + } + } + +Message transmission: + +This example assumes some ring parameters of the ring setup are available. +A single message is constructed and transmitted, to send multiple messages +at once they would be constructed in consecutive frames before a final call +to sendto(). + + unsigned int frame_offset = 0; + struct nl_mmap_hdr *hdr; + struct nlmsghdr *nlh; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + + hdr = tx_ring + frame_offset; + if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) + /* No frame available. Use poll() to avoid. */ + exit(1); + + nlh = (void *)hdr + NL_MMAP_HDRLEN; + + /* Build message */ + build_message(nlh); + + /* Fill frame header: length and status need to be set */ + hdr->nm_len = nlh->nlmsg_len; + hdr->nm_status = NL_MMAP_STATUS_VALID; + + if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) + exit(1); + + /* Advance frame offset to next frame */ + frame_offset = (frame_offset + frame_size) % ring_size; diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 94444b152fbc..65efb85e49de 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -685,6 +685,333 @@ int main(int argc, char **argp) } ------------------------------------------------------------------------------- ++ AF_PACKET TPACKET_V3 example +------------------------------------------------------------------------------- + +AF_PACKET's TPACKET_V3 ring buffer can be configured to use non-static frame +sizes by doing it's own memory management. It is based on blocks where polling +works on a per block basis instead of per ring as in TPACKET_V2 and predecessor. + +It is said that TPACKET_V3 brings the following benefits: + *) ~15 - 20% reduction in CPU-usage + *) ~20% increase in packet capture rate + *) ~2x increase in packet density + *) Port aggregation analysis + *) Non static frame size to capture entire packet payload + +So it seems to be a good candidate to be used with packet fanout. + +Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile +it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.): + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> +#include <net/if.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <poll.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <linux/if_packet.h> +#include <linux/if_ether.h> +#include <linux/ip.h> + +#define BLOCK_SIZE (1 << 22) +#define FRAME_SIZE 2048 + +#define NUM_BLOCKS 64 +#define NUM_FRAMES ((BLOCK_SIZE * NUM_BLOCKS) / FRAME_SIZE) + +#define BLOCK_RETIRE_TOV_IN_MS 64 +#define BLOCK_PRIV_AREA_SZ 13 + +#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1)) + +#define BLOCK_STATUS(x) ((x)->h1.block_status) +#define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts) +#define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt) +#define BLOCK_LEN(x) ((x)->h1.blk_len) +#define BLOCK_SNUM(x) ((x)->h1.seq_num) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x))) +#define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc))) +#define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri))) + +#ifndef likely +# define likely(x) __builtin_expect(!!(x), 1) +#endif +#ifndef unlikely +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +struct block_desc { + uint32_t version; + uint32_t offset_to_priv; + struct tpacket_hdr_v1 h1; +}; + +struct ring { + struct iovec *rd; + uint8_t *map; + struct tpacket_req3 req; +}; + +static unsigned long packets_total = 0, bytes_total = 0; +static sig_atomic_t sigint = 0; + +void sighandler(int num) +{ + sigint = 1; +} + +static int setup_socket(struct ring *ring, char *netdev) +{ + int err, i, fd, v = TPACKET_V3; + struct sockaddr_ll ll; + + fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (fd < 0) { + perror("socket"); + exit(1); + } + + err = setsockopt(fd, SOL_PACKET, PACKET_VERSION, &v, sizeof(v)); + if (err < 0) { + perror("setsockopt"); + exit(1); + } + + memset(&ring->req, 0, sizeof(ring->req)); + ring->req.tp_block_size = BLOCK_SIZE; + ring->req.tp_frame_size = FRAME_SIZE; + ring->req.tp_block_nr = NUM_BLOCKS; + ring->req.tp_frame_nr = NUM_FRAMES; + ring->req.tp_retire_blk_tov = BLOCK_RETIRE_TOV_IN_MS; + ring->req.tp_sizeof_priv = BLOCK_PRIV_AREA_SZ; + ring->req.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH; + + err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req, + sizeof(ring->req)); + if (err < 0) { + perror("setsockopt"); + exit(1); + } + + ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, + fd, 0); + if (ring->map == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + ring->rd = malloc(ring->req.tp_block_nr * sizeof(*ring->rd)); + assert(ring->rd); + for (i = 0; i < ring->req.tp_block_nr; ++i) { + ring->rd[i].iov_base = ring->map + (i * ring->req.tp_block_size); + ring->rd[i].iov_len = ring->req.tp_block_size; + } + + memset(&ll, 0, sizeof(ll)); + ll.sll_family = PF_PACKET; + ll.sll_protocol = htons(ETH_P_ALL); + ll.sll_ifindex = if_nametoindex(netdev); + ll.sll_hatype = 0; + ll.sll_pkttype = 0; + ll.sll_halen = 0; + + err = bind(fd, (struct sockaddr *) &ll, sizeof(ll)); + if (err < 0) { + perror("bind"); + exit(1); + } + + return fd; +} + +#ifdef __checked +static uint64_t prev_block_seq_num = 0; + +void assert_block_seq_num(struct block_desc *pbd) +{ + if (unlikely(prev_block_seq_num + 1 != BLOCK_SNUM(pbd))) { + printf("prev_block_seq_num:%"PRIu64", expected seq:%"PRIu64" != " + "actual seq:%"PRIu64"\n", prev_block_seq_num, + prev_block_seq_num + 1, (uint64_t) BLOCK_SNUM(pbd)); + exit(1); + } + + prev_block_seq_num = BLOCK_SNUM(pbd); +} + +static void assert_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) +{ + if (BLOCK_NUM_PKTS(pbd)) { + if (unlikely(bytes != BLOCK_LEN(pbd))) { + printf("block:%u with %upackets, expected len:%u != actual len:%u\n", + block_num, BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd)); + exit(1); + } + } else { + if (unlikely(BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(BLOCK_PRIV_AREA_SZ))) { + printf("block:%u, expected len:%lu != actual len:%u\n", + block_num, BLOCK_HDR_LEN, BLOCK_LEN(pbd)); + exit(1); + } + } +} + +static void assert_block_header(struct block_desc *pbd, const int block_num) +{ + uint32_t block_status = BLOCK_STATUS(pbd); + + if (unlikely((block_status & TP_STATUS_USER) == 0)) { + printf("block:%u, not in TP_STATUS_USER\n", block_num); + exit(1); + } + + assert_block_seq_num(pbd); +} +#else +static inline void assert_block_header(struct block_desc *pbd, const int block_num) +{ +} +static void assert_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) +{ +} +#endif + +static void display(struct tpacket3_hdr *ppd) +{ + struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac); + struct iphdr *ip = (struct iphdr *) ((uint8_t *) eth + ETH_HLEN); + + if (eth->h_proto == htons(ETH_P_IP)) { + struct sockaddr_in ss, sd; + char sbuff[NI_MAXHOST], dbuff[NI_MAXHOST]; + + memset(&ss, 0, sizeof(ss)); + ss.sin_family = PF_INET; + ss.sin_addr.s_addr = ip->saddr; + getnameinfo((struct sockaddr *) &ss, sizeof(ss), + sbuff, sizeof(sbuff), NULL, 0, NI_NUMERICHOST); + + memset(&sd, 0, sizeof(sd)); + sd.sin_family = PF_INET; + sd.sin_addr.s_addr = ip->daddr; + getnameinfo((struct sockaddr *) &sd, sizeof(sd), + dbuff, sizeof(dbuff), NULL, 0, NI_NUMERICHOST); + + printf("%s -> %s, ", sbuff, dbuff); + } + + printf("rxhash: 0x%x\n", ppd->hv1.tp_rxhash); +} + +static void walk_block(struct block_desc *pbd, const int block_num) +{ + int num_pkts = BLOCK_NUM_PKTS(pbd), i; + unsigned long bytes = 0; + unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(BLOCK_PRIV_AREA_SZ); + struct tpacket3_hdr *ppd; + + assert_block_header(pbd, block_num); + + ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd)); + for (i = 0; i < num_pkts; ++i) { + bytes += ppd->tp_snaplen; + if (ppd->tp_next_offset) + bytes_with_padding += ppd->tp_next_offset; + else + bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac); + + display(ppd); + + ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset); + __sync_synchronize(); + } + + assert_block_len(pbd, bytes_with_padding, block_num); + + packets_total += num_pkts; + bytes_total += bytes; +} + +void flush_block(struct block_desc *pbd) +{ + BLOCK_STATUS(pbd) = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static void teardown_socket(struct ring *ring, int fd) +{ + munmap(ring->map, ring->req.tp_block_size * ring->req.tp_block_nr); + free(ring->rd); + close(fd); +} + +int main(int argc, char **argp) +{ + int fd, err; + socklen_t len; + struct ring ring; + struct pollfd pfd; + unsigned int block_num = 0; + struct block_desc *pbd; + struct tpacket_stats_v3 stats; + + if (argc != 2) { + fprintf(stderr, "Usage: %s INTERFACE\n", argp[0]); + return EXIT_FAILURE; + } + + signal(SIGINT, sighandler); + + memset(&ring, 0, sizeof(ring)); + fd = setup_socket(&ring, argp[argc - 1]); + assert(fd > 0); + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = fd; + pfd.events = POLLIN | POLLERR; + pfd.revents = 0; + + while (likely(!sigint)) { + pbd = (struct block_desc *) ring.rd[block_num].iov_base; +retry_block: + if ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0) { + poll(&pfd, 1, -1); + goto retry_block; + } + + walk_block(pbd, block_num); + flush_block(pbd); + block_num = (block_num + 1) % NUM_BLOCKS; + } + + len = sizeof(stats); + err = getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len); + if (err < 0) { + perror("getsockopt"); + exit(1); + } + + fflush(stdout); + printf("\nReceived %u packets, %lu bytes, %u dropped, freeze_q_cnt: %u\n", + stats.tp_packets, bytes_total, stats.tp_drops, + stats.tp_freeze_q_cnt); + + teardown_socket(&ring, fd); + return 0; +} + +------------------------------------------------------------------------------- + PACKET_TIMESTAMP ------------------------------------------------------------------------------- diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index f9fa6db40a52..654d2e55c8cb 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -1,6 +1,6 @@ STMicroelectronics 10/100/1000 Synopsys Ethernet driver -Copyright (C) 2007-2010 STMicroelectronics Ltd +Copyright (C) 2007-2013 STMicroelectronics Ltd Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers @@ -10,7 +10,7 @@ Currently this network device driver is for all STM embedded MAC/GMAC (i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000 FF1152AMT0221 D1215994A VIRTEX FPGA board. -DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether +DWC Ether MAC 10/100/1000 Universal version 3.70a (and older) and DWC Ether MAC 10/100 Universal version 4.0 have been used for developing this driver. This driver supports both the platform bus and PCI. @@ -32,6 +32,8 @@ The kernel configuration option is STMMAC_ETH: watchdog: transmit timeout (in milliseconds); flow_ctrl: Flow control ability [on/off]; pause: Flow Control Pause Time; + eee_timer: tx EEE timer; + chain_mode: select chain mode instead of ring. 3) Command line options Driver parameters can be also passed in command line by using: @@ -164,12 +166,12 @@ Where: o bus_setup: perform HW setup of the bus. For example, on some ST platforms this field is used to configure the AMBA bridge to generate more efficient STBus traffic. - o init/exit: callbacks used for calling a custom initialisation; + o init/exit: callbacks used for calling a custom initialization; this is sometime necessary on some platforms (e.g. ST boxes) where the HW needs to have set some PIO lines or system cfg registers. o custom_cfg/custom_data: this is a custom configuration that can be passed - while initialising the resources. + while initializing the resources. o bsp_priv: another private poiter. For MDIO bus The we have: @@ -273,6 +275,8 @@ reset procedure etc). o norm_desc.c: functions for handling normal descriptors; o chain_mode.c/ring_mode.c:: functions to manage RING/CHAINED modes; o mmc_core.c/mmc.h: Management MAC Counters; + o stmmac_hwtstamp.c: HW timestamp support for PTP + o stmmac_ptp.c: PTP 1588 clock 5) Debug Information @@ -326,6 +330,35 @@ To enter in Tx LPI mode the driver needs to have a software timer that enable and disable the LPI mode when there is nothing to be transmitted. -7) TODO: +7) Extended descriptors +The extended descriptors give us information about the receive Ethernet payload +when it is carrying PTP packets or TCP/UDP/ICMP over IP. +These are not available on GMAC Synopsys chips older than the 3.50. +At probe time the driver will decide if these can be actually used. +This support also is mandatory for PTPv2 because the extra descriptors 6 and 7 +are used for saving the hardware timestamps. + +8) Precision Time Protocol (PTP) +The driver supports the IEEE 1588-2002, Precision Time Protocol (PTP), +which enables precise synchronization of clocks in measurement and +control systems implemented with technologies such as network +communication. + +In addition to the basic timestamp features mentioned in IEEE 1588-2002 +Timestamps, new GMAC cores support the advanced timestamp features. +IEEE 1588-2008 that can be enabled when configure the Kernel. + +9) SGMII/RGMII supports +New GMAC devices provide own way to manage RGMII/SGMII. +This information is available at run-time by looking at the +HW capability register. This means that the stmmac can manage +auto-negotiation and link status w/o using the PHYLIB stuff +In fact, the HW provides a subset of extended registers to +restart the ANE, verify Full/Half duplex mode and Speed. +Also thanks to these registers it is possible to look at the +Auto-negotiated Link Parter Ability. + +10) TODO: o XGMAC is not supported. - o Add the PTP - precision time protocol + o Complete the TBI & RTBI support. + o extened VLAN support for 3.70a SYNP GMAC. diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt index 3035d00757ad..425c51d56aef 100644 --- a/Documentation/power/opp.txt +++ b/Documentation/power/opp.txt @@ -1,6 +1,5 @@ -*=============* -* OPP Library * -*=============* +Operating Performance Points (OPP) Library +========================================== (C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated @@ -16,15 +15,31 @@ Contents 1. Introduction =============== +1.1 What is an Operating Performance Point (OPP)? + Complex SoCs of today consists of a multiple sub-modules working in conjunction. In an operational system executing varied use cases, not all modules in the SoC need to function at their highest performing frequency all the time. To facilitate this, sub-modules in a SoC are grouped into domains, allowing some -domains to run at lower voltage and frequency while other domains are loaded -more. The set of discrete tuples consisting of frequency and voltage pairs that +domains to run at lower voltage and frequency while other domains run at +voltage/frequency pairs that are higher. + +The set of discrete tuples consisting of frequency and voltage pairs that the device will support per domain are called Operating Performance Points or OPPs. +As an example: +Let us consider an MPU device which supports the following: +{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V}, +{1GHz at minimum voltage of 1.3V} + +We can represent these as three OPPs as the following {Hz, uV} tuples: +{300000000, 1000000} +{800000000, 1200000} +{1000000000, 1300000} + +1.2 Operating Performance Points Library + OPP library provides a set of helper functions to organize and query the OPP information. The library is located in drivers/base/power/opp.c and the header is located in include/linux/opp.h. OPP library can be enabled by enabling diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index e8a6aa473bab..6e953564de03 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -170,5 +170,5 @@ Reminder: sizeof() result is of type size_t. Thank you for your cooperation and attention. -By Randy Dunlap <rdunlap@xenotime.net> and +By Randy Dunlap <rdunlap@infradead.org> and Andrew Murray <amurray@mpc-data.co.uk> diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx index 27a91cf43d6d..5020b7b5a244 100644 --- a/Documentation/scsi/LICENSE.qla2xxx +++ b/Documentation/scsi/LICENSE.qla2xxx @@ -1,4 +1,4 @@ -Copyright (c) 2003-2012 QLogic Corporation +Copyright (c) 2003-2013 QLogic Corporation QLogic Linux FC-FCoE Driver This program includes a device driver for Linux 3.x. diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index ce6581c8ca26..95731a08f257 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -890,9 +890,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) power_save - Automatic power-saving timeout (in second, 0 = disable) - power_save_controller - Support runtime D3 of HD-audio controller - (-1 = on for supported chip (default), false = off, - true = force to on even for unsupported hardware) + power_save_controller - Reset HD-audio controller in power-saving mode + (default = on) align_buffer_size - Force rounding of buffer/period sizes to multiples of 128 bytes. This is more efficient in terms of memory access but isn't required by the HDA spec and prevents @@ -912,7 +911,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. models depending on the codec chip. The list of available models is found in HD-Audio-Models.txt - The model name "genric" is treated as a special case. When this + The model name "generic" is treated as a special case. When this model is given, the driver uses the generic codec parser without "codec-patch". It's sometimes good for testing and debugging. diff --git a/Documentation/sound/alsa/seq_oss.html b/Documentation/sound/alsa/seq_oss.html index d9776cf60c07..9663b45f6fde 100644 --- a/Documentation/sound/alsa/seq_oss.html +++ b/Documentation/sound/alsa/seq_oss.html @@ -285,7 +285,7 @@ sample data. <H4> 7.2.4 Close Callback</H4> The <TT>close</TT> callback is called when this device is closed by the -applicaion. If any private data was allocated in open callback, it must +application. If any private data was allocated in open callback, it must be released in the close callback. The deletion of ALSA port should be done here, too. This callback must not be NULL. <H4> diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 53d6a3c51d87..a372304aef10 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1873,7 +1873,7 @@ feature: status\input | 0 | 1 | else | --------------+------------+------------+------------+ - not allocated |(do nothing)| alloc+swap | EINVAL | + not allocated |(do nothing)| alloc+swap |(do nothing)| --------------+------------+------------+------------+ allocated | free | swap | clear | --------------+------------+------------+------------+ diff --git a/MAINTAINERS b/MAINTAINERS index 685949bb8838..cae1f8edef29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1338,12 +1338,6 @@ S: Maintained F: drivers/platform/x86/asus*.c F: drivers/platform/x86/eeepc*.c -ASUS ASB100 HARDWARE MONITOR DRIVER -M: "Mark M. Hoffman" <mhoffman@lightlink.com> -L: lm-sensors@lm-sensors.org -S: Maintained -F: drivers/hwmon/asb100.c - ASYNCHRONOUS TRANSFERS/TRANSFORMS (IOAT) API M: Dan Williams <djbw@fb.com> W: http://sourceforge.net/projects/xscaleiop @@ -1467,6 +1461,12 @@ F: drivers/dma/at_hdmac.c F: drivers/dma/at_hdmac_regs.h F: include/linux/platform_data/dma-atmel.h +ATMEL I2C DRIVER +M: Ludovic Desroches <ludovic.desroches@atmel.com> +L: linux-i2c@vger.kernel.org +S: Supported +F: drivers/i2c/busses/i2c-at91.c + ATMEL ISI DRIVER M: Josh Wu <josh.wu@atmel.com> L: linux-media@vger.kernel.org @@ -1764,7 +1764,7 @@ F: arch/arm/configs/bcm2835_defconfig F: drivers/*/*bcm2835* BROADCOM TG3 GIGABIT ETHERNET DRIVER -M: Matt Carlson <mcarlson@broadcom.com> +M: Nithin Nayak Sujir <nsujir@broadcom.com> M: Michael Chan <mchan@broadcom.com> L: netdev@vger.kernel.org S: Supported @@ -1886,7 +1886,7 @@ F: Documentation/video4linux/cafe_ccic F: drivers/media/platform/marvell-ccic/ CAIF NETWORK LAYER -M: Sjur Braendeland <sjur.brandeland@stericsson.com> +M: Dmitry Tarnyagin <dmitry.tarnyagin@lockless.no> L: netdev@vger.kernel.org S: Supported F: Documentation/networking/caif/ @@ -2629,7 +2629,7 @@ F: include/uapi/drm/ INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) M: Daniel Vetter <daniel.vetter@ffwll.ch> -L: intel-gfx@lists.freedesktop.org (subscribers-only) +L: intel-gfx@lists.freedesktop.org L: dri-devel@lists.freedesktop.org T: git git://people.freedesktop.org/~danvet/drm-intel S: Supported @@ -3242,6 +3242,12 @@ F: Documentation/firmware_class/ F: drivers/base/firmware*.c F: include/linux/firmware.h +FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card) +M: Joshua Morris <josh.h.morris@us.ibm.com> +M: Philip Kelleher <pjk1939@linux.vnet.ibm.com> +S: Maintained +F: drivers/block/rsxx/ + FLOPPY DRIVER M: Jiri Kosina <jkosina@suse.cz> T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/floppy.git @@ -3851,7 +3857,7 @@ F: drivers/i2c/busses/i2c-ismt.c F: Documentation/i2c/busses/i2c-ismt I2C/SMBUS STUB DRIVER -M: "Mark M. Hoffman" <mhoffman@lightlink.com> +M: Jean Delvare <khali@linux-fr.org> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/i2c-stub.c @@ -4005,6 +4011,22 @@ M: Stanislaw Gruszka <stf_xl@wp.pl> S: Maintained F: drivers/usb/atm/ueagle-atm.c +INA209 HARDWARE MONITOR DRIVER +M: Guenter Roeck <linux@roeck-us.net> +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/ina209 +F: Documentation/devicetree/bindings/i2c/ina209.txt +F: drivers/hwmon/ina209.c + +INA2XX HARDWARE MONITOR DRIVER +M: Guenter Roeck <linux@roeck-us.net> +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/ina2xx +F: drivers/hwmon/ina2xx.c +F: include/linux/platform_data/ina2xx.h + INDUSTRY PACK SUBSYSTEM (IPACK) M: Samuel Iglesias Gonsalvez <siglesias@igalia.com> M: Jens Taprogge <jens.taprogge@taprogge.org> @@ -4919,6 +4941,12 @@ W: logfs.org S: Maintained F: fs/logfs/ +LPC32XX MACHINE SUPPORT +M: Roland Stigge <stigge@antcom.de> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-lpc32xx/ + LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Nagalakshmi Nandigama <Nagalakshmi.Nandigama@lsi.com> M: Sreekanth Reddy <Sreekanth.Reddy@lsi.com> @@ -5097,6 +5125,15 @@ S: Maintained F: Documentation/hwmon/max6650 F: drivers/hwmon/max6650.c +MAX6697 HARDWARE MONITOR DRIVER +M: Guenter Roeck <linux@roeck-us.net> +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/max6697 +F: Documentation/devicetree/bindings/i2c/max6697.txt +F: drivers/hwmon/max6697.c +F: include/linux/platform_data/max6697.h + MAXIRADIO FM RADIO RECEIVER DRIVER M: Hans Verkuil <hverkuil@xs4all.nl> L: linux-media@vger.kernel.org @@ -5537,6 +5574,7 @@ F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h NETXEN (1/10) GbE SUPPORT +M: Manish Chopra <manish.chopra@qlogic.com> M: Sony Chacko <sony.chacko@qlogic.com> M: Rajesh Borundia <rajesh.borundia@qlogic.com> L: netdev@vger.kernel.org @@ -5621,6 +5659,14 @@ S: Maintained F: drivers/video/riva/ F: drivers/video/nvidia/ +NVM EXPRESS DRIVER +M: Matthew Wilcox <willy@linux.intel.com> +L: linux-nvme@lists.infradead.org +T: git git://git.infradead.org/users/willy/linux-nvme.git +S: Supported +F: drivers/block/nvme.c +F: include/linux/nvme.h + OMAP SUPPORT M: Tony Lindgren <tony@atomide.com> L: linux-omap@vger.kernel.org @@ -5649,7 +5695,7 @@ S: Maintained F: arch/arm/*omap*/*clock* OMAP POWER MANAGEMENT SUPPORT -M: Kevin Hilman <khilman@ti.com> +M: Kevin Hilman <khilman@deeprootsystems.com> L: linux-omap@vger.kernel.org S: Maintained F: arch/arm/*omap*/*pm* @@ -5743,7 +5789,7 @@ F: arch/arm/*omap*/usb* OMAP GPIO DRIVER M: Santosh Shilimkar <santosh.shilimkar@ti.com> -M: Kevin Hilman <khilman@ti.com> +M: Kevin Hilman <khilman@deeprootsystems.com> L: linux-omap@vger.kernel.org S: Maintained F: drivers/gpio/gpio-omap.c @@ -6175,7 +6221,7 @@ F: include/linux/power_supply.h F: drivers/power/ PNP SUPPORT -M: Adam Belay <abelay@mit.edu> +M: Rafael J. Wysocki <rafael.j.wysocki@intel.com> M: Bjorn Helgaas <bhelgaas@google.com> S: Maintained F: drivers/pnp/ @@ -6290,6 +6336,7 @@ F: drivers/acpi/apei/erst.c PTP HARDWARE CLOCK SUPPORT M: Richard Cochran <richardcochran@gmail.com> +L: netdev@vger.kernel.org S: Maintained W: http://linuxptp.sourceforge.net/ F: Documentation/ABI/testing/sysfs-ptp @@ -6411,6 +6458,8 @@ F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER +M: Rajesh Borundia <rajesh.borundia@qlogic.com> +M: Shahed Shaikh <shahed.shaikh@qlogic.com> M: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com> M: Sony Chacko <sony.chacko@qlogic.com> M: linux-driver@qlogic.com @@ -6419,6 +6468,7 @@ S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER +M: Shahed Shaikh <shahed.shaikh@qlogic.com> M: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com> M: Ron Mercer <ron.mercer@qlogic.com> M: linux-driver@qlogic.com @@ -6515,12 +6565,6 @@ S: Maintained F: Documentation/blockdev/ramdisk.txt F: drivers/block/brd.c -RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card) -M: Joshua Morris <josh.h.morris@us.ibm.com> -M: Philip Kelleher <pjk1939@linux.vnet.ibm.com> -S: Maintained -F: drivers/block/rsxx/ - RANDOM NUMBER DRIVER M: Theodore Ts'o" <tytso@mit.edu> S: Maintained @@ -6589,7 +6633,7 @@ S: Supported F: fs/reiserfs/ REGISTER MAP ABSTRACTION -M: Mark Brown <broonie@opensource.wolfsonmicro.com> +M: Mark Brown <broonie@kernel.org> T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git S: Supported F: drivers/base/regmap/ @@ -6915,7 +6959,6 @@ F: drivers/scsi/st* SCTP PROTOCOL M: Vlad Yasevich <vyasevich@gmail.com> -M: Sridhar Samudrala <sri@us.ibm.com> M: Neil Horman <nhorman@tuxdriver.com> L: linux-sctp@vger.kernel.org W: http://lksctp.sourceforge.net @@ -7137,7 +7180,7 @@ F: arch/arm/mach-s3c2410/bast-irq.c TI DAVINCI MACHINE SUPPORT M: Sekhar Nori <nsekhar@ti.com> -M: Kevin Hilman <khilman@ti.com> +M: Kevin Hilman <khilman@deeprootsystems.com> L: davinci-linux-open-source@linux.davincidsp.com (moderated for non-subscribers) T: git git://gitorious.org/linux-davinci/linux-davinci.git Q: http://patchwork.kernel.org/project/linux-davinci/list/ @@ -7170,13 +7213,6 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/sis/sis900.* -SIS 96X I2C/SMBUS DRIVER -M: "Mark M. Hoffman" <mhoffman@lightlink.com> -L: linux-i2c@vger.kernel.org -S: Maintained -F: Documentation/i2c/busses/i2c-sis96x -F: drivers/i2c/busses/i2c-sis96x.c - SIS FRAMEBUFFER DRIVER M: Thomas Winischhofer <thomas@winischhofer.net> W: http://www.winischhofer.net/linuxsisvga.shtml @@ -7254,7 +7290,7 @@ F: Documentation/hwmon/sch5627 F: drivers/hwmon/sch5627.c SMSC47B397 HARDWARE MONITOR DRIVER -M: "Mark M. Hoffman" <mhoffman@lightlink.com> +M: Jean Delvare <khali@linux-fr.org> L: lm-sensors@lm-sensors.org S: Maintained F: Documentation/hwmon/smsc47b397 @@ -7345,7 +7381,7 @@ F: sound/ SOUND - SOC LAYER / DYNAMIC AUDIO POWER MANAGEMENT (ASoC) M: Liam Girdwood <lgirdwood@gmail.com> -M: Mark Brown <broonie@opensource.wolfsonmicro.com> +M: Mark Brown <broonie@kernel.org> T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git L: alsa-devel@alsa-project.org (moderated for non-subscribers) W: http://alsa-project.org/main/index.php/ASoC @@ -7434,7 +7470,7 @@ F: drivers/clk/spear/ SPI SUBSYSTEM M: Grant Likely <grant.likely@secretlab.ca> -M: Mark Brown <broonie@opensource.wolfsonmicro.com> +M: Mark Brown <broonie@kernel.org> L: spi-devel-general@lists.sourceforge.net Q: http://patchwork.kernel.org/project/spi-devel-general/list/ T: git git://git.secretlab.ca/git/linux-2.6.git @@ -7677,9 +7713,10 @@ F: include/linux/swiotlb.h SYNOPSYS ARC ARCHITECTURE M: Vineet Gupta <vgupta@synopsys.com> -L: linux-snps-arc@vger.kernel.org S: Supported F: arch/arc/ +F: Documentation/devicetree/bindings/arc/ +F: drivers/tty/serial/arc-uart.c SYSV FILESYSTEM M: Christoph Hellwig <hch@infradead.org> @@ -8678,7 +8715,7 @@ F: drivers/scsi/vmw_pvscsi.h VOLTAGE AND CURRENT REGULATOR FRAMEWORK M: Liam Girdwood <lrg@ti.com> -M: Mark Brown <broonie@opensource.wolfsonmicro.com> +M: Mark Brown <broonie@kernel.org> W: http://opensource.wolfsonmicro.com/node/15 W: http://www.slimlogic.co.uk/?p=48 T: git git://git.kernel.org/pub/scm/linux/kernel/git/lrg/regulator.git @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc8 NAME = Unicycling Gorilla # *DOCUMENTATION* @@ -513,7 +513,8 @@ ifeq ($(KBUILD_EXTMOD),) # Carefully list dependencies so we do not try to build scripts twice # in parallel PHONY += scripts -scripts: scripts_basic include/config/auto.conf include/config/tristate.conf +scripts: scripts_basic include/config/auto.conf include/config/tristate.conf \ + asm-generic $(Q)$(MAKE) $(build)=$(@) # Objects we will link into vmlinux / subdirs we need to visit diff --git a/arch/Kconfig b/arch/Kconfig index 5a1779c93940..1455579791ec 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -319,13 +319,6 @@ config ARCH_WANT_OLD_COMPAT_IPC select ARCH_WANT_COMPAT_IPC_PARSE_VERSION bool -config HAVE_VIRT_TO_BUS - bool - help - An architecture should select this if it implements the - deprecated interface virt_to_bus(). All new architectures - should probably not select this. - config HAVE_ARCH_SECCOMP_FILTER bool help diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 5833aa441481..8a33ba01301f 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -9,7 +9,7 @@ config ALPHA select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index 4759fe751aa1..2cc3cc519c54 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -12,7 +12,7 @@ NM := $(NM) -B LDFLAGS_vmlinux := -static -N #-relax CHECKFLAGS += -D__alpha__ -m64 -cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data +cflags-y := -pipe -mno-fp-regs -ffixed-8 cflags-y += $(call cc-option, -fno-jump-tables) cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4 diff --git a/arch/alpha/boot/head.S b/arch/alpha/boot/head.S index b06812bcac83..8efb26686d47 100644 --- a/arch/alpha/boot/head.S +++ b/arch/alpha/boot/head.S @@ -4,6 +4,7 @@ * initial bootloader stuff.. */ +#include <asm/pal.h> .set noreorder .globl __start diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h index 46cefbd50e73..bae97eb19d26 100644 --- a/arch/alpha/include/asm/floppy.h +++ b/arch/alpha/include/asm/floppy.h @@ -26,7 +26,7 @@ #define fd_disable_irq() disable_irq(FLOPPY_IRQ) #define fd_cacheflush(addr,size) /* nothing */ #define fd_request_irq() request_irq(FLOPPY_IRQ, floppy_interrupt,\ - IRQF_DISABLED, "floppy", NULL) + 0, "floppy", NULL) #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) #ifdef CONFIG_PCI diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index c5195524d1ef..eee6ea76bdaf 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 2872accd2215..7b2be251c30f 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -117,13 +117,6 @@ handle_irq(int irq) return; } - /* - * From here we must proceed with IPL_MAX. Note that we do not - * explicitly enable interrupts afterwards - some MILO PALcode - * (namely LX164 one) seems to have severe problems with RTI - * at IPL 0. - */ - local_irq_disable(); irq_enter(); generic_handle_irq_desc(irq, desc); irq_exit(); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 772ddfdb71a8..f433fc11877a 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -45,6 +45,14 @@ do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr, struct pt_regs *regs) { struct pt_regs *old_regs; + + /* + * Disable interrupts during IRQ handling. + * Note that there is no matching local_irq_enable() due to + * severe problems with RTI at IPL0 and some MILO PALcode + * (namely LX164). + */ + local_irq_disable(); switch (type) { case 0: #ifdef CONFIG_SMP @@ -62,7 +70,6 @@ do_entInt(unsigned long type, unsigned long vector, { long cpu; - local_irq_disable(); smp_percpu_timer_interrupt(regs); cpu = smp_processor_id(); if (cpu != boot_cpuid) { @@ -222,7 +229,6 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, .name = "timer", }; diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 4d4c046f708d..1383f8601a93 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -188,6 +188,10 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr) extern void free_reserved_mem(void *, void *); extern void pcibios_claim_one_bus(struct pci_bus *); +static struct resource irongate_io = { + .name = "Irongate PCI IO", + .flags = IORESOURCE_IO, +}; static struct resource irongate_mem = { .name = "Irongate PCI MEM", .flags = IORESOURCE_MEM, @@ -209,6 +213,7 @@ nautilus_init_pci(void) irongate = pci_get_bus_and_slot(0, 0); bus->self = irongate; + bus->resource[0] = &irongate_io; bus->resource[1] = &irongate_mem; pci_bus_size_bridges(bus); diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 5cf4a481b8c5..a53cf03f49d5 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -280,15 +280,15 @@ titan_late_init(void) * all reported to the kernel as machine checks, so the handler * is a nop so it can be called to count the individual events. */ - titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(63+16, titan_intr_nop, 0, "CChip Error", NULL); - titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(62+16, titan_intr_nop, 0, "PChip 0 H_Error", NULL); - titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(61+16, titan_intr_nop, 0, "PChip 1 H_Error", NULL); - titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(60+16, titan_intr_nop, 0, "PChip 0 C_Error", NULL); - titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(59+16, titan_intr_nop, 0, "PChip 1 C_Error", NULL); /* @@ -348,9 +348,9 @@ privateer_init_pci(void) * Hook a couple of extra err interrupts that the * common titan code won't. */ - titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(53+16, titan_intr_nop, 0, "NMI", NULL); - titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(50+16, titan_intr_nop, 0, "Temperature Warning", NULL); /* diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h index 31f77aec0823..45b8e0cea176 100644 --- a/arch/arc/include/asm/dma-mapping.h +++ b/arch/arc/include/asm/dma-mapping.h @@ -126,7 +126,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - sg->dma_address = dma_map_page(dev, sg_page(s), s->offset, + s->dma_address = dma_map_page(dev, sg_page(s), s->offset, s->length, dir); return nents; diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index f4c8d36ebecb..a26282857683 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -72,7 +72,4 @@ extern int elf_check_arch(const struct elf32_hdr *); */ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 23daa326fc9b..eb2ae53187d9 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -415,7 +415,7 @@ *-------------------------------------------------------------*/ .macro SAVE_ALL_EXCEPTION marker - st \marker, [sp, 8] + st \marker, [sp, 8] /* orig_r8 */ st r0, [sp, 4] /* orig_r0, needed only for sys calls */ /* Restore r9 used to code the early prologue */ diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index ccd84806b62f..eac071668201 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -39,7 +39,7 @@ static inline long arch_local_irq_save(void) " flag.nz %0 \n" : "=r"(temp), "=r"(flags) : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) - : "cc"); + : "memory", "cc"); return flags; } @@ -53,7 +53,8 @@ static inline void arch_local_irq_restore(unsigned long flags) __asm__ __volatile__( " flag %0 \n" : - : "r"(flags)); + : "r"(flags) + : "memory"); } /* @@ -73,7 +74,8 @@ static inline void arch_local_irq_disable(void) " and %0, %0, %1 \n" " flag %0 \n" : "=&r"(temp) - : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))); + : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK)) + : "memory"); } /* @@ -85,7 +87,9 @@ static inline long arch_local_save_flags(void) __asm__ __volatile__( " lr %0, [status32] \n" - : "=&r"(temp)); + : "=&r"(temp) + : + : "memory"); return temp; } diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h index f3c4934f0ca9..4930957ca3d3 100644 --- a/arch/arc/include/asm/kgdb.h +++ b/arch/arc/include/asm/kgdb.h @@ -13,7 +13,7 @@ #ifdef CONFIG_KGDB -#include <asm/user.h> +#include <asm/ptrace.h> /* to ensure compatibility with Linux 2.6.35, we don't implement the get/set * register API yet */ @@ -53,9 +53,7 @@ enum arc700_linux_regnums { }; #else -static inline void kgdb_trap(struct pt_regs *regs, int param) -{ -} +#define kgdb_trap(regs, param) #endif #endif /* __ARC_KGDB_H__ */ diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 8ae783d20a81..6179de7e07c2 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -123,7 +123,7 @@ static inline long regs_return_value(struct pt_regs *regs) #define orig_r8_IS_SCALL 0x0001 #define orig_r8_IS_SCALL_RESTARTED 0x0002 #define orig_r8_IS_BRKPT 0x0004 -#define orig_r8_IS_EXCPN 0x0004 +#define orig_r8_IS_EXCPN 0x0008 #define orig_r8_IS_IRQ1 0x0010 #define orig_r8_IS_IRQ2 0x0020 diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index e53a5340ba4f..dd785befe7fd 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -16,8 +16,6 @@ #include <linux/types.h> int sys_clone_wrapper(int, int, int, int, int); -int sys_fork_wrapper(void); -int sys_vfork_wrapper(void); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 6afa4f702075..30333cec0fef 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -28,14 +28,14 @@ */ struct user_regs_struct { - struct scratch { + struct { long pad; long bta, lp_start, lp_end, lp_count; long status32, ret, blink, fp, gp; long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; long sp; } scratch; - struct callee { + struct { long pad; long r25, r24, r23, r22, r21, r20; long r19, r18, r17, r16, r15, r14, r13; diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index ef6800ba2f03..91eeab81f52d 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -452,7 +452,7 @@ tracesys: ; using ERET won't work since next-PC has already committed lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 - st r12, [r11, THREAD_FAULT_ADDR] + st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address ; PRE Sys Call Ptrace hook mov r0, sp ; pt_regs needed @@ -792,31 +792,6 @@ ARC_EXIT ret_from_fork ;################### Special Sys Call Wrappers ########################## -; TBD: call do_fork directly from here -ARC_ENTRY sys_fork_wrapper - SAVE_CALLEE_SAVED_USER - bl @sys_fork - DISCARD_CALLEE_SAVED_USER - - GET_CURR_THR_INFO_FLAGS r10 - btst r10, TIF_SYSCALL_TRACE - bnz tracesys_exit - - b ret_from_system_call -ARC_EXIT sys_fork_wrapper - -ARC_ENTRY sys_vfork_wrapper - SAVE_CALLEE_SAVED_USER - bl @sys_vfork - DISCARD_CALLEE_SAVED_USER - - GET_CURR_THR_INFO_FLAGS r10 - btst r10, TIF_SYSCALL_TRACE - bnz tracesys_exit - - b ret_from_system_call -ARC_EXIT sys_vfork_wrapper - ARC_ENTRY sys_clone_wrapper SAVE_CALLEE_SAVED_USER bl @sys_clone diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 2888ba5be47e..52bdc83c1495 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -9,6 +9,7 @@ */ #include <linux/kgdb.h> +#include <linux/sched.h> #include <asm/disasm.h> #include <asm/cacheflush.h> diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index dc0f968dae0a..2d95ac07df7b 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -232,10 +232,8 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "\n"); -#ifdef _ASM_GENERIC_UNISTD_H n += scnprintf(buf + n, len - n, - "OS ABI [v2]\t: asm-generic/{unistd,stat,fcntl}\n"); -#endif + "OS ABI [v3]\t: no-legacy-syscalls\n"); return buf; } diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c index f6bdd07583f3..9d6c1ca26af6 100644 --- a/arch/arc/kernel/sys.c +++ b/arch/arc/kernel/sys.c @@ -6,8 +6,6 @@ #include <asm/syscalls.h> #define sys_clone sys_clone_wrapper -#define sys_fork sys_fork_wrapper -#define sys_vfork sys_vfork_wrapper #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5b714695b01b..1cacda426a0e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -49,7 +49,6 @@ config ARM select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 - select HAVE_VIRT_TO_BUS select KTIME_SCALAR select PERF_USE_VMALLOC select RTC_LIB @@ -556,7 +555,6 @@ config ARCH_IXP4XX config ARCH_DOVE bool "Marvell Dove" select ARCH_REQUIRE_GPIOLIB - select COMMON_CLK_DOVE select CPU_V7 select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI @@ -744,6 +742,7 @@ config ARCH_RPC select NEED_MACH_IO_H select NEED_MACH_MEMORY_H select NO_IOPORT + select VIRT_TO_BUS help On the Acorn Risc-PC, Linux can support the internal IDE disk and CD-ROM interface, serial and parallel port, and the floppy drive. @@ -879,6 +878,7 @@ config ARCH_SHARK select ISA_DMA select NEED_MACH_MEMORY_H select PCI + select VIRT_TO_BUS select ZONE_DMA help Support for the StrongARM based Digital DNARD machine, also known @@ -1006,12 +1006,12 @@ config ARCH_MULTI_V4_V5 bool config ARCH_MULTI_V6 - bool "ARMv6 based platforms (ARM11, Scorpion, ...)" + bool "ARMv6 based platforms (ARM11)" select ARCH_MULTI_V6_V7 select CPU_V6 config ARCH_MULTI_V7 - bool "ARMv7 based platforms (Cortex-A, PJ4, Krait)" + bool "ARMv7 based platforms (Cortex-A, PJ4, Scorpion, Krait)" default y select ARCH_MULTI_V6_V7 select ARCH_VEXPRESS @@ -1183,9 +1183,9 @@ config ARM_NR_BANKS default 8 config IWMMXT - bool "Enable iWMMXt support" + bool "Enable iWMMXt support" if !CPU_PJ4 depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 - default y if PXA27x || PXA3xx || ARCH_MMP + default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 help Enable support for iWMMXt context switching at run time if running on a CPU that supports it. @@ -1439,6 +1439,16 @@ config ARM_ERRATA_775420 to deadlock. This workaround puts DSB before executing ISB if an abort may occur on cache maintenance. +config ARM_ERRATA_798181 + bool "ARM errata: TLBI/DSB failure on Cortex-A15" + depends on CPU_V7 && SMP + help + On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not + adequately shooting down all use of the old entries. This + option enables the Linux kernel workaround for this erratum + which sends an IPI to the CPUs that are running the same ASID + as the one being invalidated. + endmenu source "arch/arm/common/Kconfig" @@ -1462,10 +1472,6 @@ config ISA_DMA bool select ISA_DMA_API -config ARCH_NO_VIRT_TO_BUS - def_bool y - depends on !ARCH_RPC && !ARCH_NETWINDER && !ARCH_SHARK - # Select ISA DMA interface config ISA_DMA_API bool @@ -1657,13 +1663,16 @@ config LOCAL_TIMERS accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. +# The GPIO number here must be sorted by descending number. In case of +# a multiplatform kernel, we just want the highest value required by the +# selected platforms. config ARCH_NR_GPIO int default 1024 if ARCH_SHMOBILE || ARCH_TEGRA - default 355 if ARCH_U8500 - default 264 if MACH_H4700 default 512 if SOC_OMAP5 + default 355 if ARCH_U8500 default 288 if ARCH_VT8500 || ARCH_SUNXI + default 264 if MACH_H4700 default 0 help Maximum number of GPIOs in the system. @@ -1887,8 +1896,9 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM (EXPERIMENTAL)" - depends on ARM && OF + depends on ARM && AEABI && OF depends on CPU_V7 && !CPU_V6 + depends on !GENERIC_ATOMIC64 help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index acddddac7ee4..9b31f4311ea2 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -492,9 +492,10 @@ config DEBUG_IMX_UART_PORT DEBUG_IMX31_UART || \ DEBUG_IMX35_UART || \ DEBUG_IMX51_UART || \ - DEBUG_IMX50_IMX53_UART || \ + DEBUG_IMX53_UART || \ DEBUG_IMX6Q_UART default 1 + depends on ARCH_MXC help Choose UART port on which kernel low-level debug messages should be output. diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 71768b8a1ab9..84aa2caf07ed 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -115,4 +115,4 @@ i: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" -subdir- := bootp compressed +subdir- := bootp compressed dts diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 5cad8a6dadb0..afed28e37ea5 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -120,7 +120,7 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) endif -ccflags-y := -fpic -fno-builtin -I$(obj) +ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) asflags-y := -Wa,-march=all -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 0957645b73af..91fe4f148f80 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -349,7 +349,7 @@ rx_descs = <64>; mac_control = <0x20>; slaves = <2>; - cpts_active_slave = <0>; + active_slave = <0>; cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; reg = <0x4a100000 0x800 diff --git a/arch/arm/boot/dts/armada-370-mirabox.dts b/arch/arm/boot/dts/armada-370-mirabox.dts index dd0c57dd9f30..3234875824dc 100644 --- a/arch/arm/boot/dts/armada-370-mirabox.dts +++ b/arch/arm/boot/dts/armada-370-mirabox.dts @@ -54,7 +54,7 @@ }; mvsdio@d00d4000 { - pinctrl-0 = <&sdio_pins2>; + pinctrl-0 = <&sdio_pins3>; pinctrl-names = "default"; status = "okay"; /* diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts index f8e4855bc9a5..070bba4f2585 100644 --- a/arch/arm/boot/dts/armada-370-rd.dts +++ b/arch/arm/boot/dts/armada-370-rd.dts @@ -64,5 +64,13 @@ status = "okay"; /* No CD or WP GPIOs */ }; + + usb@d0050000 { + status = "okay"; + }; + + usb@d0051000 { + status = "okay"; + }; }; }; diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 6f1acc75e155..5b708208b607 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -31,7 +31,6 @@ mpic: interrupt-controller@d0020000 { compatible = "marvell,mpic"; #interrupt-cells = <1>; - #address-cells = <1>; #size-cells = <1>; interrupt-controller; }; @@ -54,7 +53,7 @@ reg = <0xd0012000 0x100>; reg-shift = <2>; interrupts = <41>; - reg-io-width = <4>; + reg-io-width = <1>; status = "disabled"; }; serial@d0012100 { @@ -62,7 +61,7 @@ reg = <0xd0012100 0x100>; reg-shift = <2>; interrupts = <42>; - reg-io-width = <4>; + reg-io-width = <1>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 8188d138020e..a195debb67d3 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -59,6 +59,12 @@ "mpp50", "mpp51", "mpp52"; marvell,function = "sd0"; }; + + sdio_pins3: sdio-pins3 { + marvell,pins = "mpp48", "mpp49", "mpp50", + "mpp51", "mpp52", "mpp53"; + marvell,function = "sd0"; + }; }; gpio0: gpio@d0018100 { diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 1443949c165e..ca00d8326c87 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -46,7 +46,7 @@ reg = <0xd0012200 0x100>; reg-shift = <2>; interrupts = <43>; - reg-io-width = <4>; + reg-io-width = <1>; status = "disabled"; }; serial@d0012300 { @@ -54,7 +54,7 @@ reg = <0xd0012300 0x100>; reg-shift = <2>; interrupts = <44>; - reg-io-width = <4>; + reg-io-width = <1>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index aa98e641931f..a98c0d50fbbe 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -238,8 +238,32 @@ nand { pinctrl_nand: nand-0 { atmel,pins = - <3 4 0x0 0x1 /* PD5 gpio RDY pin pull_up */ - 3 5 0x0 0x1>; /* PD4 gpio enable pin pull_up */ + <3 0 0x1 0x0 /* PD0 periph A Read Enable */ + 3 1 0x1 0x0 /* PD1 periph A Write Enable */ + 3 2 0x1 0x0 /* PD2 periph A Address Latch Enable */ + 3 3 0x1 0x0 /* PD3 periph A Command Latch Enable */ + 3 4 0x0 0x1 /* PD4 gpio Chip Enable pin pull_up */ + 3 5 0x0 0x1 /* PD5 gpio RDY/BUSY pin pull_up */ + 3 6 0x1 0x0 /* PD6 periph A Data bit 0 */ + 3 7 0x1 0x0 /* PD7 periph A Data bit 1 */ + 3 8 0x1 0x0 /* PD8 periph A Data bit 2 */ + 3 9 0x1 0x0 /* PD9 periph A Data bit 3 */ + 3 10 0x1 0x0 /* PD10 periph A Data bit 4 */ + 3 11 0x1 0x0 /* PD11 periph A Data bit 5 */ + 3 12 0x1 0x0 /* PD12 periph A Data bit 6 */ + 3 13 0x1 0x0>; /* PD13 periph A Data bit 7 */ + }; + + pinctrl_nand_16bits: nand_16bits-0 { + atmel,pins = + <3 14 0x1 0x0 /* PD14 periph A Data bit 8 */ + 3 15 0x1 0x0 /* PD15 periph A Data bit 9 */ + 3 16 0x1 0x0 /* PD16 periph A Data bit 10 */ + 3 17 0x1 0x0 /* PD17 periph A Data bit 11 */ + 3 18 0x1 0x0 /* PD18 periph A Data bit 12 */ + 3 19 0x1 0x0 /* PD19 periph A Data bit 13 */ + 3 20 0x1 0x0 /* PD20 periph A Data bit 14 */ + 3 21 0x1 0x0>; /* PD21 periph A Data bit 15 */ }; }; diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi index 4bf2a8774aa7..7e0481e2441a 100644 --- a/arch/arm/boot/dts/bcm2835.dtsi +++ b/arch/arm/boot/dts/bcm2835.dtsi @@ -105,7 +105,7 @@ compatible = "fixed-clock"; reg = <1>; #clock-cells = <0>; - clock-frequency = <150000000>; + clock-frequency = <250000000>; }; }; }; diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi index 69140ba99f46..aaa63d0a8096 100644 --- a/arch/arm/boot/dts/dbx5x0.dtsi +++ b/arch/arm/boot/dts/dbx5x0.dtsi @@ -191,8 +191,8 @@ prcmu: prcmu@80157000 { compatible = "stericsson,db8500-prcmu"; - reg = <0x80157000 0x1000>; - reg-names = "prcmu"; + reg = <0x80157000 0x1000>, <0x801b0000 0x8000>, <0x801b8000 0x1000>; + reg-names = "prcmu", "prcmu-tcpm", "prcmu-tcdm"; interrupts = <0 47 0x4>; #address-cells = <1>; #size-cells = <1>; @@ -319,9 +319,8 @@ }; }; - ab8500@5 { + ab8500 { compatible = "stericsson,ab8500"; - reg = <5>; /* mailbox 5 is i2c */ interrupt-parent = <&intc>; interrupts = <0 40 0x4>; interrupt-controller; diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi index 67dbe20868a2..f7509cafc377 100644 --- a/arch/arm/boot/dts/dove.dtsi +++ b/arch/arm/boot/dts/dove.dtsi @@ -197,6 +197,11 @@ status = "disabled"; }; + rtc@d8500 { + compatible = "marvell,orion-rtc"; + reg = <0xd8500 0x20>; + }; + crypto: crypto@30000 { compatible = "marvell,orion-crypto"; reg = <0x30000 0x10000>, diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index e1347fceb5bc..1a62bcf18aa3 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -275,18 +275,27 @@ compatible = "arm,pl330", "arm,primecell"; reg = <0x12680000 0x1000>; interrupts = <0 35 0>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; pdma1: pdma@12690000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x12690000 0x1000>; interrupts = <0 36 0>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; mdma1: mdma@12850000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x12850000 0x1000>; interrupts = <0 34 0>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <1>; }; }; }; diff --git a/arch/arm/boot/dts/exynos5440.dtsi b/arch/arm/boot/dts/exynos5440.dtsi index 5f3562ad6746..9a99755920c0 100644 --- a/arch/arm/boot/dts/exynos5440.dtsi +++ b/arch/arm/boot/dts/exynos5440.dtsi @@ -142,12 +142,18 @@ compatible = "arm,pl330", "arm,primecell"; reg = <0x120000 0x1000>; interrupts = <0 34 0>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; pdma1: pdma@121B0000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x121000 0x1000>; interrupts = <0 35 0>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; }; diff --git a/arch/arm/boot/dts/href.dtsi b/arch/arm/boot/dts/href.dtsi index 592fb9dc35bd..379128eb9d98 100644 --- a/arch/arm/boot/dts/href.dtsi +++ b/arch/arm/boot/dts/href.dtsi @@ -221,7 +221,7 @@ }; }; - ab8500@5 { + ab8500 { ab8500-regulators { ab8500_ldo_aux1_reg: ab8500_ldo_aux1 { regulator-name = "V-DISPLAY"; diff --git a/arch/arm/boot/dts/hrefv60plus.dts b/arch/arm/boot/dts/hrefv60plus.dts index 55f4191a626e..2b587a74b813 100644 --- a/arch/arm/boot/dts/hrefv60plus.dts +++ b/arch/arm/boot/dts/hrefv60plus.dts @@ -158,7 +158,7 @@ }; }; - ab8500@5 { + ab8500 { ab8500-regulators { ab8500_ldo_aux1_reg: ab8500_ldo_aux1 { regulator-name = "V-DISPLAY"; diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts index 6ce3d17c3a29..fd36e1cca104 100644 --- a/arch/arm/boot/dts/imx28-m28evk.dts +++ b/arch/arm/boot/dts/imx28-m28evk.dts @@ -152,7 +152,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; sgtl5000: codec@0a { diff --git a/arch/arm/boot/dts/imx28-sps1.dts b/arch/arm/boot/dts/imx28-sps1.dts index e6cde8aa7fff..6c6a5442800a 100644 --- a/arch/arm/boot/dts/imx28-sps1.dts +++ b/arch/arm/boot/dts/imx28-sps1.dts @@ -70,7 +70,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; rtc: rtc@51 { diff --git a/arch/arm/boot/dts/imx53-mba53.dts b/arch/arm/boot/dts/imx53-mba53.dts index e54fffd48369..468c0a1d48d9 100644 --- a/arch/arm/boot/dts/imx53-mba53.dts +++ b/arch/arm/boot/dts/imx53-mba53.dts @@ -42,10 +42,9 @@ fsl,pins = <689 0x10000 /* DISP1_DRDY */ 482 0x10000 /* DISP1_HSYNC */ 489 0x10000 /* DISP1_VSYNC */ - 684 0x10000 /* DISP1_DAT_0 */ 515 0x10000 /* DISP1_DAT_22 */ 523 0x10000 /* DISP1_DAT_23 */ - 543 0x10000 /* DISP1_DAT_21 */ + 545 0x10000 /* DISP1_DAT_21 */ 553 0x10000 /* DISP1_DAT_20 */ 558 0x10000 /* DISP1_DAT_19 */ 564 0x10000 /* DISP1_DAT_18 */ diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 06ec460b4581..281a223591ff 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -91,6 +91,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x00a00600 0x20>; interrupts = <1 13 0xf01>; + clocks = <&clks 15>; }; L2: l2-cache@00a02000 { diff --git a/arch/arm/boot/dts/kirkwood-dns320.dts b/arch/arm/boot/dts/kirkwood-dns320.dts index 5bb0bf39d3b8..c9c44b2f62d7 100644 --- a/arch/arm/boot/dts/kirkwood-dns320.dts +++ b/arch/arm/boot/dts/kirkwood-dns320.dts @@ -42,12 +42,10 @@ ocp@f1000000 { serial@12000 { - clock-frequency = <166666667>; status = "okay"; }; serial@12100 { - clock-frequency = <166666667>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/kirkwood-dns325.dts b/arch/arm/boot/dts/kirkwood-dns325.dts index d430713ea9b9..e4e4930dc5cf 100644 --- a/arch/arm/boot/dts/kirkwood-dns325.dts +++ b/arch/arm/boot/dts/kirkwood-dns325.dts @@ -50,7 +50,6 @@ }; }; serial@12000 { - clock-frequency = <200000000>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/kirkwood-dockstar.dts b/arch/arm/boot/dts/kirkwood-dockstar.dts index 2e3dd34e21a5..0196cf6b0ef2 100644 --- a/arch/arm/boot/dts/kirkwood-dockstar.dts +++ b/arch/arm/boot/dts/kirkwood-dockstar.dts @@ -37,7 +37,6 @@ }; }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-dreamplug.dts b/arch/arm/boot/dts/kirkwood-dreamplug.dts index ef2d8c705709..289e51d86372 100644 --- a/arch/arm/boot/dts/kirkwood-dreamplug.dts +++ b/arch/arm/boot/dts/kirkwood-dreamplug.dts @@ -38,7 +38,6 @@ }; }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-goflexnet.dts b/arch/arm/boot/dts/kirkwood-goflexnet.dts index 1b133e0c566e..c3573be7b92c 100644 --- a/arch/arm/boot/dts/kirkwood-goflexnet.dts +++ b/arch/arm/boot/dts/kirkwood-goflexnet.dts @@ -73,11 +73,11 @@ }; }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; nand@3000000 { + chip-delay = <40>; status = "okay"; partition@0 { diff --git a/arch/arm/boot/dts/kirkwood-ib62x0.dts b/arch/arm/boot/dts/kirkwood-ib62x0.dts index 71902da33d63..5335b1aa8601 100644 --- a/arch/arm/boot/dts/kirkwood-ib62x0.dts +++ b/arch/arm/boot/dts/kirkwood-ib62x0.dts @@ -51,7 +51,6 @@ }; }; serial@12000 { - clock-frequency = <200000000>; status = "okay"; }; diff --git a/arch/arm/boot/dts/kirkwood-iconnect.dts b/arch/arm/boot/dts/kirkwood-iconnect.dts index 504f16be8b54..12ccf74ac3c4 100644 --- a/arch/arm/boot/dts/kirkwood-iconnect.dts +++ b/arch/arm/boot/dts/kirkwood-iconnect.dts @@ -78,7 +78,6 @@ }; }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts index 6cae4599c4b3..3694e94f6e99 100644 --- a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts +++ b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts @@ -96,11 +96,11 @@ marvell,function = "gpio"; }; pmx_led_rebuild_brt_ctrl_1: pmx-led-rebuild-brt-ctrl-1 { - marvell,pins = "mpp44"; + marvell,pins = "mpp46"; marvell,function = "gpio"; }; pmx_led_rebuild_brt_ctrl_2: pmx-led-rebuild-brt-ctrl-2 { - marvell,pins = "mpp45"; + marvell,pins = "mpp47"; marvell,function = "gpio"; }; @@ -115,7 +115,6 @@ }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; @@ -158,14 +157,14 @@ gpios = <&gpio0 16 0>; linux,default-trigger = "default-on"; }; - health_led1 { + rebuild_led { + label = "status:white:rebuild_led"; + gpios = <&gpio1 4 0>; + }; + health_led { label = "status:red:health_led"; gpios = <&gpio1 5 0>; }; - health_led2 { - label = "status:white:health_led"; - gpios = <&gpio1 4 0>; - }; backup_led { label = "status:blue:backup_led"; gpios = <&gpio0 15 0>; diff --git a/arch/arm/boot/dts/kirkwood-km_kirkwood.dts b/arch/arm/boot/dts/kirkwood-km_kirkwood.dts index 8db3123ac80f..5bbd0542cdd3 100644 --- a/arch/arm/boot/dts/kirkwood-km_kirkwood.dts +++ b/arch/arm/boot/dts/kirkwood-km_kirkwood.dts @@ -34,7 +34,6 @@ }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-lschlv2.dts b/arch/arm/boot/dts/kirkwood-lschlv2.dts index 9510c9ea666c..9f55d95f35f5 100644 --- a/arch/arm/boot/dts/kirkwood-lschlv2.dts +++ b/arch/arm/boot/dts/kirkwood-lschlv2.dts @@ -13,7 +13,6 @@ ocp@f1000000 { serial@12000 { - clock-frequency = <166666667>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/kirkwood-lsxhl.dts b/arch/arm/boot/dts/kirkwood-lsxhl.dts index 739019c4cba9..5c84c118ed8d 100644 --- a/arch/arm/boot/dts/kirkwood-lsxhl.dts +++ b/arch/arm/boot/dts/kirkwood-lsxhl.dts @@ -13,7 +13,6 @@ ocp@f1000000 { serial@12000 { - clock-frequency = <200000000>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/kirkwood-mplcec4.dts b/arch/arm/boot/dts/kirkwood-mplcec4.dts index 662dfd81b1ce..758824118a9a 100644 --- a/arch/arm/boot/dts/kirkwood-mplcec4.dts +++ b/arch/arm/boot/dts/kirkwood-mplcec4.dts @@ -90,7 +90,6 @@ }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-ns2-common.dtsi b/arch/arm/boot/dts/kirkwood-ns2-common.dtsi index e8e7ecef1650..6affd924fe11 100644 --- a/arch/arm/boot/dts/kirkwood-ns2-common.dtsi +++ b/arch/arm/boot/dts/kirkwood-ns2-common.dtsi @@ -23,7 +23,6 @@ }; serial@12000 { - clock-frequency = <166666667>; status = "okay"; }; diff --git a/arch/arm/boot/dts/kirkwood-nsa310.dts b/arch/arm/boot/dts/kirkwood-nsa310.dts index 3a178cf708d7..a7412b937a8a 100644 --- a/arch/arm/boot/dts/kirkwood-nsa310.dts +++ b/arch/arm/boot/dts/kirkwood-nsa310.dts @@ -117,7 +117,6 @@ }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a6.dts b/arch/arm/boot/dts/kirkwood-openblocks_a6.dts index ede7fe0d7a87..d27f7245f8e7 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a6.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a6.dts @@ -18,12 +18,10 @@ ocp@f1000000 { serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; serial@12100 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood-topkick.dts b/arch/arm/boot/dts/kirkwood-topkick.dts index 842ff95d60df..66eb45b00b25 100644 --- a/arch/arm/boot/dts/kirkwood-topkick.dts +++ b/arch/arm/boot/dts/kirkwood-topkick.dts @@ -108,7 +108,6 @@ }; serial@12000 { - clock-frequency = <200000000>; status = "ok"; }; diff --git a/arch/arm/boot/dts/kirkwood.dtsi b/arch/arm/boot/dts/kirkwood.dtsi index 2c738d9dc82a..fada7e6d24d8 100644 --- a/arch/arm/boot/dts/kirkwood.dtsi +++ b/arch/arm/boot/dts/kirkwood.dtsi @@ -38,6 +38,7 @@ interrupt-controller; #interrupt-cells = <2>; interrupts = <35>, <36>, <37>, <38>; + clocks = <&gate_clk 7>; }; gpio1: gpio@10140 { @@ -49,6 +50,7 @@ interrupt-controller; #interrupt-cells = <2>; interrupts = <39>, <40>, <41>; + clocks = <&gate_clk 7>; }; serial@12000 { @@ -57,7 +59,6 @@ reg-shift = <2>; interrupts = <33>; clocks = <&gate_clk 7>; - /* set clock-frequency in board dts */ status = "disabled"; }; @@ -67,7 +68,6 @@ reg-shift = <2>; interrupts = <34>; clocks = <&gate_clk 7>; - /* set clock-frequency in board dts */ status = "disabled"; }; @@ -75,6 +75,7 @@ compatible = "marvell,kirkwood-rtc", "marvell,orion-rtc"; reg = <0x10300 0x20>; interrupts = <53>; + clocks = <&gate_clk 7>; }; spi@10600 { diff --git a/arch/arm/boot/dts/orion5x-lacie-ethernet-disk-mini-v2.dts b/arch/arm/boot/dts/orion5x-lacie-ethernet-disk-mini-v2.dts index 5a3a58b7e18f..0077fc8510b7 100644 --- a/arch/arm/boot/dts/orion5x-lacie-ethernet-disk-mini-v2.dts +++ b/arch/arm/boot/dts/orion5x-lacie-ethernet-disk-mini-v2.dts @@ -11,7 +11,7 @@ / { model = "LaCie Ethernet Disk mini V2"; - compatible = "lacie,ethernet-disk-mini-v2", "marvell-orion5x-88f5182", "marvell,orion5x"; + compatible = "lacie,ethernet-disk-mini-v2", "marvell,orion5x-88f5182", "marvell,orion5x"; memory { reg = <0x00000000 0x4000000>; /* 64 MB */ diff --git a/arch/arm/boot/dts/orion5x.dtsi b/arch/arm/boot/dts/orion5x.dtsi index 8aad00f81ed9..f7bec3b1ba32 100644 --- a/arch/arm/boot/dts/orion5x.dtsi +++ b/arch/arm/boot/dts/orion5x.dtsi @@ -13,6 +13,9 @@ compatible = "marvell,orion5x"; interrupt-parent = <&intc>; + aliases { + gpio0 = &gpio0; + }; intc: interrupt-controller { compatible = "marvell,orion-intc", "marvell,intc"; interrupt-controller; @@ -32,7 +35,9 @@ #gpio-cells = <2>; gpio-controller; reg = <0x10100 0x40>; - ngpio = <32>; + ngpios = <32>; + interrupt-controller; + #interrupt-cells = <2>; interrupts = <6>, <7>, <8>, <9>; }; @@ -91,7 +96,7 @@ reg = <0x90000 0x10000>, <0xf2200000 0x800>; reg-names = "regs", "sram"; - interrupts = <22>; + interrupts = <28>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/snowball.dts b/arch/arm/boot/dts/snowball.dts index 27f31a5fa494..d3ec32f6b790 100644 --- a/arch/arm/boot/dts/snowball.dts +++ b/arch/arm/boot/dts/snowball.dts @@ -298,7 +298,7 @@ }; }; - ab8500@5 { + ab8500 { ab8500-regulators { ab8500_ldo_aux1_reg: ab8500_ldo_aux1 { regulator-name = "V-DISPLAY"; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 936d2306e7e1..7e8769bd5977 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -75,6 +75,9 @@ compatible = "arm,pl330", "arm,primecell"; reg = <0xffe01000 0x1000>; interrupts = <0 180 4>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; }; diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 9a428931d042..3d3f64d2111a 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -118,6 +118,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x50040600 0x20>; interrupts = <1 13 0x304>; + clocks = <&tegra_car 132>; }; intc: interrupt-controller { @@ -384,7 +385,7 @@ spi@7000d800 { compatible = "nvidia,tegra20-slink"; - reg = <0x7000d480 0x200>; + reg = <0x7000d800 0x200>; interrupts = <0 83 0x04>; nvidia,dma-request-selector = <&apbdma 17>; #address-cells = <1>; diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index 767803e1fd55..dbf46c272562 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -119,6 +119,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x50040600 0x20>; interrupts = <1 13 0xf04>; + clocks = <&tegra_car 214>; }; intc: interrupt-controller { @@ -371,7 +372,7 @@ spi@7000d800 { compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink"; - reg = <0x7000d480 0x200>; + reg = <0x7000d800 0x200>; interrupts = <0 83 0x04>; nvidia,dma-request-selector = <&apbdma 17>; #address-cells = <1>; diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index fbbc5bb022d5..6a99e30f81d2 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -116,6 +116,7 @@ CONFIG_SND_SOC=y CONFIG_SND_MXS_SOC=y CONFIG_SND_SOC_MXS_SGTL5000=y CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_STORAGE=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index b16bae2c9a60..bd07864f14a0 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -126,6 +126,8 @@ CONFIG_INPUT_MISC=y CONFIG_INPUT_TWL4030_PWRBUTTON=y CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index 720799fd3a81..dff714d886d5 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -24,7 +24,7 @@ extern struct arm_delay_ops { void (*delay)(unsigned long); void (*const_udelay)(unsigned long); void (*udelay)(unsigned long); - bool const_clock; + unsigned long ticks_per_jiffy; } arm_delay_ops; #define __delay(n) arm_delay_ops.delay(n) diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index cca9f15704ed..ea289e1435e7 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -19,14 +19,6 @@ #undef _CACHE #undef MULTI_CACHE -#if defined(CONFIG_CPU_CACHE_V3) -# ifdef _CACHE -# define MULTI_CACHE 1 -# else -# define _CACHE v3 -# endif -#endif - #if defined(CONFIG_CPU_CACHE_V4) # ifdef _CACHE # define MULTI_CACHE 1 diff --git a/arch/arm/include/asm/hardware/iop3xx.h b/arch/arm/include/asm/hardware/iop3xx.h index 02fe2fbe2477..ed94b1a366ae 100644 --- a/arch/arm/include/asm/hardware/iop3xx.h +++ b/arch/arm/include/asm/hardware/iop3xx.h @@ -37,7 +37,7 @@ extern int iop3xx_get_init_atu(void); * IOP3XX processor registers */ #define IOP3XX_PERIPHERAL_PHYS_BASE 0xffffe000 -#define IOP3XX_PERIPHERAL_VIRT_BASE 0xfeffe000 +#define IOP3XX_PERIPHERAL_VIRT_BASE 0xfedfe000 #define IOP3XX_PERIPHERAL_SIZE 0x00002000 #define IOP3XX_PERIPHERAL_UPPER_PA (IOP3XX_PERIPHERAL_PHYS_BASE +\ IOP3XX_PERIPHERAL_SIZE - 1) diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 8c5e828f484d..91b99abe7a95 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -41,6 +41,13 @@ extern void kunmap_high(struct page *page); #endif #endif +/* + * Needed to be able to broadcast the TLB invalidation for kmap. + */ +#ifdef CONFIG_ARM_ERRATA_798181 +#undef ARCH_NEEDS_KMAP_HIGH_GET +#endif + #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); #else diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 9f77e7804f3b..e3d55547e755 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -5,15 +5,15 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID - u64 id; + atomic64_t id; #endif - unsigned int vmalloc_seq; + unsigned int vmalloc_seq; } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID #define ASID_BITS 8 #define ASID_MASK ((~0ULL) << ASID_BITS) -#define ASID(mm) ((mm)->context.id & ~ASID_MASK) +#define ASID(mm) ((mm)->context.id.counter & ~ASID_MASK) #else #define ASID(mm) (0) #endif @@ -26,7 +26,7 @@ typedef struct { * modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com> */ typedef struct { - unsigned long end_brk; + unsigned long end_brk; } mm_context_t; #endif diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index e1f644bc7cc5..a7b85e0d0cc1 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -25,7 +25,9 @@ void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); -#define init_new_context(tsk,mm) ({ mm->context.id = 0; }) +#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) + +DECLARE_PER_CPU(atomic64_t, active_asids); #else /* !CONFIG_CPU_HAS_ASID */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 6ef8afd1b64c..86b8fe398b95 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -111,7 +111,7 @@ #define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa) << 2) /* MemAttr[3:0] */ #define L_PTE_S2_MT_WRITEBACK (_AT(pteval_t, 0xf) << 2) /* MemAttr[3:0] */ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ -#define L_PTE_S2_RDWR (_AT(pteval_t, 2) << 6) /* HAP[2:1] */ +#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ /* * Hyp-mode PL2 PTE definitions for LPAE. diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 6e924d3a77eb..ab865e65a84c 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -14,7 +14,6 @@ #include <asm/glue.h> -#define TLB_V3_PAGE (1 << 0) #define TLB_V4_U_PAGE (1 << 1) #define TLB_V4_D_PAGE (1 << 2) #define TLB_V4_I_PAGE (1 << 3) @@ -22,7 +21,6 @@ #define TLB_V6_D_PAGE (1 << 5) #define TLB_V6_I_PAGE (1 << 6) -#define TLB_V3_FULL (1 << 8) #define TLB_V4_U_FULL (1 << 9) #define TLB_V4_D_FULL (1 << 10) #define TLB_V4_I_FULL (1 << 11) @@ -34,10 +32,13 @@ #define TLB_V6_D_ASID (1 << 17) #define TLB_V6_I_ASID (1 << 18) +#define TLB_V6_BP (1 << 19) + /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */ -#define TLB_V7_UIS_PAGE (1 << 19) -#define TLB_V7_UIS_FULL (1 << 20) -#define TLB_V7_UIS_ASID (1 << 21) +#define TLB_V7_UIS_PAGE (1 << 20) +#define TLB_V7_UIS_FULL (1 << 21) +#define TLB_V7_UIS_ASID (1 << 22) +#define TLB_V7_UIS_BP (1 << 23) #define TLB_BARRIER (1 << 28) #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ @@ -49,7 +50,6 @@ * ============= * * We have the following to choose from: - * v3 - ARMv3 * v4 - ARMv4 without write buffer * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction @@ -150,7 +150,8 @@ #define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ TLB_V6_I_FULL | TLB_V6_D_FULL | \ TLB_V6_I_PAGE | TLB_V6_D_PAGE | \ - TLB_V6_I_ASID | TLB_V6_D_ASID) + TLB_V6_I_ASID | TLB_V6_D_ASID | \ + TLB_V6_BP) #ifdef CONFIG_CPU_TLB_V6 # define v6wbi_possible_flags v6wbi_tlb_flags @@ -166,9 +167,11 @@ #endif #define v7wbi_tlb_flags_smp (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ - TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) + TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \ + TLB_V7_UIS_ASID | TLB_V7_UIS_BP) #define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ - TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID) + TLB_V6_U_FULL | TLB_V6_U_PAGE | \ + TLB_V6_U_ASID | TLB_V6_BP) #ifdef CONFIG_CPU_TLB_V7 @@ -324,7 +327,6 @@ static inline void local_flush_tlb_all(void) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero); @@ -345,9 +347,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_FULL|TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { + if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); @@ -379,9 +380,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_PAGE|TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && + if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { - tlb_op(TLB_V3_PAGE, "c6, c0, 0", uaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr); @@ -412,7 +412,6 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_PAGE, "c6, c0, 0", kaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); @@ -430,6 +429,35 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) } } +static inline void local_flush_bp_all(void) +{ + const int zero = 0; + const unsigned int __tlb_flag = __cpu_tlb_flags; + + if (tlb_flag(TLB_V7_UIS_BP)) + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); + else if (tlb_flag(TLB_V6_BP)) + asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero)); + + if (tlb_flag(TLB_BARRIER)) + isb(); +} + +#ifdef CONFIG_ARM_ERRATA_798181 +static inline void dummy_flush_tlb_a15_erratum(void) +{ + /* + * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. + */ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(); +} +#else +static inline void dummy_flush_tlb_a15_erratum(void) +{ +} +#endif + /* * flush_pmd_entry * @@ -480,6 +508,7 @@ static inline void clean_pmd_entry(void *pmd) #define flush_tlb_kernel_page local_flush_tlb_kernel_page #define flush_tlb_range local_flush_tlb_range #define flush_tlb_kernel_range local_flush_tlb_kernel_range +#define flush_bp_all local_flush_bp_all #else extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); @@ -487,6 +516,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr); extern void flush_tlb_kernel_page(unsigned long kaddr); extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void flush_bp_all(void); #endif /* diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h index 5c27696de14f..8b1f37bfeeec 100644 --- a/arch/arm/include/asm/xen/events.h +++ b/arch/arm/include/asm/xen/events.h @@ -2,6 +2,7 @@ #define _ASM_ARM_XEN_EVENTS_H #include <asm/ptrace.h> +#include <asm/atomic.h> enum ipi_vector { XEN_PLACEHOLDER_VECTOR, @@ -15,26 +16,8 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->ARM_cpsr); } -/* - * We cannot use xchg because it does not support 8-byte - * values. However it is safe to use {ldr,dtd}exd directly because all - * platforms which Xen can run on support those instructions. - */ -static inline xen_ulong_t xchg_xen_ulong(xen_ulong_t *ptr, xen_ulong_t val) -{ - xen_ulong_t oldval; - unsigned int tmp; - - wmb(); - asm volatile("@ xchg_xen_ulong\n" - "1: ldrexd %0, %H0, [%3]\n" - " strexd %1, %2, %H2, [%3]\n" - " teq %1, #0\n" - " bne 1b" - : "=&r" (oldval), "=&r" (tmp) - : "r" (val), "r" (ptr) - : "memory", "cc"); - return oldval; -} +#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \ + atomic64_t, \ + counter), (val)) #endif /* _ASM_ARM_XEN_EVENTS_H */ diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 4da7cde70b5d..af33b44990ed 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -404,7 +404,7 @@ #define __NR_setns (__NR_SYSCALL_BASE+375) #define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) - /* 378 for kcmp */ +#define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) /* diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 5ce738b43508..923eec7105cf 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -110,7 +110,7 @@ int main(void) BLANK(); #endif #ifdef CONFIG_CPU_HAS_ASID - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); #endif DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 0cc57611fc4f..c6ca7e376773 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -387,7 +387,7 @@ /* 375 */ CALL(sys_setns) CALL(sys_process_vm_readv) CALL(sys_process_vm_writev) - CALL(sys_ni_syscall) /* reserved for sys_kcmp */ + CALL(sys_kcmp) CALL(sys_finit_module) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 3248cde504ed..fefd7f971437 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -276,7 +276,13 @@ ENDPROC(ftrace_graph_caller_old) */ .macro mcount_enter +/* + * This pad compensates for the push {lr} at the call site. Note that we are + * unable to unwind through a function which does not otherwise save its lr. + */ + UNWIND(.pad #4) stmdb sp!, {r0-r3, lr} + UNWIND(.save {r0-r3, lr}) .endm .macro mcount_get_lr reg @@ -289,6 +295,7 @@ ENDPROC(ftrace_graph_caller_old) .endm ENTRY(__gnu_mcount_nc) +UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE mov ip, lr ldmia sp!, {lr} @@ -296,17 +303,22 @@ ENTRY(__gnu_mcount_nc) #else __mcount #endif +UNWIND(.fnend) ENDPROC(__gnu_mcount_nc) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller) +UNWIND(.fnstart) __ftrace_caller +UNWIND(.fnend) ENDPROC(ftrace_caller) #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) +UNWIND(.fnstart) __ftrace_graph_caller +UNWIND(.fnend) ENDPROC(ftrace_graph_caller) #endif diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 486a15ae9011..8bac553fe213 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -184,13 +184,22 @@ __create_page_tables: orr r3, r3, #3 @ PGD block type mov r6, #4 @ PTRS_PER_PGD mov r7, #1 << (55 - 32) @ L_PGD_SWAPPER -1: str r3, [r0], #4 @ set bottom PGD entry bits +1: +#ifdef CONFIG_CPU_ENDIAN_BE8 str r7, [r0], #4 @ set top PGD entry bits + str r3, [r0], #4 @ set bottom PGD entry bits +#else + str r3, [r0], #4 @ set bottom PGD entry bits + str r7, [r0], #4 @ set top PGD entry bits +#endif add r3, r3, #0x1000 @ next PMD table subs r6, r6, #1 bne 1b add r4, r4, #0x1000 @ point to the PMD tables +#ifdef CONFIG_CPU_ENDIAN_BE8 + add r4, r4, #4 @ we only write the bottom word +#endif #endif ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags @@ -258,6 +267,11 @@ __create_page_tables: addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] +#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) + sub r4, r4, #4 @ Fixup page table pointer + @ for 64-bit descriptors +#endif + #ifdef CONFIG_DEBUG_LL #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING) /* @@ -276,13 +290,17 @@ __create_page_tables: orr r3, r7, r3, lsl #SECTION_SHIFT #ifdef CONFIG_ARM_LPAE mov r7, #1 << (54 - 32) @ XN +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r7, [r0], #4 + str r3, [r0], #4 #else - orr r3, r3, #PMD_SECT_XN -#endif str r3, [r0], #4 -#ifdef CONFIG_ARM_LPAE str r7, [r0], #4 #endif +#else + orr r3, r3, #PMD_SECT_XN + str r3, [r0], #4 +#endif #else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */ /* we don't need any serial debugging mappings */ diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 5eae53e7a2e1..1fd749ee4a1b 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -966,7 +966,7 @@ static void reset_ctrl_regs(void *unused) } if (err) { - pr_warning("CPU %d debug is powered down!\n", cpu); + pr_warn_once("CPU %d debug is powered down!\n", cpu); cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); return; } @@ -987,7 +987,7 @@ clear_vcr: isb(); if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { - pr_warning("CPU %d failed to disable vector catch\n", cpu); + pr_warn_once("CPU %d failed to disable vector catch\n", cpu); return; } @@ -1007,7 +1007,7 @@ clear_vcr: } if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { - pr_warning("CPU %d failed to clear debug register pairs\n", cpu); + pr_warn_once("CPU %d failed to clear debug register pairs\n", cpu); return; } @@ -1023,7 +1023,7 @@ out_mdbgen: static int __cpuinit dbg_reset_notify(struct notifier_block *self, unsigned long action, void *cpu) { - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); return NOTIFY_OK; @@ -1043,7 +1043,7 @@ static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata dbg_cpu_pm_nb = { +static struct notifier_block dbg_cpu_pm_nb = { .notifier_call = dbg_cpu_pm_notify, }; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 31e0eb353cd8..8c3094d0f7b7 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -253,7 +253,10 @@ validate_event(struct pmu_hw_events *hw_events, struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct pmu *leader_pmu = event->group_leader->pmu; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, event) >= 0; @@ -400,7 +403,7 @@ __hw_perf_event_init(struct perf_event *event) } if (event->group_leader != event) { - if (validate_group(event) != 0); + if (validate_group(event) != 0) return -EINVAL; } @@ -484,7 +487,7 @@ const struct dev_pm_ops armpmu_dev_pm_ops = { SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) }; -static void __init armpmu_init(struct arm_pmu *armpmu) +static void armpmu_init(struct arm_pmu *armpmu) { atomic_set(&armpmu->active_events, 0); mutex_init(&armpmu->reserve_mutex); diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8c79a9e70b83..039cffb053a7 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -774,7 +774,7 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] /* * PMXEVTYPER: Event selection reg */ -#define ARMV7_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ +#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ #define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ /* diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c index bd6f56b9ec21..59d2adb764a9 100644 --- a/arch/arm/kernel/sched_clock.c +++ b/arch/arm/kernel/sched_clock.c @@ -45,12 +45,12 @@ static u32 notrace jiffy_sched_clock_read(void) static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; -static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { return (cyc * mult) >> shift; } -static unsigned long long cyc_to_sched_clock(u32 cyc, u32 mask) +static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) { u64 epoch_ns; u32 epoch_cyc; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f6cbb2e3eda..234e339196c0 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -56,7 +56,6 @@ #include <asm/virt.h> #include "atags.h" -#include "tcm.h" #if defined(CONFIG_FPE_NWFPE) || defined(CONFIG_FPE_FASTFPE) @@ -353,6 +352,23 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +static void __init cpuid_init_hwcaps(void) +{ + unsigned int divide_instrs; + + if (cpu_architecture() < CPU_ARCH_ARMv7) + return; + + divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; + + switch (divide_instrs) { + case 2: + elf_hwcap |= HWCAP_IDIVA; + case 1: + elf_hwcap |= HWCAP_IDIVT; + } +} + static void __init feat_v6_fixup(void) { int id = read_cpuid_id(); @@ -483,8 +499,11 @@ static void __init setup_processor(void) snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c", list->elf_name, ENDIANNESS); elf_hwcap = list->elf_hwcap; + + cpuid_init_hwcaps(); + #ifndef CONFIG_ARM_THUMB - elf_hwcap &= ~HWCAP_THUMB; + elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); #endif feat_v6_fixup(); @@ -524,7 +543,7 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) size -= start & ~PAGE_MASK; bank->start = PAGE_ALIGN(start); -#ifndef CONFIG_LPAE +#ifndef CONFIG_ARM_LPAE if (bank->start + size < bank->start) { printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " "32-bit physical address space\n", (long long)start); @@ -778,8 +797,6 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); - tcm_init(); - #ifdef CONFIG_MULTI_IRQ_HANDLER handle_arch_irq = mdesc->handle_irq; #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1bdfd87c8e41..1f2ccccaf009 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -285,6 +285,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) * switch away from it before attempting any exclusive accesses. */ cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); enter_lazy_tlb(mm, current); local_flush_tlb_all(); @@ -479,7 +480,7 @@ static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt) evt->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_DUMMY; - evt->rating = 400; + evt->rating = 100; evt->mult = 1; evt->set_mode = broadcast_timer_set_mode; @@ -672,9 +673,6 @@ static int cpufreq_callback(struct notifier_block *nb, if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; - if (arm_delay_ops.const_clock) - return NOTIFY_OK; - if (!per_cpu(l_p_j_ref, cpu)) { per_cpu(l_p_j_ref, cpu) = per_cpu(cpu_data, cpu).loops_per_jiffy; diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 02c5d2ce23bf..e82e1d248772 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -12,6 +12,7 @@ #include <asm/smp_plat.h> #include <asm/tlbflush.h> +#include <asm/mmu_context.h> /**********************************************************************/ @@ -64,12 +65,77 @@ static inline void ipi_flush_tlb_kernel_range(void *arg) local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); } +static inline void ipi_flush_bp_all(void *ignored) +{ + local_flush_bp_all(); +} + +#ifdef CONFIG_ARM_ERRATA_798181 +static int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} +#else +static int erratum_a15_798181(void) +{ + return 0; +} +#endif + +static void ipi_flush_tlb_a15_erratum(void *arg) +{ + dmb(); +} + +static void broadcast_tlb_a15_erratum(void) +{ + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + smp_call_function_many(cpu_online_mask, ipi_flush_tlb_a15_erratum, + NULL, 1); +} + +static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) +{ + int cpu; + cpumask_t mask = { CPU_BITS_NONE }; + + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + /* + * We only need to send an IPI if the other CPUs are running + * the same ASID as the one being invalidated. There is no + * need for locking around the active_asids check since the + * switch_mm() function has at least one dmb() (as required by + * this workaround) in case a context switch happens on + * another CPU after the condition below. + */ + if (atomic64_read(&mm->context.id) == + atomic64_read(&per_cpu(active_asids, cpu))) + cpumask_set_cpu(cpu, &mask); + } + smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); +} + void flush_tlb_all(void) { if (tlb_ops_need_broadcast()) on_each_cpu(ipi_flush_tlb_all, NULL, 1); else local_flush_tlb_all(); + broadcast_tlb_a15_erratum(); } void flush_tlb_mm(struct mm_struct *mm) @@ -78,6 +144,7 @@ void flush_tlb_mm(struct mm_struct *mm) on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); else local_flush_tlb_mm(mm); + broadcast_tlb_mm_a15_erratum(mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) @@ -90,6 +157,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) &ta, 1); } else local_flush_tlb_page(vma, uaddr); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_page(unsigned long kaddr) @@ -100,6 +168,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); } else local_flush_tlb_kernel_page(kaddr); + broadcast_tlb_a15_erratum(); } void flush_tlb_range(struct vm_area_struct *vma, @@ -114,6 +183,7 @@ void flush_tlb_range(struct vm_area_struct *vma, &ta, 1); } else local_flush_tlb_range(vma, start, end); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -125,5 +195,13 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); } else local_flush_tlb_kernel_range(start, end); + broadcast_tlb_a15_erratum(); } +void flush_bp_all(void) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu(ipi_flush_bp_all, NULL, 1); + else + local_flush_bp_all(); +} diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index c092115d903a..3f2565037480 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -22,6 +22,7 @@ #include <linux/of_irq.h> #include <linux/of_address.h> +#include <asm/smp_plat.h> #include <asm/smp_twd.h> #include <asm/localtimer.h> @@ -373,6 +374,9 @@ void __init twd_local_timer_of_register(void) struct device_node *np; int err; + if (!is_smp() || !setup_max_cpus) + return; + np = of_find_matching_node(NULL, twd_of_match); if (!np) return; diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 358bca3a995e..c59c97ea8268 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -68,6 +68,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend(arg, fn); if (ret == 0) { cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); local_flush_tlb_all(); } diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 30ae6bb4a310..f50f19e5c138 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,7 +17,6 @@ #include <asm/mach/map.h> #include <asm/memory.h> #include <asm/system_info.h> -#include "tcm.h" static struct gen_pool *tcm_pool; static bool dtcm_present; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5a936988eb24..c1fe498983ac 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -201,6 +201,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; case KVM_CAP_ARM_SET_DEVICE_ADDR: r = 1; + break; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 4ea9a982269c..7bed7556077a 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -79,11 +79,11 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, u32 val; int cpu; - cpu = get_cpu(); - if (!p->is_write) return read_from_write_only(vcpu, p); + cpu = get_cpu(); + cpumask_setall(&vcpu->arch.require_dcache_flush); cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index c9a17316e9fe..0e4cfe123b38 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -883,8 +883,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr, irq, vgic_cpu->vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; - - goto out; + return true; } /* Try to use another LR for this interrupt */ @@ -898,7 +897,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); -out: if (!vgic_irq_is_edge(vcpu, irq)) vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; @@ -1018,21 +1016,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - /* - * We do not need to take the distributor lock here, since the only - * action we perform is clearing the irq_active_bit for an EOIed - * level interrupt. There is a potential race with - * the queuing of an interrupt in __kvm_vgic_flush_hwstate(), where we - * check if the interrupt is already active. Two possibilities: - * - * - The queuing is occurring on the same vcpu: cannot happen, - * as we're already in the context of this vcpu, and - * executing the handler - * - The interrupt has been migrated to another vcpu, and we - * ignore this interrupt for this run. Big deal. It is still - * pending though, and will get considered when this vcpu - * exits. - */ if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their @@ -1054,6 +1037,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } else { vgic_cpu_irq_clear(vcpu, irq); } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } @@ -1064,9 +1054,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } /* - * Sync back the VGIC state after a guest run. We do not really touch - * the distributor here (the irq_pending_on_cpu bit is safe to set), - * so there is no need for taking its lock. + * Sync back the VGIC state after a guest run. The distributor lock is + * needed so we don't get preempted in the middle of the state processing. */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { @@ -1112,10 +1101,14 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + if (!irqchip_in_kernel(vcpu->kvm)) return; + spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); + spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 6b93f6a1a3c7..64dbfa57204a 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -58,7 +58,7 @@ static void __timer_delay(unsigned long cycles) static void __timer_const_udelay(unsigned long xloops) { unsigned long long loops = xloops; - loops *= loops_per_jiffy; + loops *= arm_delay_ops.ticks_per_jiffy; __timer_delay(loops >> UDELAY_SHIFT); } @@ -73,11 +73,13 @@ void __init register_current_timer_delay(const struct delay_timer *timer) pr_info("Switching to timer-based delay loop\n"); delay_timer = timer; lpj_fine = timer->freq / HZ; - loops_per_jiffy = lpj_fine; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; arm_delay_ops.delay = __timer_delay; arm_delay_ops.const_udelay = __timer_const_udelay; arm_delay_ops.udelay = __timer_udelay; - arm_delay_ops.const_clock = true; + delay_calibrated = true; } else { pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 650d5923ab83..94b0650ea98f 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -14,27 +14,15 @@ .text .align 5 - .word 0 - -1: subs r2, r2, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted. Try doing the - * memset again. - */ ENTRY(memset) ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary. */ - orr r1, r1, r1, lsl #8 +1: orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 mov r3, r1 cmp r2, #16 @@ -43,29 +31,28 @@ ENTRY(memset) #if ! CALGN(1)+0 /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR */ - str lr, [sp, #-4]! - mov ip, r1 + stmfd sp!, {r8, lr} + mov r8, r1 mov lr, r1 2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} bgt 2b - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} #else @@ -74,54 +61,63 @@ ENTRY(memset) * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} mov r4, r1 mov r5, r1 mov r6, r1 mov r7, r1 - mov ip, r1 + mov r8, r1 mov lr, r1 cmp r2, #96 - tstgt r0, #31 + tstgt ip, #31 ble 3f - and ip, r0, #31 - rsb ip, ip, #32 - sub r2, r2, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - tst ip, #(1 << 30) - mov ip, r1 - strne r1, [r0], #4 + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 3: subs r2, r2, #64 - stmgeia r0!, {r1, r3-r7, ip, lr} - stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} tst r2, #32 - stmneia r0!, {r1, r3-r7, ip, lr} + stmneia ip!, {r1, r3-r8, lr} tst r2, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} #endif 4: tst r2, #8 - stmneia r0!, {r1, r3} + stmneia ip!, {r1, r3} tst r2, #4 - strne r1, [r0], #4 + strne r1, [ip], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 + strneb r1, [ip], #1 tst r2, #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 mov pc, lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b ENDPROC(memset) diff --git a/arch/arm/mach-at91/board-foxg20.c b/arch/arm/mach-at91/board-foxg20.c index 2ea7059b840b..c20a870ea9c9 100644 --- a/arch/arm/mach-at91/board-foxg20.c +++ b/arch/arm/mach-at91/board-foxg20.c @@ -176,6 +176,7 @@ static struct w1_gpio_platform_data w1_gpio_pdata = { /* If you choose to use a pin other than PB16 it needs to be 3.3V */ .pin = AT91_PIN_PB16, .is_open_drain = 1, + .ext_pullup_enable_pin = -EINVAL, }; static struct platform_device w1_device = { diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c index a033b8df9fb2..869cbecf00b7 100644 --- a/arch/arm/mach-at91/board-stamp9g20.c +++ b/arch/arm/mach-at91/board-stamp9g20.c @@ -188,6 +188,7 @@ static struct spi_board_info portuxg20_spi_devices[] = { static struct w1_gpio_platform_data w1_gpio_pdata = { .pin = AT91_PIN_PA29, .is_open_drain = 1, + .ext_pullup_enable_pin = -EINVAL, }; static struct platform_device w1_device = { diff --git a/arch/arm/mach-at91/include/mach/gpio.h b/arch/arm/mach-at91/include/mach/gpio.h index eed465ab0dd7..5fc23771c154 100644 --- a/arch/arm/mach-at91/include/mach/gpio.h +++ b/arch/arm/mach-at91/include/mach/gpio.h @@ -209,6 +209,14 @@ extern int at91_get_gpio_value(unsigned pin); extern void at91_gpio_suspend(void); extern void at91_gpio_resume(void); +#ifdef CONFIG_PINCTRL_AT91 +extern void at91_pinctrl_gpio_suspend(void); +extern void at91_pinctrl_gpio_resume(void); +#else +static inline void at91_pinctrl_gpio_suspend(void) {} +static inline void at91_pinctrl_gpio_resume(void) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/mach-at91/irq.c b/arch/arm/mach-at91/irq.c index 8e210262aeee..e0ca59171022 100644 --- a/arch/arm/mach-at91/irq.c +++ b/arch/arm/mach-at91/irq.c @@ -92,23 +92,21 @@ static int at91_aic_set_wake(struct irq_data *d, unsigned value) void at91_irq_suspend(void) { - int i = 0, bit; + int bit = -1; if (has_aic5()) { /* disable enabled irqs */ - while ((bit = find_next_bit(backups, n_irqs, i)) < n_irqs) { + while ((bit = find_next_bit(backups, n_irqs, bit + 1)) < n_irqs) { at91_aic_write(AT91_AIC5_SSR, bit & AT91_AIC5_INTSEL_MSK); at91_aic_write(AT91_AIC5_IDCR, 1); - i = bit; } /* enable wakeup irqs */ - i = 0; - while ((bit = find_next_bit(wakeups, n_irqs, i)) < n_irqs) { + bit = -1; + while ((bit = find_next_bit(wakeups, n_irqs, bit + 1)) < n_irqs) { at91_aic_write(AT91_AIC5_SSR, bit & AT91_AIC5_INTSEL_MSK); at91_aic_write(AT91_AIC5_IECR, 1); - i = bit; } } else { at91_aic_write(AT91_AIC_IDCR, *backups); @@ -118,23 +116,21 @@ void at91_irq_suspend(void) void at91_irq_resume(void) { - int i = 0, bit; + int bit = -1; if (has_aic5()) { /* disable wakeup irqs */ - while ((bit = find_next_bit(wakeups, n_irqs, i)) < n_irqs) { + while ((bit = find_next_bit(wakeups, n_irqs, bit + 1)) < n_irqs) { at91_aic_write(AT91_AIC5_SSR, bit & AT91_AIC5_INTSEL_MSK); at91_aic_write(AT91_AIC5_IDCR, 1); - i = bit; } /* enable irqs disabled for suspend */ - i = 0; - while ((bit = find_next_bit(backups, n_irqs, i)) < n_irqs) { + bit = -1; + while ((bit = find_next_bit(backups, n_irqs, bit + 1)) < n_irqs) { at91_aic_write(AT91_AIC5_SSR, bit & AT91_AIC5_INTSEL_MSK); at91_aic_write(AT91_AIC5_IECR, 1); - i = bit; } } else { at91_aic_write(AT91_AIC_IDCR, *wakeups); diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index adb6db888a1f..73f1f250403a 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -201,7 +201,10 @@ extern u32 at91_slow_clock_sz; static int at91_pm_enter(suspend_state_t state) { - at91_gpio_suspend(); + if (of_have_populated_dt()) + at91_pinctrl_gpio_suspend(); + else + at91_gpio_suspend(); at91_irq_suspend(); pr_debug("AT91: PM - wake mask %08x, pm state %d\n", @@ -286,7 +289,10 @@ static int at91_pm_enter(suspend_state_t state) error: target_state = PM_SUSPEND_ON; at91_irq_resume(); - at91_gpio_resume(); + if (of_have_populated_dt()) + at91_pinctrl_gpio_resume(); + else + at91_gpio_resume(); return 0; } diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index e698f26cc0cb..52e4bb5cf12d 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -22,19 +22,9 @@ static struct map_desc cns3xxx_io_desc[] __initdata = { { - .virtual = CNS3XXX_TC11MP_TWD_BASE_VIRT, - .pfn = __phys_to_pfn(CNS3XXX_TC11MP_TWD_BASE), - .length = SZ_4K, - .type = MT_DEVICE, - }, { - .virtual = CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT, - .pfn = __phys_to_pfn(CNS3XXX_TC11MP_GIC_CPU_BASE), - .length = SZ_4K, - .type = MT_DEVICE, - }, { - .virtual = CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT, - .pfn = __phys_to_pfn(CNS3XXX_TC11MP_GIC_DIST_BASE), - .length = SZ_4K, + .virtual = CNS3XXX_TC11MP_SCU_BASE_VIRT, + .pfn = __phys_to_pfn(CNS3XXX_TC11MP_SCU_BASE), + .length = SZ_8K, .type = MT_DEVICE, }, { .virtual = CNS3XXX_TIMER1_2_3_BASE_VIRT, diff --git a/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h b/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h index 191c8e57f289..b1021aafa481 100644 --- a/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h +++ b/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h @@ -94,10 +94,10 @@ #define RTC_INTR_STS_OFFSET 0x34 #define CNS3XXX_MISC_BASE 0x76000000 /* Misc Control */ -#define CNS3XXX_MISC_BASE_VIRT 0xFFF07000 /* Misc Control */ +#define CNS3XXX_MISC_BASE_VIRT 0xFB000000 /* Misc Control */ #define CNS3XXX_PM_BASE 0x77000000 /* Power Management Control */ -#define CNS3XXX_PM_BASE_VIRT 0xFFF08000 +#define CNS3XXX_PM_BASE_VIRT 0xFB001000 #define PM_CLK_GATE_OFFSET 0x00 #define PM_SOFT_RST_OFFSET 0x04 @@ -109,7 +109,7 @@ #define PM_PLL_HM_PD_OFFSET 0x1C #define CNS3XXX_UART0_BASE 0x78000000 /* UART 0 */ -#define CNS3XXX_UART0_BASE_VIRT 0xFFF09000 +#define CNS3XXX_UART0_BASE_VIRT 0xFB002000 #define CNS3XXX_UART1_BASE 0x78400000 /* UART 1 */ #define CNS3XXX_UART1_BASE_VIRT 0xFFF0A000 @@ -130,7 +130,7 @@ #define CNS3XXX_I2S_BASE_VIRT 0xFFF10000 #define CNS3XXX_TIMER1_2_3_BASE 0x7C800000 /* Timer */ -#define CNS3XXX_TIMER1_2_3_BASE_VIRT 0xFFF10800 +#define CNS3XXX_TIMER1_2_3_BASE_VIRT 0xFB003000 #define TIMER1_COUNTER_OFFSET 0x00 #define TIMER1_AUTO_RELOAD_OFFSET 0x04 @@ -227,16 +227,16 @@ * Testchip peripheral and fpga gic regions */ #define CNS3XXX_TC11MP_SCU_BASE 0x90000000 /* IRQ, Test chip */ -#define CNS3XXX_TC11MP_SCU_BASE_VIRT 0xFF000000 +#define CNS3XXX_TC11MP_SCU_BASE_VIRT 0xFB004000 #define CNS3XXX_TC11MP_GIC_CPU_BASE 0x90000100 /* Test chip interrupt controller CPU interface */ -#define CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT 0xFF000100 +#define CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT (CNS3XXX_TC11MP_SCU_BASE_VIRT + 0x100) #define CNS3XXX_TC11MP_TWD_BASE 0x90000600 -#define CNS3XXX_TC11MP_TWD_BASE_VIRT 0xFF000600 +#define CNS3XXX_TC11MP_TWD_BASE_VIRT (CNS3XXX_TC11MP_SCU_BASE_VIRT + 0x600) #define CNS3XXX_TC11MP_GIC_DIST_BASE 0x90001000 /* Test chip interrupt controller distributor */ -#define CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT 0xFF001000 +#define CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT (CNS3XXX_TC11MP_SCU_BASE_VIRT + 0x1000) #define CNS3XXX_TC11MP_L220_BASE 0x92002000 /* L220 registers */ #define CNS3XXX_TC11MP_L220_BASE_VIRT 0xFF002000 diff --git a/arch/arm/mach-davinci/dma.c b/arch/arm/mach-davinci/dma.c index a685e9706b7b..45b7c71d9cc1 100644 --- a/arch/arm/mach-davinci/dma.c +++ b/arch/arm/mach-davinci/dma.c @@ -743,6 +743,9 @@ EXPORT_SYMBOL(edma_free_channel); */ int edma_alloc_slot(unsigned ctlr, int slot) { + if (!edma_cc[ctlr]) + return -EINVAL; + if (slot >= 0) slot = EDMA_CHAN_SLOT(slot); diff --git a/arch/arm/mach-ep93xx/include/mach/uncompress.h b/arch/arm/mach-ep93xx/include/mach/uncompress.h index d2afb4dd82ab..b5cc77d2380b 100644 --- a/arch/arm/mach-ep93xx/include/mach/uncompress.h +++ b/arch/arm/mach-ep93xx/include/mach/uncompress.h @@ -47,9 +47,13 @@ static void __raw_writel(unsigned int value, unsigned int ptr) static inline void putc(int c) { - /* Transmit fifo not full? */ - while (__raw_readb(PHYS_UART_FLAG) & UART_FLAG_TXFF) - ; + int i; + + for (i = 0; i < 10000; i++) { + /* Transmit fifo not full? */ + if (!(__raw_readb(PHYS_UART_FLAG) & UART_FLAG_TXFF)) + break; + } __raw_writeb(c, PHYS_UART_DATA); } diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index abda5a18a664..0f2111a11315 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -67,6 +67,7 @@ config ARCH_NETWINDER select ISA select ISA_DMA select PCI + select VIRT_TO_BUS help Say Y here if you intend to run this kernel on the Rebel.COM NetWinder. Information about this machine can be found at: diff --git a/arch/arm/mach-highbank/hotplug.c b/arch/arm/mach-highbank/hotplug.c index f30c52843396..890cae23c12a 100644 --- a/arch/arm/mach-highbank/hotplug.c +++ b/arch/arm/mach-highbank/hotplug.c @@ -28,13 +28,11 @@ extern void secondary_startup(void); */ void __ref highbank_cpu_die(unsigned int cpu) { - flush_cache_all(); - highbank_set_cpu_jump(cpu, phys_to_virt(0)); - highbank_set_core_pwr(); - cpu_do_idle(); + flush_cache_louis(); + highbank_set_core_pwr(); - /* We should never return from idle */ - panic("highbank: cpu %d unexpectedly exit from shutdown\n", cpu); + while (1) + cpu_do_idle(); } diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c index 74e3a34d78b8..2193c834f55c 100644 --- a/arch/arm/mach-imx/clk-imx35.c +++ b/arch/arm/mach-imx/clk-imx35.c @@ -257,6 +257,7 @@ int __init mx35_clocks_init(void) clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[nfc_div], NULL, "imx25-nand.0"); clk_register_clkdev(clk[csi_gate], NULL, "mx3-camera.0"); + clk_register_clkdev(clk[admux_gate], "audmux", NULL); clk_prepare_enable(clk[spba_gate]); clk_prepare_enable(clk[gpio1_gate]); @@ -264,6 +265,8 @@ int __init mx35_clocks_init(void) clk_prepare_enable(clk[gpio3_gate]); clk_prepare_enable(clk[iim_gate]); clk_prepare_enable(clk[emi_gate]); + clk_prepare_enable(clk[max_gate]); + clk_prepare_enable(clk[iomuxc_gate]); /* * SCC is needed to boot via mmc after a watchdog reset. The clock code diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 7b025ee528a5..d38e54f5b6d7 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -115,7 +115,7 @@ static const char *gpu2d_core_sels[] = { "axi", "pll3_usb_otg", "pll2_pfd0_352m" static const char *gpu3d_core_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd2_396m", }; static const char *gpu3d_shader_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd9_720m", }; static const char *ipu_sels[] = { "mmdc_ch0_axi", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", }; -static const char *ldb_di_sels[] = { "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_pfd1_540m", }; +static const char *ldb_di_sels[] = { "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_usb_otg", }; static const char *ipu_di_pre_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll3_pfd1_540m", }; static const char *ipu1_di0_sels[] = { "ipu1_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; static const char *ipu1_di1_sels[] = { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; @@ -172,7 +172,7 @@ static struct clk *clk[clk_max]; static struct clk_onecell_data clk_data; static enum mx6q_clks const clks_init_on[] __initconst = { - mmdc_ch0_axi, rom, + mmdc_ch0_axi, rom, pll1_sys, }; static struct clk_div_table clk_enet_ref_table[] = { @@ -443,7 +443,6 @@ int __init mx6q_clocks_init(void) clk_register_clkdev(clk[gpt_ipg], "ipg", "imx-gpt.0"); clk_register_clkdev(clk[gpt_ipg_per], "per", "imx-gpt.0"); - clk_register_clkdev(clk[twd], NULL, "smp_twd"); clk_register_clkdev(clk[cko1_sel], "cko1_sel", NULL); clk_register_clkdev(clk[ahb], "ahb", NULL); clk_register_clkdev(clk[cko1], "cko1", NULL); diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index 5a800bfcec5b..5bf4a97ab241 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -110,6 +110,8 @@ void tzic_handle_irq(struct pt_regs *); extern void imx_enable_cpu(int cpu, bool enable); extern void imx_set_cpu_jump(int cpu, void *jump_addr); +extern u32 imx_get_cpu_arg(int cpu); +extern void imx_set_cpu_arg(int cpu, u32 arg); extern void v7_cpu_resume(void); extern u32 *pl310_get_save_ptr(void); #ifdef CONFIG_SMP diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S index 921fc1555854..a58c8b0527cc 100644 --- a/arch/arm/mach-imx/headsmp.S +++ b/arch/arm/mach-imx/headsmp.S @@ -26,16 +26,16 @@ ENDPROC(v7_secondary_startup) #ifdef CONFIG_PM /* - * The following code is located into the .data section. This is to - * allow phys_l2x0_saved_regs to be accessed with a relative load - * as we are running on physical address here. + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. */ - .data - .align #ifdef CONFIG_CACHE_L2X0 .macro pl310_resume - ldr r2, phys_l2x0_saved_regs + adr r0, l2x0_saved_regs_offset + ldr r2, [r0] + add r2, r2, r0 ldr r0, [r2, #L2X0_R_PHY_BASE] @ get physical base of l2x0 ldr r1, [r2, #L2X0_R_AUX_CTRL] @ get aux_ctrl value str r1, [r0, #L2X0_AUX_CTRL] @ restore aux_ctrl @@ -43,9 +43,9 @@ ENDPROC(v7_secondary_startup) str r1, [r0, #L2X0_CTRL] @ re-enable L2 .endm - .globl phys_l2x0_saved_regs -phys_l2x0_saved_regs: - .long 0 +l2x0_saved_regs_offset: + .word l2x0_saved_regs - . + #else .macro pl310_resume .endm diff --git a/arch/arm/mach-imx/hotplug.c b/arch/arm/mach-imx/hotplug.c index 7bc5fe15dda2..361a253e2b63 100644 --- a/arch/arm/mach-imx/hotplug.c +++ b/arch/arm/mach-imx/hotplug.c @@ -46,11 +46,23 @@ static inline void cpu_enter_lowpower(void) void imx_cpu_die(unsigned int cpu) { cpu_enter_lowpower(); + /* + * We use the cpu jumping argument register to sync with + * imx_cpu_kill() which is running on cpu0 and waiting for + * the register being cleared to kill the cpu. + */ + imx_set_cpu_arg(cpu, ~0); cpu_do_idle(); } int imx_cpu_kill(unsigned int cpu) { + unsigned long timeout = jiffies + msecs_to_jiffies(50); + + while (imx_get_cpu_arg(cpu) == 0) + if (time_after(jiffies, timeout)) + return 0; imx_enable_cpu(cpu, false); + imx_set_cpu_arg(cpu, 0); return 1; } diff --git a/arch/arm/mach-imx/imx25-dt.c b/arch/arm/mach-imx/imx25-dt.c index 03b65e5ea541..82348391582a 100644 --- a/arch/arm/mach-imx/imx25-dt.c +++ b/arch/arm/mach-imx/imx25-dt.c @@ -27,6 +27,11 @@ static const char * const imx25_dt_board_compat[] __initconst = { NULL }; +static void __init imx25_timer_init(void) +{ + mx25_clocks_init_dt(); +} + DT_MACHINE_START(IMX25_DT, "Freescale i.MX25 (Device Tree Support)") .map_io = mx25_map_io, .init_early = imx25_init_early, diff --git a/arch/arm/mach-imx/pm-imx6q.c b/arch/arm/mach-imx/pm-imx6q.c index ee42d20cba19..5faba7a3c95f 100644 --- a/arch/arm/mach-imx/pm-imx6q.c +++ b/arch/arm/mach-imx/pm-imx6q.c @@ -22,8 +22,6 @@ #include "common.h" #include "hardware.h" -extern unsigned long phys_l2x0_saved_regs; - static int imx6q_suspend_finish(unsigned long val) { cpu_do_idle(); @@ -57,18 +55,5 @@ static const struct platform_suspend_ops imx6q_pm_ops = { void __init imx6q_pm_init(void) { - /* - * The l2x0 core code provides an infrastucture to save and restore - * l2x0 registers across suspend/resume cycle. But because imx6q - * retains L2 content during suspend and needs to resume L2 before - * MMU is enabled, it can only utilize register saving support and - * have to take care of restoring on its own. So we save physical - * address of the data structure used by l2x0 core to save registers, - * and later restore the necessary ones in imx6q resume entry. - */ -#ifdef CONFIG_CACHE_L2X0 - phys_l2x0_saved_regs = __pa(&l2x0_saved_regs); -#endif - suspend_set_ops(&imx6q_pm_ops); } diff --git a/arch/arm/mach-imx/src.c b/arch/arm/mach-imx/src.c index e15f1555c59b..09a742f8c7ab 100644 --- a/arch/arm/mach-imx/src.c +++ b/arch/arm/mach-imx/src.c @@ -43,6 +43,18 @@ void imx_set_cpu_jump(int cpu, void *jump_addr) src_base + SRC_GPR1 + cpu * 8); } +u32 imx_get_cpu_arg(int cpu) +{ + cpu = cpu_logical_map(cpu); + return readl_relaxed(src_base + SRC_GPR1 + cpu * 8 + 4); +} + +void imx_set_cpu_arg(int cpu, u32 arg) +{ + cpu = cpu_logical_map(cpu); + writel_relaxed(arg, src_base + SRC_GPR1 + cpu * 8 + 4); +} + void imx_src_prepare_restart(void) { u32 val; diff --git a/arch/arm/mach-ixp4xx/vulcan-setup.c b/arch/arm/mach-ixp4xx/vulcan-setup.c index d42730a1d4ab..d599e354ca57 100644 --- a/arch/arm/mach-ixp4xx/vulcan-setup.c +++ b/arch/arm/mach-ixp4xx/vulcan-setup.c @@ -163,6 +163,7 @@ static struct platform_device vulcan_max6369 = { static struct w1_gpio_platform_data vulcan_w1_gpio_pdata = { .pin = 14, + .ext_pullup_enable_pin = -EINVAL, }; static struct platform_device vulcan_w1_gpio = { diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c index 2e73e9d53f70..d367aa6b47bb 100644 --- a/arch/arm/mach-kirkwood/board-dt.c +++ b/arch/arm/mach-kirkwood/board-dt.c @@ -41,16 +41,12 @@ static void __init kirkwood_legacy_clk_init(void) struct device_node *np = of_find_compatible_node( NULL, NULL, "marvell,kirkwood-gating-clock"); - struct of_phandle_args clkspec; + struct clk *clk; clkspec.np = np; clkspec.args_count = 1; - clkspec.args[0] = CGC_BIT_GE0; - orion_clkdev_add(NULL, "mv643xx_eth_port.0", - of_clk_get_from_provider(&clkspec)); - clkspec.args[0] = CGC_BIT_PEX0; orion_clkdev_add("0", "pcie", of_clk_get_from_provider(&clkspec)); @@ -59,9 +55,24 @@ static void __init kirkwood_legacy_clk_init(void) orion_clkdev_add("1", "pcie", of_clk_get_from_provider(&clkspec)); - clkspec.args[0] = CGC_BIT_GE1; - orion_clkdev_add(NULL, "mv643xx_eth_port.1", + clkspec.args[0] = CGC_BIT_SDIO; + orion_clkdev_add(NULL, "mvsdio", of_clk_get_from_provider(&clkspec)); + + /* + * The ethernet interfaces forget the MAC address assigned by + * u-boot if the clocks are turned off. Until proper DT support + * is available we always enable them for now. + */ + clkspec.args[0] = CGC_BIT_GE0; + clk = of_clk_get_from_provider(&clkspec); + orion_clkdev_add(NULL, "mv643xx_eth_port.0", clk); + clk_prepare_enable(clk); + + clkspec.args[0] = CGC_BIT_GE1; + clk = of_clk_get_from_provider(&clkspec); + orion_clkdev_add(NULL, "mv643xx_eth_port.1", clk); + clk_prepare_enable(clk); } static void __init kirkwood_of_clk_init(void) diff --git a/arch/arm/mach-kirkwood/board-iomega_ix2_200.c b/arch/arm/mach-kirkwood/board-iomega_ix2_200.c index f655b2637b0e..e5f70415905a 100644 --- a/arch/arm/mach-kirkwood/board-iomega_ix2_200.c +++ b/arch/arm/mach-kirkwood/board-iomega_ix2_200.c @@ -20,10 +20,15 @@ static struct mv643xx_eth_platform_data iomega_ix2_200_ge00_data = { .duplex = DUPLEX_FULL, }; +static struct mv643xx_eth_platform_data iomega_ix2_200_ge01_data = { + .phy_addr = MV643XX_ETH_PHY_ADDR(11), +}; + void __init iomega_ix2_200_init(void) { /* * Basic setup. Needs to be called early. */ - kirkwood_ge01_init(&iomega_ix2_200_ge00_data); + kirkwood_ge00_init(&iomega_ix2_200_ge00_data); + kirkwood_ge01_init(&iomega_ix2_200_ge01_data); } diff --git a/arch/arm/mach-kirkwood/guruplug-setup.c b/arch/arm/mach-kirkwood/guruplug-setup.c index 1c6e736cbbf8..08dd739aa709 100644 --- a/arch/arm/mach-kirkwood/guruplug-setup.c +++ b/arch/arm/mach-kirkwood/guruplug-setup.c @@ -53,6 +53,8 @@ static struct mv_sata_platform_data guruplug_sata_data = { static struct mvsdio_platform_data guruplug_mvsdio_data = { /* unfortunately the CD signal has not been connected */ + .gpio_card_detect = -1, + .gpio_write_protect = -1, }; static struct gpio_led guruplug_led_pins[] = { diff --git a/arch/arm/mach-kirkwood/openrd-setup.c b/arch/arm/mach-kirkwood/openrd-setup.c index 8ddd69fdc937..6a6eb548307d 100644 --- a/arch/arm/mach-kirkwood/openrd-setup.c +++ b/arch/arm/mach-kirkwood/openrd-setup.c @@ -55,6 +55,7 @@ static struct mv_sata_platform_data openrd_sata_data = { static struct mvsdio_platform_data openrd_mvsdio_data = { .gpio_card_detect = 29, /* MPP29 used as SD card detect */ + .gpio_write_protect = -1, }; static unsigned int openrd_mpp_config[] __initdata = { diff --git a/arch/arm/mach-kirkwood/rd88f6281-setup.c b/arch/arm/mach-kirkwood/rd88f6281-setup.c index c7d93b48926b..d24223166e06 100644 --- a/arch/arm/mach-kirkwood/rd88f6281-setup.c +++ b/arch/arm/mach-kirkwood/rd88f6281-setup.c @@ -69,6 +69,7 @@ static struct mv_sata_platform_data rd88f6281_sata_data = { static struct mvsdio_platform_data rd88f6281_mvsdio_data = { .gpio_card_detect = 28, + .gpio_write_protect = -1, }; static unsigned int rd88f6281_mpp_config[] __initdata = { diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c index d1e2d595e79c..f62b68d926f4 100644 --- a/arch/arm/mach-mmp/gplugd.c +++ b/arch/arm/mach-mmp/gplugd.c @@ -9,6 +9,7 @@ */ #include <linux/init.h> +#include <linux/platform_device.h> #include <linux/gpio.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 2969027f02fa..f9fd77e8f1f5 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -62,7 +62,10 @@ static int msm_timer_set_next_event(unsigned long cycles, { u32 ctrl = readl_relaxed(event_base + TIMER_ENABLE); - writel_relaxed(0, event_base + TIMER_CLEAR); + ctrl &= ~TIMER_ENABLE_EN; + writel_relaxed(ctrl, event_base + TIMER_ENABLE); + + writel_relaxed(ctrl, event_base + TIMER_CLEAR); writel_relaxed(cycles, event_base + TIMER_MATCH_VAL); writel_relaxed(ctrl | TIMER_ENABLE_EN, event_base + TIMER_ENABLE); return 0; diff --git a/arch/arm/mach-mvebu/irq-armada-370-xp.c b/arch/arm/mach-mvebu/irq-armada-370-xp.c index 274ff58271de..d5970f5a1e8d 100644 --- a/arch/arm/mach-mvebu/irq-armada-370-xp.c +++ b/arch/arm/mach-mvebu/irq-armada-370-xp.c @@ -44,6 +44,8 @@ #define ARMADA_370_XP_MAX_PER_CPU_IRQS (28) +#define ARMADA_370_XP_TIMER0_PER_CPU_IRQ (5) + #define ACTIVE_DOORBELLS (8) static DEFINE_RAW_SPINLOCK(irq_controller_lock); @@ -59,36 +61,26 @@ static struct irq_domain *armada_370_xp_mpic_domain; */ static void armada_370_xp_irq_mask(struct irq_data *d) { -#ifdef CONFIG_SMP irq_hw_number_t hwirq = irqd_to_hwirq(d); - if (hwirq > ARMADA_370_XP_MAX_PER_CPU_IRQS) + if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) writel(hwirq, main_int_base + ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS); else writel(hwirq, per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); -#else - writel(irqd_to_hwirq(d), - per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); -#endif } static void armada_370_xp_irq_unmask(struct irq_data *d) { -#ifdef CONFIG_SMP irq_hw_number_t hwirq = irqd_to_hwirq(d); - if (hwirq > ARMADA_370_XP_MAX_PER_CPU_IRQS) + if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) writel(hwirq, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); else writel(hwirq, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); -#else - writel(irqd_to_hwirq(d), - per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); -#endif } #ifdef CONFIG_SMP @@ -144,10 +136,14 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { armada_370_xp_irq_mask(irq_get_irq_data(virq)); - writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); + if (hw != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) + writel(hw, per_cpu_int_base + + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); + else + writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); irq_set_status_flags(virq, IRQ_LEVEL); - if (hw < ARMADA_370_XP_MAX_PER_CPU_IRQS) { + if (hw == ARMADA_370_XP_TIMER0_PER_CPU_IRQ) { irq_set_percpu_devid(virq); irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip, handle_percpu_devid_irq); diff --git a/arch/arm/mach-mxs/icoll.c b/arch/arm/mach-mxs/icoll.c index 8fb23af154b3..e26eeba46598 100644 --- a/arch/arm/mach-mxs/icoll.c +++ b/arch/arm/mach-mxs/icoll.c @@ -100,7 +100,7 @@ static struct irq_domain_ops icoll_irq_domain_ops = { .xlate = irq_domain_xlate_onecell, }; -void __init icoll_of_init(struct device_node *np, +static void __init icoll_of_init(struct device_node *np, struct device_node *interrupt_parent) { /* diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 052186713347..e7b781d3788f 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -41,8 +41,6 @@ static struct fb_videomode mx23evk_video_modes[] = { .lower_margin = 4, .hsync_len = 1, .vsync_len = 1, - .sync = FB_SYNC_DATA_ENABLE_HIGH_ACT | - FB_SYNC_DOTCLK_FAILING_ACT, }, }; @@ -59,8 +57,6 @@ static struct fb_videomode mx28evk_video_modes[] = { .lower_margin = 10, .hsync_len = 10, .vsync_len = 10, - .sync = FB_SYNC_DATA_ENABLE_HIGH_ACT | - FB_SYNC_DOTCLK_FAILING_ACT, }, }; @@ -77,7 +73,6 @@ static struct fb_videomode m28evk_video_modes[] = { .lower_margin = 45, .hsync_len = 1, .vsync_len = 1, - .sync = FB_SYNC_DATA_ENABLE_HIGH_ACT, }, }; @@ -94,9 +89,7 @@ static struct fb_videomode apx4devkit_video_modes[] = { .lower_margin = 13, .hsync_len = 48, .vsync_len = 3, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT | - FB_SYNC_DATA_ENABLE_HIGH_ACT | - FB_SYNC_DOTCLK_FAILING_ACT, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, }, }; @@ -113,9 +106,7 @@ static struct fb_videomode apf28dev_video_modes[] = { .lower_margin = 0x15, .hsync_len = 64, .vsync_len = 4, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT | - FB_SYNC_DATA_ENABLE_HIGH_ACT | - FB_SYNC_DOTCLK_FAILING_ACT, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, }, }; @@ -132,7 +123,6 @@ static struct fb_videomode cfa10049_video_modes[] = { .lower_margin = 2, .hsync_len = 15, .vsync_len = 15, - .sync = FB_SYNC_DATA_ENABLE_HIGH_ACT }, }; @@ -259,6 +249,8 @@ static void __init imx23_evk_init(void) mxsfb_pdata.mode_count = ARRAY_SIZE(mx23evk_video_modes); mxsfb_pdata.default_bpp = 32; mxsfb_pdata.ld_intf_width = STMLCDIF_24BIT; + mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT | + MXSFB_SYNC_DOTCLK_FAILING_ACT; } static inline void enable_clk_enet_out(void) @@ -278,6 +270,8 @@ static void __init imx28_evk_init(void) mxsfb_pdata.mode_count = ARRAY_SIZE(mx28evk_video_modes); mxsfb_pdata.default_bpp = 32; mxsfb_pdata.ld_intf_width = STMLCDIF_24BIT; + mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT | + MXSFB_SYNC_DOTCLK_FAILING_ACT; mxs_saif_clkmux_select(MXS_DIGCTL_SAIF_CLKMUX_EXTMSTR0); } @@ -297,6 +291,7 @@ static void __init m28evk_init(void) mxsfb_pdata.mode_count = ARRAY_SIZE(m28evk_video_modes); mxsfb_pdata.default_bpp = 16; mxsfb_pdata.ld_intf_width = STMLCDIF_18BIT; + mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT; } static void __init sc_sps1_init(void) @@ -322,6 +317,8 @@ static void __init apx4devkit_init(void) mxsfb_pdata.mode_count = ARRAY_SIZE(apx4devkit_video_modes); mxsfb_pdata.default_bpp = 32; mxsfb_pdata.ld_intf_width = STMLCDIF_24BIT; + mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT | + MXSFB_SYNC_DOTCLK_FAILING_ACT; } #define ENET0_MDC__GPIO_4_0 MXS_GPIO_NR(4, 0) @@ -402,17 +399,18 @@ static void __init cfa10049_init(void) { enable_clk_enet_out(); update_fec_mac_prop(OUI_CRYSTALFONTZ); + + mxsfb_pdata.mode_list = cfa10049_video_modes; + mxsfb_pdata.mode_count = ARRAY_SIZE(cfa10049_video_modes); + mxsfb_pdata.default_bpp = 32; + mxsfb_pdata.ld_intf_width = STMLCDIF_18BIT; + mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT; } static void __init cfa10037_init(void) { enable_clk_enet_out(); update_fec_mac_prop(OUI_CRYSTALFONTZ); - - mxsfb_pdata.mode_list = cfa10049_video_modes; - mxsfb_pdata.mode_count = ARRAY_SIZE(cfa10049_video_modes); - mxsfb_pdata.default_bpp = 32; - mxsfb_pdata.ld_intf_width = STMLCDIF_18BIT; } static void __init apf28_init(void) @@ -423,6 +421,8 @@ static void __init apf28_init(void) mxsfb_pdata.mode_count = ARRAY_SIZE(apf28dev_video_modes); mxsfb_pdata.default_bpp = 16; mxsfb_pdata.ld_intf_width = STMLCDIF_16BIT; + mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT | + MXSFB_SYNC_DOTCLK_FAILING_ACT; } static void __init mxs_machine_init(void) diff --git a/arch/arm/mach-mxs/mm.c b/arch/arm/mach-mxs/mm.c index a4294aa9f301..e63b7d87acbd 100644 --- a/arch/arm/mach-mxs/mm.c +++ b/arch/arm/mach-mxs/mm.c @@ -18,6 +18,7 @@ #include <mach/mx23.h> #include <mach/mx28.h> +#include <mach/common.h> /* * Define the MX23 memory map. diff --git a/arch/arm/mach-mxs/ocotp.c b/arch/arm/mach-mxs/ocotp.c index 54add60f94c9..1dff46703753 100644 --- a/arch/arm/mach-mxs/ocotp.c +++ b/arch/arm/mach-mxs/ocotp.c @@ -19,6 +19,7 @@ #include <asm/processor.h> /* for cpu_relax() */ #include <mach/mxs.h> +#include <mach/common.h> #define OCOTP_WORD_OFFSET 0x20 #define OCOTP_WORD_COUNT 0x20 diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index 27c2cb7ab813..1504b68f4c66 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -168,7 +168,7 @@ void __init netx_init_irq(void) { int irq; - vic_init(io_p2v(NETX_PA_VIC), 0, ~0, 0); + vic_init(io_p2v(NETX_PA_VIC), NETX_IRQ_VIC_START, ~0, 0); for (irq = NETX_IRQ_HIF_CHAINED(0); irq <= NETX_IRQ_HIF_LAST; irq++) { irq_set_chip_and_handler(irq, &netx_hif_chip, diff --git a/arch/arm/mach-netx/include/mach/irqs.h b/arch/arm/mach-netx/include/mach/irqs.h index 6ce914d54a30..8f74a844a775 100644 --- a/arch/arm/mach-netx/include/mach/irqs.h +++ b/arch/arm/mach-netx/include/mach/irqs.h @@ -17,42 +17,42 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define NETX_IRQ_VIC_START 0 -#define NETX_IRQ_SOFTINT 0 -#define NETX_IRQ_TIMER0 1 -#define NETX_IRQ_TIMER1 2 -#define NETX_IRQ_TIMER2 3 -#define NETX_IRQ_SYSTIME_NS 4 -#define NETX_IRQ_SYSTIME_S 5 -#define NETX_IRQ_GPIO_15 6 -#define NETX_IRQ_WATCHDOG 7 -#define NETX_IRQ_UART0 8 -#define NETX_IRQ_UART1 9 -#define NETX_IRQ_UART2 10 -#define NETX_IRQ_USB 11 -#define NETX_IRQ_SPI 12 -#define NETX_IRQ_I2C 13 -#define NETX_IRQ_LCD 14 -#define NETX_IRQ_HIF 15 -#define NETX_IRQ_GPIO_0_14 16 -#define NETX_IRQ_XPEC0 17 -#define NETX_IRQ_XPEC1 18 -#define NETX_IRQ_XPEC2 19 -#define NETX_IRQ_XPEC3 20 -#define NETX_IRQ_XPEC(no) (17 + (no)) -#define NETX_IRQ_MSYNC0 21 -#define NETX_IRQ_MSYNC1 22 -#define NETX_IRQ_MSYNC2 23 -#define NETX_IRQ_MSYNC3 24 -#define NETX_IRQ_IRQ_PHY 25 -#define NETX_IRQ_ISO_AREA 26 +#define NETX_IRQ_VIC_START 64 +#define NETX_IRQ_SOFTINT (NETX_IRQ_VIC_START + 0) +#define NETX_IRQ_TIMER0 (NETX_IRQ_VIC_START + 1) +#define NETX_IRQ_TIMER1 (NETX_IRQ_VIC_START + 2) +#define NETX_IRQ_TIMER2 (NETX_IRQ_VIC_START + 3) +#define NETX_IRQ_SYSTIME_NS (NETX_IRQ_VIC_START + 4) +#define NETX_IRQ_SYSTIME_S (NETX_IRQ_VIC_START + 5) +#define NETX_IRQ_GPIO_15 (NETX_IRQ_VIC_START + 6) +#define NETX_IRQ_WATCHDOG (NETX_IRQ_VIC_START + 7) +#define NETX_IRQ_UART0 (NETX_IRQ_VIC_START + 8) +#define NETX_IRQ_UART1 (NETX_IRQ_VIC_START + 9) +#define NETX_IRQ_UART2 (NETX_IRQ_VIC_START + 10) +#define NETX_IRQ_USB (NETX_IRQ_VIC_START + 11) +#define NETX_IRQ_SPI (NETX_IRQ_VIC_START + 12) +#define NETX_IRQ_I2C (NETX_IRQ_VIC_START + 13) +#define NETX_IRQ_LCD (NETX_IRQ_VIC_START + 14) +#define NETX_IRQ_HIF (NETX_IRQ_VIC_START + 15) +#define NETX_IRQ_GPIO_0_14 (NETX_IRQ_VIC_START + 16) +#define NETX_IRQ_XPEC0 (NETX_IRQ_VIC_START + 17) +#define NETX_IRQ_XPEC1 (NETX_IRQ_VIC_START + 18) +#define NETX_IRQ_XPEC2 (NETX_IRQ_VIC_START + 19) +#define NETX_IRQ_XPEC3 (NETX_IRQ_VIC_START + 20) +#define NETX_IRQ_XPEC(no) (NETX_IRQ_VIC_START + 17 + (no)) +#define NETX_IRQ_MSYNC0 (NETX_IRQ_VIC_START + 21) +#define NETX_IRQ_MSYNC1 (NETX_IRQ_VIC_START + 22) +#define NETX_IRQ_MSYNC2 (NETX_IRQ_VIC_START + 23) +#define NETX_IRQ_MSYNC3 (NETX_IRQ_VIC_START + 24) +#define NETX_IRQ_IRQ_PHY (NETX_IRQ_VIC_START + 25) +#define NETX_IRQ_ISO_AREA (NETX_IRQ_VIC_START + 26) /* int 27 is reserved */ /* int 28 is reserved */ -#define NETX_IRQ_TIMER3 29 -#define NETX_IRQ_TIMER4 30 +#define NETX_IRQ_TIMER3 (NETX_IRQ_VIC_START + 29) +#define NETX_IRQ_TIMER4 (NETX_IRQ_VIC_START + 30) /* int 31 is reserved */ -#define NETX_IRQS 32 +#define NETX_IRQS (NETX_IRQ_VIC_START + 32) /* for multiplexed irqs on gpio 0..14 */ #define NETX_IRQ_GPIO(x) (NETX_IRQS + (x)) diff --git a/arch/arm/mach-omap1/clock_data.c b/arch/arm/mach-omap1/clock_data.c index cb7c6ae2e3fc..6c4f766365a2 100644 --- a/arch/arm/mach-omap1/clock_data.c +++ b/arch/arm/mach-omap1/clock_data.c @@ -543,15 +543,6 @@ static struct clk usb_dc_ck = { /* Direct from ULPD, no parent */ .rate = 48000000, .enable_reg = OMAP1_IO_ADDRESS(SOFT_REQ_REG), - .enable_bit = USB_REQ_EN_SHIFT, -}; - -static struct clk usb_dc_ck7xx = { - .name = "usb_dc_ck", - .ops = &clkops_generic, - /* Direct from ULPD, no parent */ - .rate = 48000000, - .enable_reg = OMAP1_IO_ADDRESS(SOFT_REQ_REG), .enable_bit = SOFT_USB_OTG_DPLL_REQ_SHIFT, }; @@ -727,8 +718,7 @@ static struct omap_clk omap_clks[] = { CLK(NULL, "usb_clko", &usb_clko, CK_16XX | CK_1510 | CK_310), CLK(NULL, "usb_hhc_ck", &usb_hhc_ck1510, CK_1510 | CK_310), CLK(NULL, "usb_hhc_ck", &usb_hhc_ck16xx, CK_16XX), - CLK(NULL, "usb_dc_ck", &usb_dc_ck, CK_16XX), - CLK(NULL, "usb_dc_ck", &usb_dc_ck7xx, CK_7XX), + CLK(NULL, "usb_dc_ck", &usb_dc_ck, CK_16XX | CK_7XX), CLK(NULL, "mclk", &mclk_1510, CK_1510 | CK_310), CLK(NULL, "mclk", &mclk_16xx, CK_16XX), CLK(NULL, "bclk", &bclk_1510, CK_1510 | CK_310), diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h index fb18831e88aa..14f7e9920479 100644 --- a/arch/arm/mach-omap1/common.h +++ b/arch/arm/mach-omap1/common.h @@ -31,6 +31,8 @@ #include <plat/i2c.h> +#include <mach/irqs.h> + #if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850) void omap7xx_map_io(void); #else diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 49ac3dfebef9..8111cd9ff3e5 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -311,9 +311,6 @@ config MACH_OMAP_ZOOM2 default y select OMAP_PACKAGE_CBB select REGULATOR_FIXED_VOLTAGE if REGULATOR - select SERIAL_8250 - select SERIAL_8250_CONSOLE - select SERIAL_CORE_CONSOLE config MACH_OMAP_ZOOM3 bool "OMAP3630 Zoom3 board" @@ -321,9 +318,6 @@ config MACH_OMAP_ZOOM3 default y select OMAP_PACKAGE_CBP select REGULATOR_FIXED_VOLTAGE if REGULATOR - select SERIAL_8250 - select SERIAL_8250_CONSOLE - select SERIAL_CORE_CONSOLE config MACH_CM_T35 bool "CompuLab CM-T35/CM-T3730 modules" diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 0274ff7a2a2b..e54a48060198 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -102,6 +102,7 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)") .init_irq = omap_intc_of_init, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_generic_init, + .init_late = omap3_init_late, .init_time = omap3_sync32k_timer_init, .dt_compat = omap3_boards_compat, .restart = omap3xxx_restart, @@ -119,6 +120,7 @@ DT_MACHINE_START(OMAP3_GP_DT, "Generic OMAP3-GP (Flattened Device Tree)") .init_irq = omap_intc_of_init, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_generic_init, + .init_late = omap3_init_late, .init_time = omap3_secure_sync32k_timer_init, .dt_compat = omap3_gp_boards_compat, .restart = omap3xxx_restart, diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c index f7c4616cbb60..d2ea68ea678a 100644 --- a/arch/arm/mach-omap2/board-rx51.c +++ b/arch/arm/mach-omap2/board-rx51.c @@ -17,6 +17,7 @@ #include <linux/io.h> #include <linux/gpio.h> #include <linux/leds.h> +#include <linux/usb/phy.h> #include <linux/usb/musb.h> #include <linux/platform_data/spi-omap2-mcspi.h> @@ -98,6 +99,7 @@ static void __init rx51_init(void) sdrc_params = nokia_get_sdram_timings(); omap_sdrc_init(sdrc_params, sdrc_params); + usb_bind_phy("musb-hdrc.0.auto", 0, "twl4030_usb"); usb_musb_init(&musb_board_data); rx51_peripherals_init(); diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c index 3d58f335f173..0c6834ae1fc4 100644 --- a/arch/arm/mach-omap2/cclock44xx_data.c +++ b/arch/arm/mach-omap2/cclock44xx_data.c @@ -52,6 +52,13 @@ */ #define OMAP4_DPLL_ABE_DEFFREQ 98304000 +/* + * OMAP4 USB DPLL default frequency. In OMAP4430 TRM version V, section + * "3.6.3.9.5 DPLL_USB Preferred Settings" shows that the preferred + * locked frequency for the USB DPLL is 960MHz. + */ +#define OMAP4_DPLL_USB_DEFFREQ 960000000 + /* Root clocks */ DEFINE_CLK_FIXED_RATE(extalt_clkin_ck, CLK_IS_ROOT, 59000000, 0x0); @@ -1011,6 +1018,10 @@ DEFINE_CLK_OMAP_MUX(hsmmc2_fclk, "l3_init_clkdm", hsmmc1_fclk_sel, OMAP4430_CM_L3INIT_MMC2_CLKCTRL, OMAP4430_CLKSEL_MASK, hsmmc1_fclk_parents, func_dmic_abe_gfclk_ops); +DEFINE_CLK_GATE(ocp2scp_usb_phy_phy_48m, "func_48m_fclk", &func_48m_fclk, 0x0, + OMAP4430_CM_L3INIT_USBPHYOCP2SCP_CLKCTRL, + OMAP4430_OPTFCLKEN_PHY_48M_SHIFT, 0x0, NULL); + DEFINE_CLK_GATE(sha2md5_fck, "l3_div_ck", &l3_div_ck, 0x0, OMAP4430_CM_L4SEC_SHA2MD51_CLKCTRL, OMAP4430_MODULEMODE_SWCTRL_SHIFT, 0x0, NULL); @@ -1538,6 +1549,7 @@ static struct omap_clk omap44xx_clks[] = { CLK(NULL, "per_mcbsp4_gfclk", &per_mcbsp4_gfclk, CK_443X), CLK(NULL, "hsmmc1_fclk", &hsmmc1_fclk, CK_443X), CLK(NULL, "hsmmc2_fclk", &hsmmc2_fclk, CK_443X), + CLK(NULL, "ocp2scp_usb_phy_phy_48m", &ocp2scp_usb_phy_phy_48m, CK_443X), CLK(NULL, "sha2md5_fck", &sha2md5_fck, CK_443X), CLK(NULL, "slimbus1_fclk_1", &slimbus1_fclk_1, CK_443X), CLK(NULL, "slimbus1_fclk_0", &slimbus1_fclk_0, CK_443X), @@ -1705,5 +1717,13 @@ int __init omap4xxx_clk_init(void) if (rc) pr_err("%s: failed to configure ABE DPLL!\n", __func__); + /* + * Lock USB DPLL on OMAP4 devices so that the L3INIT power + * domain can transition to retention state when not in use. + */ + rc = clk_set_rate(&dpll_usb_ck, OMAP4_DPLL_USB_DEFFREQ); + if (rc) + pr_err("%s: failed to configure USB DPLL!\n", __func__); + return 0; } diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 0a6b9c7a63da..d6ba13e1c540 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -108,7 +108,6 @@ void omap35xx_init_late(void); void omap3630_init_late(void); void am35xx_init_late(void); void ti81xx_init_late(void); -void omap4430_init_late(void); int omap2_common_pm_late_init(void); #if defined(CONFIG_SOC_OMAP2420) || defined(CONFIG_SOC_OMAP2430) @@ -294,5 +293,8 @@ extern void omap_reserve(void); struct omap_hwmod; extern int omap_dss_reset(struct omap_hwmod *); +/* SoC specific clock initializer */ +extern int (*omap_clk_init)(void); + #endif /* __ASSEMBLER__ */ #endif /* __ARCH_ARM_MACH_OMAP2PLUS_COMMON_H */ diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index e4b16c8efe8b..410e1bac7815 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -1122,9 +1122,6 @@ int gpmc_calc_timings(struct gpmc_timings *gpmc_t, /* TODO: remove, see function definition */ gpmc_convert_ps_to_ns(gpmc_t); - /* Now the GPMC is initialised, unreserve the chip-selects */ - gpmc_cs_map = 0; - return 0; } @@ -1383,6 +1380,9 @@ static int gpmc_probe(struct platform_device *pdev) if (IS_ERR_VALUE(gpmc_setup_irq())) dev_warn(gpmc_dev, "gpmc_setup_irq failed\n"); + /* Now the GPMC is initialised, unreserve the chip-selects */ + gpmc_cs_map = 0; + rc = gpmc_probe_dt(pdev); if (rc < 0) { clk_disable_unprepare(gpmc_l3_clk); diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 2c3fdd65387b..5c445ca1e271 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -55,6 +55,12 @@ #include "prm44xx.h" /* + * omap_clk_init: points to a function that does the SoC-specific + * clock initializations + */ +int (*omap_clk_init)(void); + +/* * The machine specific code may provide the extra mapping besides the * default mapping provided here. */ @@ -397,7 +403,7 @@ void __init omap2420_init_early(void) omap242x_clockdomains_init(); omap2420_hwmod_init(); omap_hwmod_init_postsetup(); - omap2420_clk_init(); + omap_clk_init = omap2420_clk_init; } void __init omap2420_init_late(void) @@ -427,7 +433,7 @@ void __init omap2430_init_early(void) omap243x_clockdomains_init(); omap2430_hwmod_init(); omap_hwmod_init_postsetup(); - omap2430_clk_init(); + omap_clk_init = omap2430_clk_init; } void __init omap2430_init_late(void) @@ -462,7 +468,7 @@ void __init omap3_init_early(void) omap3xxx_clockdomains_init(); omap3xxx_hwmod_init(); omap_hwmod_init_postsetup(); - omap3xxx_clk_init(); + omap_clk_init = omap3xxx_clk_init; } void __init omap3430_init_early(void) @@ -500,7 +506,7 @@ void __init ti81xx_init_early(void) omap3xxx_clockdomains_init(); omap3xxx_hwmod_init(); omap_hwmod_init_postsetup(); - omap3xxx_clk_init(); + omap_clk_init = omap3xxx_clk_init; } void __init omap3_init_late(void) @@ -568,7 +574,7 @@ void __init am33xx_init_early(void) am33xx_clockdomains_init(); am33xx_hwmod_init(); omap_hwmod_init_postsetup(); - am33xx_clk_init(); + omap_clk_init = am33xx_clk_init; } #endif @@ -593,7 +599,7 @@ void __init omap4430_init_early(void) omap44xx_clockdomains_init(); omap44xx_hwmod_init(); omap_hwmod_init_postsetup(); - omap4xxx_clk_init(); + omap_clk_init = omap4xxx_clk_init; } void __init omap4430_init_late(void) diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index 6a217c98db54..f82cf878d6af 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -211,8 +211,6 @@ static int __init _omap_mux_get_by_name(struct omap_mux_partition *partition, return -EINVAL; } - pr_err("%s: Could not find signal %s\n", __func__, muxname); - return -ENODEV; } @@ -234,6 +232,8 @@ int __init omap_mux_get_by_name(const char *muxname, return mux_mode; } + pr_err("%s: Could not find signal %s\n", __func__, muxname); + return -ENODEV; } @@ -739,8 +739,9 @@ static void __init omap_mux_dbg_create_entry( list_for_each_entry(e, &partition->muxmodes, node) { struct omap_mux *m = &e->mux; - (void)debugfs_create_file(m->muxnames[0], S_IWUSR, mux_dbg_dir, - m, &omap_mux_dbg_signal_fops); + (void)debugfs_create_file(m->muxnames[0], S_IWUSR | S_IRUGO, + mux_dbg_dir, m, + &omap_mux_dbg_signal_fops); } } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index c2c798c08c2b..a202a4785104 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1368,7 +1368,9 @@ static void _enable_sysc(struct omap_hwmod *oh) } if (sf & SYSC_HAS_MIDLEMODE) { - if (oh->flags & HWMOD_SWSUP_MSTANDBY) { + if (oh->flags & HWMOD_FORCE_MSTANDBY) { + idlemode = HWMOD_IDLEMODE_FORCE; + } else if (oh->flags & HWMOD_SWSUP_MSTANDBY) { idlemode = HWMOD_IDLEMODE_NO; } else { if (sf & SYSC_HAS_ENAWAKEUP) @@ -1440,7 +1442,8 @@ static void _idle_sysc(struct omap_hwmod *oh) } if (sf & SYSC_HAS_MIDLEMODE) { - if (oh->flags & HWMOD_SWSUP_MSTANDBY) { + if ((oh->flags & HWMOD_SWSUP_MSTANDBY) || + (oh->flags & HWMOD_FORCE_MSTANDBY)) { idlemode = HWMOD_IDLEMODE_FORCE; } else { if (sf & SYSC_HAS_ENAWAKEUP) diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index d43d9b608eda..d5dc935f6060 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -427,8 +427,8 @@ struct omap_hwmod_omap4_prcm { * * HWMOD_SWSUP_SIDLE: omap_hwmod code should manually bring module in and out * of idle, rather than relying on module smart-idle - * HWMOD_SWSUP_MSTDBY: omap_hwmod code should manually bring module in and out - * of standby, rather than relying on module smart-standby + * HWMOD_SWSUP_MSTANDBY: omap_hwmod code should manually bring module in and + * out of standby, rather than relying on module smart-standby * HWMOD_INIT_NO_RESET: don't reset this module at boot - important for * SDRAM controller, etc. XXX probably belongs outside the main hwmod file * XXX Should be HWMOD_SETUP_NO_RESET @@ -459,6 +459,10 @@ struct omap_hwmod_omap4_prcm { * correctly, or this is being abused to deal with some PM latency * issues -- but we're currently suffering from a shortage of * folks who are able to track these issues down properly. + * HWMOD_FORCE_MSTANDBY: Always keep MIDLEMODE bits cleared so that device + * is kept in force-standby mode. Failing to do so causes PM problems + * with musb on OMAP3630 at least. Note that musb has a dedicated register + * to control MSTANDBY signal when MIDLEMODE is set to force-standby. */ #define HWMOD_SWSUP_SIDLE (1 << 0) #define HWMOD_SWSUP_MSTANDBY (1 << 1) @@ -471,6 +475,7 @@ struct omap_hwmod_omap4_prcm { #define HWMOD_16BIT_REG (1 << 8) #define HWMOD_EXT_OPT_MAIN_CLK (1 << 9) #define HWMOD_BLOCK_WFI (1 << 10) +#define HWMOD_FORCE_MSTANDBY (1 << 11) /* * omap_hwmod._int_flags definitions diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index ac7e03ec952f..5112d04e7b79 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -1707,9 +1707,14 @@ static struct omap_hwmod omap3xxx_usbhsotg_hwmod = { * Erratum ID: i479 idle_req / idle_ack mechanism potentially * broken when autoidle is enabled * workaround is to disable the autoidle bit at module level. + * + * Enabling the device in any other MIDLEMODE setting but force-idle + * causes core_pwrdm not enter idle states at least on OMAP3630. + * Note that musb has OTG_FORCESTDBY register that controls MSTANDBY + * signal when MIDLEMODE is set to force-idle. */ .flags = HWMOD_NO_OCP_AUTOIDLE | HWMOD_SWSUP_SIDLE - | HWMOD_SWSUP_MSTANDBY, + | HWMOD_FORCE_MSTANDBY, }; /* usb_otg_hs */ diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 0e47d2e1687c..9e0576569e07 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -2714,6 +2714,10 @@ static struct omap_ocp2scp_dev ocp2scp_dev_attr[] = { { } }; +static struct omap_hwmod_opt_clk ocp2scp_usb_phy_opt_clks[] = { + { .role = "48mhz", .clk = "ocp2scp_usb_phy_phy_48m" }, +}; + /* ocp2scp_usb_phy */ static struct omap_hwmod omap44xx_ocp2scp_usb_phy_hwmod = { .name = "ocp2scp_usb_phy", @@ -2728,6 +2732,8 @@ static struct omap_hwmod omap44xx_ocp2scp_usb_phy_hwmod = { }, }, .dev_attr = ocp2scp_dev_attr, + .opt_clks = ocp2scp_usb_phy_opt_clks, + .opt_clks_cnt = ARRAY_SIZE(ocp2scp_usb_phy_opt_clks), }; /* diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 2bdd4cf17a8f..f62b509ed08d 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -547,6 +547,8 @@ static inline void __init realtime_counter_init(void) clksrc_nr, clksrc_src) \ void __init omap##name##_gptimer_timer_init(void) \ { \ + if (omap_clk_init) \ + omap_clk_init(); \ omap_dmtimer_init(); \ omap2_gp_clockevent_init((clkev_nr), clkev_src, clkev_prop); \ omap2_gptimer_clocksource_init((clksrc_nr), clksrc_src); \ @@ -556,6 +558,8 @@ void __init omap##name##_gptimer_timer_init(void) \ clksrc_nr, clksrc_src) \ void __init omap##name##_sync32k_timer_init(void) \ { \ + if (omap_clk_init) \ + omap_clk_init(); \ omap_dmtimer_init(); \ omap2_gp_clockevent_init((clkev_nr), clkev_src, clkev_prop); \ /* Enable the use of clocksource="gp_timer" kernel parameter */ \ diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index af41888acbd6..969b0ba7fa70 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -505,6 +505,7 @@ static struct w1_gpio_platform_data w1_gpio_platform_data = { .pin = GPIO_ONE_WIRE, .is_open_drain = 0, .enable_external_pullup = w1_enable_external_pullup, + .ext_pullup_enable_pin = -EINVAL, }; struct platform_device raumfeld_w1_gpio_device = { diff --git a/arch/arm/mach-s3c24xx/include/mach/irqs.h b/arch/arm/mach-s3c24xx/include/mach/irqs.h index b7a9f4d469e8..1e73f5fa8659 100644 --- a/arch/arm/mach-s3c24xx/include/mach/irqs.h +++ b/arch/arm/mach-s3c24xx/include/mach/irqs.h @@ -188,10 +188,8 @@ #if defined(CONFIG_CPU_S3C2416) #define NR_IRQS (IRQ_S3C2416_I2S1 + 1) -#elif defined(CONFIG_CPU_S3C2443) -#define NR_IRQS (IRQ_S3C2443_AC97+1) #else -#define NR_IRQS (IRQ_S3C2440_AC97+1) +#define NR_IRQS (IRQ_S3C2443_AC97 + 1) #endif /* compatibility define. */ diff --git a/arch/arm/mach-s3c24xx/irq.c b/arch/arm/mach-s3c24xx/irq.c index cb9f5e011e73..d8ba9bee4c7e 100644 --- a/arch/arm/mach-s3c24xx/irq.c +++ b/arch/arm/mach-s3c24xx/irq.c @@ -500,7 +500,7 @@ struct s3c_irq_intc *s3c24xx_init_intc(struct device_node *np, base = (void *)0xfd000000; intc->reg_mask = base + 0xa4; - intc->reg_pending = base + 0x08; + intc->reg_pending = base + 0xa8; irq_num = 20; irq_start = S3C2410_IRQ(32); irq_offset = 4; diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c index fcdf52dbcc49..f051f53e35b7 100644 --- a/arch/arm/mach-s5pv210/clock.c +++ b/arch/arm/mach-s5pv210/clock.c @@ -214,11 +214,6 @@ static struct clk clk_pcmcdclk2 = { .name = "pcmcdclk", }; -static struct clk dummy_apb_pclk = { - .name = "apb_pclk", - .id = -1, -}; - static struct clk *clkset_vpllsrc_list[] = { [0] = &clk_fin_vpll, [1] = &clk_sclk_hdmi27m, @@ -305,18 +300,6 @@ static struct clk_ops clk_fout_apll_ops = { static struct clk init_clocks_off[] = { { - .name = "dma", - .devname = "dma-pl330.0", - .parent = &clk_hclk_psys.clk, - .enable = s5pv210_clk_ip0_ctrl, - .ctrlbit = (1 << 3), - }, { - .name = "dma", - .devname = "dma-pl330.1", - .parent = &clk_hclk_psys.clk, - .enable = s5pv210_clk_ip0_ctrl, - .ctrlbit = (1 << 4), - }, { .name = "rot", .parent = &clk_hclk_dsys.clk, .enable = s5pv210_clk_ip0_ctrl, @@ -573,6 +556,20 @@ static struct clk clk_hsmmc3 = { .ctrlbit = (1<<19), }; +static struct clk clk_pdma0 = { + .name = "pdma0", + .parent = &clk_hclk_psys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 3), +}; + +static struct clk clk_pdma1 = { + .name = "pdma1", + .parent = &clk_hclk_psys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 4), +}; + static struct clk *clkset_uart_list[] = { [6] = &clk_mout_mpll.clk, [7] = &clk_mout_epll.clk, @@ -1075,6 +1072,8 @@ static struct clk *clk_cdev[] = { &clk_hsmmc1, &clk_hsmmc2, &clk_hsmmc3, + &clk_pdma0, + &clk_pdma1, }; /* Clock initialisation code */ @@ -1333,6 +1332,8 @@ static struct clk_lookup s5pv210_clk_lookup[] = { CLKDEV_INIT(NULL, "spi_busclk0", &clk_p), CLKDEV_INIT("s5pv210-spi.0", "spi_busclk1", &clk_sclk_spi0.clk), CLKDEV_INIT("s5pv210-spi.1", "spi_busclk1", &clk_sclk_spi1.clk), + CLKDEV_INIT("dma-pl330.0", "apb_pclk", &clk_pdma0), + CLKDEV_INIT("dma-pl330.1", "apb_pclk", &clk_pdma1), }; void __init s5pv210_register_clocks(void) @@ -1361,6 +1362,5 @@ void __init s5pv210_register_clocks(void) for (ptr = 0; ptr < ARRAY_SIZE(clk_cdev); ptr++) s3c_disable_clocks(clk_cdev[ptr], 1); - s3c24xx_register_clock(&dummy_apb_pclk); s3c_pwmclk_init(); } diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c index 3a38f7b34b94..e373de44a8b6 100644 --- a/arch/arm/mach-s5pv210/mach-goni.c +++ b/arch/arm/mach-s5pv210/mach-goni.c @@ -845,7 +845,7 @@ static struct fimc_source_info goni_camera_sensors[] = { .mux_id = 0, .flags = V4L2_MBUS_PCLK_SAMPLE_FALLING | V4L2_MBUS_VSYNC_ACTIVE_LOW, - .bus_type = FIMC_BUS_TYPE_ITU_601, + .fimc_bus_type = FIMC_BUS_TYPE_ITU_601, .board_info = &noon010pc30_board_info, .i2c_bus_num = 0, .clk_frequency = 16000000UL, diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index cdcb799e802f..fec49ebc359a 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -32,6 +32,7 @@ #include <linux/smsc911x.h> #include <linux/spi/spi.h> #include <linux/spi/sh_hspi.h> +#include <linux/mmc/host.h> #include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mfd/tmio.h> #include <linux/usb/otg.h> diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c index f9d754f90c59..d2b3937c4014 100644 --- a/arch/arm/mach-spear3xx/spear3xx.c +++ b/arch/arm/mach-spear3xx/spear3xx.c @@ -14,7 +14,7 @@ #define pr_fmt(fmt) "SPEAr3xx: " fmt #include <linux/amba/pl022.h> -#include <linux/amba/pl08x.h> +#include <linux/amba/pl080.h> #include <linux/io.h> #include <plat/pl080.h> #include <mach/generic.h> diff --git a/arch/arm/mach-ux500/board-mop500-sdi.c b/arch/arm/mach-ux500/board-mop500-sdi.c index 051b62c27102..7f2cb6c5e2c1 100644 --- a/arch/arm/mach-ux500/board-mop500-sdi.c +++ b/arch/arm/mach-ux500/board-mop500-sdi.c @@ -81,7 +81,6 @@ static struct stedma40_chan_cfg mop500_sdi0_dma_cfg_tx = { #endif struct mmci_platform_data mop500_sdi0_data = { - .ios_handler = mop500_sdi0_ios_handler, .ocr_mask = MMC_VDD_29_30, .f_max = 50000000, .capabilities = MMC_CAP_4_BIT_DATA | diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c index b03457881c4b..87d2d7b38ce9 100644 --- a/arch/arm/mach-ux500/board-mop500.c +++ b/arch/arm/mach-ux500/board-mop500.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/platform_device.h> +#include <linux/clk.h> #include <linux/io.h> #include <linux/i2c.h> #include <linux/platform_data/i2c-nomadik.h> @@ -439,6 +440,15 @@ static void mop500_prox_deactivate(struct device *dev) regulator_put(prox_regulator); } +void mop500_snowball_ethernet_clock_enable(void) +{ + struct clk *clk; + + clk = clk_get_sys("fsmc", NULL); + if (!IS_ERR(clk)) + clk_prepare_enable(clk); +} + static struct cryp_platform_data u8500_cryp1_platform_data = { .mem_to_engine = { .dir = STEDMA40_MEM_TO_PERIPH, @@ -683,6 +693,8 @@ static void __init snowball_init_machine(void) mop500_audio_init(parent); mop500_uart_init(parent); + mop500_snowball_ethernet_clock_enable(); + /* This board has full regulator constraints */ regulator_has_full_constraints(); } diff --git a/arch/arm/mach-ux500/board-mop500.h b/arch/arm/mach-ux500/board-mop500.h index eaa605f5d90d..d38951be70df 100644 --- a/arch/arm/mach-ux500/board-mop500.h +++ b/arch/arm/mach-ux500/board-mop500.h @@ -104,6 +104,7 @@ void __init mop500_pinmaps_init(void); void __init snowball_pinmaps_init(void); void __init hrefv60_pinmaps_init(void); void mop500_audio_init(struct device *parent); +void mop500_snowball_ethernet_clock_enable(void); int __init mop500_uib_init(void); void mop500_uib_i2c_add(int busnum, struct i2c_board_info *info, diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 19235cf7bbe3..f1a581844372 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -312,9 +312,10 @@ static void __init u8500_init_machine(void) /* Pinmaps must be in place before devices register */ if (of_machine_is_compatible("st-ericsson,mop500")) mop500_pinmaps_init(); - else if (of_machine_is_compatible("calaosystems,snowball-a9500")) + else if (of_machine_is_compatible("calaosystems,snowball-a9500")) { snowball_pinmaps_init(); - else if (of_machine_is_compatible("st-ericsson,hrefv60+")) + mop500_snowball_ethernet_clock_enable(); + } else if (of_machine_is_compatible("st-ericsson,hrefv60+")) hrefv60_pinmaps_init(); else if (of_machine_is_compatible("st-ericsson,ccu9540")) {} /* TODO: Add pinmaps for ccu9540 board. */ diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 025d17328730..4045c4931a30 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -43,7 +43,7 @@ config CPU_ARM740T depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_CACHE_V3 # although the core is v4t + select CPU_CACHE_V4 select CPU_CP15_MPU select CPU_PABRT_LEGACY help @@ -469,9 +469,6 @@ config CPU_PABRT_V7 bool # The cache model -config CPU_CACHE_V3 - bool - config CPU_CACHE_V4 bool diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 4e333fa2756f..9e51be96f635 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o -obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index dd3d59122cc3..48bc3c0a87ce 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -343,6 +343,7 @@ void __init feroceon_l2_init(int __l2_wt_override) outer_cache.inv_range = feroceon_l2_inv_range; outer_cache.clean_range = feroceon_l2_clean_range; outer_cache.flush_range = feroceon_l2_flush_range; + outer_cache.inv_all = l2_inv_all; enable_l2(); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c2f37390308a..c465faca51b0 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -299,7 +299,7 @@ static void l2x0_unlock(u32 cache_id) int lockregs; int i; - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: lockregs = 8; break; @@ -333,15 +333,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) if (cache_id_part_number_from_dt) cache_id = cache_id_part_number_from_dt; else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID) - & L2X0_CACHE_ID_PART_MASK; + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; /* Determine the number of ways */ - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: if (aux & (1 << 16)) ways = 16; @@ -725,7 +724,6 @@ static const struct l2x0_of_data pl310_data = { .flush_all = l2x0_flush_all, .inv_all = l2x0_inv_all, .disable = l2x0_disable, - .set_debug = pl310_set_debug, }, }; @@ -814,9 +812,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data->save(); of_init = true; - l2x0_init(l2x0_base, aux_val, aux_mask); - memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); + l2x0_init(l2x0_base, aux_val, aux_mask); return 0; } diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S deleted file mode 100644 index 8a3fadece8d3..000000000000 --- a/arch/arm/mm/cache-v3.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * linux/arch/arm/mm/cache-v3.S - * - * Copyright (C) 1997-2002 Russell king - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/page.h> -#include "proc-macros.S" - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v3_flush_icache_all) - mov pc, lr -ENDPROC(v3_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - * - * - mm - mm_struct describing address space - */ -ENTRY(v3_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v3_flush_kern_cache_all) - /* FALLTHROUGH */ - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v3_flush_user_cache_range) - mov ip, #0 - mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_user_range) - mov r0, #0 - mov pc, lr - -/* - * flush_kern_dcache_area(void *page, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v3_flush_kern_dcache_area) - /* FALLTHROUGH */ - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_flush_range) - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_unmap_area) - teq r2, #DMA_TO_DEVICE - bne v3_dma_flush_range - /* FALLTHROUGH */ - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_map_area) - mov pc, lr -ENDPROC(v3_dma_unmap_area) -ENDPROC(v3_dma_map_area) - - .globl v3_flush_kern_cache_louis - .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v3 diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 43e5d77be677..a7ba68f59f0c 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -58,7 +58,7 @@ ENTRY(v4_flush_kern_cache_all) ENTRY(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 - mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mcr p15, 0, ip, c7, c7, 0 @ flush ID cache mov pc, lr #else /* FALLTHROUGH */ diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 7a0511191f6b..2ac37372ef52 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -48,7 +48,7 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -static DEFINE_PER_CPU(atomic64_t, active_asids); +DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; @@ -152,9 +152,9 @@ static int is_reserved_asid(u64 asid) return 0; } -static void new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm, unsigned int cpu) { - u64 asid = mm->context.id; + u64 asid = atomic64_read(&mm->context.id); u64 generation = atomic64_read(&asid_generation); if (asid != 0 && is_reserved_asid(asid)) { @@ -181,13 +181,14 @@ static void new_context(struct mm_struct *mm, unsigned int cpu) cpumask_clear(mm_cpumask(mm)); } - mm->context.id = asid; + return asid; } void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) { unsigned long flags; unsigned int cpu = smp_processor_id(); + u64 asid; if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) __check_vmalloc_seq(mm); @@ -198,20 +199,27 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) */ cpu_set_reserved_ttbr0(); - if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) - && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id)) + asid = atomic64_read(&mm->context.id); + if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), asid)) goto switch_mm_fastpath; raw_spin_lock_irqsave(&cpu_asid_lock, flags); /* Check that our ASID belongs to the current generation. */ - if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS) - new_context(mm, cpu); - - atomic64_set(&per_cpu(active_asids, cpu), mm->context.id); - cpumask_set_cpu(cpu, mm_cpumask(mm)); + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } - if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { + local_flush_bp_all(); local_flush_tlb_all(); + dummy_flush_tlb_a15_erratum(); + } + + atomic64_set(&per_cpu(active_asids, cpu), asid); + cpumask_set_cpu(cpu, mm_cpumask(mm)); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c7e3759f16d3..e9db6b4bf65a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -342,6 +342,7 @@ static int __init atomic_pool_init(void) { struct dma_pool *pool = &atomic_pool; pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); + gfp_t gfp = GFP_KERNEL | GFP_DMA; unsigned long nr_pages = pool->size >> PAGE_SHIFT; unsigned long *bitmap; struct page *page; @@ -361,8 +362,8 @@ static int __init atomic_pool_init(void) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, atomic_pool_init); else - ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, - &page, atomic_pool_init); + ptr = __alloc_remap_buffer(NULL, pool->size, gfp, prot, &page, + atomic_pool_init); if (ptr) { int i; diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 2dffc010cc41..5ee505c937d1 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -141,6 +141,7 @@ void setup_mm_for_reboot(void) { /* Switch to the identity mapping. */ cpu_switch_mm(idmap_pgd, &init_mm); + local_flush_bp_all(); #ifdef CONFIG_CPU_HAS_ASID /* diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e95a996ab78f..a84ff763ac39 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -34,6 +34,7 @@ #include <asm/mach/pci.h> #include "mm.h" +#include "tcm.h" /* * empty_zero_page is a special page that is used for @@ -598,39 +599,60 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_section(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - const struct mem_type *type) +static void __init map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { - pmd_t *pmd = pmd_offset(pud, addr); - +#ifndef CONFIG_ARM_LPAE /* - * Try a section mapping - end, addr and phys must all be aligned - * to a section boundary. Note that PMDs refer to the individual - * L1 entries, whereas PGDs refer to a group of L1 entries making - * up one logical pointer to an L2 table. + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) */ - if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { - pmd_t *p = pmd; - -#ifndef CONFIG_ARM_LPAE - if (addr & SECTION_SIZE) - pmd++; + if (addr & SECTION_SIZE) + pmd++; #endif + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); - do { - *pmd = __pmd(phys | type->prot_sect); - phys += SECTION_SIZE; - } while (pmd++, addr += SECTION_SIZE, addr != end); + flush_pmd_entry(pmd); +} - flush_pmd_entry(p); - } else { +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { /* - * No need to loop; pte's aren't interested in the - * individual L1 entries. + * With LPAE, we must loop over to map + * all the pmds for the given range. */ - alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); - } + next = pmd_addr_end(addr, end); + + /* + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. + */ + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + map_init_section(pmd, addr, next, phys, type); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, @@ -641,7 +663,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, do { next = pud_addr_end(addr, end); - alloc_init_section(pud, addr, next, phys, type); + alloc_init_pmd(pud, addr, next, phys, type); phys += next - addr; } while (pud++, addr = next, addr != end); } @@ -1256,6 +1278,7 @@ void __init paging_init(struct machine_desc *mdesc) dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); + tcm_init(); top_pmd = pmd_off_k(0xffff0000); diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index dc5de5d53f20..fde2d2a794cf 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -77,24 +77,27 @@ __arm740_setup: mcr p15, 0, r0, c6, c0 @ set area 0, default ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit mcr p15, 0, r0, c6, c1 @ set area 1, RAM ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + cmp r3, #0 + moveq r0, #0 + beq 2f + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH +2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH mov r0, #0x06 mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable @@ -137,13 +140,14 @@ __arm740_proc_info: .long 0x41807400 .long 0xfffffff0 .long 0 + .long 0 b __arm740_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT .long cpu_arm740_name .long arm740_processor_functions .long 0 .long 0 - .long v3_cache_fns @ cache model + .long v4_cache_fns @ cache model .size __arm740_proc_info, . - __arm740_proc_info diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 2c3b9421ab5e..2556cf1c2da1 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -387,7 +387,7 @@ ENTRY(cpu_arm920_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size .equ cpu_arm920_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_arm920_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f1803f7e2972..344c8a548cc0 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -402,7 +402,7 @@ ENTRY(cpu_arm926_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size .equ cpu_arm926_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_arm926_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 82f9cdc751d6..0b60dd3d742a 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -350,7 +350,7 @@ ENTRY(cpu_mohawk_set_pte_ext) .globl cpu_mohawk_suspend_size .equ cpu_mohawk_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_mohawk_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 3aa0da11fd84..d92dfd081429 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -172,7 +172,7 @@ ENTRY(cpu_sa1100_set_pte_ext) .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_sa1100_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c3, c0, 0 @ domain ID diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c index 3e6210b4d6d4..054b491ff764 100644 --- a/arch/arm/mm/proc-syms.c +++ b/arch/arm/mm/proc-syms.c @@ -17,7 +17,9 @@ #ifndef MULTI_CPU EXPORT_SYMBOL(cpu_dcache_clean_area); +#ifdef CONFIG_MMU EXPORT_SYMBOL(cpu_set_pte_ext); +#endif #else EXPORT_SYMBOL(processor); #endif diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index bcaaa8de9325..5c07ee4fe3eb 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -138,7 +138,7 @@ ENTRY(cpu_v6_set_pte_ext) /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size .equ cpu_v6_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 50bf1dafc9ea..6ffd78c0f9ab 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -48,7 +48,7 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id - and r3, r1, #0xff + asid r3, r1 mov r3, r3, lsl #(48 - 32) @ ASID mcrr p15, 0, r0, r3, c2 @ set TTB 0 isb diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3a3c015f8d5c..f584d3f5b37c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -420,7 +420,7 @@ __v7_pj4b_proc_info: __v7_ca7mp_proc_info: .long 0x410fc070 .long 0xff0ffff0 - __v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca7mp_setup .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info /* @@ -430,10 +430,25 @@ __v7_ca7mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. + */ + __v7_proc __v7_setup, hwcaps = HWCAP_IDIV + .size __krait_proc_info, . - __krait_proc_info + + /* * Match any ARMv7 processor core. */ .type __v7_proc_info, #object diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index eb93d6487f35..e8efd83b6f25 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -413,7 +413,7 @@ ENTRY(cpu_xsc3_set_pte_ext) .globl cpu_xsc3_suspend_size .equ cpu_xsc3_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_xsc3_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 25510361aa18..e766f889bfd6 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -528,7 +528,7 @@ ENTRY(cpu_xscale_set_pte_ext) .globl cpu_xscale_suspend_size .equ cpu_xscale_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_xscale_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/kernel/tcm.h b/arch/arm/mm/tcm.h index 8015ad434a40..8015ad434a40 100644 --- a/arch/arm/kernel/tcm.h +++ b/arch/arm/mm/tcm.h diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6828ef6ce80e..1a643ee8e082 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -576,7 +576,7 @@ load_ind: /* x = ((*(frame + k)) & 0xf) << 2; */ ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; /* the interpreter should deal with the negative K */ - if (k < 0) + if ((int)k < 0) return -1; /* offset in r1: we might have to take the slow path */ emit_mov_i(r_off, k, ctx); @@ -918,9 +918,8 @@ void bpf_jit_compile(struct sk_filter *fp) #endif if (bpf_jit_enable > 1) - print_hex_dump(KERN_INFO, "BPF JIT code: ", - DUMP_PREFIX_ADDRESS, 16, 4, ctx.target, - alloc_size, false); + /* there are 2 passes here */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; out: diff --git a/arch/arm/plat-orion/addr-map.c b/arch/arm/plat-orion/addr-map.c index febe3862873c..807ac8e5cbc0 100644 --- a/arch/arm/plat-orion/addr-map.c +++ b/arch/arm/plat-orion/addr-map.c @@ -157,9 +157,12 @@ void __init orion_setup_cpu_mbus_target(const struct orion_addr_map_cfg *cfg, u32 size = readl(ddr_window_cpu_base + DDR_SIZE_CS_OFF(i)); /* - * Chip select enabled? + * We only take care of entries for which the chip + * select is enabled, and that don't have high base + * address bits set (devices can only access the first + * 32 bits of the memory). */ - if (size & 1) { + if ((size & 1) && !(base & 0xF)) { struct mbus_dram_window *w; w = &orion_mbus_dram_info.cs[cs++]; diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 2d4b6414609f..251f827271e9 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -238,6 +238,7 @@ static __init void ge_complete( struct mv643xx_eth_shared_platform_data *orion_ge_shared_data, struct resource *orion_ge_resource, unsigned long irq, struct platform_device *orion_ge_shared, + struct platform_device *orion_ge_mvmdio, struct mv643xx_eth_platform_data *eth_data, struct platform_device *orion_ge) { @@ -247,6 +248,8 @@ static __init void ge_complete( orion_ge->dev.platform_data = eth_data; platform_device_register(orion_ge_shared); + if (orion_ge_mvmdio) + platform_device_register(orion_ge_mvmdio); platform_device_register(orion_ge); } @@ -258,8 +261,6 @@ struct mv643xx_eth_shared_platform_data orion_ge00_shared_data; static struct resource orion_ge00_shared_resources[] = { { .name = "ge00 base", - }, { - .name = "ge00 err irq", }, }; @@ -271,6 +272,19 @@ static struct platform_device orion_ge00_shared = { }, }; +static struct resource orion_ge_mvmdio_resources[] = { + { + .name = "ge00 mvmdio base", + }, { + .name = "ge00 mvmdio err irq", + }, +}; + +static struct platform_device orion_ge_mvmdio = { + .name = "orion-mdio", + .id = -1, +}; + static struct resource orion_ge00_resources[] = { { .name = "ge00 irq", @@ -295,26 +309,25 @@ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned int tx_csum_limit) { fill_resources(&orion_ge00_shared, orion_ge00_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); + fill_resources(&orion_ge_mvmdio, orion_ge_mvmdio_resources, + mapbase + 0x2004, 0x84 - 1, irq_err); orion_ge00_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge00_shared_data, orion_ge00_resources, irq, &orion_ge00_shared, + &orion_ge_mvmdio, eth_data, &orion_ge00); } /***************************************************************************** * GE01 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge01_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge01_shared_data; static struct resource orion_ge01_shared_resources[] = { { .name = "ge01 base", - }, { - .name = "ge01 err irq", - }, + } }; static struct platform_device orion_ge01_shared = { @@ -349,26 +362,23 @@ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned int tx_csum_limit) { fill_resources(&orion_ge01_shared, orion_ge01_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); orion_ge01_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge01_shared_data, orion_ge01_resources, irq, &orion_ge01_shared, + NULL, eth_data, &orion_ge01); } /***************************************************************************** * GE10 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge10_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge10_shared_data; static struct resource orion_ge10_shared_resources[] = { { .name = "ge10 base", - }, { - .name = "ge10 err irq", - }, + } }; static struct platform_device orion_ge10_shared = { @@ -402,24 +412,21 @@ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data, unsigned long irq_err) { fill_resources(&orion_ge10_shared, orion_ge10_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); ge_complete(&orion_ge10_shared_data, orion_ge10_resources, irq, &orion_ge10_shared, + NULL, eth_data, &orion_ge10); } /***************************************************************************** * GE11 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data orion_ge11_shared_data = { - .shared_smi = &orion_ge00_shared, -}; +struct mv643xx_eth_shared_platform_data orion_ge11_shared_data; static struct resource orion_ge11_shared_resources[] = { { .name = "ge11 base", - }, { - .name = "ge11 err irq", }, }; @@ -454,9 +461,10 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, unsigned long irq_err) { fill_resources(&orion_ge11_shared, orion_ge11_shared_resources, - mapbase + 0x2000, SZ_16K - 1, irq_err); + mapbase + 0x2000, SZ_16K - 1, NO_IRQ); ge_complete(&orion_ge11_shared_data, orion_ge11_resources, irq, &orion_ge11_shared, + NULL, eth_data, &orion_ge11); } diff --git a/arch/arm/plat-spear/Kconfig b/arch/arm/plat-spear/Kconfig index 739d016eb273..8a08c31b5e20 100644 --- a/arch/arm/plat-spear/Kconfig +++ b/arch/arm/plat-spear/Kconfig @@ -10,7 +10,7 @@ choice config ARCH_SPEAR13XX bool "ST SPEAr13xx with Device Tree" - select ARCH_HAVE_CPUFREQ + select ARCH_HAS_CPUFREQ select ARM_GIC select CPU_V7 select GPIO_SPEAR_SPICS diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fd70a68387eb..9b6d19f74078 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -9,7 +9,6 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS - select GENERIC_HARDIRQS_NO_DEPRECATED select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 51493430f142..1a6bfe954d49 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,17 +6,6 @@ config FRAME_POINTER bool default y -config DEBUG_ERRORS - bool "Verbose kernel error messages" - depends on DEBUG_KERNEL - help - This option controls verbose debugging information which can be - printed when the kernel detects an internal error. This debugging - information is useful to kernel hackers when tracking down problems, - but mostly meaningless to other people. It's safe to say Y unless - you are concerned with the code size or don't want to see these - messages. - config DEBUG_STACK_USAGE bool "Enable stack utilization instrumentation" depends on DEBUG_KERNEL diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 9212c7880da7..09bef29f3a09 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -82,4 +82,3 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y # CONFIG_FTRACE is not set CONFIG_ATOMIC64_SELFTEST=y -CONFIG_DEBUG_ERRORS=y diff --git a/arch/arm64/include/asm/ucontext.h b/arch/arm64/include/asm/ucontext.h index bde960720892..42e04c877428 100644 --- a/arch/arm64/include/asm/ucontext.h +++ b/arch/arm64/include/asm/ucontext.h @@ -22,7 +22,7 @@ struct ucontext { stack_t uc_stack; sigset_t uc_sigmask; /* glibc uses a 1024-bit sigset_t */ - __u8 __unused[(1024 - sizeof(sigset_t)) / 8]; + __u8 __unused[1024 / 8 - sizeof(sigset_t)]; /* last for future expansion */ struct sigcontext uc_mcontext; }; diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index cef3925eaf60..aa3e948f7885 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -40,7 +40,9 @@ EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); /* bitops */ +#ifdef CONFIG_SMP EXPORT_SYMBOL(__atomic_hash); +#endif /* physical memory */ EXPORT_SYMBOL(memstart_addr); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 7f4f3673f2bc..e393174fe859 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -549,7 +549,6 @@ int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct compat_rt_sigframe __user *frame; - compat_stack_t stack; int err = 0; frame = compat_get_sigframe(ka, regs, sizeof(*frame)); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 224b44ab534e..70b8cd4021c4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -261,7 +261,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt) { unsigned long size, mask; - bool page64k = IS_ENABLED(ARM64_64K_PAGES); + bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES); pgd_t *pgd; pud_t *pud; pmd_t *pmd; diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 9b89257b2cfd..c1a868d398bd 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -7,7 +7,7 @@ config AVR32 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_ATOMIC64 select HARDIRQS_SW_RESEND diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h index cf60d0a9f176..fc6483f83ccc 100644 --- a/arch/avr32/include/asm/io.h +++ b/arch/avr32/include/asm/io.h @@ -165,6 +165,10 @@ BUILDIO_IOPORT(l, u32) #define readw_be __raw_readw #define readl_be __raw_readl +#define writeb_relaxed writeb +#define writew_relaxed writew +#define writel_relaxed writel + #define writeb_be __raw_writeb #define writew_be __raw_writew #define writel_be __raw_writel diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 51c6401582ea..37401f535126 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 600494c70e96..c3f2e0bc644a 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -33,7 +33,7 @@ config BLACKFIN select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_UID16 - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 diff --git a/arch/c6x/include/asm/irqflags.h b/arch/c6x/include/asm/irqflags.h index cf78e09e18c3..2c71d5634ec2 100644 --- a/arch/c6x/include/asm/irqflags.h +++ b/arch/c6x/include/asm/irqflags.h @@ -27,7 +27,7 @@ static inline unsigned long arch_local_save_flags(void) /* set interrupt enabled status */ static inline void arch_local_irq_restore(unsigned long flags) { - asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags)); + asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags) : "memory"); } /* unconditionally enable interrupts */ diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index bb0ac66cf533..06dd026533e3 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -43,7 +43,7 @@ config CRIS select GENERIC_ATOMIC64 select HAVE_GENERIC_HARDIRQS select HAVE_UID16 - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_IRQ_SHOW select GENERIC_IOMAP diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 50692b738c75..ba409c9947bc 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -74,6 +74,8 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 12369b194c7b..2ce731f9aa4d 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -6,7 +6,7 @@ config FRV select HAVE_PERF_EVENTS select HAVE_UID16 select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_IRQ_SHOW select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 595391f0f98c..31dbb5d8e13d 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -72,5 +72,7 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index ae8551eb3736..79250de1b12a 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -5,7 +5,7 @@ config H8300 select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 select HAVE_UID16 - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index 43e32621da7d..5d1c6d0870e6 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 33f3fdc0b214..9a02f71c6b1f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -26,7 +26,7 @@ config IA64 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index c567adc8bea5..6b4329f18b29 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -81,4 +81,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 77597e5ea60a..79521d5499f9 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -849,17 +849,6 @@ static palinfo_entry_t palinfo_entries[]={ #define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) -/* - * this array is used to keep track of the proc entries we create. This is - * required in the module mode when we need to remove all entries. The procfs code - * does not do recursion of deletion - * - * Notes: - * - +1 accounts for the cpuN directory entry in /proc/pal - */ -#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)) - -static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; static struct proc_dir_entry *palinfo_dir; /* @@ -971,60 +960,32 @@ palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, voi static void __cpuinit create_palinfo_proc_entries(unsigned int cpu) { -# define CPUSTR "cpu%d" - pal_func_cpu_u_t f; - struct proc_dir_entry **pdir; struct proc_dir_entry *cpu_dir; int j; - char cpustr[sizeof(CPUSTR)]; - - - /* - * we keep track of created entries in a depth-first order for - * cleanup purposes. Each entry is stored into palinfo_proc_entries - */ - sprintf(cpustr,CPUSTR, cpu); + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", cpu); cpu_dir = proc_mkdir(cpustr, palinfo_dir); + if (!cpu_dir) + return; f.req_cpu = cpu; - /* - * Compute the location to store per cpu entries - * We dont store the top level entry in this list, but - * remove it finally after removing all cpu entries. - */ - pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)]; - *pdir++ = cpu_dir; for (j=0; j < NR_PALINFO_ENTRIES; j++) { f.func_id = j; - *pdir = create_proc_read_entry( - palinfo_entries[j].name, 0, cpu_dir, - palinfo_read_entry, (void *)f.value); - pdir++; + create_proc_read_entry( + palinfo_entries[j].name, 0, cpu_dir, + palinfo_read_entry, (void *)f.value); } } static void remove_palinfo_proc_entries(unsigned int hcpu) { - int j; - struct proc_dir_entry *cpu_dir, **pdir; - - pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)]; - cpu_dir = *pdir; - *pdir++=NULL; - for (j=0; j < (NR_PALINFO_ENTRIES); j++) { - if ((*pdir)) { - remove_proc_entry ((*pdir)->name, cpu_dir); - *pdir ++= NULL; - } - } - - if (cpu_dir) { - remove_proc_entry(cpu_dir->name, palinfo_dir); - } + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", hcpu); + remove_proc_subtree(cpustr, palinfo_dir); } static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, @@ -1058,6 +1019,8 @@ palinfo_init(void) printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); palinfo_dir = proc_mkdir("pal", NULL); + if (!palinfo_dir) + return -ENOMEM; /* Create palinfo dirs in /proc for all online cpus */ for_each_online_cpu(i) { @@ -1073,22 +1036,8 @@ palinfo_init(void) static void __exit palinfo_exit(void) { - int i = 0; - - /* remove all nodes: depth first pass. Could optimize this */ - for_each_online_cpu(i) { - remove_palinfo_proc_entries(i); - } - - /* - * Remove the top level entry finally - */ - remove_proc_entry(palinfo_dir->name, NULL); - - /* - * Unregister from cpu notifier callbacks - */ unregister_hotcpu_notifier(&palinfo_cpu_notifier); + remove_proc_subtree("pal", NULL); } module_init(palinfo_init); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 433f5e8a2cd1..2eda28414abb 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -619,6 +619,7 @@ static struct file_system_type pfm_fs_type = { .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("pfmfs"); DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index e34f565f595a..6f7dc8b7b35c 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -291,7 +291,6 @@ cpu_idle (void) } if (!need_resched()) { - void (*idle)(void); #ifdef CONFIG_SMP min_xtp(); #endif @@ -299,9 +298,7 @@ cpu_idle (void) if (mark_idle) (*mark_idle)(1); - if (!idle) - idle = default_idle; - (*idle)(); + default_idle(); if (mark_idle) (*mark_idle)(0); #ifdef CONFIG_SMP diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 92623818a1fe..bcd17b206571 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -10,7 +10,7 @@ config M32R select ARCH_WANT_IPC_PARSE_VERSION select HAVE_DEBUG_BUGVERBOSE select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 519afa2755db..2a3b59e0e171 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/stat.h b/arch/m32r/include/uapi/asm/stat.h index da4518f82d6d..98470fe483b6 100644 --- a/arch/m32r/include/uapi/asm/stat.h +++ b/arch/m32r/include/uapi/asm/stat.h @@ -63,10 +63,10 @@ struct stat64 { long long st_size; unsigned long st_blksize; -#if defined(__BIG_ENDIAN) +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) unsigned long __pad4; /* future possible st_blocks high bits */ unsigned long st_blocks; /* Number 512-byte blocks allocated. */ -#elif defined(__LITTLE_ENDIAN) +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) unsigned long st_blocks; /* Number 512-byte blocks allocated. */ unsigned long __pad4; /* future possible st_blocks high bits */ #else diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 0e708c78e01c..6de813370b8c 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -8,7 +8,7 @@ config M68K select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 select HAVE_UID16 - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS select GENERIC_CPU_DEVICES select GENERIC_STRNCPY_FROM_USER if MMU diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 7cdf6b010381..7240584d3439 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -310,7 +310,6 @@ config COBRA5282 config SOM5282EM bool "EMAC.Inc SOM5282EM board support" depends on M528x - select EMAC_INC help Support for the EMAC.Inc SOM5282EM module. diff --git a/arch/m68k/include/asm/MC68328.h b/arch/m68k/include/asm/MC68328.h index a337e56d09bf..4ebf098b8a1f 100644 --- a/arch/m68k/include/asm/MC68328.h +++ b/arch/m68k/include/asm/MC68328.h @@ -293,7 +293,7 @@ /* * Here go the bitmasks themselves */ -#define IMR_MSPIM (1 << SPIM _IRQ_NUM) /* Mask SPI Master interrupt */ +#define IMR_MSPIM (1 << SPIM_IRQ_NUM) /* Mask SPI Master interrupt */ #define IMR_MTMR2 (1 << TMR2_IRQ_NUM) /* Mask Timer 2 interrupt */ #define IMR_MUART (1 << UART_IRQ_NUM) /* Mask UART interrupt */ #define IMR_MWDT (1 << WDT_IRQ_NUM) /* Mask Watchdog Timer interrupt */ @@ -327,7 +327,7 @@ #define IWR_ADDR 0xfffff308 #define IWR LONG_REF(IWR_ADDR) -#define IWR_SPIM (1 << SPIM _IRQ_NUM) /* SPI Master interrupt */ +#define IWR_SPIM (1 << SPIM_IRQ_NUM) /* SPI Master interrupt */ #define IWR_TMR2 (1 << TMR2_IRQ_NUM) /* Timer 2 interrupt */ #define IWR_UART (1 << UART_IRQ_NUM) /* UART interrupt */ #define IWR_WDT (1 << WDT_IRQ_NUM) /* Watchdog Timer interrupt */ @@ -357,7 +357,7 @@ #define ISR_ADDR 0xfffff30c #define ISR LONG_REF(ISR_ADDR) -#define ISR_SPIM (1 << SPIM _IRQ_NUM) /* SPI Master interrupt */ +#define ISR_SPIM (1 << SPIM_IRQ_NUM) /* SPI Master interrupt */ #define ISR_TMR2 (1 << TMR2_IRQ_NUM) /* Timer 2 interrupt */ #define ISR_UART (1 << UART_IRQ_NUM) /* UART interrupt */ #define ISR_WDT (1 << WDT_IRQ_NUM) /* Watchdog Timer interrupt */ @@ -391,7 +391,7 @@ #define IPR_ADDR 0xfffff310 #define IPR LONG_REF(IPR_ADDR) -#define IPR_SPIM (1 << SPIM _IRQ_NUM) /* SPI Master interrupt */ +#define IPR_SPIM (1 << SPIM_IRQ_NUM) /* SPI Master interrupt */ #define IPR_TMR2 (1 << TMR2_IRQ_NUM) /* Timer 2 interrupt */ #define IPR_UART (1 << UART_IRQ_NUM) /* UART interrupt */ #define IPR_WDT (1 << WDT_IRQ_NUM) /* Watchdog Timer interrupt */ @@ -757,7 +757,7 @@ /* 'EZ328-compatible definitions */ #define TCN_ADDR TCN1_ADDR -#define TCN TCN +#define TCN TCN1 /* * Timer Unit 1 and 2 Status Registers diff --git a/arch/m68k/include/asm/gpio.h b/arch/m68k/include/asm/gpio.h index 4395ffc51fdb..8cc83431805b 100644 --- a/arch/m68k/include/asm/gpio.h +++ b/arch/m68k/include/asm/gpio.h @@ -86,4 +86,24 @@ static inline int gpio_cansleep(unsigned gpio) return gpio < MCFGPIO_PIN_MAX ? 0 : __gpio_cansleep(gpio); } +static inline int gpio_request_one(unsigned gpio, unsigned long flags, const char *label) +{ + int err; + + err = gpio_request(gpio, label); + if (err) + return err; + + if (flags & GPIOF_DIR_IN) + err = gpio_direction_input(gpio); + else + err = gpio_direction_output(gpio, + (flags & GPIOF_INIT_HIGH) ? 1 : 0); + + if (err) + gpio_free(gpio); + + return err; +} + #endif diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 71fb29938dba..911ba472e6c4 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -57,6 +57,9 @@ void (*mach_reset)(void); void (*mach_halt)(void); void (*mach_power_off)(void); +#ifdef CONFIG_M68000 +#define CPU_NAME "MC68000" +#endif #ifdef CONFIG_M68328 #define CPU_NAME "MC68328" #endif diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index afd8106fd83b..519aad8fa812 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -188,7 +188,7 @@ void __init mem_init(void) } } -#if !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) +#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) /* insert pointer tables allocated so far into the tablelist */ init_pointer_table((unsigned long)kernel_pg_dir); for (i = 0; i < PTRS_PER_PGD; i++) { diff --git a/arch/m68k/platform/coldfire/m528x.c b/arch/m68k/platform/coldfire/m528x.c index 83b7dad7a84e..b03a9d271837 100644 --- a/arch/m68k/platform/coldfire/m528x.c +++ b/arch/m68k/platform/coldfire/m528x.c @@ -69,7 +69,7 @@ static void __init m528x_uarts_init(void) u8 port; /* make sure PUAPAR is set for UART0 and UART1 */ - port = readb(MCF5282_GPIO_PUAPAR); + port = readb(MCFGPIO_PUAPAR); port |= 0x03 | (0x03 << 2); writeb(port, MCFGPIO_PUAPAR); } diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h index d63b9d0e57dd..d2baf6961794 100644 --- a/arch/metag/include/asm/elf.h +++ b/arch/metag/include/asm/elf.h @@ -100,9 +100,6 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #define STACK_RND_MASK (0) #ifdef CONFIG_METAG_USER_TCM diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig index cd7f2f2ad416..975f2f4e3ecf 100644 --- a/arch/metag/mm/Kconfig +++ b/arch/metag/mm/Kconfig @@ -40,6 +40,7 @@ endchoice config NUMA bool "Non Uniform Memory Access (NUMA) Support" + select ARCH_WANT_NUMA_VARIABLE_LOCALITY help Some Meta systems have MMU-mappable on-chip memories with lower latencies than main memory. This enables support for diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 7843d11156e6..1323fa2530eb 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -19,7 +19,7 @@ config MICROBLAZE select HAVE_DEBUG_KMEMLEAK select IRQ_DOMAIN select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ae9c716c46bb..51244bf97271 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -18,7 +18,7 @@ config MIPS select HAVE_KRETPROBES select HAVE_DEBUG_KMEMLEAK select ARCH_BINFMT_ELF_RANDOMIZE_PIE - select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT select RTC_LIB if !MACH_LOONGSON select GENERIC_ATOMIC64 if !64BIT select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -38,7 +38,7 @@ config MIPS select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE select HAVE_MOD_ARCH_SPECIFIC - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select MODULES_USE_ELF_REL if MODULES select MODULES_USE_ELF_RELA if MODULES && 64BIT select CLONE_BACKWARDS @@ -657,7 +657,7 @@ config SNI_RM bool "SNI RM200/300/400" select FW_ARC if CPU_LITTLE_ENDIAN select FW_ARC32 if CPU_LITTLE_ENDIAN - select SNIPROM if CPU_BIG_ENDIAN + select FW_SNIPROM if CPU_BIG_ENDIAN select ARCH_MAY_HAVE_PC_FDC select BOOT_ELF32 select CEVT_R4K @@ -1144,7 +1144,7 @@ config DEFAULT_SGI_PARTITION config FW_ARC32 bool -config SNIPROM +config FW_SNIPROM bool config BOOT_ELF32 @@ -1493,7 +1493,6 @@ config CPU_XLP select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM - select CPU_HAS_LLSC select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select CPU_HAS_PREFETCH diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index ed1949c29508..9aa7d44898ed 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -745,10 +745,7 @@ void __init board_prom_init(void) strcpy(cfe_version, "unknown"); printk(KERN_INFO PFX "CFE version: %s\n", cfe_version); - if (bcm63xx_nvram_init(boot_addr + BCM963XX_NVRAM_OFFSET)) { - printk(KERN_ERR PFX "invalid nvram checksum\n"); - return; - } + bcm63xx_nvram_init(boot_addr + BCM963XX_NVRAM_OFFSET); board_name = bcm63xx_nvram_get_name(); /* find board by name */ diff --git a/arch/mips/bcm63xx/nvram.c b/arch/mips/bcm63xx/nvram.c index 620611680839..a4b8864f9307 100644 --- a/arch/mips/bcm63xx/nvram.c +++ b/arch/mips/bcm63xx/nvram.c @@ -38,7 +38,7 @@ struct bcm963xx_nvram { static struct bcm963xx_nvram nvram; static int mac_addr_used; -int __init bcm63xx_nvram_init(void *addr) +void __init bcm63xx_nvram_init(void *addr) { unsigned int check_len; u32 crc, expected_crc; @@ -60,9 +60,8 @@ int __init bcm63xx_nvram_init(void *addr) crc = crc32_le(~0, (u8 *)&nvram, check_len); if (crc != expected_crc) - return -EINVAL; - - return 0; + pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n", + expected_crc, crc); } u8 *bcm63xx_nvram_get_name(void) diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c index 314231be788c..35e18e98beb9 100644 --- a/arch/mips/bcm63xx/setup.c +++ b/arch/mips/bcm63xx/setup.c @@ -157,4 +157,4 @@ int __init bcm63xx_register_devices(void) return board_register_devices(); } -device_initcall(bcm63xx_register_devices); +arch_initcall(bcm63xx_register_devices); diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index c594a3d4f743..b0baa299f899 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -174,7 +174,10 @@ static int octeon_kexec_prepare(struct kimage *image) static void octeon_generic_shutdown(void) { - int cpu, i; + int i; +#ifdef CONFIG_SMP + int cpu; +#endif struct cvmx_bootmem_desc *bootmem_desc; void *named_block_array_ptr; diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h index 62d6a3b4d3b7..4e0b6bc1165e 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h @@ -9,10 +9,8 @@ * * Initialized the local nvram copy from the target address and checks * its checksum. - * - * Returns 0 on success. */ -int __init bcm63xx_nvram_init(void *nvram); +void bcm63xx_nvram_init(void *nvram); /** * bcm63xx_nvram_get_name() - returns the board name according to nvram diff --git a/arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h b/arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h index d9c828419037..193c0912d38e 100644 --- a/arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h @@ -28,11 +28,7 @@ /* #define cpu_has_prefetch ? */ #define cpu_has_mcheck 1 /* #define cpu_has_ejtag ? */ -#ifdef CONFIG_CPU_HAS_LLSC #define cpu_has_llsc 1 -#else -#define cpu_has_llsc 0 -#endif /* #define cpu_has_vtag_icache ? */ /* #define cpu_has_dc_aliases ? */ /* #define cpu_has_ic_fills_f_dc ? */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 12b70c25906a..0da44d422f5b 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1166,7 +1166,10 @@ do { \ unsigned int __dspctl; \ \ __asm__ __volatile__( \ + " .set push \n" \ + " .set dsp \n" \ " rddsp %0, %x1 \n" \ + " .set pop \n" \ : "=r" (__dspctl) \ : "i" (mask)); \ __dspctl; \ @@ -1175,30 +1178,198 @@ do { \ #define wrdsp(val, mask) \ do { \ __asm__ __volatile__( \ + " .set push \n" \ + " .set dsp \n" \ " wrdsp %0, %x1 \n" \ + " .set pop \n" \ : \ : "r" (val), "i" (mask)); \ } while (0) -#define mflo0() ({ long mflo0; __asm__("mflo %0, $ac0" : "=r" (mflo0)); mflo0;}) -#define mflo1() ({ long mflo1; __asm__("mflo %0, $ac1" : "=r" (mflo1)); mflo1;}) -#define mflo2() ({ long mflo2; __asm__("mflo %0, $ac2" : "=r" (mflo2)); mflo2;}) -#define mflo3() ({ long mflo3; __asm__("mflo %0, $ac3" : "=r" (mflo3)); mflo3;}) - -#define mfhi0() ({ long mfhi0; __asm__("mfhi %0, $ac0" : "=r" (mfhi0)); mfhi0;}) -#define mfhi1() ({ long mfhi1; __asm__("mfhi %0, $ac1" : "=r" (mfhi1)); mfhi1;}) -#define mfhi2() ({ long mfhi2; __asm__("mfhi %0, $ac2" : "=r" (mfhi2)); mfhi2;}) -#define mfhi3() ({ long mfhi3; __asm__("mfhi %0, $ac3" : "=r" (mfhi3)); mfhi3;}) - -#define mtlo0(x) __asm__("mtlo %0, $ac0" ::"r" (x)) -#define mtlo1(x) __asm__("mtlo %0, $ac1" ::"r" (x)) -#define mtlo2(x) __asm__("mtlo %0, $ac2" ::"r" (x)) -#define mtlo3(x) __asm__("mtlo %0, $ac3" ::"r" (x)) - -#define mthi0(x) __asm__("mthi %0, $ac0" ::"r" (x)) -#define mthi1(x) __asm__("mthi %0, $ac1" ::"r" (x)) -#define mthi2(x) __asm__("mthi %0, $ac2" ::"r" (x)) -#define mthi3(x) __asm__("mthi %0, $ac3" ::"r" (x)) +#define mflo0() \ +({ \ + long mflo0; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mflo %0, $ac0 \n" \ + " .set pop \n" \ + : "=r" (mflo0)); \ + mflo0; \ +}) + +#define mflo1() \ +({ \ + long mflo1; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mflo %0, $ac1 \n" \ + " .set pop \n" \ + : "=r" (mflo1)); \ + mflo1; \ +}) + +#define mflo2() \ +({ \ + long mflo2; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mflo %0, $ac2 \n" \ + " .set pop \n" \ + : "=r" (mflo2)); \ + mflo2; \ +}) + +#define mflo3() \ +({ \ + long mflo3; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mflo %0, $ac3 \n" \ + " .set pop \n" \ + : "=r" (mflo3)); \ + mflo3; \ +}) + +#define mfhi0() \ +({ \ + long mfhi0; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mfhi %0, $ac0 \n" \ + " .set pop \n" \ + : "=r" (mfhi0)); \ + mfhi0; \ +}) + +#define mfhi1() \ +({ \ + long mfhi1; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mfhi %0, $ac1 \n" \ + " .set pop \n" \ + : "=r" (mfhi1)); \ + mfhi1; \ +}) + +#define mfhi2() \ +({ \ + long mfhi2; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mfhi %0, $ac2 \n" \ + " .set pop \n" \ + : "=r" (mfhi2)); \ + mfhi2; \ +}) + +#define mfhi3() \ +({ \ + long mfhi3; \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mfhi %0, $ac3 \n" \ + " .set pop \n" \ + : "=r" (mfhi3)); \ + mfhi3; \ +}) + + +#define mtlo0(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mtlo %0, $ac0 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mtlo1(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mtlo %0, $ac1 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mtlo2(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mtlo %0, $ac2 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mtlo3(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mtlo %0, $ac3 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mthi0(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mthi %0, $ac0 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mthi1(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mthi %0, $ac1 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mthi2(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mthi %0, $ac2 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) + +#define mthi3(x) \ +({ \ + __asm__( \ + " .set push \n" \ + " .set dsp \n" \ + " mthi %0, $ac3 \n" \ + " .set pop \n" \ + : \ + : "r" (x)); \ +}) #else diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h index 197f6367c201..8efe5a9e2c3e 100644 --- a/arch/mips/include/asm/signal.h +++ b/arch/mips/include/asm/signal.h @@ -21,6 +21,6 @@ #include <asm/sigcontext.h> #include <asm/siginfo.h> -#define __ARCH_HAS_ODD_SIGACTION +#define __ARCH_HAS_IRIX_SIGACTION #endif /* _ASM_SIGNAL_H */ diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h index d6b18b4d0f3a..addb9f556b71 100644 --- a/arch/mips/include/uapi/asm/signal.h +++ b/arch/mips/include/uapi/asm/signal.h @@ -72,6 +72,12 @@ typedef unsigned long old_sigset_t; /* at least 32 bits */ * * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single * Unix names RESETHAND and NODEFER respectively. + * + * SA_RESTORER used to be defined as 0x04000000 but only the O32 ABI ever + * supported its use and no libc was using it, so the entire sa-restorer + * functionality was removed with lmo commit 39bffc12c3580ab for 2.5.48 + * retaining only the SA_RESTORER definition as a reminder to avoid + * accidental reuse of the mask bit. */ #define SA_ONSTACK 0x08000000 #define SA_RESETHAND 0x80000000 @@ -84,8 +90,6 @@ typedef unsigned long old_sigset_t; /* at least 32 bits */ #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -#define SA_RESTORER 0x04000000 /* Only for o32 */ - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 47132f44c955..3b211507be7f 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -90,4 +90,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index f81d98f6184c..de75fb50562b 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -100,29 +100,16 @@ obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_mipsxx.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o # -# DSP ASE supported for MIPS32 or MIPS64 Release 2 cores only. It is safe -# to enable DSP assembler support here even if the MIPS Release 2 CPU we -# are targetting does not support DSP because all code-paths making use of -# it properly check that the running CPU *actually does* support these -# instructions. +# DSP ASE supported for MIPS32 or MIPS64 Release 2 cores only. It is not +# safe to unconditionnaly use the assembler -mdsp / -mdspr2 switches +# here because the compiler may use DSP ASE instructions (such as lwx) in +# code paths where we cannot check that the CPU we are running on supports it. +# Proper abstraction using HAVE_AS_DSP and macros is done in +# arch/mips/include/asm/mipsregs.h. # ifeq ($(CONFIG_CPU_MIPSR2), y) CFLAGS_DSP = -DHAVE_AS_DSP -# -# Check if assembler supports DSP ASE -# -ifeq ($(call cc-option-yn,-mdsp), y) -CFLAGS_DSP += -mdsp -endif - -# -# Check if assembler supports DSP ASE Rev2 -# -ifeq ($(call cc-option-yn,-mdspr2), y) -CFLAGS_DSP += -mdspr2 -endif - CFLAGS_signal.o = $(CFLAGS_DSP) CFLAGS_signal32.o = $(CFLAGS_DSP) CFLAGS_process.o = $(CFLAGS_DSP) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6bfccc227a95..5fe66a0c3224 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -580,6 +580,9 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 48; break; case PRID_IMP_VR41XX: + set_isa(c, MIPS_CPU_ISA_III); + c->options = R4K_OPTS; + c->tlbsize = 32; switch (c->processor_id & 0xf0) { case PRID_REV_VR4111: c->cputype = CPU_VR4111; @@ -604,6 +607,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "NEC VR4131"; } else { c->cputype = CPU_VR4133; + c->options |= MIPS_CPU_LLSC; __cpu_name[cpu] = "NEC VR4133"; } break; @@ -613,9 +617,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "NEC Vr41xx"; break; } - set_isa(c, MIPS_CPU_ISA_III); - c->options = R4K_OPTS; - c->tlbsize = 32; break; case PRID_IMP_R4300: c->cputype = CPU_R4300; @@ -1226,10 +1227,8 @@ __cpuinit void cpu_probe(void) if (c->options & MIPS_CPU_FPU) { c->fpu_id = cpu_get_fpu_id(); - if (c->isa_level == MIPS_CPU_ISA_M32R1 || - c->isa_level == MIPS_CPU_ISA_M32R2 || - c->isa_level == MIPS_CPU_ISA_M64R1 || - c->isa_level == MIPS_CPU_ISA_M64R2) { + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | + MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) { if (c->fpu_id & MIPS_FPIR_3D) c->ases |= MIPS_ASE_MIPS3D; } diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 8eeee1c860c0..db9655f08892 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -171,7 +171,7 @@ SYSCALL_DEFINE6(32_ipc, u32, call, long, first, long, second, long, third, err = compat_sys_shmctl(first, second, compat_ptr(ptr)); break; default: - err = -EINVAL; + err = -ENOSYS; break; } diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index 165867673357..33d067148e61 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -46,10 +46,9 @@ PTR_L a5, PT_R9(sp) PTR_L a6, PT_R10(sp) PTR_L a7, PT_R11(sp) -#else - PTR_ADDIU sp, PT_SIZE #endif -.endm + PTR_ADDIU sp, PT_SIZE + .endm .macro RETURN_BACK jr ra @@ -68,7 +67,11 @@ NESTED(ftrace_caller, PT_SIZE, ra) .globl _mcount _mcount: b ftrace_stub - addiu sp,sp,8 +#ifdef CONFIG_32BIT + addiu sp,sp,8 +#else + nop +#endif /* When tracing is activated, it calls ftrace_caller+8 (aka here) */ lw t1, function_trace_stop diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 135c4aadccbe..7a54f74b7818 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -67,7 +67,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_mips_r) { seq_printf(m, "isa\t\t\t:"); if (cpu_has_mips_1) - seq_printf(m, "%s", "mips1"); + seq_printf(m, "%s", " mips1"); if (cpu_has_mips_2) seq_printf(m, "%s", " mips2"); if (cpu_has_mips_3) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index a200b5bdbb87..c3abb88170fc 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1571,7 +1571,7 @@ void __cpuinit per_cpu_trap_init(bool is_boot_cpu) #ifdef CONFIG_64BIT status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX; #endif - if (current_cpu_data.isa_level == MIPS_CPU_ISA_IV) + if (current_cpu_data.isa_level & MIPS_CPU_ISA_IV) status_set |= ST0_XX; if (cpu_has_dsp) status_set |= ST0_MX; diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c index 81f1dcfdcab8..a64daee740ee 100644 --- a/arch/mips/lib/bitops.c +++ b/arch/mips/lib/bitops.c @@ -90,12 +90,12 @@ int __mips_test_and_set_bit(unsigned long nr, unsigned bit = nr & SZLONG_MASK; unsigned long mask; unsigned long flags; - unsigned long res; + int res; a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); - res = (mask & *a); + res = (mask & *a) != 0; *a |= mask; raw_local_irq_restore(flags); return res; @@ -116,12 +116,12 @@ int __mips_test_and_set_bit_lock(unsigned long nr, unsigned bit = nr & SZLONG_MASK; unsigned long mask; unsigned long flags; - unsigned long res; + int res; a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); - res = (mask & *a); + res = (mask & *a) != 0; *a |= mask; raw_local_irq_restore(flags); return res; @@ -141,12 +141,12 @@ int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) unsigned bit = nr & SZLONG_MASK; unsigned long mask; unsigned long flags; - unsigned long res; + int res; a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); - res = (mask & *a); + res = (mask & *a) != 0; *a &= ~mask; raw_local_irq_restore(flags); return res; @@ -166,12 +166,12 @@ int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr) unsigned bit = nr & SZLONG_MASK; unsigned long mask; unsigned long flags; - unsigned long res; + int res; a += nr >> SZLONG_LOG; mask = 1UL << bit; raw_local_irq_save(flags); - res = (mask & *a); + res = (mask & *a) != 0; *a ^= mask; raw_local_irq_restore(flags); return res; diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 507147aebd41..a6adffbb4e5f 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -270,7 +270,7 @@ LEAF(csum_partial) #endif /* odd buffer alignment? */ -#ifdef CPU_MIPSR2 +#ifdef CONFIG_CPU_MIPSR2 wsbh v1, sum movn sum, v1, t7 #else @@ -670,7 +670,7 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc) addu sum, v1 #endif -#ifdef CPU_MIPSR2 +#ifdef CONFIG_CPU_MIPSR2 wsbh v1, sum movn sum, v1, odd #else diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index ecca559b8d7b..2078915eacb9 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1247,10 +1247,8 @@ static void __cpuinit setup_scache(void) return; default: - if (c->isa_level == MIPS_CPU_ISA_M32R1 || - c->isa_level == MIPS_CPU_ISA_M32R2 || - c->isa_level == MIPS_CPU_ISA_M64R1 || - c->isa_level == MIPS_CPU_ISA_M64R2) { + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | + MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) { #ifdef CONFIG_MIPS_CPU_SCACHE if (mips_sc_init ()) { scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 93d937b4b1ba..df96da7e939b 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -98,10 +98,8 @@ static inline int __init mips_sc_probe(void) c->scache.flags |= MIPS_CACHE_NOT_PRESENT; /* Ignore anything but MIPSxx processors */ - if (c->isa_level != MIPS_CPU_ISA_M32R1 && - c->isa_level != MIPS_CPU_ISA_M32R2 && - c->isa_level != MIPS_CPU_ISA_M64R1 && - c->isa_level != MIPS_CPU_ISA_M64R2) + if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | + MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2))) return 0; /* Does this MIPS32/MIPS64 CPU have a config2 register? */ diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index 38a80c83fd67..d1faece21b6a 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -19,7 +19,7 @@ #include <asm/mach-au1x00/au1000.h> #include <asm/tlbmisc.h> -#ifdef CONFIG_DEBUG_PCI +#ifdef CONFIG_PCI_DEBUG #define DBG(x...) printk(KERN_DEBUG x) #else #define DBG(x...) do {} while (0) @@ -162,7 +162,7 @@ static int config_access(unsigned char access_type, struct pci_bus *bus, if (status & (1 << 29)) { *data = 0xffffffff; error = -1; - DBG("alchemy-pci: master abort on cfg access %d bus %d dev %d", + DBG("alchemy-pci: master abort on cfg access %d bus %d dev %d\n", access_type, bus->number, device); } else if ((status >> 28) & 0xf) { DBG("alchemy-pci: PCI ERR detected: dev %d, status %lx\n", diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index b06c7360b1c6..428da175d073 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,7 +8,7 @@ config MN10300 select HAVE_ARCH_KGDB select GENERIC_ATOMIC64 select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 5c7c7c988544..b4ce844c9391 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -72,4 +72,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 014a6482ed4c..9ab3bf2eca8d 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -9,10 +9,9 @@ config OPENRISC select OF_EARLY_FLATTREE select IRQ_DOMAIN select HAVE_MEMBLOCK - select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_REQUIRE_GPIOLIB select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a9ff712a2864..0339181bf3ac 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -21,7 +21,7 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select HAVE_MOD_ARCH_SPECIFIC - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select MODULES_USE_ELF_RELA select CLONE_BACKWARDS select TTY # Needed for pdc_cons.c diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 526e4b9aece0..70c512a386f7 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -71,6 +71,8 @@ #define SO_LOCK_FILTER 0x4025 +#define SO_SELECT_ERR_QUEUE 0x4026 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b89d7eb730a2..ea5bb045983a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -90,6 +90,7 @@ config GENERIC_GPIO config PPC bool default y + select BINFMT_ELF select OF select OF_EARLY_FLATTREE select HAVE_FTRACE_MCOUNT_RECORD @@ -98,7 +99,7 @@ config PPC select HAVE_FUNCTION_GRAPH_TRACER select SYSCTL_EXCEPTION_TRACE select ARCH_WANT_OPTIONAL_GPIOLIB - select HAVE_VIRT_TO_BUS if !PPC64 + select VIRT_TO_BUS if !PPC64 select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 2fdb47a19efd..b59e06f507ea 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -343,17 +343,16 @@ extern void slb_set_size(u16 size); /* * VSID allocation (256MB segment) * - * We first generate a 38-bit "proto-VSID". For kernel addresses this - * is equal to the ESID | 1 << 37, for user addresses it is: - * (context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1) + * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated + * from mmu context id and effective segment id of the address. * - * This splits the proto-VSID into the below range - * 0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range - * 2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range - * - * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1 - * That is, we assign half of the space to user processes and half - * to the kernel. + * For user processes max context id is limited to ((1ul << 19) - 5) + * for kernel space, we use the top 4 context ids to map address as below + * NOTE: each context only support 64TB now. + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] * * The proto-VSIDs are then scrambled into real VSIDs with the * multiplicative hash: @@ -363,41 +362,49 @@ extern void slb_set_size(u16 size); * VSID_MULTIPLIER is prime, so in particular it is * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. * Because the modulus is 2^n-1 we can compute it efficiently without - * a divide or extra multiply (see below). - * - * This scheme has several advantages over older methods: - * - * - We have VSIDs allocated for every kernel address - * (i.e. everything above 0xC000000000000000), except the very top - * segment, which simplifies several things. + * a divide or extra multiply (see below). The scramble function gives + * robust scattering in the hash table (at least based on some initial + * results). * - * - We allow for USER_ESID_BITS significant bits of ESID and - * CONTEXT_BITS bits of context for user addresses. - * i.e. 64T (46 bits) of address space for up to half a million contexts. + * We also consider VSID 0 special. We use VSID 0 for slb entries mapping + * bad address. This enables us to consolidate bad address handling in + * hash_page. * - * - The scramble function gives robust scattering in the hash - * table (at least based on some initial results). The previous - * method was more susceptible to pathological cases giving excessive - * hash collisions. + * We also need to avoid the last segment of the last context, because that + * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 + * because of the modulo operation in vsid scramble. But the vmemmap + * (which is what uses region 0xf) will never be close to 64TB in size + * (it's 56 bytes per page of system memory). */ +#define CONTEXT_BITS 19 +#define ESID_BITS 18 +#define ESID_BITS_1T 6 + +/* + * 256MB segment + * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments + * available for user + kernel mapping. The top 4 contexts are used for + * kernel mapping. Each segment contains 2^28 bytes. Each + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts + * (19 == 37 + 28 - 46). + */ +#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) + /* * This should be computed such that protovosid * vsid_mulitplier * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus */ #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_256M 38 +#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_1T 26 +#define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T) #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) -#define CONTEXT_BITS 19 -#define USER_ESID_BITS 18 -#define USER_ESID_BITS_1T 6 -#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) +#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT)) /* * This macro generates asm code to compute the VSID scramble @@ -421,7 +428,8 @@ extern void slb_set_size(u16 size); srdi rx,rt,VSID_BITS_##size; \ clrldi rt,rt,(64-VSID_BITS_##size); \ add rt,rt,rx; /* add high and low bits */ \ - /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ + /* NOTE: explanation based on VSID_BITS_##size = 36 \ + * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ * 2^36-1+2^28-1. That in particular means that if r3 >= \ * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ * the bit clear, r3 already has the answer we want, if it \ @@ -513,34 +521,6 @@ typedef struct { }) #endif /* 1 */ -/* - * This is only valid for addresses >= PAGE_OFFSET - * The proto-VSID space is divided into two class - * User: 0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1 - * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1 - * - * With KERNEL_START at 0xc000000000000000, the proto vsid for - * the kernel ends up with 0xc00000000 (36 bits). With 64TB - * support we need to have kernel proto-VSID in the - * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS. - */ -static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) -{ - unsigned long proto_vsid; - /* - * We need to make sure proto_vsid for the kernel is - * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T]) - */ - if (ssize == MMU_SEGSIZE_256M) { - proto_vsid = ea >> SID_SHIFT; - proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS)); - return vsid_scramble(proto_vsid, 256M); - } - proto_vsid = ea >> SID_SHIFT_1T; - proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T)); - return vsid_scramble(proto_vsid, 1T); -} - /* Returns the segment size indicator for a user address */ static inline int user_segment_size(unsigned long addr) { @@ -550,17 +530,41 @@ static inline int user_segment_size(unsigned long addr) return MMU_SEGSIZE_256M; } -/* This is only valid for user addresses (which are below 2^44) */ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, int ssize) { + /* + * Bad address. We return VSID 0 for that + */ + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + return 0; + if (ssize == MMU_SEGSIZE_256M) - return vsid_scramble((context << USER_ESID_BITS) + return vsid_scramble((context << ESID_BITS) | (ea >> SID_SHIFT), 256M); - return vsid_scramble((context << USER_ESID_BITS_1T) + return vsid_scramble((context << ESID_BITS_1T) | (ea >> SID_SHIFT_1T), 1T); } +/* + * This is only valid for addresses >= PAGE_OFFSET + * + * For kernel space, we use the top 4 context ids to map address as below + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] + */ +static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) +{ + unsigned long context; + + /* + * kernel take the top 4 context from the available range + */ + context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; + return get_vsid(context, ea, ssize); +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a26dcaece509..a36daf3c6f9a 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 75a3d71b895d..19599ef352bc 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -275,7 +275,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 256c5bf0adb7..04d69c4a5ac2 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -304,7 +304,7 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything else left to do? */ - SET_DEFAULT_THREAD_PPR(r3, r9) /* Set thread.ppr = 3 */ + SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq .ret_from_except_lite @@ -657,7 +657,7 @@ resume_kernel: /* Clear _TIF_EMULATE_STACK_STORE flag */ lis r11,_TIF_EMULATE_STACK_STORE@h addi r5,r9,TI_FLAGS - ldarx r4,0,r5 +0: ldarx r4,0,r5 andc r4,r4,r11 stdcx. r4,0,r5 bne- 0b diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index f3eab8594d9f..d44a571e45a7 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -23,8 +23,10 @@ #include <asm/code-patching.h> #include <asm/machdep.h> +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) extern void epapr_ev_idle(void); extern u32 epapr_ev_idle_start[]; +#endif bool epapr_paravirt_enabled; @@ -47,11 +49,15 @@ static int __init epapr_paravirt_init(void) for (i = 0; i < (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) patch_instruction(epapr_ev_idle_start + i, insts[i]); +#endif } +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) if (of_get_property(hyper_node, "has-idle", NULL)) ppc_md.power_save = epapr_ev_idle; +#endif epapr_paravirt_enabled = true; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 87ef8f5ee5bc..56bd92362ce1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1066,78 +1066,6 @@ unrecov_user_slb: #endif /* __DISABLED__ */ -/* - * r13 points to the PACA, r9 contains the saved CR, - * r12 contain the saved SRR1, SRR0 is still ready for return - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(slb_miss_realmode) - mflr r10 -#ifdef CONFIG_RELOCATABLE - mtctr r11 -#endif - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate_realmode - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- 2f - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - - RESTORE_PPR_PACA(PACA_EXSLB, r9) - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -2: mfspr r11,SPRN_SRR0 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10,unrecov_slb) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - rfid - b . - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - - -#ifdef CONFIG_PPC_970_NAP -power4_fixup_nap: - andc r9,r9,r10 - std r9,TI_LOCAL_FLAGS(r11) - ld r10,_LINK(r1) /* make idle task do the */ - std r10,_NIP(r1) /* equivalent of a blr */ - blr -#endif - .align 7 .globl alignment_common alignment_common: @@ -1336,6 +1264,78 @@ _GLOBAL(opal_mc_secondary_handler) /* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 +#ifdef CONFIG_RELOCATABLE + mtctr r11 +#endif + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- 2f + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +2: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecov_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + + +#ifdef CONFIG_PPC_970_NAP +power4_fixup_nap: + andc r9,r9,r10 + std r9,TI_LOCAL_FLAGS(r11) + ld r10,_LINK(r1) /* make idle task do the */ + std r10,_NIP(r1) /* equivalent of a blr */ + blr +#endif + +/* * Hash table stuff */ .align 7 @@ -1452,20 +1452,36 @@ do_ste_alloc: _GLOBAL(do_stab_bolted) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + mfspr r11,SPRN_DAR /* ea */ + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + rldicr. r9,r11,4,(63 - 46 - 4) + li r9,0 /* VSID = 0 for bad address */ + bne- 0f + + /* + * Calculate VSID: + * This is the kernel vsid, we take the top for context from + * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * Here we know that (ea >> 60) == 0xc + */ + lis r9,(MAX_USER_CONTEXT + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT + 1)@l + + srdi r10,r11,SID_SHIFT + rldimi r10,r9,ESID_BITS,0 /* proto vsid */ + ASM_VSID_SCRAMBLE(r10, r9, 256M) + rldic r9,r10,12,16 /* r9 = vsid << 12 */ + +0: /* Hash to the primary group */ ld r10,PACASTABVIRT(r13) - mfspr r11,SPRN_DAR - srdi r11,r11,28 + srdi r11,r11,SID_SHIFT rldimi r10,r11,7,52 /* r10 = first ste of the group */ - /* Calculate VSID */ - /* This is a kernel address, so protovsid = ESID | 1 << 37 */ - li r9,0x1 - rldimi r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0 - ASM_VSID_SCRAMBLE(r11, r9, 256M) - rldic r9,r11,12,16 /* r9 = vsid << 12 */ - /* Search the primary group for a free entry */ 1: ld r11,0(r10) /* Test valid bit of the current ste */ andi. r11,r11,0x80 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 59dd545fdde1..16e77a81ab4f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -555,10 +555,12 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) new->thread.regs->msr |= (MSR_FP | new->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(&new->thread); new->thread.regs->msr |= MSR_VEC; } +#endif /* We may as well turn on VSX too since all the state is restored now */ if (msr & MSR_VSX) new->thread.regs->msr |= MSR_VSX; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 7f7fb7fd991b..13f8d168b3f1 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2832,11 +2832,13 @@ static void unreloc_toc(void) { } #else -static void __reloc_toc(void *tocstart, unsigned long offset, - unsigned long nr_entries) +static void __reloc_toc(unsigned long offset, unsigned long nr_entries) { unsigned long i; - unsigned long *toc_entry = (unsigned long *)tocstart; + unsigned long *toc_entry; + + /* Get the start of the TOC by using r2 directly. */ + asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry)); for (i = 0; i < nr_entries; i++) { *toc_entry = *toc_entry + offset; @@ -2850,8 +2852,7 @@ static void reloc_toc(void) unsigned long nr_entries = (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - /* Need to add offset to get at __prom_init_toc_start */ - __reloc_toc(__prom_init_toc_start + offset, offset, nr_entries); + __reloc_toc(offset, nr_entries); mb(); } @@ -2864,8 +2865,7 @@ static void unreloc_toc(void) mb(); - /* __prom_init_toc_start has been relocated, no need to add offset */ - __reloc_toc(__prom_init_toc_start, -offset, nr_entries); + __reloc_toc(-offset, nr_entries); } #endif #endif diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 245c1b6a0858..f9b30c68ba47 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1428,6 +1428,7 @@ static long ppc_set_hwdebug(struct task_struct *child, brk.address = bp_info->addr & ~7UL; brk.type = HW_BRK_TYPE_TRANSLATE; + brk.len = 8; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) brk.type |= HW_BRK_TYPE_READ; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3acb28e245b4..95068bf569ad 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -866,10 +866,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, do_load_up_transact_fpu(¤t->thread); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(¤t->thread); regs->msr |= MSR_VEC; } +#endif return 0; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 995f8543cb57..c1794286098c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -522,10 +522,12 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, do_load_up_transact_fpu(¤t->thread); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(¤t->thread); regs->msr |= MSR_VEC; } +#endif return err; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 84dbace657ce..2da67e7a16d5 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -309,6 +309,7 @@ _GLOBAL(tm_recheckpoint) or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ mtmsr r5 +#ifdef CONFIG_ALTIVEC /* FP and VEC registers: These are recheckpointed from thread.fpr[] * and thread.vr[] respectively. The thread.transact_fpr[] version * is more modern, and will be loaded subsequently by any FPUnavailable @@ -323,6 +324,7 @@ _GLOBAL(tm_recheckpoint) REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ ld r5, THREAD_VRSAVE(r3) mtspr SPRN_VRSAVE, r5 +#endif dont_restore_vec: andi. r0, r4, MSR_FP diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index ead58e317294..5d7d29a313eb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -326,8 +326,8 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) vcpu3s->context_id[0] = err; vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) - << USER_ESID_BITS) - 1; - vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; + << ESID_BITS) - 1; + vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS; vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 41cefd43655f..33db48a8ce24 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -26,17 +26,20 @@ #define E500_PID_NUM 3 #define E500_TLB_NUM 2 -#define E500_TLB_VALID 1 -#define E500_TLB_BITMAP 2 +/* entry is mapped somewhere in host TLB */ +#define E500_TLB_VALID (1 << 0) +/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ +#define E500_TLB_BITMAP (1 << 1) +/* TLB1 entry is mapped by host TLB0 */ #define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { - pfn_t pfn; - unsigned int flags; /* E500_TLB_* */ + pfn_t pfn; /* valid only for TLB0, except briefly */ + unsigned int flags; /* E500_TLB_* */ }; struct tlbe_priv { - struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ + struct tlbe_ref ref; }; #ifdef CONFIG_KVM_E500V2 @@ -63,17 +66,6 @@ struct kvmppc_vcpu_e500 { unsigned int gtlb_nv[E500_TLB_NUM]; - /* - * information associated with each host TLB entry -- - * TLB1 only for now. If/when guest TLB1 entries can be - * mapped with host TLB0, this will be used for that too. - * - * We don't want to use this for guest TLB0 because then we'd - * have the overhead of doing the translation again even if - * the entry is still in the guest TLB (e.g. we swapped out - * and back, and our host TLB entries got evicted). - */ - struct tlbe_ref *tlb_refs[E500_TLB_NUM]; unsigned int host_tlb1_nv; u32 svr; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index a222edfb9a9b..1c6a9d729df4 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -193,8 +193,11 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) - return; + if (!(ref->flags & E500_TLB_VALID)) { + WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), + "%s: flags %x\n", __func__, ref->flags); + WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]); + } if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; @@ -248,7 +251,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, pfn_t pfn) { ref->pfn = pfn; - ref->flags = E500_TLB_VALID; + ref->flags |= E500_TLB_VALID; if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); @@ -257,6 +260,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + /* FIXME: don't log bogus pfn for TLB1 */ trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } @@ -274,36 +278,23 @@ static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) { - int tlbsel = 0; - int i; - - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } -} - -static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int stlbsel = 1; + int tlbsel; int i; - kvmppc_e500_tlbil_all(vcpu_e500); - - for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->tlb_refs[stlbsel][i]; - kvmppc_e500_ref_release(ref); + for (tlbsel = 0; tlbsel <= 1; tlbsel++) { + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } } - - clear_tlb_privs(vcpu_e500); } void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - clear_tlb_refs(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); + clear_tlb_privs(vcpu_e500); clear_tlb1_bitmap(vcpu_e500); } @@ -458,8 +449,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); } - /* Drop old ref and setup new one. */ - kvmppc_e500_ref_release(ref); kvmppc_e500_ref_setup(ref, gtlbe, pfn); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, @@ -507,14 +496,15 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) vcpu_e500->host_tlb1_nv = 0; - vcpu_e500->tlb_refs[1][sesel] = *ref; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; + vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; + WARN_ON(!(ref->flags & E500_TLB_VALID)); return sesel; } @@ -526,13 +516,12 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref ref; + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref; int sesel; int r; - ref.flags = 0; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - &ref); + ref); if (r) return r; @@ -544,7 +533,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, } /* Otherwise map into TLB1 */ - sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel); write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; @@ -565,7 +554,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, case 0: priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - /* Triggers after clear_tlb_refs or on initial mapping */ + /* Triggers after clear_tlb_privs or on initial mapping */ if (!(priv->ref.flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); } else { @@ -665,35 +654,16 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - vcpu_e500->tlb_refs[0] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[0]) - goto err; - - vcpu_e500->tlb_refs[1] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[1]) - goto err; - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * host_tlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->h2g_tlb1_rmap) - goto err; + return -EINVAL; return 0; - -err: - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); - return -EINVAL; } void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { kfree(vcpu_e500->h2g_tlb1_rmap); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); } diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1f89d26e65fb..2f4baa074b2e 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -108,6 +108,8 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -136,8 +138,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GDEAR, vcpu->arch.shared->dar); mtspr(SPRN_GESR, vcpu->arch.shared->esr); - if (vcpu->arch.oldpir != mfspr(SPRN_PIR)) + if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || + __get_cpu_var(last_vcpu_on_cpu) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); + __get_cpu_var(last_vcpu_on_cpu) = vcpu; + } kvmppc_load_guest_fp(vcpu); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1b6e1271719f..f410c3e12c1e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -195,6 +195,11 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); unsigned long tprot = prot; + /* + * If we hit a bad address return error. + */ + if (!vsid) + return -1; /* Make kernel text executable */ if (overlaps_kernel_text(vaddr, vaddr + step)) tprot &= ~HPTE_R_N; @@ -759,6 +764,8 @@ void __init early_init_mmu(void) /* Initialize stab / SLB management */ if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); + else + stab_initialize(get_paca()->stab_real); } #ifdef CONFIG_SMP @@ -922,11 +929,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { - DBG_LOW(" out of pgtable range !\n"); - return 1; - } - /* Get region & vsid */ switch (REGION_ID(ea)) { case USER_REGION_ID: @@ -957,6 +959,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); + /* Bad address. */ + if (!vsid) { + DBG_LOW("Bad address!\n"); + return 1; + } /* Get pgdir */ pgdir = mm->pgd; if (pgdir == NULL) @@ -1126,6 +1133,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Get VSID */ ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + if (!vsid) + return; /* Hash doesn't like irqs */ local_irq_save(flags); @@ -1233,6 +1242,9 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + /* Don't create HPTE entries for bad address */ + if (!vsid) + return; ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr), mode, HPTE_V_BOLTED, mmu_linear_psize, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 40bc5b0ace54..d1d1b92c5b99 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -29,15 +29,6 @@ static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); -/* - * 256MB segment - * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments - * available for user mappings. Each segment contains 2^28 bytes. Each - * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts - * (19 == 37 + 28 - 46). - */ -#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1) - int __init_new_context(void) { int index; @@ -56,7 +47,7 @@ again: else if (err) return err; - if (index > MAX_CONTEXT) { + if (index > MAX_USER_CONTEXT) { spin_lock(&mmu_context_lock); ida_remove(&mmu_context_ida, index); spin_unlock(&mmu_context_lock); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e212a271c7a4..654258f165ae 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -61,7 +61,7 @@ #endif #ifdef CONFIG_PPC_STD_MMU_64 -#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) #error TASK_SIZE_USER64 exceeds user VSID range #endif #endif diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 1a16ca227757..17aa6dfceb34 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -31,10 +31,15 @@ * No other registers are examined or changed. */ _GLOBAL(slb_allocate_realmode) - /* r3 = faulting address */ + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + rldicr. r9,r3,4,(63 - 46 - 4) + bne- 8f srdi r9,r3,60 /* get region */ - srdi r10,r3,28 /* get esid */ + srdi r10,r3,SID_SHIFT /* get esid */ cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ @@ -56,12 +61,14 @@ _GLOBAL(slb_allocate_realmode) */ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 - li r9,0x1 /* - * for 1T we shift 12 bits more. slb_finish_load_1T will do - * the necessary adjustment + * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * r9 = region id. */ - rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 + addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l + + BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) @@ -91,24 +98,19 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) _GLOBAL(slb_miss_kernel_load_io) li r11,0 6: - li r9,0x1 /* - * for 1T we shift 12 bits more. slb_finish_load_1T will do - * the necessary adjustment + * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * r9 = region id. */ - rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 + addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l + BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T -0: /* user address: proto-VSID = context << 15 | ESID. First check - * if the address is within the boundaries of the user region - */ - srdi. r9,r10,USER_ESID_BITS - bne- 8f /* invalid ea bits set */ - - +0: /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs * array. @@ -164,15 +166,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) ld r9,PACACONTEXTID(r13) BEGIN_FTR_SECTION cmpldi r10,0x1000 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - rldimi r10,r9,USER_ESID_BITS,0 -BEGIN_FTR_SECTION bge slb_finish_load_1T END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) b slb_finish_load 8: /* invalid EA */ li r10,0 /* BAD_VSID */ + li r9,0 /* BAD_VSID */ li r11,SLB_VSID_USER /* flags don't much matter */ b slb_finish_load @@ -221,8 +221,6 @@ _GLOBAL(slb_allocate_user) /* get context to calculate proto-VSID */ ld r9,PACACONTEXTID(r13) - rldimi r10,r9,USER_ESID_BITS,0 - /* fall through slb_finish_load */ #endif /* __DISABLED__ */ @@ -231,9 +229,10 @@ _GLOBAL(slb_allocate_user) /* * Finish loading of an SLB entry and return * - * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET + * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET */ slb_finish_load: + rldimi r10,r9,ESID_BITS,0 ASM_VSID_SCRAMBLE(r10,r9,256M) /* * bits above VSID_BITS_256M need to be ignored from r10 @@ -298,10 +297,11 @@ _GLOBAL(slb_compare_rr_to_size) /* * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. * - * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9 + * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 */ slb_finish_load_1T: - srdi r10,r10,40-28 /* get 1T ESID */ + srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ + rldimi r10,r9,ESID_BITS_1T,0 ASM_VSID_SCRAMBLE(r10,r9,1T) /* * bits above VSID_BITS_1T need to be ignored from r10 diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 0d82ef50dc3f..023ec8a13f38 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -82,11 +82,11 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } + WARN_ON(vsid == 0); vpn = hpt_vpn(addr, vsid, ssize); rpte = __real_pte(__pte(pte), ptep); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e834f1ec23c8..c427ae36374a 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -671,16 +671,12 @@ void bpf_jit_compile(struct sk_filter *fp) } if (bpf_jit_enable > 1) - pr_info("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + /* Note that we output the base address of the code_base + * rather than image, since opcodes are in code_base. + */ + bpf_jit_dump(flen, proglen, pass, code_base); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", - DUMP_PREFIX_ADDRESS, - 16, 1, code_base, - proglen, false); - bpf_flush_icache(code_base, code_base + (proglen/4)); /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index b554879bd31e..3c475d6267c7 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -420,7 +420,20 @@ static struct attribute_group power7_pmu_events_group = { .attrs = power7_events_attr, }; +PMU_FORMAT_ATTR(event, "config:0-19"); + +static struct attribute *power7_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +struct attribute_group power7_pmu_format_group = { + .name = "format", + .attrs = power7_pmu_format_attr, +}; + static const struct attribute_group *power7_pmu_attr_groups[] = { + &power7_pmu_format_group, &power7_pmu_events_group, NULL, }; diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 611e92f291c4..7179726ba5c5 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -69,7 +69,7 @@ static irqreturn_t gpio_halt_irq(int irq, void *__data) return IRQ_HANDLED; }; -static int __devinit gpio_halt_probe(struct platform_device *pdev) +static int gpio_halt_probe(struct platform_device *pdev) { enum of_gpio_flags flags; struct device_node *node = pdev->dev.of_node; @@ -128,7 +128,7 @@ static int __devinit gpio_halt_probe(struct platform_device *pdev) return 0; } -static int __devexit gpio_halt_remove(struct platform_device *pdev) +static int gpio_halt_remove(struct platform_device *pdev) { if (halt_node) { int gpio = of_get_gpio(halt_node, 0); @@ -165,7 +165,7 @@ static struct platform_driver gpio_halt_driver = { .of_match_table = gpio_halt_match, }, .probe = gpio_halt_probe, - .remove = __devexit_p(gpio_halt_remove), + .remove = gpio_halt_remove, }; module_platform_driver(gpio_halt_driver); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index cea2f09c4241..18e3b76c78d7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -124,9 +124,8 @@ config 6xx select PPC_HAVE_PMU_SUPPORT config POWER3 - bool depends on PPC64 && PPC_BOOK3S - default y if !POWER4_ONLY + def_bool y config POWER4 depends on PPC64 && PPC_BOOK3S @@ -145,8 +144,7 @@ config TUNE_CELL but somewhat slower on other machines. This option only changes the scheduling of instructions, not the selection of instructions itself, so the resulting kernel will keep running on all other - machines. When building a kernel that is supposed to run only - on Cell, you should also select the POWER4_ONLY option. + machines. # this is temp to handle compat with arch=ppc config 8xx diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 863184b182f4..3f3bb4cdbbec 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -749,6 +749,7 @@ static struct file_system_type spufs_type = { .mount = spufs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("spufs"); static int __init spufs_init(void) { diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 039fc8e82199..2b4dc6abde6c 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -47,6 +47,25 @@ static struct platform_device mv643xx_eth_shared_device = { .resource = mv643xx_eth_shared_resources, }; +/* + * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1 + */ +static struct resource mv643xx_eth_mvmdio_resources[] = { + [0] = { + .name = "ethernet mdio base", + .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4, + .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device mv643xx_eth_mvmdio_device = { + .name = "orion-mdio", + .id = -1, + .num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources), + .resource = mv643xx_eth_shared_resources, +}; + static struct resource mv643xx_eth_port1_resources[] = { [0] = { .name = "eth port1 irq", @@ -82,6 +101,7 @@ static struct platform_device eth_port1_device = { static struct platform_device *mv643xx_eth_pd_devs[] __initdata = { &mv643xx_eth_shared_device, + &mv643xx_eth_mvmdio_device, ð_port1_device, }; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0da39fed355a..299731e9036b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -186,7 +186,13 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) (0x1UL << 4), &dummy1, &dummy2); if (lpar_rc == H_SUCCESS) return i; - BUG_ON(lpar_rc != H_NOT_FOUND); + + /* + * The test for adjunct partition is performed before the + * ANDCOND test. H_RESOURCE may be returned, so we need to + * check for that as well. + */ + BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); slot_offset++; slot_offset &= 0x7; diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index 0f6af41ebb44..4a25c26f0bf4 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -214,15 +214,27 @@ static struct platform_device * __init mv64x60_eth_register_shared_pdev( struct device_node *np, int id) { struct platform_device *pdev; - struct resource r[1]; + struct resource r[2]; int err; err = of_address_to_resource(np, 0, &r[0]); if (err) return ERR_PTR(err); + /* register an orion mdio bus driver */ + r[1].start = r[0].start + 0x4; + r[1].end = r[0].start + 0x84 - 1; + r[1].flags = IORESOURCE_MEM; + + if (id == 0) { + pdev = platform_device_register_simple("orion-mdio", -1, &r[1], 1); + if (!pdev) + return pdev; + } + pdev = platform_device_register_simple(MV643XX_ETH_SHARED_NAME, id, - r, 1); + &r[0], 1); + return pdev; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4b505370a1d5..eb8fb629f00b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -134,7 +134,7 @@ config S390 select HAVE_SYSCALL_WRAPPERS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select INIT_ALL_POSSIBLE select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 8538015ed4a0..5f7d7ba2874c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,6 +456,7 @@ static struct file_system_type hypfs_type = { .mount = hypfs_mount, .kill_sb = hypfs_kill_super }; +MODULE_ALIAS_FS("s390_hypfs"); static const struct super_operations hypfs_s_ops = { .statfs = simple_statfs, diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index f1eddd150dd7..c879fad404c8 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -12,6 +12,7 @@ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H +#include <linux/errno.h> #include <asm/facility.h> #define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h index 8d4847191ecc..dc9200ca32ed 100644 --- a/arch/s390/include/asm/eadm.h +++ b/arch/s390/include/asm/eadm.h @@ -34,6 +34,8 @@ struct arsb { u32 reserved[4]; } __packed; +#define EQC_WR_PROHIBIT 22 + struct msb { u8 fmt:4; u8 oc:4; @@ -96,11 +98,13 @@ struct scm_device { #define OP_STATE_TEMP_ERR 2 #define OP_STATE_PERM_ERR 3 +enum scm_event {SCM_CHANGE, SCM_AVAIL}; + struct scm_driver { struct device_driver drv; int (*probe) (struct scm_device *scmdev); int (*remove) (struct scm_device *scmdev); - void (*notify) (struct scm_device *scmdev); + void (*notify) (struct scm_device *scmdev, enum scm_event event); void (*handler) (struct scm_device *scmdev, void *data, int error); }; diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 27cb32185ce1..379d96e2105e 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -50,10 +50,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr); #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache -/* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { return (void __iomem *) offset; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a2930844d43..3cb47cf02530 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -57,6 +57,10 @@ extern unsigned long zero_page_mask; (((unsigned long)(vaddr)) &zero_page_mask)))) #define __HAVE_COLOR_ZERO_PAGE +/* TODO: s390 cannot support io_remap_pfn_range... */ +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + #endif /* !__ASSEMBLY__ */ /* @@ -344,6 +348,7 @@ extern unsigned long MODULES_END; #define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ @@ -1531,7 +1536,8 @@ extern int s390_enable_sie(void); /* * No page table caches to initialise */ -#define pgtable_cache_init() do { } while (0) +static inline void pgtable_cache_init(void) { } +static inline void check_pgt_cache(void) { } #include <asm-generic/pgtable.h> diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 1d8fe2b17ef6..6b32af30878c 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -74,8 +74,6 @@ static inline void __tlb_flush_idte(unsigned long asce) static inline void __tlb_flush_mm(struct mm_struct * mm) { - if (unlikely(cpumask_empty(mm_cpumask(mm)))) - return; /* * If the machine has IDTE we prefer to do a per mm flush * on all cpus instead of doing a local flush if the mm diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index f99eea7fff0f..2dacb306835c 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -78,4 +78,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 550228523267..94feff7d6132 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -636,7 +636,8 @@ ENTRY(mcck_int_handler) UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER mcck_skip: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT - mvc __PT_R0(64,%r11),__LC_GPREGS_SAVE_AREA + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32 stm %r8,%r9,__PT_PSW(%r11) xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) l %r1,BASED(.Ldo_machine_check) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9c837c101297..2e6d60c55f90 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -678,8 +678,9 @@ ENTRY(mcck_int_handler) UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER LAST_BREAK %r14 mcck_skip: - lghi %r14,__LC_GPREGS_SAVE_AREA - mvc __PT_R0(128,%r11),0(%r14) + lghi %r14,__LC_GPREGS_SAVE_AREA+64 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a5360de85ec7..29268859d8ee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -571,6 +571,8 @@ static void __init setup_memory_end(void) /* Split remaining virtual space between 1:1 mapping & vmemmap array */ tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + tmp = SECTION_ALIGN_UP(tmp); tmp = VMALLOC_START - tmp * sizeof(struct page); tmp &= ~((vmax >> 11) - 1); /* align to page table level */ tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index dff631d34b45..466fb3383960 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -77,42 +77,69 @@ static size_t copy_in_kernel(size_t count, void __user *to, * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address * contains the (negative) exception code. */ -static __always_inline unsigned long follow_table(struct mm_struct *mm, - unsigned long addr, int write) +#ifdef CONFIG_64BIT +static unsigned long follow_table(struct mm_struct *mm, + unsigned long address, int write) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep; + unsigned long *table = (unsigned long *)__pa(mm->pgd); + + switch (mm->context.asce_bits & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table = table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -0x39UL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + case _ASCE_TYPE_REGION2: + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -0x3aUL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + case _ASCE_TYPE_REGION3: + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -0x3bUL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + case _ASCE_TYPE_SEGMENT: + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) + return -0x10UL; + if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) { + if (write && (*table & _SEGMENT_ENTRY_RO)) + return -0x04UL; + return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) + + (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE); + } + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + } + table = table + ((address >> 12) & 0xff); + if (unlikely(*table & _PAGE_INVALID)) + return -0x11UL; + if (write && (*table & _PAGE_RO)) + return -0x04UL; + return (*table & PAGE_MASK) + (address & ~PAGE_MASK); +} - pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return -0x3aUL; +#else /* CONFIG_64BIT */ - pud = pud_offset(pgd, addr); - if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return -0x3bUL; +static unsigned long follow_table(struct mm_struct *mm, + unsigned long address, int write) +{ + unsigned long *table = (unsigned long *)__pa(mm->pgd); - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) return -0x10UL; - if (pmd_large(*pmd)) { - if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO)) - return -0x04UL; - return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK); - } - if (unlikely(pmd_bad(*pmd))) - return -0x10UL; - - ptep = pte_offset_map(pmd, addr); - if (!pte_present(*ptep)) + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (!pte_write(*ptep) || !pte_dirty(*ptep))) + if (write && (*table & _PAGE_RO)) return -0x04UL; - - return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); + return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } +#endif /* CONFIG_64BIT */ + static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, size_t n, int write_user) { @@ -197,7 +224,7 @@ size_t copy_to_user_pt(size_t n, void __user *to, const void *from) static size_t clear_user_pt(size_t n, void __user *to) { - void *zpage = &empty_zero_page; + void *zpage = (void *) empty_zero_page; long done, size, ret; done = 0; diff --git a/arch/score/Kconfig b/arch/score/Kconfig index e569aa1fd2ba..c8def8bc9020 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -12,7 +12,7 @@ config SCORE select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select MODULES_USE_ELF_REL select CLONE_BACKWARDS diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 289127d5241c..3d361f236308 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -84,12 +84,6 @@ config ARCH_DEFCONFIG default "arch/sparc/configs/sparc32_defconfig" if SPARC32 default "arch/sparc/configs/sparc64_defconfig" if SPARC64 -# CONFIG_BITS can be used at source level to get 32/64 bits -config BITS - int - default 32 if SPARC32 - default 64 if SPARC64 - config IOMMU_HELPER bool default y if SPARC64 @@ -197,7 +191,7 @@ config RWSEM_XCHGADD_ALGORITHM config GENERIC_HWEIGHT bool - default y if !ULTRA_HAS_POPULATION_COUNT + default y config GENERIC_CALIBRATE_DELAY bool diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index e26d430ce2fd..ff18e3cfb6b1 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,11 +2,16 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += div64.h +generic-y += emergency-restart.h generic-y += exec.h generic-y += local64.h +generic-y += mutex.h generic-y += irq_regs.h generic-y += local.h generic-y += module.h +generic-y += serial.h generic-y += trace_clock.h +generic-y += types.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/asm/cputime.h b/arch/sparc/include/asm/cputime.h deleted file mode 100644 index 1a642b81e019..000000000000 --- a/arch/sparc/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_CPUTIME_H -#define __SPARC_CPUTIME_H - -#include <asm-generic/cputime.h> - -#endif /* __SPARC_CPUTIME_H */ diff --git a/arch/sparc/include/asm/emergency-restart.h b/arch/sparc/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/sparc/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/sparc/include/asm/mutex.h b/arch/sparc/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/sparc/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include <asm-generic/mutex-dec.h> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 08fcce90316b..7619f2f792af 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -915,6 +915,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot); } +#include <asm/tlbflush.h> #include <asm-generic/pgtable.h> /* We provide our own get_unmapped_area to cope with VA holes and diff --git a/arch/sparc/include/asm/serial.h b/arch/sparc/include/asm/serial.h deleted file mode 100644 index f90d61c28059..000000000000 --- a/arch/sparc/include/asm/serial.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_SERIAL_H -#define __SPARC_SERIAL_H - -#define BASE_BAUD ( 1843200 / 16 ) - -#endif /* __SPARC_SERIAL_H */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index b73da3c5f10a..3c8917f054de 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -36,7 +36,6 @@ typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); void cpu_panic(void); -extern void smp4m_irq_rotate(int cpu); /* * General functions that each host system must provide. @@ -46,7 +45,6 @@ void sun4m_init_smp(void); void sun4d_init_smp(void); void smp_callin(void); -void smp_boot_cpus(void); void smp_store_cpu_info(int); void smp_resched_interrupt(void); @@ -107,9 +105,6 @@ extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) -#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier -#define prof_counter(__cpu) cpu_data(__cpu).counter - void smp_setup_cpu_possible_map(void); #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index d06a26601753..6b67e50fb9b4 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -45,6 +45,7 @@ #define SUN4V_CHIP_NIAGARA3 0x03 #define SUN4V_CHIP_NIAGARA4 0x04 #define SUN4V_CHIP_NIAGARA5 0x05 +#define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_UNKNOWN 0xff #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h index cad36f56fa03..c7de3323819c 100644 --- a/arch/sparc/include/asm/switch_to_64.h +++ b/arch/sparc/include/asm/switch_to_64.h @@ -18,8 +18,7 @@ do { \ * and 2 stores in this critical code path. -DaveM */ #define switch_to(prev, next, last) \ -do { flush_tlb_pending(); \ - save_and_clear_fpu(); \ +do { save_and_clear_fpu(); \ /* If you are tempted to conditionalize the following */ \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index 2ef463494153..f0d6a9700f4c 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -11,24 +11,40 @@ struct tlb_batch { struct mm_struct *mm; unsigned long tlb_nr; + unsigned long active; unsigned long vaddrs[TLB_BATCH_NR]; }; extern void flush_tsb_kernel_range(unsigned long start, unsigned long end); extern void flush_tsb_user(struct tlb_batch *tb); +extern void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); /* TLB flush operations. */ -extern void flush_tlb_pending(void); +static inline void flush_tlb_mm(struct mm_struct *mm) +{ +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define flush_tlb_range(vma,start,end) \ - do { (void)(start); flush_tlb_pending(); } while (0) -#define flush_tlb_page(vma,addr) flush_tlb_pending() -#define flush_tlb_mm(mm) flush_tlb_pending() +extern void flush_tlb_pending(void); +extern void arch_enter_lazy_mmu_mode(void); +extern void arch_leave_lazy_mmu_mode(void); +#define arch_flush_lazy_mmu_mode() do {} while (0) /* Local cpu only. */ extern void __flush_tlb_all(void); - +extern void __flush_tlb_page(unsigned long context, unsigned long vaddr); extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP @@ -38,15 +54,24 @@ do { flush_tsb_kernel_range(start,end); \ __flush_tlb_kernel_range(start,end); \ } while (0) +static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + __flush_tlb_page(CTX_HWBITS(mm->context), vaddr); +} + #else /* CONFIG_SMP */ extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr); #define flush_tlb_kernel_range(start, end) \ do { flush_tsb_kernel_range(start,end); \ smp_flush_tlb_kernel_range(start, end); \ } while (0) +#define global_flush_tlb_page(mm, vaddr) \ + smp_flush_tlb_page(mm, vaddr) + #endif /* ! CONFIG_SMP */ #endif /* _SPARC64_TLBFLUSH_H */ diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index ce175aff71b7..b5843ee09fb5 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -44,7 +44,6 @@ header-y += swab.h header-y += termbits.h header-y += termios.h header-y += traps.h -header-y += types.h header-y += uctx.h header-y += unistd.h header-y += utrap.h diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index cbbad74b2e06..89f49b68a21c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -68,6 +68,8 @@ #define SO_LOCK_FILTER 0x0028 +#define SO_SELECT_ERR_QUEUE 0x0029 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/sparc/include/uapi/asm/types.h b/arch/sparc/include/uapi/asm/types.h deleted file mode 100644 index 383d156cde9c..000000000000 --- a/arch/sparc/include/uapi/asm/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _SPARC_TYPES_H -#define _SPARC_TYPES_H -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - */ - -#if defined(__sparc__) - -#include <asm-generic/int-ll64.h> - -#endif /* defined(__sparc__) */ - -#endif /* defined(_SPARC_TYPES_H) */ diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index a6c94a2bf9d4..5c5125895db8 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -493,6 +493,12 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara5"; break; + case SUN4V_CHIP_SPARC64X: + sparc_cpu_type = "SPARC64-X"; + sparc_fpu_type = "SPARC64-X integrated FPU"; + sparc_pmu_type = "sparc64-x"; + break; + default: printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", prom_cpu_compatible); diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 2feb15c35d9e..26b706a1867d 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -134,6 +134,8 @@ prom_niagara_prefix: .asciz "SUNW,UltraSPARC-T" prom_sparc_prefix: .asciz "SPARC-" +prom_sparc64x_prefix: + .asciz "SPARC64-X" .align 4 prom_root_compatible: .skip 64 @@ -412,7 +414,7 @@ sun4v_chip_type: cmp %g2, 'T' be,pt %xcc, 70f cmp %g2, 'M' - bne,pn %xcc, 4f + bne,pn %xcc, 49f nop 70: ldub [%g1 + 7], %g2 @@ -425,7 +427,7 @@ sun4v_chip_type: cmp %g2, '5' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 - ba,pt %xcc, 4f + ba,pt %xcc, 49f nop 91: sethi %hi(prom_cpu_compatible), %g1 @@ -439,6 +441,25 @@ sun4v_chip_type: mov SUN4V_CHIP_NIAGARA2, %g4 4: + /* Athena */ + sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 + sethi %hi(prom_sparc64x_prefix), %g7 + or %g7, %lo(prom_sparc64x_prefix), %g7 + mov 9, %g3 +41: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 + bne,pn %icc, 49f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 41b + add %g1, 1, %g1 + mov SUN4V_CHIP_SPARC64X, %g4 + ba,pt %xcc, 5f + nop + +49: mov SUN4V_CHIP_UNKNOWN, %g4 5: sethi %hi(sun4v_chip_type), %g2 or %g2, %lo(sun4v_chip_type), %g2 diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index fc4320886a3a..4d1487138d26 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -186,6 +186,8 @@ struct grpci2_cap_first { #define CAP9_IOMAP_OFS 0x20 #define CAP9_BARSIZE_OFS 0x24 +#define TGT 256 + struct grpci2_priv { struct leon_pci_info info; /* must be on top of this structure */ struct grpci2_regs *regs; @@ -237,8 +239,12 @@ static int grpci2_cfg_r32(struct grpci2_priv *priv, unsigned int bus, if (where & 0x3) return -EINVAL; - if (bus == 0 && PCI_SLOT(devfn) != 0) - devfn += (0x8 * 6); + if (bus == 0) { + devfn += (0x8 * 6); /* start at AD16=Device0 */ + } else if (bus == TGT) { + bus = 0; + devfn = 0; /* special case: bridge controller itself */ + } /* Select bus */ spin_lock_irqsave(&grpci2_dev_lock, flags); @@ -303,8 +309,12 @@ static int grpci2_cfg_w32(struct grpci2_priv *priv, unsigned int bus, if (where & 0x3) return -EINVAL; - if (bus == 0 && PCI_SLOT(devfn) != 0) - devfn += (0x8 * 6); + if (bus == 0) { + devfn += (0x8 * 6); /* start at AD16=Device0 */ + } else if (bus == TGT) { + bus = 0; + devfn = 0; /* special case: bridge controller itself */ + } /* Select bus */ spin_lock_irqsave(&grpci2_dev_lock, flags); @@ -368,7 +378,7 @@ static int grpci2_read_config(struct pci_bus *bus, unsigned int devfn, unsigned int busno = bus->number; int ret; - if (PCI_SLOT(devfn) > 15 || (PCI_SLOT(devfn) == 0 && busno == 0)) { + if (PCI_SLOT(devfn) > 15 || busno > 255) { *val = ~0; return 0; } @@ -406,7 +416,7 @@ static int grpci2_write_config(struct pci_bus *bus, unsigned int devfn, struct grpci2_priv *priv = grpci2priv; unsigned int busno = bus->number; - if (PCI_SLOT(devfn) > 15 || (PCI_SLOT(devfn) == 0 && busno == 0)) + if (PCI_SLOT(devfn) > 15 || busno > 255) return 0; #ifdef GRPCI2_DEBUG_CFGACCESS @@ -578,15 +588,15 @@ void grpci2_hw_init(struct grpci2_priv *priv) REGSTORE(regs->ahbmst_map[i], priv->pci_area); /* Get the GRPCI2 Host PCI ID */ - grpci2_cfg_r32(priv, 0, 0, PCI_VENDOR_ID, &priv->pciid); + grpci2_cfg_r32(priv, TGT, 0, PCI_VENDOR_ID, &priv->pciid); /* Get address to first (always defined) capability structure */ - grpci2_cfg_r8(priv, 0, 0, PCI_CAPABILITY_LIST, &capptr); + grpci2_cfg_r8(priv, TGT, 0, PCI_CAPABILITY_LIST, &capptr); /* Enable/Disable Byte twisting */ - grpci2_cfg_r32(priv, 0, 0, capptr+CAP9_IOMAP_OFS, &io_map); + grpci2_cfg_r32(priv, TGT, 0, capptr+CAP9_IOMAP_OFS, &io_map); io_map = (io_map & ~0x1) | (priv->bt_enabled ? 1 : 0); - grpci2_cfg_w32(priv, 0, 0, capptr+CAP9_IOMAP_OFS, io_map); + grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_IOMAP_OFS, io_map); /* Setup the Host's PCI Target BARs for other peripherals to access, * and do DMA to the host's memory. The target BARs can be sized and @@ -617,17 +627,18 @@ void grpci2_hw_init(struct grpci2_priv *priv) pciadr = 0; } } - grpci2_cfg_w32(priv, 0, 0, capptr+CAP9_BARSIZE_OFS+i*4, bar_sz); - grpci2_cfg_w32(priv, 0, 0, PCI_BASE_ADDRESS_0+i*4, pciadr); - grpci2_cfg_w32(priv, 0, 0, capptr+CAP9_BAR_OFS+i*4, ahbadr); + grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_BARSIZE_OFS+i*4, + bar_sz); + grpci2_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0+i*4, pciadr); + grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_BAR_OFS+i*4, ahbadr); printk(KERN_INFO " TGT BAR[%d]: 0x%08x (PCI)-> 0x%08x\n", i, pciadr, ahbadr); } /* set as bus master and enable pci memory responses */ - grpci2_cfg_r32(priv, 0, 0, PCI_COMMAND, &data); + grpci2_cfg_r32(priv, TGT, 0, PCI_COMMAND, &data); data |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); - grpci2_cfg_w32(priv, 0, 0, PCI_COMMAND, data); + grpci2_cfg_w32(priv, TGT, 0, PCI_COMMAND, data); /* Enable Error respone (CPU-TRAP) on illegal memory access. */ REGSTORE(regs->ctrl, CTRL_ER | CTRL_PE); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 537eb66abd06..ca64d2a86ec0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -849,7 +849,7 @@ void smp_tsb_sync(struct mm_struct *mm) } extern unsigned long xcall_flush_tlb_mm; -extern unsigned long xcall_flush_tlb_pending; +extern unsigned long xcall_flush_tlb_page; extern unsigned long xcall_flush_tlb_kernel_range; extern unsigned long xcall_fetch_glob_regs; extern unsigned long xcall_fetch_glob_pmu; @@ -1074,23 +1074,56 @@ local_flush_and_out: put_cpu(); } +struct tlb_pending_info { + unsigned long ctx; + unsigned long nr; + unsigned long *vaddrs; +}; + +static void tlb_pending_func(void *info) +{ + struct tlb_pending_info *t = info; + + __flush_tlb_pending(t->ctx, t->nr, t->vaddrs); +} + void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) { u32 ctx = CTX_HWBITS(mm->context); + struct tlb_pending_info info; int cpu = get_cpu(); + info.ctx = ctx; + info.nr = nr; + info.vaddrs = vaddrs; + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); else - smp_cross_call_masked(&xcall_flush_tlb_pending, - ctx, nr, (unsigned long) vaddrs, - mm_cpumask(mm)); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); put_cpu(); } +void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long context = CTX_HWBITS(mm->context); + int cpu = get_cpu(); + + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) + cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); + else + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); + + put_cpu(); +} + void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 48d00e72ce15..8ec4e9c0251a 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -119,11 +119,7 @@ void bit_map_clear(struct bit_map *t, int offset, int len) void bit_map_init(struct bit_map *t, unsigned long *map, int size) { - - if ((size & 07) != 0) - BUG(); - memset(map, 0, size>>3); - + bitmap_zero(map, size); memset(t, 0, sizeof *t); spin_lock_init(&t->lock); t->map = map; diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 0f4f7191fbba..28f96f27c768 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -34,7 +34,7 @@ #define IOMMU_RNGE IOMMU_RNGE_256MB #define IOMMU_START 0xF0000000 #define IOMMU_WINSIZE (256*1024*1024U) -#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 265KB */ +#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 256KB */ #define IOMMU_ORDER 6 /* 4096 * (1<<6) */ /* srmmu.c */ diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c38bb72e3e80..036c2797dece 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -280,7 +280,9 @@ static void __init srmmu_nocache_init(void) SRMMU_NOCACHE_ALIGN_MAX, 0UL); memset(srmmu_nocache_pool, 0, srmmu_nocache_size); - srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL); + srmmu_nocache_bitmap = + __alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long), + SMP_CACHE_BYTES, 0UL); bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits); srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index ba6ae7ffdc2c..272aa4f7657e 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -24,11 +24,17 @@ static DEFINE_PER_CPU(struct tlb_batch, tlb_batch); void flush_tlb_pending(void) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); + struct mm_struct *mm = tb->mm; - if (tb->tlb_nr) { - flush_tsb_user(tb); + if (!tb->tlb_nr) + goto out; - if (CTX_VALID(tb->mm->context)) { + flush_tsb_user(tb); + + if (CTX_VALID(mm->context)) { + if (tb->tlb_nr == 1) { + global_flush_tlb_page(mm, tb->vaddrs[0]); + } else { #ifdef CONFIG_SMP smp_flush_tlb_pending(tb->mm, tb->tlb_nr, &tb->vaddrs[0]); @@ -37,12 +43,30 @@ void flush_tlb_pending(void) tb->tlb_nr, &tb->vaddrs[0]); #endif } - tb->tlb_nr = 0; } + tb->tlb_nr = 0; + +out: put_cpu_var(tlb_batch); } +void arch_enter_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + tb->active = 1; +} + +void arch_leave_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + if (tb->tlb_nr) + flush_tlb_pending(); + tb->active = 0; +} + static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, bool exec) { @@ -60,6 +84,12 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, nr = 0; } + if (!tb->active) { + global_flush_tlb_page(mm, vaddr); + flush_tsb_user_page(mm, vaddr); + return; + } + if (nr == 0) tb->mm = mm; diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 428982b9becf..2cc3bce5ee91 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -7,11 +7,10 @@ #include <linux/preempt.h> #include <linux/slab.h> #include <asm/page.h> -#include <asm/tlbflush.h> -#include <asm/tlb.h> -#include <asm/mmu_context.h> #include <asm/pgtable.h> +#include <asm/mmu_context.h> #include <asm/tsb.h> +#include <asm/tlb.h> #include <asm/oplib.h> extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; @@ -46,23 +45,27 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) } } -static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, - unsigned long tsb, unsigned long nentries) +static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries) { - unsigned long i; + unsigned long tag, ent, hash; - for (i = 0; i < tb->tlb_nr; i++) { - unsigned long v = tb->vaddrs[i]; - unsigned long tag, ent, hash; + v &= ~0x1UL; + hash = tsb_hash(v, hash_shift, nentries); + ent = tsb + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); - v &= ~0x1UL; + tsb_flush(ent, tag); +} - hash = tsb_hash(v, hash_shift, nentries); - ent = tsb + (hash * sizeof(struct tsb)); - tag = (v >> 22UL); +static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, + unsigned long tsb, unsigned long nentries) +{ + unsigned long i; - tsb_flush(ent, tag); - } + for (i = 0; i < tb->tlb_nr; i++) + __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); } void flush_tsb_user(struct tlb_batch *tb) @@ -90,6 +93,30 @@ void flush_tsb_user(struct tlb_batch *tb) spin_unlock_irqrestore(&mm->context.lock, flags); } +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); +} + #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index f8e13d421fcb..432aa0cb1b38 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -53,6 +53,33 @@ __flush_tlb_mm: /* 18 insns */ nop .align 32 + .globl __flush_tlb_page +__flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + andn %o1, 1, %o3 + be,pn %icc, 1f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + + .align 32 .globl __flush_tlb_pending __flush_tlb_pending: /* 26 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ @@ -203,6 +230,31 @@ __cheetah_flush_tlb_mm: /* 19 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + be,pn %icc, 1f + andn %o1, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + __cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 @@ -269,6 +321,20 @@ __hypervisor_flush_tlb_mm: /* 10 insns */ retl nop +__hypervisor_flush_tlb_page: /* 11 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g2, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop + __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 @@ -339,6 +405,13 @@ cheetah_patch_cachetlbops: call tlb_patch_one mov 19, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 @@ -397,10 +470,9 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop - .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=context, %g1=nr, %g7=vaddrs[] */ - sllx %g1, 3, %g1 + .globl xcall_flush_tlb_page +xcall_flush_tlb_page: /* 17 insns */ + /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -408,20 +480,16 @@ xcall_flush_tlb_pending: /* 21 insns */ or %g5, %g4, %g5 mov PRIMARY_CONTEXT, %g4 stxa %g5, [%g4] ASI_DMMU -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %g5 - andcc %g5, 0x1, %g0 + andcc %g1, 0x1, %g0 be,pn %icc, 2f - - andn %g5, 0x1, %g5 + andn %g1, 0x1, %g5 stxa %g0, [%g5] ASI_IMMU_DEMAP 2: stxa %g0, [%g5] ASI_DMMU_DEMAP membar #Sync - brnz,pt %g1, 1b - nop stxa %g2, [%g4] ASI_DMMU retry nop + nop .globl xcall_flush_tlb_kernel_range xcall_flush_tlb_kernel_range: /* 25 insns */ @@ -656,15 +724,13 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ membar #Sync retry - .globl __hypervisor_xcall_flush_tlb_pending -__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ - sllx %g1, 3, %g1 + .globl __hypervisor_xcall_flush_tlb_page +__hypervisor_xcall_flush_tlb_page: /* 17 insns */ + /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 mov %o2, %g4 -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g1, %o0 /* ARG0: virtual address */ mov %g5, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ srlx %o0, PAGE_SHIFT, %o0 @@ -673,8 +739,6 @@ __hypervisor_xcall_flush_tlb_pending: /* 21 insns */ mov HV_MMU_UNMAP_ADDR_TRAP, %g6 brnz,a,pn %o0, __hypervisor_tlb_xcall_error mov %o0, %g5 - brnz,pt %g1, 1b - nop mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 @@ -757,6 +821,13 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 10, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one + mov 11, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 @@ -788,12 +859,12 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 21, %o2 - sethi %hi(xcall_flush_tlb_pending), %o0 - or %o0, %lo(xcall_flush_tlb_pending), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 21, %o2 + mov 17, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 3109ca684a99..d36a85ebb5e0 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -795,13 +795,9 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; } if (bpf_jit_enable > 1) - pr_err("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + bpf_jit_dump(flen, proglen, pass, image); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, - 16, 1, image, proglen, false); bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; } diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index ff496ab1e794..25877aebc685 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -17,7 +17,7 @@ config TILE select GENERIC_IRQ_SHOW select HAVE_DEBUG_BUGVERBOSE select HAVE_SYSCALL_WRAPPERS if TILEGX - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select SYS_HYPERVISOR select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 8c5eff6d6df5..47684815e5c8 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -330,7 +330,6 @@ CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID10=m CONFIG_MD_RAID456=m -CONFIG_MULTICORE_RAID456=y CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m CONFIG_DM_DEBUG=y diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index e7a3dfcbcda7..dd2b8f0c631f 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -324,7 +324,6 @@ CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID10=m CONFIG_MD_RAID456=m -CONFIG_MULTICORE_RAID456=y CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m CONFIG_DM_DEBUG=y diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 001d418a8957..78f1f2ded86c 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -288,6 +288,9 @@ long compat_sys_sync_file_range2(int fd, unsigned int flags, long compat_sys_fallocate(int fd, int mode, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi); +long compat_sys_llseek(unsigned int fd, unsigned int offset_high, + unsigned int offset_low, loff_t __user * result, + unsigned int origin); /* Assembly trampoline to avoid clobbering r0. */ long _compat_sys_rt_sigreturn(void); diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 241c0bb60b12..c96f9bbb760d 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -40,7 +40,15 @@ #include <asm/percpu.h> #include <arch/spr_def.h> -/* Set and clear kernel interrupt masks. */ +/* + * Set and clear kernel interrupt masks. + * + * NOTE: __insn_mtspr() is a compiler builtin marked as a memory + * clobber. We rely on it being equivalent to a compiler barrier in + * this code since arch_local_irq_save() and friends must act as + * compiler barriers. This compiler semantic is baked into enough + * places that the compiler will maintain it going forward. + */ #if CHIP_HAS_SPLIT_INTR_MASK() #if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32 # error Fix assumptions about which word various interrupts are in diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 7f72401b4f45..6ea4cdb3c6a0 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -32,50 +32,65 @@ * adapt the usual convention. */ -long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE4(truncate64, char __user *, filename, u32, dummy, + u32, low, u32, high) { return sys_truncate(filename, ((loff_t)high << 32) | low); } -long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE4(ftruncate64, unsigned int, fd, u32, dummy, + u32, low, u32, high) { return sys_ftruncate(fd, ((loff_t)high << 32) | low); } -long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, - u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE6(pread64, unsigned int, fd, char __user *, ubuf, + size_t, count, u32, dummy, u32, low, u32, high) { return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, - u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE6(pwrite64, unsigned int, fd, char __user *, ubuf, + size_t, count, u32, dummy, u32, low, u32, high) { return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len) +COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, low, u32, high, + char __user *, buf, size_t, len) { return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len); } -long compat_sys_sync_file_range2(int fd, unsigned int flags, - u32 offset_lo, u32 offset_hi, - u32 nbytes_lo, u32 nbytes_hi) +COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags, + u32, offset_lo, u32, offset_hi, + u32, nbytes_lo, u32, nbytes_hi) { return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)nbytes_hi << 32) | nbytes_lo, flags); } -long compat_sys_fallocate(int fd, int mode, - u32 offset_lo, u32 offset_hi, - u32 len_lo, u32 len_hi) +COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, + u32, offset_lo, u32, offset_hi, + u32, len_lo, u32, len_hi) { return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)len_hi << 32) | len_lo); } +/* + * Avoid bug in generic sys_llseek() that specifies offset_high and + * offset_low as "unsigned long", thus making it possible to pass + * a sign-extended high 32 bits in offset_low. + */ +COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, + unsigned int, offset_low, loff_t __user *, result, + unsigned int, origin) +{ + return sys_llseek(fd, offset_high, offset_low, result, origin); +} + /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -83,6 +98,7 @@ long compat_sys_fallocate(int fd, int mode, /* See comments in sys.c */ #define compat_sys_fadvise64_64 sys32_fadvise64_64 #define compat_sys_readahead sys32_readahead +#define sys_llseek compat_sys_llseek /* Call the assembly trampolines where necessary. */ #define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index d1e15f7b59c6..7a5aa1a7864e 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1004,15 +1004,8 @@ void __cpuinit setup_cpu(int boot) #ifdef CONFIG_BLK_DEV_INITRD -/* - * Note that the kernel can potentially support other compression - * techniques than gz, though we don't do so by default. If we ever - * decide to do so we can either look for other filename extensions, - * or just allow a file with this name to be compressed with an - * arbitrary compressor (somewhat counterintuitively). - */ static int __initdata set_initramfs_file; -static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; +static char __initdata initramfs_file[128] = "initramfs"; static int __init setup_initramfs_file(char *str) { @@ -1026,9 +1019,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an "initramfs.cpio.gz" file in the hvfs. - * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs. + * We look for a file called "initramfs" in the hvfs. If there is one, we + * allocate some memory for it and it will be unpacked to the initramfs. + * If it's compressed, the initd code will uncompress it first. */ static void __init load_hv_initrd(void) { @@ -1038,10 +1031,16 @@ static void __init load_hv_initrd(void) fd = hv_fs_findfile((HV_VirtAddr) initramfs_file); if (fd == HV_ENOENT) { - if (set_initramfs_file) + if (set_initramfs_file) { pr_warning("No such hvfs initramfs file '%s'\n", initramfs_file); - return; + return; + } else { + /* Try old backwards-compatible name. */ + fd = hv_fs_findfile((HV_VirtAddr)"initramfs.cpio.gz"); + if (fd == HV_ENOENT) + return; + } } BUG_ON(fd < 0); stat = hv_fs_fstat(fd); diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h index 78f1b8999964..c512b0306dd4 100644 --- a/arch/um/drivers/chan.h +++ b/arch/um/drivers/chan.h @@ -37,7 +37,7 @@ extern int console_write_chan(struct chan *chan, const char *buf, extern int console_open_chan(struct line *line, struct console *co); extern void deactivate_chan(struct chan *chan, int irq); extern void reactivate_chan(struct chan *chan, int irq); -extern void chan_enable_winch(struct chan *chan, struct tty_struct *tty); +extern void chan_enable_winch(struct chan *chan, struct tty_port *port); extern int enable_chan(struct line *line); extern void close_chan(struct line *line); extern int chan_window_size(struct line *line, diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 15c553c239a1..80b47cb71e0a 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -122,10 +122,10 @@ static int open_chan(struct list_head *chans) return err; } -void chan_enable_winch(struct chan *chan, struct tty_struct *tty) +void chan_enable_winch(struct chan *chan, struct tty_port *port) { if (chan && chan->primary && chan->ops->winch) - register_winch(chan->fd, tty); + register_winch(chan->fd, port); } static void line_timer_cb(struct work_struct *work) diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 9be670ad23b5..3fd7c3efdb18 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -216,7 +216,7 @@ static int winch_thread(void *arg) } } -static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out, +static int winch_tramp(int fd, struct tty_port *port, int *fd_out, unsigned long *stack_out) { struct winch_data data; @@ -271,7 +271,7 @@ static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out, return err; } -void register_winch(int fd, struct tty_struct *tty) +void register_winch(int fd, struct tty_port *port) { unsigned long stack; int pid, thread, count, thread_fd = -1; @@ -281,17 +281,17 @@ void register_winch(int fd, struct tty_struct *tty) return; pid = tcgetpgrp(fd); - if (is_skas_winch(pid, fd, tty)) { - register_winch_irq(-1, fd, -1, tty, 0); + if (is_skas_winch(pid, fd, port)) { + register_winch_irq(-1, fd, -1, port, 0); return; } if (pid == -1) { - thread = winch_tramp(fd, tty, &thread_fd, &stack); + thread = winch_tramp(fd, port, &thread_fd, &stack); if (thread < 0) return; - register_winch_irq(thread_fd, fd, thread, tty, stack); + register_winch_irq(thread_fd, fd, thread, port, stack); count = write(thread_fd, &c, sizeof(c)); if (count != sizeof(c)) diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h index dc693298eb8f..03f1b565c5f9 100644 --- a/arch/um/drivers/chan_user.h +++ b/arch/um/drivers/chan_user.h @@ -38,10 +38,10 @@ extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, unsigned short *cols_out); extern void generic_free(void *data); -struct tty_struct; -extern void register_winch(int fd, struct tty_struct *tty); +struct tty_port; +extern void register_winch(int fd, struct tty_port *port); extern void register_winch_irq(int fd, int tty_fd, int pid, - struct tty_struct *tty, unsigned long stack); + struct tty_port *port, unsigned long stack); #define __channel_help(fn, prefix) \ __uml_help(fn, prefix "[0-9]*=<channel description>\n" \ diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index f1b38571f94e..be541cf69fd2 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -305,7 +305,7 @@ static int line_activate(struct tty_port *port, struct tty_struct *tty) return ret; if (!line->sigio) { - chan_enable_winch(line->chan_out, tty); + chan_enable_winch(line->chan_out, port); line->sigio = 1; } @@ -315,8 +315,22 @@ static int line_activate(struct tty_port *port, struct tty_struct *tty) return 0; } +static void unregister_winch(struct tty_struct *tty); + +static void line_destruct(struct tty_port *port) +{ + struct tty_struct *tty = tty_port_tty_get(port); + struct line *line = tty->driver_data; + + if (line->sigio) { + unregister_winch(tty); + line->sigio = 0; + } +} + static const struct tty_port_operations line_port_ops = { .activate = line_activate, + .destruct = line_destruct, }; int line_open(struct tty_struct *tty, struct file *filp) @@ -340,18 +354,6 @@ int line_install(struct tty_driver *driver, struct tty_struct *tty, return 0; } -static void unregister_winch(struct tty_struct *tty); - -void line_cleanup(struct tty_struct *tty) -{ - struct line *line = tty->driver_data; - - if (line->sigio) { - unregister_winch(tty); - line->sigio = 0; - } -} - void line_close(struct tty_struct *tty, struct file * filp) { struct line *line = tty->driver_data; @@ -601,7 +603,7 @@ struct winch { int fd; int tty_fd; int pid; - struct tty_struct *tty; + struct tty_port *port; unsigned long stack; struct work_struct work; }; @@ -655,7 +657,7 @@ static irqreturn_t winch_interrupt(int irq, void *data) goto out; } } - tty = winch->tty; + tty = tty_port_tty_get(winch->port); if (tty != NULL) { line = tty->driver_data; if (line != NULL) { @@ -663,6 +665,7 @@ static irqreturn_t winch_interrupt(int irq, void *data) &tty->winsize.ws_col); kill_pgrp(tty->pgrp, SIGWINCH, 1); } + tty_kref_put(tty); } out: if (winch->fd != -1) @@ -670,7 +673,7 @@ static irqreturn_t winch_interrupt(int irq, void *data) return IRQ_HANDLED; } -void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty, +void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port, unsigned long stack) { struct winch *winch; @@ -685,7 +688,7 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty, .fd = fd, .tty_fd = tty_fd, .pid = pid, - .tty = tty, + .port = port, .stack = stack }); if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, @@ -714,15 +717,18 @@ static void unregister_winch(struct tty_struct *tty) { struct list_head *ele, *next; struct winch *winch; + struct tty_struct *wtty; spin_lock(&winch_handler_lock); list_for_each_safe(ele, next, &winch_handlers) { winch = list_entry(ele, struct winch, list); - if (winch->tty == tty) { + wtty = tty_port_tty_get(winch->port); + if (wtty == tty) { free_winch(winch); break; } + tty_kref_put(wtty); } spin_unlock(&winch_handler_lock); } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index d8926c303629..39f186252e02 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -218,6 +218,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&lp->lock, flags); len = (*lp->write)(lp->fd, skb, lp); + skb_tx_timestamp(skb); if (len == skb->len) { dev->stats.tx_packets++; @@ -281,6 +282,7 @@ static void uml_net_get_drvinfo(struct net_device *dev, static const struct ethtool_ops uml_net_ethtool_ops = { .get_drvinfo = uml_net_get_drvinfo, .get_link = ethtool_op_get_link, + .get_ts_info = ethtool_op_get_ts_info, }; static void uml_net_user_timer_expire(unsigned long _conn) diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index 16fdd0a0f9d6..b8d14fa52059 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -105,7 +105,6 @@ static const struct tty_operations ssl_ops = { .throttle = line_throttle, .unthrottle = line_unthrottle, .install = ssl_install, - .cleanup = line_cleanup, .hangup = line_hangup, }; diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index 827777af3f6d..7b361f36ca96 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -110,7 +110,6 @@ static const struct tty_operations console_ops = { .set_termios = line_set_termios, .throttle = line_throttle, .unthrottle = line_unthrottle, - .cleanup = line_cleanup, .hangup = line_hangup, }; diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index b1469fe93295..9d9f1b4bf826 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -15,7 +15,7 @@ #include <sysdep/mcontext.h> #include "internal.h" -void (*sig_info[NSIG])(int, siginfo_t *, struct uml_pt_regs *) = { +void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGTRAP] = relay_signal, [SIGFPE] = relay_signal, [SIGILL] = relay_signal, diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index da4b9e9999fd..337518c5042a 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -15,6 +15,8 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/wait.h> +#include <sys/time.h> +#include <sys/resource.h> #include <asm/unistd.h> #include <init.h> #include <os.h> diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index dc50b157fc83..2943e3acdf0c 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -9,7 +9,7 @@ config UNICORE32 select GENERIC_ATOMIC64 select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select ARCH_HAVE_CUSTOM_GPIO_H select GENERIC_FIND_FIRST_BIT select GENERIC_IRQ_PROBE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4f24f5b1218..15b5cef4aa38 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -112,7 +112,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 @@ -1549,6 +1549,7 @@ config X86_SMAP config EFI bool "EFI runtime service support" depends on ACPI + select UCS2_STRING ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 8a84501acb1b..5ef205c5f37b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -29,7 +29,6 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/piggy.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o @@ -43,7 +42,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -targets += vmlinux.bin.all vmlinux.relocs +targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c205035a6b96..8615f7581820 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -251,6 +251,51 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) *size = len; } +static efi_status_t setup_efi_vars(struct boot_params *params) +{ + struct setup_data *data; + struct efi_var_bootdata *efidata; + u64 store_size, remaining_size, var_size; + efi_status_t status; + + if (!sys_table->runtime->query_variable_info) + return EFI_UNSUPPORTED; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + status = efi_call_phys4(sys_table->runtime->query_variable_info, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, &store_size, + &remaining_size, &var_size); + + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*efidata), &efidata); + + if (status != EFI_SUCCESS) + return status; + + efidata->data.type = SETUP_EFI_VARS; + efidata->data.len = sizeof(struct efi_var_bootdata) - + sizeof(struct setup_data); + efidata->data.next = 0; + efidata->store_size = store_size; + efidata->remaining_size = remaining_size; + efidata->max_var_size = var_size; + + if (data) + data->next = (unsigned long)efidata; + else + params->hdr.setup_data = (unsigned long)efidata; + +} + static efi_status_t setup_efi_pci(struct boot_params *params) { efi_pci_io_protocol *pci; @@ -1157,6 +1202,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, setup_graphics(boot_params); + setup_efi_vars(boot_params); + setup_efi_pci(boot_params); status = efi_call_phys3(sys_table->boottime->allocate_pool, diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 5b5e9cb774b5..653668d140f9 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -14,13 +14,29 @@ * analysis of kexec-tools; if other broken bootloaders initialize a * different set of fields we will need to figure out how to disambiguate. * + * Note: efi_info is commonly left uninitialized, but that field has a + * private magic, so it is better to leave it unchanged. */ static void sanitize_boot_params(struct boot_params *boot_params) { + /* + * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear + * this field. The purpose of this field is to guarantee + * compliance with the x86 boot spec located in + * Documentation/x86/boot.txt . That spec says that the + * *whole* structure should be cleared, after which only the + * portion defined by struct setup_header (boot_params->hdr) + * should be copied in. + * + * If you're having an issue because the sentinel is set, you + * need to change the whole structure to be cleared, not this + * (or any other) individual field, or you will soon have + * problems again. + */ if (boot_params->sentinel) { - /*fields in boot_params are not valid, clear them */ + /* fields in boot_params are left uninitialized, clear them */ memset(&boot_params->olpc_ofw_header, 0, - (char *)&boot_params->alt_mem_k - + (char *)&boot_params->efi_info - (char *)&boot_params->olpc_ofw_header); memset(&boot_params->kbd_status, 0, (char *)&boot_params->hdr - diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 60c89f30c727..2fb5d5884e23 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -102,6 +102,13 @@ extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); +struct efi_var_bootdata { + struct setup_data data; + u64 store_size; + u64 remaining_size; + u64 max_var_size; +}; + #ifdef CONFIG_EFI static inline bool efi_is_native(void) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index d3ddd17405d0..5a6d2873f80e 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -77,6 +77,7 @@ struct arch_specific_insn { * a post_handler or break_handler). */ int boostable; + bool if_modifier; }; struct arch_optimized_insn { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 635a74d22409..4979778cc7fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -414,8 +414,8 @@ struct kvm_vcpu_arch { gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; - unsigned int time_offset; - struct page *time_page; + struct gfn_to_hva_cache pv_time; + bool pv_time_enabled; /* set guest stopped flag in pvclock flags field */ bool pvclock_set_guest_stopped_request; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5edd1742cfd0..7361e47db79f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -703,7 +703,10 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -void arch_flush_lazy_mmu_mode(void); +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush); +} static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 142236ed83af..b3b0ec1dac86 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -91,6 +91,7 @@ struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); + void (*flush)(void); }; struct pv_time_ops { @@ -679,6 +680,7 @@ void paravirt_end_context_switch(struct task_struct *next); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); +void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); u32 _paravirt_ident_32(u32); diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 1ace47b62592..2e188d68397c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -29,13 +29,13 @@ extern const unsigned long sys_call_table[]; */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->orig_ax & __SYSCALL_MASK; + return regs->orig_ax; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - regs->ax = regs->orig_ax & __SYSCALL_MASK; + regs->ax = regs->orig_ax; } static inline long syscall_get_error(struct task_struct *task, diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 4fef20773b8f..c7797307fc2b 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -7,7 +7,7 @@ #define tlb_flush(tlb) \ { \ - if (tlb->fullmm == 0) \ + if (!tlb->fullmm && !tlb->need_flush_all) \ flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \ else \ flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index c20d1ce62dc6..e709884d0ef9 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -382,14 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str) return _hypercall3(int, console_io, cmd, count, str); } -extern int __must_check HYPERVISOR_physdev_op_compat(int, void *); +extern int __must_check xen_physdev_op_compat(int, void *); static inline int HYPERVISOR_physdev_op(int cmd, void *arg) { int rc = _hypercall2(int, physdev_op, cmd, arg); if (unlikely(rc == -ENOSYS)) - rc = HYPERVISOR_physdev_op_compat(cmd, arg); + rc = xen_physdev_op_compat(cmd, arg); return rc; } diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index c15ddaf90710..08744242b8d2 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -6,6 +6,7 @@ #define SETUP_E820_EXT 1 #define SETUP_DTB 2 #define SETUP_PCI 3 +#define SETUP_EFI_VARS 4 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 892ce40a7470..7a060f4b411f 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -44,6 +44,7 @@ #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define MSR_PLATFORM_INFO 0x000000ce #define MSR_MTRRcap 0x000000fe #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a7d26d83fb70..8f4be53ea04b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -35,13 +35,6 @@ static bool __init ms_hyperv_platform(void) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - /* - * Xen emulates Hyper-V to support enlightened Windows. - * Check to see first if we are on a Xen Hypervisor. - */ - if (xen_cpuid_base()) - return false; - cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); @@ -82,12 +75,6 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Setup the IDT for hypervisor callback. - */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); -#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { @@ -103,6 +90,11 @@ static irq_handler_t vmbus_isr; void hv_register_vmbus_handler(int irq, irq_handler_t handler) { + /* + * Setup the IDT for hypervisor callback. + */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + vmbus_irq = irq; vmbus_isr = handler; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 529c8931fc02..cc45deb791b0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -101,6 +101,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ + INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ @@ -149,8 +153,14 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = }; static struct extra_reg intel_snb_extra_regs[] __read_mostly = { - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), - INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), EVENT_EXTRA_END }; @@ -2093,7 +2103,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 45) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; @@ -2119,7 +2132,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 62) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a4f2ee..26830f3af0df 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -314,10 +314,11 @@ int intel_pmu_drain_bts_buffer(void) if (top <= at) return 0; + memset(®s, 0, sizeof(regs)); + ds->bts_index = ds->bts_buffer_base; perf_sample_data_init(&data, 0, event->hw.last_period); - regs.ip = 0; /* * Prepare a generic sample, i.e. fill in the invariant fields. @@ -729,3 +730,13 @@ void intel_ds_init(void) } } } + +void perf_restore_debug_store(void) +{ + struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); + + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); +} diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3f06e6149981..7bfe318d3d8a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -375,6 +375,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) else p->ainsn.boostable = -1; + /* Check whether the instruction modifies Interrupt Flag or not */ + p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); + /* Also, displacement change doesn't affect the first byte */ p->opcode = p->ainsn.insn[0]; } @@ -434,7 +437,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - if (is_IF_modifier(p->ainsn.insn)) + if (p->ainsn.if_modifier) kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; } diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c index 577db8417d15..833d51d6ee06 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/microcode_core_early.c @@ -45,9 +45,6 @@ static int __cpuinit x86_vendor(void) u32 eax = 0x00000000; u32 ebx, ecx = 0, edx; - if (!have_cpuid_p()) - return X86_VENDOR_UNKNOWN; - native_cpuid(&eax, &ebx, &ecx, &edx); if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) @@ -59,18 +56,45 @@ static int __cpuinit x86_vendor(void) return X86_VENDOR_UNKNOWN; } +static int __cpuinit x86_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + int x86; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + x86 = (eax >> 8) & 0xf; + if (x86 == 15) + x86 += (eax >> 20) & 0xff; + + return x86; +} + void __init load_ucode_bsp(void) { - int vendor = x86_vendor(); + int vendor, x86; + + if (!have_cpuid_p()) + return; - if (vendor == X86_VENDOR_INTEL) + vendor = x86_vendor(); + x86 = x86_family(); + + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_bsp(); } void __cpuinit load_ucode_ap(void) { - int vendor = x86_vendor(); + int vendor, x86; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); - if (vendor == X86_VENDOR_INTEL) + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_ap(); } diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c index 7890bc838952..d893e8ed8ac9 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/microcode_intel_early.c @@ -90,13 +90,13 @@ microcode_phys(struct microcode_intel **mc_saved_tmp, struct microcode_intel ***mc_saved; mc_saved = (struct microcode_intel ***) - __pa_symbol(&mc_saved_data->mc_saved); + __pa_nodebug(&mc_saved_data->mc_saved); for (i = 0; i < mc_saved_data->mc_saved_count; i++) { struct microcode_intel *p; p = *(struct microcode_intel **) - __pa(mc_saved_data->mc_saved + i); - mc_saved_tmp[i] = (struct microcode_intel *)__pa(p); + __pa_nodebug(mc_saved_data->mc_saved + i); + mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); } } #endif @@ -562,7 +562,7 @@ scan_microcode(unsigned long start, unsigned long end, struct cpio_data cd; long offset = 0; #ifdef CONFIG_X86_32 - char *p = (char *)__pa_symbol(ucode_name); + char *p = (char *)__pa_nodebug(ucode_name); #else char *p = ucode_name; #endif @@ -630,8 +630,8 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci) if (mc_intel == NULL) return; - delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info); - current_mc_date_p = (int *)__pa_symbol(¤t_mc_date); + delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); + current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); *delay_ucode_info_p = 1; *current_mc_date_p = mc_intel->hdr.date; @@ -659,8 +659,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) } #endif -static int apply_microcode_early(struct mc_saved_data *mc_saved_data, - struct ucode_cpu_info *uci) +static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data, + struct ucode_cpu_info *uci) { struct microcode_intel *mc_intel; unsigned int val[2]; @@ -741,15 +741,15 @@ load_ucode_intel_bsp(void) #ifdef CONFIG_X86_32 struct boot_params *boot_params_p; - boot_params_p = (struct boot_params *)__pa_symbol(&boot_params); + boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params); ramdisk_image = boot_params_p->hdr.ramdisk_image; ramdisk_size = boot_params_p->hdr.ramdisk_size; initrd_start_early = ramdisk_image; initrd_end_early = initrd_start_early + ramdisk_size; _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_symbol(&mc_saved_data), - (unsigned long *)__pa_symbol(&mc_saved_in_initrd), + (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), initrd_start_early, initrd_end_early, &uci); #else ramdisk_image = boot_params.hdr.ramdisk_image; @@ -772,10 +772,10 @@ void __cpuinit load_ucode_intel_ap(void) unsigned long *initrd_start_p; mc_saved_in_initrd_p = - (unsigned long *)__pa_symbol(mc_saved_in_initrd); - mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start); - initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p); + (unsigned long *)__pa_nodebug(mc_saved_in_initrd); + mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); + initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); #else mc_saved_data_p = &mc_saved_data; mc_saved_in_initrd_p = mc_saved_in_initrd; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 17fff18a1031..8bfb335f74bb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } +void paravirt_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + void paravirt_start_context_switch(struct task_struct *prev) { BUG_ON(preemptible()); @@ -292,18 +304,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return this_cpu_read(paravirt_lazy_mode); } -void arch_flush_lazy_mmu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -475,6 +475,7 @@ struct pv_mmu_ops pv_mmu_ops = { .lazy_mode = { .enter = paravirt_nop, .leave = paravirt_nop, + .flush = paravirt_nop, }, .set_fixmap = native_set_fixmap, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84d32855f65c..fae9134a2de9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -171,9 +171,15 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { + .wp_works_ok = -1, + .fdiv_bug = -1, +}; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { + .wp_works_ok = -1, + .fdiv_bug = -1, +}; EXPORT_SYMBOL(boot_cpu_data); unsigned int def_to_bigsmp; @@ -501,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. On 32 bits earlier kernels * would limit the kernel to the low 512 MiB due to mapping restrictions. + * On 64bit, old kexec-tools need to under 896MiB. */ #ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20) #else -# define CRASH_KERNEL_ADDR_MAX MAXMEM +# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20) +# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM #endif static void __init reserve_crashkernel_low(void) @@ -515,19 +524,35 @@ static void __init reserve_crashkernel_low(void) unsigned long long low_base = 0, low_size = 0; unsigned long total_low_mem; unsigned long long base; + bool auto_set = false; int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + /* crashkernel=Y,low */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); - if (ret != 0 || low_size <= 0) - return; + if (ret != 0) { + /* + * two parts from lib/swiotlb.c: + * swiotlb size: user specified with swiotlb= or default. + * swiotlb overflow buffer: now is hardcoded to 32k. + * We round it to 8M for other buffers that + * may need to stay low too. + */ + low_size = swiotlb_size_or_default() + (8UL<<20); + auto_set = true; + } else { + /* passed with crashkernel=0,low ? */ + if (!low_size) + return; + } low_base = memblock_find_in_range(low_size, (1ULL<<32), low_size, alignment); if (!low_base) { - pr_info("crashkernel low reservation failed - No suitable area found.\n"); + if (!auto_set) + pr_info("crashkernel low reservation failed - No suitable area found.\n"); return; } @@ -548,14 +573,22 @@ static void __init reserve_crashkernel(void) const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long total_mem; unsigned long long crash_size, crash_base; + bool high = false; int ret; total_mem = memblock_phys_mem_size(); + /* crashkernel=XM */ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; + if (ret != 0 || crash_size <= 0) { + /* crashkernel=X,high */ + ret = parse_crashkernel_high(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + high = true; + } /* 0 means: find the address automatically */ if (crash_base <= 0) { @@ -563,7 +596,9 @@ static void __init reserve_crashkernel(void) * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ crash_base = memblock_find_in_range(alignment, - CRASH_KERNEL_ADDR_MAX, crash_size, alignment); + high ? CRASH_KERNEL_ADDR_HIGH_MAX : + CRASH_KERNEL_ADDR_LOW_MAX, + crash_size, alignment); if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a6ceaedc396a..9f190a2a00e9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1365,9 +1365,8 @@ static inline void mwait_play_dead(void) unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; - int i; void *mwait_ptr; - struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); + int i; if (!this_cpu_has(X86_FEATURE_MWAIT)) return; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd4e4ad..f77df1c5de6e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1857,7 +1857,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) if (!pv_eoi_enabled(vcpu)) return 0; return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, - addr); + addr, sizeof(u8)); } void kvm_lapic_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f71500af1f81..e1721324c271 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1406,25 +1406,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long flags, this_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; - void *shared_kaddr; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp, host_tsc; - struct pvclock_vcpu_time_info *guest_hv_clock; + struct pvclock_vcpu_time_info guest_hv_clock; u8 pvclock_flags; bool use_master_clock; kernel_ns = 0; host_tsc = 0; - /* Keep irq disabled to prevent changes to the clock */ - local_irq_save(flags); - this_tsc_khz = __get_cpu_var(cpu_tsc_khz); - if (unlikely(this_tsc_khz == 0)) { - local_irq_restore(flags); - kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); - return 1; - } - /* * If the host uses TSC clock, then passthrough TSC as stable * to the guest. @@ -1436,6 +1426,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = ka->master_kernel_ns; } spin_unlock(&ka->pvclock_gtod_sync_lock); + + /* Keep irq disabled to prevent changes to the clock */ + local_irq_save(flags); + this_tsc_khz = __get_cpu_var(cpu_tsc_khz); + if (unlikely(this_tsc_khz == 0)) { + local_irq_restore(flags); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); + return 1; + } if (!use_master_clock) { host_tsc = native_read_tsc(); kernel_ns = get_kernel_ns(); @@ -1463,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_restore(flags); - if (!vcpu->time_page) + if (!vcpu->pv_time_enabled) return 0; /* @@ -1525,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) */ vcpu->hv_clock.version += 2; - shared_kaddr = kmap_atomic(vcpu->time_page); - - guest_hv_clock = shared_kaddr + vcpu->time_offset; + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + &guest_hv_clock, sizeof(guest_hv_clock)))) + return 0; /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ - pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); + pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); if (vcpu->pvclock_set_guest_stopped_request) { pvclock_flags |= PVCLOCK_GUEST_STOPPED; @@ -1543,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; - memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); - - kunmap_atomic(shared_kaddr); - - mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); return 0; } @@ -1827,7 +1823,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, + sizeof(u32))) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); @@ -1837,10 +1834,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { - if (vcpu->arch.time_page) { - kvm_release_page_dirty(vcpu->arch.time_page); - vcpu->arch.time_page = NULL; - } + vcpu->arch.pv_time_enabled = false; } static void accumulate_steal_time(struct kvm_vcpu *vcpu) @@ -1947,6 +1941,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_KVM_SYSTEM_TIME_NEW: case MSR_KVM_SYSTEM_TIME: { + u64 gpa_offset; kvmclock_reset(vcpu); vcpu->arch.time = data; @@ -1956,14 +1951,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & 1)) break; - /* ...but clean it before doing the actual write */ - vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); - - vcpu->arch.time_page = - gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); + gpa_offset = data & ~(PAGE_MASK | 1); - if (is_error_page(vcpu->arch.time_page)) - vcpu->arch.time_page = NULL; + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.pv_time, data & ~1ULL, + sizeof(struct pvclock_vcpu_time_info))) + vcpu->arch.pv_time_enabled = false; + else + vcpu->arch.pv_time_enabled = true; break; } @@ -1980,7 +1975,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS)) + data & KVM_STEAL_VALID_BITS, + sizeof(struct kvm_steal_time))) return 1; vcpu->arch.st.msr_val = data; @@ -2967,7 +2963,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, */ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.time_page) + if (!vcpu->arch.pv_time_enabled) return -EINVAL; vcpu->arch.pvclock_set_guest_stopped_request = true; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -6718,6 +6714,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_wbinvd_dirty_mask; vcpu->arch.ia32_tsc_adjust_msr = 0x0; + vcpu->arch.pv_time_enabled = false; kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1cbd89ca5569..7114c63f047d 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1334,6 +1334,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; + pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu; pv_mmu_ops.pte_update = lguest_pte_update; pv_mmu_ops.pte_update_defer = lguest_pte_update; diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 05928aae911e..906fea315791 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -74,10 +74,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) char c; unsigned zero_len; - for (; len; --len) { + for (; len; --len, to++) { if (__get_user_nocheck(c, from++, sizeof(char))) break; - if (__put_user_nocheck(c, to++, sizeof(char))) + if (__put_user_nocheck(c, to, sizeof(char))) break; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2b97525246d4..0e883364abb5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -378,10 +378,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_ref)) return -1; - if (pgd_none(*pgd)) + if (pgd_none(*pgd)) { set_pgd(pgd, *pgd_ref); - else + arch_flush_lazy_mmu_mode(); + } else { BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } /* * Below here mismatches are bugs because these lower tables diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4903a03ae876..59b7fc453277 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -410,9 +410,8 @@ void __init init_mem_mapping(void) /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS); - /* xen has big range in reserved near end of ram, skip it at first */ - addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, - PAGE_SIZE); + /* xen has big range in reserved near end of ram, skip it at first.*/ + addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index b0086567271c..0e38951e65eb 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -68,7 +68,7 @@ static int print_split(struct split_state *s) s->gpg++; i += GPS/PAGE_SIZE; } else if (level == PG_LEVEL_2M) { - if (!(pte_val(*pte) & _PAGE_PSE)) { + if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) { printk(KERN_ERR "%lx level %d but not PSE %Lx\n", addr, level, (u64)pte_val(*pte)); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 091934e1d0d9..fb4e73ec24d8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -467,7 +467,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = req_prot = pte_pgprot(old_pte); + old_prot = req_prot = pte_pgprot(old_pte); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); @@ -478,12 +478,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL * for the ancient hardware that doesn't support it. */ - if (pgprot_val(new_prot) & _PAGE_PRESENT) - pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL; + if (pgprot_val(req_prot) & _PAGE_PRESENT) + pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL; else - pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); + pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); - new_prot = canon_pgprot(new_prot); + req_prot = canon_pgprot(req_prot); /* * old_pte points to the large page base address. So we need @@ -1413,6 +1413,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * but that can deadlock->flush only current cpu: */ __flush_tlb_all(); + + arch_flush_lazy_mmu_mode(); } #ifdef CONFIG_HIBERNATION diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2610bd93c896..657438858e83 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -563,6 +563,13 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) if (base > __pa(high_memory-1)) return 0; + /* + * some areas in the middle of the kernel identity range + * are not mapped, like the PCI space. + */ + if (!page_is_ram(base >> PAGE_SHIFT)) + return 0; + id_sz = (__pa(high_memory-1) <= base + size) ? __pa(high_memory) - base : size; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 193350b51f90..17fda6a8b3c2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -58,6 +58,13 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); + /* + * NOTE! For PAE, any changes to the top page-directory-pointer-table + * entries need a full cr3 reload to flush. + */ +#ifdef CONFIG_X86_PAE + tlb->need_flush_all = 1; +#endif tlb_remove_page(tlb, virt_to_page(pmd)); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3cbe45381bbb..f66b54086ce5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -725,17 +725,12 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; } oldproglen = proglen; } + if (bpf_jit_enable > 1) - pr_err("flen=%d proglen=%u pass=%d image=%p\n", - flen, proglen, pass, image); + bpf_jit_dump(flen, proglen, pass, image); if (image) { - if (bpf_jit_enable > 1) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, - 16, 1, image, proglen, false); - bpf_flush_icache(image, image + proglen); - fp->bpf_func = (void *)image; } out: diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5f2ecaf3f9d8..e4a86a677ce1 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -41,6 +41,7 @@ #include <linux/io.h> #include <linux/reboot.h> #include <linux/bcd.h> +#include <linux/ucs2_string.h> #include <asm/setup.h> #include <asm/efi.h> @@ -51,6 +52,13 @@ #define EFI_DEBUG 1 +/* + * There's some additional metadata associated with each + * variable. Intel's reference implementation is 60 bytes - bump that + * to account for potential alignment constraints + */ +#define VAR_METADATA_SIZE 64 + struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, .acpi = EFI_INVALID_TABLE_ADDR, @@ -69,6 +77,13 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_var_store_size; +static u64 efi_var_remaining_size; +static u64 efi_var_max_var_size; +static u64 boot_used_size; +static u64 boot_var_size; +static u64 active_size; + unsigned long x86_efi_facility; /* @@ -98,6 +113,15 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); +static bool efi_no_storage_paranoia; + +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { @@ -162,8 +186,53 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - return efi_call_virt3(get_next_variable, - name_size, name, vendor); + efi_status_t status; + static bool finished = false; + static u64 var_size; + + status = efi_call_virt3(get_next_variable, + name_size, name, vendor); + + if (status == EFI_NOT_FOUND) { + finished = true; + if (var_size < boot_used_size) { + boot_var_size = boot_used_size - var_size; + active_size += boot_var_size; + } else { + printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n"); + } + } + + if (boot_used_size && !finished) { + unsigned long size; + u32 attr; + efi_status_t s; + void *tmp; + + s = virt_efi_get_variable(name, vendor, &attr, &size, NULL); + + if (s != EFI_BUFFER_TOO_SMALL || !size) + return status; + + tmp = kmalloc(size, GFP_ATOMIC); + + if (!tmp) + return status; + + s = virt_efi_get_variable(name, vendor, &attr, &size, tmp); + + if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) { + var_size += size; + var_size += ucs2_strsize(name, 1024); + active_size += size; + active_size += VAR_METADATA_SIZE; + active_size += ucs2_strsize(name, 1024); + } + + kfree(tmp); + } + + return status; } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -172,9 +241,34 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); + efi_status_t status; + u32 orig_attr = 0; + unsigned long orig_size = 0; + + status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size, + NULL); + + if (status != EFI_BUFFER_TOO_SMALL) + orig_size = 0; + + status = efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); + + if (status == EFI_SUCCESS) { + if (orig_size) { + active_size -= orig_size; + active_size -= ucs2_strsize(name, 1024); + active_size -= VAR_METADATA_SIZE; + } + if (data_size) { + active_size += data_size; + active_size += ucs2_strsize(name, 1024); + active_size += VAR_METADATA_SIZE; + } + } + + return status; } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -682,6 +776,9 @@ void __init efi_init(void) char vendor[100] = "unknown"; int i = 0; void *tmp; + struct setup_data *data; + struct efi_var_bootdata *efi_var_data; + u64 pa_data; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -699,6 +796,22 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*efi_var_data)); + if (data->type == SETUP_EFI_VARS) { + efi_var_data = (struct efi_var_bootdata *)data; + + efi_var_store_size = efi_var_data->store_size; + efi_var_remaining_size = efi_var_data->remaining_size; + efi_var_max_var_size = efi_var_data->max_var_size; + } + pa_data = data->next; + early_iounmap(data, sizeof(*efi_var_data)); + } + + boot_used_size = efi_var_store_size - efi_var_remaining_size; + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* @@ -999,3 +1112,48 @@ u64 efi_mem_attributes(unsigned long phys_addr) } return 0; } + +/* + * Some firmware has serious problems when using more than 50% of the EFI + * variable store, i.e. it triggers bugs that can brick machines. Ensure that + * we never use more than this safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + if (!max_size && remaining_size > size) + printk_once(KERN_ERR FW_BUG "Broken EFI implementation" + " is returning MaxVariableSize=0\n"); + /* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. We account for that by refusing the + * write if permitting it would reduce the available space to under + * 50%. However, some firmware won't reclaim variable space until + * after the used (not merely the actively used) space drops below + * a threshold. We can approximate that case with the value calculated + * above. If both the firmware and our calculations indicate that the + * available space would drop below 50%, refuse the write. + */ + + if (!storage_size || size > remaining_size || + (max_size && size > max_size)) + return EFI_OUT_OF_RESOURCES; + + if (!efi_no_storage_paranoia && + ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && + (remaining_size - size < storage_size / 2))) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} +EXPORT_SYMBOL_GPL(efi_query_variable_store); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 120cee1c3f8d..3c68768d7a75 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -11,6 +11,7 @@ #include <linux/suspend.h> #include <linux/export.h> #include <linux/smp.h> +#include <linux/perf_event.h> #include <asm/pgtable.h> #include <asm/proto.h> @@ -228,6 +229,7 @@ static void __restore_processor_state(struct saved_context *ctxt) do_fpu_end(); x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); + perf_restore_debug_store(); } /* Needed by apm.c */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e8e34938c57d..e006c18d288a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ - - pv_mmu_ops.write_cr3 = &xen_write_cr3; } #endif @@ -1750,14 +1748,18 @@ static void *m2v(phys_addr_t maddr) } /* Set the page permissions on an identity-mapped pages */ -static void set_page_prot(void *addr, pgprot_t prot) +static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } +static void set_page_prot(void *addr, pgprot_t prot) +{ + return set_page_prot_flags(addr, prot, UVMF_NONE); +} #ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { @@ -1841,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, unsigned long addr) { if (*pt_base == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_base)++; } if (*pt_end == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_end)--; } @@ -2122,6 +2124,7 @@ static void __init xen_post_allocator_init(void) #endif #ifdef CONFIG_X86_64 + pv_mmu_ops.write_cr3 = &xen_write_cr3; SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif xen_mark_init_mm_pinned(); @@ -2197,6 +2200,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, + .flush = paravirt_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 35876ffac11d..b09de49dbec5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -9,7 +9,7 @@ config XTENSA select HAVE_IDE select GENERIC_ATOMIC64 select HAVE_GENERIC_HARDIRQS - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select MODULES_USE_ELF_RELA diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 35905cb6e419..a8f44f50e651 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -83,4 +83,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* _XTENSA_SOCKET_H */ diff --git a/block/blk-core.c b/block/blk-core.c index 074b758efc42..7c288358a745 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); diff --git a/block/blk-flush.c b/block/blk-flush.c index db8f1b507857..cc2b827a853c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -444,7 +444,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, * copied from blk_rq_pos(rq). */ if (error_sector) - *error_sector = bio->bi_sector; + *error_sector = bio->bi_sector; if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6206a934eb8c..5efc5a647183 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -229,6 +229,8 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \ unsigned long val; \ ssize_t ret; \ ret = queue_var_store(&val, page, count); \ + if (ret < 0) \ + return ret; \ if (neg) \ val = !val; \ \ diff --git a/crypto/gcm.c b/crypto/gcm.c index 137ad1ec5438..13ccbda34ff9 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -44,6 +44,7 @@ struct crypto_rfc4543_ctx { struct crypto_rfc4543_req_ctx { u8 auth_tag[16]; + u8 assocbuf[32]; struct scatterlist cipher[1]; struct scatterlist payload[2]; struct scatterlist assoc[2]; @@ -1133,9 +1134,19 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2); assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); - sg_init_table(assoc, 2); - sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, - req->assoc->offset); + if (req->assoc->length == req->assoclen) { + sg_init_table(assoc, 2); + sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, + req->assoc->offset); + } else { + BUG_ON(req->assoclen > sizeof(rctx->assocbuf)); + + scatterwalk_map_and_copy(rctx->assocbuf, req->assoc, 0, + req->assoclen, 0); + + sg_init_table(assoc, 2); + sg_set_buf(assoc, rctx->assocbuf, req->assoclen); + } scatterwalk_crypto_chain(assoc, payload, 0, 2); aead_request_set_tfm(subreq, ctx->child); diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 92ed9692c47e..4bf68c8d4797 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -396,7 +396,7 @@ config ACPI_CUSTOM_METHOD config ACPI_BGRT bool "Boottime Graphics Resource Table support" - depends on EFI + depends on EFI && X86 help This driver adds support for exposing the ACPI Boottime Graphics Resource Table, which allows the operating system to obtain diff --git a/drivers/acpi/acpi_i2c.c b/drivers/acpi/acpi_i2c.c index 82045e3f5cac..a82c7626aa9b 100644 --- a/drivers/acpi/acpi_i2c.c +++ b/drivers/acpi/acpi_i2c.c @@ -90,7 +90,7 @@ void acpi_i2c_register_devices(struct i2c_adapter *adapter) acpi_handle handle; acpi_status status; - handle = ACPI_HANDLE(&adapter->dev); + handle = ACPI_HANDLE(adapter->dev.parent); if (!handle) return; diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index 1e5d8a40101e..fefc2ca7cc3e 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c @@ -405,7 +405,7 @@ int apei_estatus_check(const struct acpi_hest_generic_status *estatus) return rc; data_len = estatus->data_length; gdata = (struct acpi_hest_generic_data *)(estatus + 1); - while (data_len > sizeof(*gdata)) { + while (data_len >= sizeof(*gdata)) { gedata_len = gdata->error_data_length; if (gedata_len > data_len - sizeof(*gdata)) return -EINVAL; diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index ef6f155469b5..40a84cc6740c 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -36,12 +36,11 @@ int register_acpi_bus_type(struct acpi_bus_type *type) { if (acpi_disabled) return -ENODEV; - if (type && type->bus && type->find_device) { + if (type && type->match && type->find_device) { down_write(&bus_type_sem); list_add_tail(&type->list, &bus_type_list); up_write(&bus_type_sem); - printk(KERN_INFO PREFIX "bus type %s registered\n", - type->bus->name); + printk(KERN_INFO PREFIX "bus type %s registered\n", type->name); return 0; } return -ENODEV; @@ -56,24 +55,21 @@ int unregister_acpi_bus_type(struct acpi_bus_type *type) down_write(&bus_type_sem); list_del_init(&type->list); up_write(&bus_type_sem); - printk(KERN_INFO PREFIX "ACPI bus type %s unregistered\n", - type->bus->name); + printk(KERN_INFO PREFIX "bus type %s unregistered\n", + type->name); return 0; } return -ENODEV; } EXPORT_SYMBOL_GPL(unregister_acpi_bus_type); -static struct acpi_bus_type *acpi_get_bus_type(struct bus_type *type) +static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) { struct acpi_bus_type *tmp, *ret = NULL; - if (!type) - return NULL; - down_read(&bus_type_sem); list_for_each_entry(tmp, &bus_type_list, list) { - if (tmp->bus == type) { + if (tmp->match(dev)) { ret = tmp; break; } @@ -82,22 +78,6 @@ static struct acpi_bus_type *acpi_get_bus_type(struct bus_type *type) return ret; } -static int acpi_find_bridge_device(struct device *dev, acpi_handle * handle) -{ - struct acpi_bus_type *tmp; - int ret = -ENODEV; - - down_read(&bus_type_sem); - list_for_each_entry(tmp, &bus_type_list, list) { - if (tmp->find_bridge && !tmp->find_bridge(dev, handle)) { - ret = 0; - break; - } - } - up_read(&bus_type_sem); - return ret; -} - static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used, void *addr_p, void **ret_p) { @@ -261,29 +241,12 @@ err: static int acpi_platform_notify(struct device *dev) { - struct acpi_bus_type *type; + struct acpi_bus_type *type = acpi_get_bus_type(dev); acpi_handle handle; int ret; ret = acpi_bind_one(dev, NULL); - if (ret && (!dev->bus || !dev->parent)) { - /* bridge devices genernally haven't bus or parent */ - ret = acpi_find_bridge_device(dev, &handle); - if (!ret) { - ret = acpi_bind_one(dev, handle); - if (ret) - goto out; - } - } - - type = acpi_get_bus_type(dev->bus); - if (ret) { - if (!type || !type->find_device) { - DBG("No ACPI bus support for %s\n", dev_name(dev)); - ret = -EINVAL; - goto out; - } - + if (ret && type) { ret = type->find_device(dev, &handle); if (ret) { DBG("Unable to get handle for %s\n", dev_name(dev)); @@ -316,7 +279,7 @@ static int acpi_platform_notify_remove(struct device *dev) { struct acpi_bus_type *type; - type = acpi_get_bus_type(dev->bus); + type = acpi_get_bus_type(dev); if (type && type->cleanup) type->cleanup(dev); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 0ac546d5e53f..6ae5e440436e 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -415,7 +415,6 @@ static int acpi_pci_root_add(struct acpi_device *device, struct acpi_pci_root *root; struct acpi_pci_driver *driver; u32 flags, base_flags; - bool is_osc_granted = false; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -476,6 +475,30 @@ static int acpi_pci_root_add(struct acpi_device *device, flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; acpi_pci_osc_support(root, flags); + /* + * TBD: Need PCI interface for enumeration/configuration of roots. + */ + + mutex_lock(&acpi_pci_root_lock); + list_add_tail(&root->node, &acpi_pci_roots); + mutex_unlock(&acpi_pci_root_lock); + + /* + * Scan the Root Bridge + * -------------------- + * Must do this prior to any attempt to bind the root device, as the + * PCI namespace does not get created until this call is made (and + * thus the root bridge's pci_dev does not exist). + */ + root->bus = pci_acpi_scan_root(root); + if (!root->bus) { + printk(KERN_ERR PREFIX + "Bus %04x:%02x not present in PCI namespace\n", + root->segment, (unsigned int)root->secondary.start); + result = -ENODEV; + goto out_del_root; + } + /* Indicate support for various _OSC capabilities. */ if (pci_ext_cfg_avail()) flags |= OSC_EXT_PCI_CONFIG_SUPPORT; @@ -494,6 +517,7 @@ static int acpi_pci_root_add(struct acpi_device *device, flags = base_flags; } } + if (!pcie_ports_disabled && (flags & ACPI_PCIE_REQ_SUPPORT) == ACPI_PCIE_REQ_SUPPORT) { flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL @@ -514,54 +538,28 @@ static int acpi_pci_root_add(struct acpi_device *device, status = acpi_pci_osc_control_set(device->handle, &flags, OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); if (ACPI_SUCCESS(status)) { - is_osc_granted = true; dev_info(&device->dev, "ACPI _OSC control (0x%02x) granted\n", flags); + if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { + /* + * We have ASPM control, but the FADT indicates + * that it's unsupported. Clear it. + */ + pcie_clear_aspm(root->bus); + } } else { - is_osc_granted = false; dev_info(&device->dev, "ACPI _OSC request failed (%s), " "returned control mask: 0x%02x\n", acpi_format_exception(status), flags); + pr_info("ACPI _OSC control for PCIe not granted, " + "disabling ASPM\n"); + pcie_no_aspm(); } } else { dev_info(&device->dev, - "Unable to request _OSC control " - "(_OSC support mask: 0x%02x)\n", flags); - } - - /* - * TBD: Need PCI interface for enumeration/configuration of roots. - */ - - mutex_lock(&acpi_pci_root_lock); - list_add_tail(&root->node, &acpi_pci_roots); - mutex_unlock(&acpi_pci_root_lock); - - /* - * Scan the Root Bridge - * -------------------- - * Must do this prior to any attempt to bind the root device, as the - * PCI namespace does not get created until this call is made (and - * thus the root bridge's pci_dev does not exist). - */ - root->bus = pci_acpi_scan_root(root); - if (!root->bus) { - printk(KERN_ERR PREFIX - "Bus %04x:%02x not present in PCI namespace\n", - root->segment, (unsigned int)root->secondary.start); - result = -ENODEV; - goto out_del_root; - } - - /* ASPM setting */ - if (is_osc_granted) { - if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) - pcie_clear_aspm(root->bus); - } else { - pr_info("ACPI _OSC control for PCIe not granted, " - "disabling ASPM\n"); - pcie_no_aspm(); + "Unable to request _OSC control " + "(_OSC support mask: 0x%02x)\n", flags); } pci_acpi_add_bus_pm_notifier(device, root->bus); @@ -646,6 +644,7 @@ static void handle_root_bridge_insertion(acpi_handle handle) static void handle_root_bridge_removal(struct acpi_device *device) { + acpi_status status; struct acpi_eject_event *ej_event; ej_event = kmalloc(sizeof(*ej_event), GFP_KERNEL); @@ -661,7 +660,9 @@ static void handle_root_bridge_removal(struct acpi_device *device) ej_event->device = device; ej_event->event = ACPI_NOTIFY_EJECT_REQUEST; - acpi_bus_hot_remove_device(ej_event); + status = acpi_os_hotplug_execute(acpi_bus_hot_remove_device, ej_event); + if (ACPI_FAILURE(status)) + kfree(ej_event); } static void _handle_hotplug_event_root(struct work_struct *work) @@ -676,8 +677,9 @@ static void _handle_hotplug_event_root(struct work_struct *work) handle = hp_work->handle; type = hp_work->type; - root = acpi_pci_find_root(handle); + acpi_scan_lock_acquire(); + root = acpi_pci_find_root(handle); acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); switch (type) { @@ -711,6 +713,7 @@ static void _handle_hotplug_event_root(struct work_struct *work) break; } + acpi_scan_lock_release(); kfree(hp_work); /* allocated in handle_hotplug_event_bridge */ kfree(buffer.pointer); } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index eff722278ff5..164d49569aeb 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -158,8 +158,7 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) } exit: - if (buffer.pointer) - kfree(buffer.pointer); + kfree(buffer.pointer); return apic_id; } diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index df34bd04ae62..bec717ffd25f 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -559,7 +559,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) return 0; #endif - BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); + BUG_ON(pr->id >= nr_cpu_ids); /* * Buggy BIOS check diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index fc95308e9a11..ee255c60bdac 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -66,7 +66,8 @@ module_param(latency_factor, uint, 0644); static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); -static struct acpi_processor_cx *acpi_cstate[CPUIDLE_STATE_MAX]; +static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], + acpi_cstate); static int disabled_by_idle_boot_param(void) { @@ -722,7 +723,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct acpi_processor_cx *cx = acpi_cstate[index]; + struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); pr = __this_cpu_read(processors); @@ -745,7 +746,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, */ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { - struct acpi_processor_cx *cx = acpi_cstate[index]; + struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); ACPI_FLUSH_CPU_CACHE(); @@ -775,7 +776,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct acpi_processor_cx *cx = acpi_cstate[index]; + struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); pr = __this_cpu_read(processors); @@ -833,7 +834,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct acpi_processor_cx *cx = acpi_cstate[index]; + struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); pr = __this_cpu_read(processors); @@ -960,7 +961,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) continue; #endif - acpi_cstate[count] = cx; + per_cpu(acpi_cstate[count], dev->cpu) = cx; count++; if (count == CPUIDLE_STATE_MAX) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 53e7ac9403a7..e854582f29a6 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -465,7 +465,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) return result; } -static int acpi_processor_get_performance_info(struct acpi_processor *pr) +int acpi_processor_get_performance_info(struct acpi_processor *pr) { int result = 0; acpi_status status = AE_OK; @@ -509,7 +509,7 @@ static int acpi_processor_get_performance_info(struct acpi_processor *pr) #endif return result; } - +EXPORT_SYMBOL_GPL(acpi_processor_get_performance_info); int acpi_processor_notify_smm(struct module *calling_module) { acpi_status status; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6d3a06a629a1..9c1a435d10e6 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -193,6 +193,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { }, { .callback = init_nvs_nosave, + .ident = "Sony Vaio VGN-FW21M", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FW21M"), + }, + }, + { + .callback = init_nvs_nosave, .ident = "Sony Vaio VPCEB17FX", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), @@ -599,7 +607,6 @@ static void acpi_sleep_suspend_setup(void) status = acpi_get_sleep_type_data(i, &type_a, &type_b); if (ACPI_SUCCESS(status)) { sleep_states[i] = 1; - pr_cont(" S%d", i); } } @@ -742,7 +749,6 @@ static void acpi_sleep_hibernate_setup(void) hibernation_set_ops(old_suspend_ordering ? &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; - pr_cont(KERN_CONT " S4"); if (nosigcheck) return; @@ -788,6 +794,9 @@ int __init acpi_sleep_init(void) { acpi_status status; u8 type_a, type_b; + char supported[ACPI_S_STATE_COUNT * 3 + 1]; + char *pos = supported; + int i; if (acpi_disabled) return 0; @@ -795,7 +804,6 @@ int __init acpi_sleep_init(void) acpi_sleep_dmi_check(); sleep_states[ACPI_STATE_S0] = 1; - pr_info(PREFIX "(supports S0"); acpi_sleep_suspend_setup(); acpi_sleep_hibernate_setup(); @@ -803,11 +811,17 @@ int __init acpi_sleep_init(void) status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); if (ACPI_SUCCESS(status)) { sleep_states[ACPI_STATE_S5] = 1; - pr_cont(" S5"); pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; } - pr_cont(")\n"); + + supported[0] = 0; + for (i = 0; i < ACPI_S_STATE_COUNT; i++) { + if (sleep_states[i]) + pos += sprintf(pos, " S%d", i); + } + pr_info(PREFIX "(supports%s)\n", supported); + /* * Register the tts_notifier to reboot notifier list so that the _TTS * object can also be evaluated when the system enters S5. diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 093c43554963..1f44e56cc65d 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -158,7 +158,7 @@ int tegra_ahb_enable_smmu(struct device_node *dn) EXPORT_SYMBOL(tegra_ahb_enable_smmu); #endif -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM static int tegra_ahb_suspend(struct device *dev) { int i; diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 3e751b74615e..a5a3ebcbdd2c 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -59,15 +59,16 @@ config ATA_ACPI option libata.noacpi=1 config SATA_ZPODD - bool "SATA Zero Power ODD Support" + bool "SATA Zero Power Optical Disc Drive (ZPODD) support" depends on ATA_ACPI default n help - This option adds support for SATA ZPODD. It requires both - ODD and the platform support, and if enabled, will automatically - power on/off the ODD when certain condition is satisfied. This - does not impact user's experience of the ODD, only power is saved - when ODD is not in use(i.e. no disc inside). + This option adds support for SATA Zero Power Optical Disc + Drive (ZPODD). It requires both the ODD and the platform + support, and if enabled, will automatically power on/off the + ODD when certain condition is satisfied. This does not impact + end user's experience of the ODD, only power is saved when + the ODD is not in use (i.e. no disc inside). If unsure, say N. diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index a99112cfd8b1..6a67b07de494 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -281,6 +281,8 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x1f37), board_ahci }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci }, /* Avoton RAID */ + { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */ + { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */ { PCI_VDEVICE(INTEL, 0x8d04), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x8d06), board_ahci }, /* Wellsburg RAID */ diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index d2ba439cfe54..2f48123d74c4 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -150,6 +150,7 @@ enum piix_controller_ids { tolapai_sata, piix_pata_vmw, /* PIIX4 for VMware, spurious DMA_ERR */ ich8_sata_snb, + ich8_2port_sata_snb, }; struct piix_map_db { @@ -304,7 +305,7 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (Lynx Point) */ { 0x8086, 0x8c01, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Lynx Point) */ - { 0x8086, 0x8c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + { 0x8086, 0x8c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, /* SATA Controller IDE (Lynx Point) */ { 0x8086, 0x8c09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (Lynx Point-LP) */ @@ -439,6 +440,7 @@ static const struct piix_map_db *piix_map_db_table[] = { [ich8m_apple_sata] = &ich8m_apple_map_db, [tolapai_sata] = &tolapai_map_db, [ich8_sata_snb] = &ich8_map_db, + [ich8_2port_sata_snb] = &ich8_2port_map_db, }; static struct pci_bits piix_enable_bits[] = { @@ -1242,6 +1244,16 @@ static struct ata_port_info piix_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &piix_sata_ops, }, + + [ich8_2port_sata_snb] = + { + .flags = PIIX_SATA_FLAGS | PIIX_FLAG_SIDPR + | PIIX_FLAG_PIO16, + .pio_mask = ATA_PIO4, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA6, + .port_ops = &piix_sata_ops, + }, }; #define AHCI_PCI_BAR 5 @@ -1547,6 +1559,10 @@ static bool piix_broken_system_poweroff(struct pci_dev *pdev) static int prefer_ms_hyperv = 1; module_param(prefer_ms_hyperv, int, 0); +MODULE_PARM_DESC(prefer_ms_hyperv, + "Prefer Hyper-V paravirtualization drivers instead of ATA, " + "0 - Use ATA drivers, " + "1 (Default) - Use the paravirtualization drivers."); static void piix_ignore_devices_quirk(struct ata_host *host) { diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 0ea1018280bd..8a52dab412e2 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -1027,7 +1027,7 @@ static void ata_acpi_register_power_resource(struct ata_device *dev) handle = ata_dev_acpi_handle(dev); if (handle) - acpi_dev_pm_remove_dependent(handle, &sdev->sdev_gendev); + acpi_dev_pm_add_dependent(handle, &sdev->sdev_gendev); } static void ata_acpi_unregister_power_resource(struct ata_device *dev) @@ -1144,13 +1144,8 @@ static int ata_acpi_find_device(struct device *dev, acpi_handle *handle) return -ENODEV; } -static int ata_acpi_find_dummy(struct device *dev, acpi_handle *handle) -{ - return -ENODEV; -} - static struct acpi_bus_type ata_acpi_bus = { - .find_bridge = ata_acpi_find_dummy, + .name = "ATA", .find_device = ata_acpi_find_device, }; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 497adea1f0d6..63c743baf920 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2329,7 +2329,7 @@ int ata_dev_configure(struct ata_device *dev) * from SATA Settings page of Identify Device Data Log. */ if (ata_id_has_devslp(dev->id)) { - u8 sata_setting[ATA_SECT_SIZE]; + u8 *sata_setting = ap->sector_buf; int i, j; dev->flags |= ATA_DFLAG_DEVSLP; @@ -2439,6 +2439,9 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) + dev->max_sectors = ATA_MAX_SECTORS_LBA48; + if (ap->ops->dev_config) ap->ops->dev_config(dev); @@ -4100,6 +4103,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, + { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, /* Devices we expect to fail diagnostics */ diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 318b41358187..ff44787e5a45 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -532,8 +532,8 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) struct scsi_sense_hdr sshdr; scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sshdr); - if (sshdr.sense_key == 0 && - sshdr.asc == 0 && sshdr.ascq == 0) + if (sshdr.sense_key == RECOVERED_ERROR && + sshdr.asc == 0 && sshdr.ascq == 0x1d) cmd_result &= ~SAM_STAT_CHECK_CONDITION; } @@ -618,8 +618,8 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg) struct scsi_sense_hdr sshdr; scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sshdr); - if (sshdr.sense_key == 0 && - sshdr.asc == 0 && sshdr.ascq == 0) + if (sshdr.sense_key == RECOVERED_ERROR && + sshdr.asc == 0 && sshdr.ascq == 0x1d) cmd_result &= ~SAM_STAT_CHECK_CONDITION; } diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c index 70b0e01372b3..6ef27e98c508 100644 --- a/drivers/ata/pata_samsung_cf.c +++ b/drivers/ata/pata_samsung_cf.c @@ -661,18 +661,7 @@ static struct platform_driver pata_s3c_driver = { }, }; -static int __init pata_s3c_init(void) -{ - return platform_driver_probe(&pata_s3c_driver, pata_s3c_probe); -} - -static void __exit pata_s3c_exit(void) -{ - platform_driver_unregister(&pata_s3c_driver); -} - -module_init(pata_s3c_init); -module_exit(pata_s3c_exit); +module_platform_driver_probe(pata_s3c_driver, pata_s3c_probe); MODULE_AUTHOR("Abhilash Kesavan, <a.kesavan@samsung.com>"); MODULE_DESCRIPTION("low-level driver for Samsung PATA controller"); diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 124b2c1d9c0b..608f82fed632 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1511,8 +1511,7 @@ error_exit_with_cleanup: if (hcr_base) iounmap(hcr_base); - if (host_priv) - kfree(host_priv); + kfree(host_priv); return retval; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2b7f77d3fcb0..15beb500a4e4 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -99,7 +99,6 @@ void device_pm_add(struct device *dev) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); - dev_pm_qos_constraints_init(dev); mutex_unlock(&dpm_list_mtx); } @@ -113,7 +112,6 @@ void device_pm_remove(struct device *dev) dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); complete_all(&dev->power.completion); mutex_lock(&dpm_list_mtx); - dev_pm_qos_constraints_destroy(dev); list_del_init(&dev->power.entry); mutex_unlock(&dpm_list_mtx); device_wakeup_disable(dev); diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index b16686a0a5a2..cfc3226ec492 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -4,7 +4,7 @@ static inline void device_pm_init_common(struct device *dev) { if (!dev->power.early_init) { spin_lock_init(&dev->power.lock); - dev->power.power_state = PMSG_INVALID; + dev->power.qos = NULL; dev->power.early_init = true; } } @@ -56,14 +56,10 @@ extern void device_pm_move_last(struct device *); static inline void device_pm_sleep_init(struct device *dev) {} -static inline void device_pm_add(struct device *dev) -{ - dev_pm_qos_constraints_init(dev); -} +static inline void device_pm_add(struct device *dev) {} static inline void device_pm_remove(struct device *dev) { - dev_pm_qos_constraints_destroy(dev); pm_runtime_remove(dev); } diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 3d4d1f8aac5c..71671c42ef45 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -41,10 +41,12 @@ #include <linux/mutex.h> #include <linux/export.h> #include <linux/pm_runtime.h> +#include <linux/err.h> #include "power.h" static DEFINE_MUTEX(dev_pm_qos_mtx); +static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx); static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers); @@ -61,7 +63,7 @@ enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) struct pm_qos_flags *pqf; s32 val; - if (!qos) + if (IS_ERR_OR_NULL(qos)) return PM_QOS_FLAGS_UNDEFINED; pqf = &qos->flags; @@ -101,7 +103,8 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_flags); */ s32 __dev_pm_qos_read_value(struct device *dev) { - return dev->power.qos ? pm_qos_read_value(&dev->power.qos->latency) : 0; + return IS_ERR_OR_NULL(dev->power.qos) ? + 0 : pm_qos_read_value(&dev->power.qos->latency); } /** @@ -198,20 +201,8 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) return 0; } -/** - * dev_pm_qos_constraints_init - Initalize device's PM QoS constraints pointer. - * @dev: target device - * - * Called from the device PM subsystem during device insertion under - * device_pm_lock(). - */ -void dev_pm_qos_constraints_init(struct device *dev) -{ - mutex_lock(&dev_pm_qos_mtx); - dev->power.qos = NULL; - dev->power.power_state = PMSG_ON; - mutex_unlock(&dev_pm_qos_mtx); -} +static void __dev_pm_qos_hide_latency_limit(struct device *dev); +static void __dev_pm_qos_hide_flags(struct device *dev); /** * dev_pm_qos_constraints_destroy @@ -226,16 +217,20 @@ void dev_pm_qos_constraints_destroy(struct device *dev) struct pm_qos_constraints *c; struct pm_qos_flags *f; + mutex_lock(&dev_pm_qos_sysfs_mtx); + /* * If the device's PM QoS resume latency limit or PM QoS flags have been * exposed to user space, they have to be hidden at this point. */ - dev_pm_qos_hide_latency_limit(dev); - dev_pm_qos_hide_flags(dev); + pm_qos_sysfs_remove_latency(dev); + pm_qos_sysfs_remove_flags(dev); mutex_lock(&dev_pm_qos_mtx); - dev->power.power_state = PMSG_INVALID; + __dev_pm_qos_hide_latency_limit(dev); + __dev_pm_qos_hide_flags(dev); + qos = dev->power.qos; if (!qos) goto out; @@ -257,7 +252,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev) } spin_lock_irq(&dev->power.lock); - dev->power.qos = NULL; + dev->power.qos = ERR_PTR(-ENODEV); spin_unlock_irq(&dev->power.lock); kfree(c->notifiers); @@ -265,6 +260,8 @@ void dev_pm_qos_constraints_destroy(struct device *dev) out: mutex_unlock(&dev_pm_qos_mtx); + + mutex_unlock(&dev_pm_qos_sysfs_mtx); } /** @@ -301,32 +298,19 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, "%s() called for already added request\n", __func__)) return -EINVAL; - req->dev = dev; - mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos) { - if (dev->power.power_state.event == PM_EVENT_INVALID) { - /* The device has been removed from the system. */ - req->dev = NULL; - ret = -ENODEV; - goto out; - } else { - /* - * Allocate the constraints data on the first call to - * add_request, i.e. only if the data is not already - * allocated and if the device has not been removed. - */ - ret = dev_pm_qos_constraints_allocate(dev); - } - } + if (IS_ERR(dev->power.qos)) + ret = -ENODEV; + else if (!dev->power.qos) + ret = dev_pm_qos_constraints_allocate(dev); if (!ret) { + req->dev = dev; req->type = type; ret = apply_constraint(req, PM_QOS_ADD_REQ, value); } - out: mutex_unlock(&dev_pm_qos_mtx); return ret; @@ -344,7 +328,14 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 curr_value; int ret = 0; - if (!req->dev->power.qos) + if (!req) /*guard against callers passing in null */ + return -EINVAL; + + if (WARN(!dev_pm_qos_request_active(req), + "%s() called for unknown object\n", __func__)) + return -EINVAL; + + if (IS_ERR_OR_NULL(req->dev->power.qos)) return -ENODEV; switch(req->type) { @@ -386,6 +377,17 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { int ret; + mutex_lock(&dev_pm_qos_mtx); + ret = __dev_pm_qos_update_request(req, new_value); + mutex_unlock(&dev_pm_qos_mtx); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); + +static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req) +{ + int ret; + if (!req) /*guard against callers passing in null */ return -EINVAL; @@ -393,13 +395,13 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) "%s() called for unknown object\n", __func__)) return -EINVAL; - mutex_lock(&dev_pm_qos_mtx); - ret = __dev_pm_qos_update_request(req, new_value); - mutex_unlock(&dev_pm_qos_mtx); + if (IS_ERR_OR_NULL(req->dev->power.qos)) + return -ENODEV; + ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); return ret; } -EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); /** * dev_pm_qos_remove_request - modifies an existing qos request @@ -418,26 +420,10 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); */ int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { - int ret = 0; - - if (!req) /*guard against callers passing in null */ - return -EINVAL; - - if (WARN(!dev_pm_qos_request_active(req), - "%s() called for unknown object\n", __func__)) - return -EINVAL; + int ret; mutex_lock(&dev_pm_qos_mtx); - - if (req->dev->power.qos) { - ret = apply_constraint(req, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); - memset(req, 0, sizeof(*req)); - } else { - /* Return if the device has been removed */ - ret = -ENODEV; - } - + ret = __dev_pm_qos_remove_request(req); mutex_unlock(&dev_pm_qos_mtx); return ret; } @@ -462,9 +448,10 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) mutex_lock(&dev_pm_qos_mtx); - if (!dev->power.qos) - ret = dev->power.power_state.event != PM_EVENT_INVALID ? - dev_pm_qos_constraints_allocate(dev) : -ENODEV; + if (IS_ERR(dev->power.qos)) + ret = -ENODEV; + else if (!dev->power.qos) + ret = dev_pm_qos_constraints_allocate(dev); if (!ret) ret = blocking_notifier_chain_register( @@ -493,7 +480,7 @@ int dev_pm_qos_remove_notifier(struct device *dev, mutex_lock(&dev_pm_qos_mtx); /* Silently return if the constraints object is not present. */ - if (dev->power.qos) + if (!IS_ERR_OR_NULL(dev->power.qos)) retval = blocking_notifier_chain_unregister( dev->power.qos->latency.notifiers, notifier); @@ -563,16 +550,28 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request); static void __dev_pm_qos_drop_user_request(struct device *dev, enum dev_pm_qos_req_type type) { + struct dev_pm_qos_request *req = NULL; + switch(type) { case DEV_PM_QOS_LATENCY: - dev_pm_qos_remove_request(dev->power.qos->latency_req); + req = dev->power.qos->latency_req; dev->power.qos->latency_req = NULL; break; case DEV_PM_QOS_FLAGS: - dev_pm_qos_remove_request(dev->power.qos->flags_req); + req = dev->power.qos->flags_req; dev->power.qos->flags_req = NULL; break; } + __dev_pm_qos_remove_request(req); + kfree(req); +} + +static void dev_pm_qos_drop_user_request(struct device *dev, + enum dev_pm_qos_req_type type) +{ + mutex_lock(&dev_pm_qos_mtx); + __dev_pm_qos_drop_user_request(dev, type); + mutex_unlock(&dev_pm_qos_mtx); } /** @@ -588,36 +587,66 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) if (!device_is_registered(dev) || value < 0) return -EINVAL; - if (dev->power.qos && dev->power.qos->latency_req) - return -EEXIST; - req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY, value); - if (ret < 0) + if (ret < 0) { + kfree(req); return ret; + } + mutex_lock(&dev_pm_qos_sysfs_mtx); + + mutex_lock(&dev_pm_qos_mtx); + + if (IS_ERR_OR_NULL(dev->power.qos)) + ret = -ENODEV; + else if (dev->power.qos->latency_req) + ret = -EEXIST; + + if (ret < 0) { + __dev_pm_qos_remove_request(req); + kfree(req); + mutex_unlock(&dev_pm_qos_mtx); + goto out; + } dev->power.qos->latency_req = req; + + mutex_unlock(&dev_pm_qos_mtx); + ret = pm_qos_sysfs_add_latency(dev); if (ret) - __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); + dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); + out: + mutex_unlock(&dev_pm_qos_sysfs_mtx); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); +static void __dev_pm_qos_hide_latency_limit(struct device *dev) +{ + if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->latency_req) + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); +} + /** * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space. * @dev: Device whose PM QoS latency limit is to be hidden from user space. */ void dev_pm_qos_hide_latency_limit(struct device *dev) { - if (dev->power.qos && dev->power.qos->latency_req) { - pm_qos_sysfs_remove_latency(dev); - __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); - } + mutex_lock(&dev_pm_qos_sysfs_mtx); + + pm_qos_sysfs_remove_latency(dev); + + mutex_lock(&dev_pm_qos_mtx); + __dev_pm_qos_hide_latency_limit(dev); + mutex_unlock(&dev_pm_qos_mtx); + + mutex_unlock(&dev_pm_qos_sysfs_mtx); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit); @@ -634,41 +663,70 @@ int dev_pm_qos_expose_flags(struct device *dev, s32 val) if (!device_is_registered(dev)) return -EINVAL; - if (dev->power.qos && dev->power.qos->flags_req) - return -EEXIST; - req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; - pm_runtime_get_sync(dev); ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val); - if (ret < 0) - goto fail; + if (ret < 0) { + kfree(req); + return ret; + } + + pm_runtime_get_sync(dev); + mutex_lock(&dev_pm_qos_sysfs_mtx); + + mutex_lock(&dev_pm_qos_mtx); + + if (IS_ERR_OR_NULL(dev->power.qos)) + ret = -ENODEV; + else if (dev->power.qos->flags_req) + ret = -EEXIST; + if (ret < 0) { + __dev_pm_qos_remove_request(req); + kfree(req); + mutex_unlock(&dev_pm_qos_mtx); + goto out; + } dev->power.qos->flags_req = req; + + mutex_unlock(&dev_pm_qos_mtx); + ret = pm_qos_sysfs_add_flags(dev); if (ret) - __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); + dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); -fail: + out: + mutex_unlock(&dev_pm_qos_sysfs_mtx); pm_runtime_put(dev); return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags); +static void __dev_pm_qos_hide_flags(struct device *dev) +{ + if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->flags_req) + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); +} + /** * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space. * @dev: Device whose PM QoS flags are to be hidden from user space. */ void dev_pm_qos_hide_flags(struct device *dev) { - if (dev->power.qos && dev->power.qos->flags_req) { - pm_qos_sysfs_remove_flags(dev); - pm_runtime_get_sync(dev); - __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); - pm_runtime_put(dev); - } + pm_runtime_get_sync(dev); + mutex_lock(&dev_pm_qos_sysfs_mtx); + + pm_qos_sysfs_remove_flags(dev); + + mutex_lock(&dev_pm_qos_mtx); + __dev_pm_qos_hide_flags(dev); + mutex_unlock(&dev_pm_qos_mtx); + + mutex_unlock(&dev_pm_qos_sysfs_mtx); + pm_runtime_put(dev); } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags); @@ -683,12 +741,14 @@ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) s32 value; int ret; - if (!dev->power.qos || !dev->power.qos->flags_req) - return -EINVAL; - pm_runtime_get_sync(dev); mutex_lock(&dev_pm_qos_mtx); + if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->flags_req) { + ret = -EINVAL; + goto out; + } + value = dev_pm_qos_requested_flags(dev); if (set) value |= mask; @@ -697,9 +757,12 @@ int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value); + out: mutex_unlock(&dev_pm_qos_mtx); pm_runtime_put(dev); - return ret; } +#else /* !CONFIG_PM_RUNTIME */ +static void __dev_pm_qos_hide_latency_limit(struct device *dev) {} +static void __dev_pm_qos_hide_flags(struct device *dev) {} #endif /* CONFIG_PM_RUNTIME */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 50d16e3cb0a9..a53ebd265701 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -708,6 +708,7 @@ void rpm_sysfs_remove(struct device *dev) void dpm_sysfs_remove(struct device *dev) { + dev_pm_qos_constraints_destroy(dev); rpm_sysfs_remove(dev); sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); sysfs_remove_group(&dev->kobj, &pm_attr_group); diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index e6732cf7c06e..79f4fca9877a 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -398,7 +398,7 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min, base = 0; if (max < rbnode->base_reg + rbnode->blklen) - end = rbnode->base_reg + rbnode->blklen - max; + end = max - rbnode->base_reg + 1; else end = rbnode->blklen; diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4706c63d0bc6..020ea2b9fd2f 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -184,6 +184,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) if (ret < 0) { dev_err(map->dev, "IRQ thread failed to resume: %d\n", ret); + pm_runtime_put(map->dev); return IRQ_NONE; } } diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 3d2367501fd0..58cfb3232428 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -710,12 +710,12 @@ skip_format_initialization: } } + regmap_debugfs_init(map, config->name); + ret = regcache_init(map, config); if (ret != 0) goto err_range; - regmap_debugfs_init(map, config->name); - /* Add a devres resource for dev_get_regmap() */ m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL); if (!m) { @@ -1036,6 +1036,8 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg, kfree(async->work_buf); kfree(async); } + + return ret; } trace_regmap_hw_write_start(map->dev, reg, diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 5dc0daed8fac..b81ddfea1da0 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -532,11 +532,11 @@ config BLK_DEV_RBD If unsure, say N. config BLK_DEV_RSXX - tristate "RamSam PCIe Flash SSD Device Driver" + tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver" depends on PCI help Device driver for IBM's high speed PCIe SSD - storage devices: RamSan-70 and RamSan-80. + storage devices: FlashSystem-70 and FlashSystem-80. To compile this driver as a module, choose M here: the module will be called rsxx. diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 25ef5c014fca..92b6d7c51e39 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -51,8 +51,9 @@ new_skb(ulong len) { struct sk_buff *skb; - skb = alloc_skb(len, GFP_ATOMIC); + skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC); if (skb) { + skb_reserve(skb, MAX_HEADER); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb->protocol = __constant_htons(ETH_P_AOE); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ade58bc8f3c4..1c1b8e544aa2 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4206,7 +4206,7 @@ static int cciss_find_cfgtables(ctlr_info_t *h) if (rc) return rc; h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev, - cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable)); + cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable)); if (!h->cfgtable) return -ENOMEM; rc = write_driver_ver_to_cfgtable(h->cfgtable); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 747bb2af69dc..dfe758382eaf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -922,6 +922,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_flags |= LO_FLAGS_PARTSCAN; if (lo->lo_flags & LO_FLAGS_PARTSCAN) ioctl_by_bdev(bdev, BLKRRPART, 0); + + /* Grab the block_device to prevent its destruction after we + * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). + */ + bdgrab(bdev); return 0; out_clr: @@ -1031,8 +1036,10 @@ static int loop_clr_fd(struct loop_device *lo) memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); - if (bdev) + if (bdev) { + bdput(bdev); invalidate_bdev(bdev); + } set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { @@ -1623,6 +1630,7 @@ static int loop_add(struct loop_device **l, int i) goto out_free_dev; i = err; + err = -ENOMEM; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) goto out_free_dev; diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 1788f491e0fb..076ae7f1b781 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -890,8 +890,10 @@ static int mg_probe(struct platform_device *plat_dev) gpio_direction_output(host->rst, 1); /* reset out pin */ - if (!(prv_data->dev_attr & MG_DEV_MASK)) + if (!(prv_data->dev_attr & MG_DEV_MASK)) { + err = -EINVAL; goto probe_err_3a; + } if (prv_data->dev_attr != MG_BOOT_DEV) { rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 11cc9522cdd4..32c678028e53 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -81,12 +81,17 @@ /* Device instance number, incremented each time a device is probed. */ static int instance; +struct list_head online_list; +struct list_head removing_list; +spinlock_t dev_lock; + /* * Global variable used to hold the major block device number * allocated in mtip_init(). */ static int mtip_major; static struct dentry *dfs_parent; +static struct dentry *dfs_device_status; static u32 cpu_use[NR_CPUS]; @@ -243,40 +248,31 @@ static inline void release_slot(struct mtip_port *port, int tag) /* * Reset the HBA (without sleeping) * - * Just like hba_reset, except does not call sleep, so can be - * run from interrupt/tasklet context. - * * @dd Pointer to the driver data structure. * * return value * 0 The reset was successful. * -1 The HBA Reset bit did not clear. */ -static int hba_reset_nosleep(struct driver_data *dd) +static int mtip_hba_reset(struct driver_data *dd) { unsigned long timeout; - /* Chip quirk: quiesce any chip function */ - mdelay(10); - /* Set the reset bit */ writel(HOST_RESET, dd->mmio + HOST_CTL); /* Flush */ readl(dd->mmio + HOST_CTL); - /* - * Wait 10ms then spin for up to 1 second - * waiting for reset acknowledgement - */ - timeout = jiffies + msecs_to_jiffies(1000); - mdelay(10); - while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) - && time_before(jiffies, timeout)) - mdelay(1); + /* Spin for up to 2 seconds, waiting for reset acknowledgement */ + timeout = jiffies + msecs_to_jiffies(2000); + do { + mdelay(10); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) + return -1; - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) - return -1; + } while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) + && time_before(jiffies, timeout)); if (readl(dd->mmio + HOST_CTL) & HOST_RESET) return -1; @@ -481,7 +477,7 @@ static void mtip_restart_port(struct mtip_port *port) dev_warn(&port->dd->pdev->dev, "PxCMD.CR not clear, escalating reset\n"); - if (hba_reset_nosleep(port->dd)) + if (mtip_hba_reset(port->dd)) dev_err(&port->dd->pdev->dev, "HBA reset escalation failed.\n"); @@ -527,6 +523,26 @@ static void mtip_restart_port(struct mtip_port *port) } +static int mtip_device_reset(struct driver_data *dd) +{ + int rv = 0; + + if (mtip_check_surprise_removal(dd->pdev)) + return 0; + + if (mtip_hba_reset(dd) < 0) + rv = -EFAULT; + + mdelay(1); + mtip_init_port(dd->port); + mtip_start_port(dd->port); + + /* Enable interrupts on the HBA. */ + writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, + dd->mmio + HOST_CTL); + return rv; +} + /* * Helper function for tag logging */ @@ -632,7 +648,7 @@ static void mtip_timeout_function(unsigned long int data) if (cmdto_cnt) { print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { - mtip_restart_port(port); + mtip_device_reset(port->dd); wake_up_interruptible(&port->svc_wait); } clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); @@ -1283,11 +1299,11 @@ static int mtip_exec_internal_command(struct mtip_port *port, int rv = 0, ready2go = 1; struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; unsigned long to; + struct driver_data *dd = port->dd; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { - dev_err(&port->dd->pdev->dev, - "SG buffer is not 8 byte aligned\n"); + dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n"); return -EFAULT; } @@ -1300,23 +1316,21 @@ static int mtip_exec_internal_command(struct mtip_port *port, mdelay(100); } while (time_before(jiffies, to)); if (!ready2go) { - dev_warn(&port->dd->pdev->dev, + dev_warn(&dd->pdev->dev, "Internal cmd active. new cmd [%02X]\n", fis->command); return -EBUSY; } set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); port->ic_pause_timer = 0; - if (fis->command == ATA_CMD_SEC_ERASE_UNIT) - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - else if (fis->command == ATA_CMD_DOWNLOAD_MICRO) - clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, 5000) < 0) { - dev_warn(&port->dd->pdev->dev, + dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); release_slot(port, MTIP_TAG_INTERNAL); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1361,58 +1375,84 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); - /* Poll if atomic, wait_for_completion otherwise */ if (atomic == GFP_KERNEL) { /* Wait for the command to complete or timeout. */ - if (wait_for_completion_timeout( + if (wait_for_completion_interruptible_timeout( &wait, - msecs_to_jiffies(timeout)) == 0) { - dev_err(&port->dd->pdev->dev, - "Internal command did not complete [%d] " - "within timeout of %lu ms\n", - atomic, timeout); - if (mtip_check_surprise_removal(port->dd->pdev) || + msecs_to_jiffies(timeout)) <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ + dev_err(&dd->pdev->dev, + "Internal command [%02X] was interrupted after %lu ms\n", + fis->command, timeout); + rv = -EINTR; + goto exec_ic_exit; + } else if (rv == 0) /* timeout */ + dev_err(&dd->pdev->dev, + "Internal command did not complete [%02X] within timeout of %lu ms\n", + fis->command, timeout); + else + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", + fis->command, rv, timeout); + + if (mtip_check_surprise_removal(dd->pdev) || test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { + &dd->dd_flag)) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned due to SR\n", + fis->command); rv = -ENXIO; goto exec_ic_exit; } + mtip_device_reset(dd); /* recover from timeout issue */ rv = -EAGAIN; + goto exec_ic_exit; } } else { + u32 hba_stat, port_stat; + /* Spin for <timeout> checking if command still outstanding */ timeout = jiffies + msecs_to_jiffies(timeout); while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) && time_before(jiffies, timeout)) { - if (mtip_check_surprise_removal(port->dd->pdev)) { + if (mtip_check_surprise_removal(dd->pdev)) { rv = -ENXIO; goto exec_ic_exit; } if ((fis->command != ATA_CMD_STANDBYNOW1) && test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { + &dd->dd_flag)) { rv = -ENXIO; goto exec_ic_exit; } - if (readl(port->mmio + PORT_IRQ_STAT) & PORT_IRQ_ERR) { - atomic_inc(&int_cmd->active); /* error */ - break; + port_stat = readl(port->mmio + PORT_IRQ_STAT); + if (!port_stat) + continue; + + if (port_stat & PORT_IRQ_ERR) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] failed\n", + fis->command); + mtip_device_reset(dd); + rv = -EIO; + goto exec_ic_exit; + } else { + writel(port_stat, port->mmio + PORT_IRQ_STAT); + hba_stat = readl(dd->mmio + HOST_IRQ_STAT); + if (hba_stat) + writel(hba_stat, + dd->mmio + HOST_IRQ_STAT); } + break; } } - if (atomic_read(&int_cmd->active) > 1) { - dev_err(&port->dd->pdev->dev, - "Internal command [%02X] failed\n", fis->command); - rv = -EIO; - } if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; - if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { - mtip_restart_port(port); + if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { + mtip_device_reset(dd); rv = -EAGAIN; } } @@ -1724,7 +1764,8 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, * -EINVAL Invalid parameters passed in, trim not supported * -EIO Error submitting trim request to hw */ -static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, + unsigned int len) { int i, rv = 0; u64 tlba, tlen, sect_left; @@ -1811,45 +1852,6 @@ static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors) } /* - * Reset the HBA. - * - * Resets the HBA by setting the HBA Reset bit in the Global - * HBA Control register. After setting the HBA Reset bit the - * function waits for 1 second before reading the HBA Reset - * bit to make sure it has cleared. If HBA Reset is not clear - * an error is returned. Cannot be used in non-blockable - * context. - * - * @dd Pointer to the driver data structure. - * - * return value - * 0 The reset was successful. - * -1 The HBA Reset bit did not clear. - */ -static int mtip_hba_reset(struct driver_data *dd) -{ - mtip_deinit_port(dd->port); - - /* Set the reset bit */ - writel(HOST_RESET, dd->mmio + HOST_CTL); - - /* Flush */ - readl(dd->mmio + HOST_CTL); - - /* Wait for reset to clear */ - ssleep(1); - - /* Check the bit has cleared */ - if (readl(dd->mmio + HOST_CTL) & HOST_RESET) { - dev_err(&dd->pdev->dev, - "Reset bit did not clear.\n"); - return -1; - } - - return 0; -} - -/* * Display the identify command data. * * @port Pointer to the port data structure. @@ -2710,6 +2712,100 @@ static ssize_t mtip_hw_show_status(struct device *dev, static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); +/* debugsfs entries */ + +static ssize_t show_device_status(struct device_driver *drv, char *buf) +{ + int size = 0; + struct driver_data *dd, *tmp; + unsigned long flags; + char id_buf[42]; + u16 status = 0; + + spin_lock_irqsave(&dev_lock, flags); + size += sprintf(&buf[size], "Devices Present:\n"); + list_for_each_entry_safe(dd, tmp, &online_list, online_list) { + if (dd->pdev) { + if (dd->port && + dd->port->identify && + dd->port->identify_valid) { + strlcpy(id_buf, + (char *) (dd->port->identify + 10), 21); + status = *(dd->port->identify + 141); + } else { + memset(id_buf, 0, 42); + status = 0; + } + + if (dd->port && + test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { + size += sprintf(&buf[size], + " device %s %s (ftl rebuild %d %%)\n", + dev_name(&dd->pdev->dev), + id_buf, + status); + } else { + size += sprintf(&buf[size], + " device %s %s\n", + dev_name(&dd->pdev->dev), + id_buf); + } + } + } + + size += sprintf(&buf[size], "Devices Being Removed:\n"); + list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) { + if (dd->pdev) { + if (dd->port && + dd->port->identify && + dd->port->identify_valid) { + strlcpy(id_buf, + (char *) (dd->port->identify+10), 21); + status = *(dd->port->identify + 141); + } else { + memset(id_buf, 0, 42); + status = 0; + } + + if (dd->port && + test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { + size += sprintf(&buf[size], + " device %s %s (ftl rebuild %d %%)\n", + dev_name(&dd->pdev->dev), + id_buf, + status); + } else { + size += sprintf(&buf[size], + " device %s %s\n", + dev_name(&dd->pdev->dev), + id_buf); + } + } + } + spin_unlock_irqrestore(&dev_lock, flags); + + return size; +} + +static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) +{ + int size = *offset; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + + if (!len || *offset) + return 0; + + size += show_device_status(NULL, buf); + + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; + + return *offset; +} + static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { @@ -2804,6 +2900,13 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, return *offset; } +static const struct file_operations mtip_device_status_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_device_status, + .llseek = no_llseek, +}; + static const struct file_operations mtip_regs_fops = { .owner = THIS_MODULE, .open = simple_open, @@ -4161,6 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, const struct cpumask *node_mask; int cpu, i = 0, j = 0; int my_node = NUMA_NO_NODE; + unsigned long flags; /* Allocate memory for this devices private data. */ my_node = pcibus_to_node(pdev->bus); @@ -4218,12 +4322,16 @@ static int mtip_pci_probe(struct pci_dev *pdev, dd->pdev = pdev; dd->numa_node = my_node; + INIT_LIST_HEAD(&dd->online_list); + INIT_LIST_HEAD(&dd->remove_list); + memset(dd->workq_name, 0, 32); snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); dd->isr_workq = create_workqueue(dd->workq_name); if (!dd->isr_workq) { dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + rv = -ENOMEM; goto block_initialize_err; } @@ -4282,7 +4390,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); pci_set_master(pdev); - if (pci_enable_msi(pdev)) { + rv = pci_enable_msi(pdev); + if (rv) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; @@ -4303,6 +4412,14 @@ static int mtip_pci_probe(struct pci_dev *pdev, instance++; if (rv != MTIP_FTL_REBUILD_MAGIC) set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); + else + rv = 0; /* device in rebuild state, return 0 from probe */ + + /* Add to online list even if in ftl rebuild */ + spin_lock_irqsave(&dev_lock, flags); + list_add(&dd->online_list, &online_list); + spin_unlock_irqrestore(&dev_lock, flags); + goto done; block_initialize_err: @@ -4336,9 +4453,15 @@ static void mtip_pci_remove(struct pci_dev *pdev) { struct driver_data *dd = pci_get_drvdata(pdev); int counter = 0; + unsigned long flags; set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + spin_lock_irqsave(&dev_lock, flags); + list_del_init(&dd->online_list); + list_add(&dd->remove_list, &removing_list); + spin_unlock_irqrestore(&dev_lock, flags); + if (mtip_check_surprise_removal(pdev)) { while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { counter++; @@ -4364,6 +4487,10 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); + spin_lock_irqsave(&dev_lock, flags); + list_del_init(&dd->remove_list); + spin_unlock_irqrestore(&dev_lock, flags); + kfree(dd); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); } @@ -4511,6 +4638,11 @@ static int __init mtip_init(void) pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); + spin_lock_init(&dev_lock); + + INIT_LIST_HEAD(&online_list); + INIT_LIST_HEAD(&removing_list); + /* Allocate a major block device number to use with this driver. */ error = register_blkdev(0, MTIP_DRV_NAME); if (error <= 0) { @@ -4520,11 +4652,18 @@ static int __init mtip_init(void) } mtip_major = error; - if (!dfs_parent) { - dfs_parent = debugfs_create_dir("rssd", NULL); - if (IS_ERR_OR_NULL(dfs_parent)) { - pr_warn("Error creating debugfs parent\n"); - dfs_parent = NULL; + dfs_parent = debugfs_create_dir("rssd", NULL); + if (IS_ERR_OR_NULL(dfs_parent)) { + pr_warn("Error creating debugfs parent\n"); + dfs_parent = NULL; + } + if (dfs_parent) { + dfs_device_status = debugfs_create_file("device_status", + S_IRUGO, dfs_parent, NULL, + &mtip_device_status_fops); + if (IS_ERR_OR_NULL(dfs_device_status)) { + pr_err("Error creating device_status node\n"); + dfs_device_status = NULL; } } diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 3bffff5f670c..8e8334c9dd0f 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -129,9 +129,9 @@ enum { MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */ MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */ MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */ - MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | \ - (1 << MTIP_PF_EH_ACTIVE_BIT) | \ - (1 << MTIP_PF_SE_ACTIVE_BIT) | \ + MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | + (1 << MTIP_PF_EH_ACTIVE_BIT) | + (1 << MTIP_PF_SE_ACTIVE_BIT) | (1 << MTIP_PF_DM_ACTIVE_BIT)), MTIP_PF_SVC_THD_ACTIVE_BIT = 4, @@ -144,9 +144,9 @@ enum { MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_WRITE_PROTECT_BIT = 3, - MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ - (1 << MTIP_DDF_SEC_LOCK_BIT) | \ - (1 << MTIP_DDF_OVER_TEMP_BIT) | \ + MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | + (1 << MTIP_DDF_SEC_LOCK_BIT) | + (1 << MTIP_DDF_OVER_TEMP_BIT) | (1 << MTIP_DDF_WRITE_PROTECT_BIT)), MTIP_DDF_CLEANUP_BIT = 5, @@ -180,7 +180,7 @@ struct mtip_work { #define MTIP_TRIM_TIMEOUT_MS 240000 #define MTIP_MAX_TRIM_ENTRIES 8 -#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 struct mtip_trim_entry { u32 lba; /* starting lba of region */ @@ -501,6 +501,10 @@ struct driver_data { atomic_t irq_workers_active; int isr_binding; + + struct list_head online_list; /* linkage for online list */ + + struct list_head remove_list; /* linkage for removing list */ }; #endif diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 07fb2dfaae13..9dcefe40380b 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -135,6 +135,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); + BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); } typedef void (*nvme_completion_fn)(struct nvme_dev *, void *, @@ -237,7 +238,8 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, *fn = special_completion; return CMD_CTX_INVALID; } - *fn = info[cmdid].fn; + if (fn) + *fn = info[cmdid].fn; ctx = info[cmdid].ctx; info[cmdid].fn = special_completion; info[cmdid].ctx = CMD_CTX_COMPLETED; @@ -335,6 +337,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) iod->offset = offsetof(struct nvme_iod, sg[nseg]); iod->npages = -1; iod->length = nbytes; + iod->nents = 0; } return iod; @@ -375,7 +378,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; - dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, + if (iod->nents) + dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); nvme_free_iod(dev, iod); if (status) { @@ -589,7 +593,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs); if (result < 0) - goto free_iod; + goto free_cmdid; length = result; cmnd->rw.command_id = cmdid; @@ -609,6 +613,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; + free_cmdid: + free_cmdid(nvmeq, cmdid, NULL); free_iod: nvme_free_iod(nvmeq->dev, iod); nomem: @@ -835,8 +841,8 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, return nvme_submit_admin_cmd(dev, &c, NULL); } -static int nvme_get_features(struct nvme_dev *dev, unsigned fid, - unsigned nsid, dma_addr_t dma_addr) +static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result) { struct nvme_command c; @@ -846,7 +852,7 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_admin_cmd(dev, &c, result); } static int nvme_set_features(struct nvme_dev *dev, unsigned fid, @@ -906,6 +912,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) spin_lock_irq(&nvmeq->q_lock); nvme_cancel_ios(nvmeq, false); + while (bio_list_peek(&nvmeq->sq_cong)) { + struct bio *bio = bio_list_pop(&nvmeq->sq_cong); + bio_endio(bio, -EIO); + } spin_unlock_irq(&nvmeq->q_lock); irq_set_affinity_hint(vector, NULL); @@ -1230,12 +1240,17 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, if (length != cmd.data_len) status = -ENOMEM; else - status = nvme_submit_admin_cmd(dev, &c, NULL); + status = nvme_submit_admin_cmd(dev, &c, &cmd.result); if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); nvme_free_iod(dev, iod); } + + if (!status && copy_to_user(&ucmd->result, &cmd.result, + sizeof(cmd.result))) + status = -EFAULT; + return status; } @@ -1523,9 +1538,9 @@ static int nvme_dev_add(struct nvme_dev *dev) continue; res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i, - dma_addr + 4096); + dma_addr + 4096, NULL); if (res) - continue; + memset(mem + 4096, 0, 4096); ns = nvme_alloc_ns(dev, i, mem, mem + 4096); if (ns) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c81a4c040b9..b7b7a88d9f68 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1264,6 +1264,32 @@ static bool obj_request_done_test(struct rbd_obj_request *obj_request) return atomic_read(&obj_request->done) != 0; } +static void +rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, + obj_request, obj_request->img_request, obj_request->result, + obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the image. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + BUG_ON(obj_request->type != OBJ_REQUEST_BIO); + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; + } + obj_request_done_set(obj_request); +} + static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) { dout("%s: obj %p cb %p\n", __func__, obj_request, @@ -1284,23 +1310,10 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) { dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, obj_request->result, obj_request->xferred, obj_request->length); - /* - * ENOENT means a hole in the object. We zero-fill the - * entire length of the request. A short read also implies - * zero-fill to the end of the request. Either way we - * update the xferred count to indicate the whole request - * was satisfied. - */ - if (obj_request->result == -ENOENT) { - zero_bio_chain(obj_request->bio_list, 0); - obj_request->result = 0; - obj_request->xferred = obj_request->length; - } else if (obj_request->xferred < obj_request->length && - !obj_request->result) { - zero_bio_chain(obj_request->bio_list, obj_request->xferred); - obj_request->xferred = obj_request->length; - } - obj_request_done_set(obj_request); + if (obj_request->img_request) + rbd_img_obj_request_read_callback(obj_request); + else + obj_request_done_set(obj_request); } static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) @@ -1729,9 +1742,10 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request) struct rbd_device *rbd_dev = img_request->rbd_dev; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; dout("%s: img %p\n", __func__, img_request); - for_each_obj_request(img_request, obj_request) { + for_each_obj_request_safe(img_request, obj_request, next_obj_request) { int ret; obj_request->callback = rbd_img_obj_callback; diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile index f35cd0b71f7b..b1c53c0aa450 100644 --- a/drivers/block/rsxx/Makefile +++ b/drivers/block/rsxx/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o -rsxx-y := config.o core.o cregs.o dev.o dma.o +rsxx-objs := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c index a295e7e9ee41..10cd530d3e10 100644 --- a/drivers/block/rsxx/config.c +++ b/drivers/block/rsxx/config.c @@ -29,15 +29,13 @@ #include "rsxx_priv.h" #include "rsxx_cfg.h" -static void initialize_config(void *config) +static void initialize_config(struct rsxx_card_cfg *cfg) { - struct rsxx_card_cfg *cfg = config; - cfg->hdr.version = RSXX_CFG_VERSION; cfg->data.block_size = RSXX_HW_BLK_SIZE; cfg->data.stripe_size = RSXX_HW_BLK_SIZE; - cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.vendor_id = RSXX_VENDOR_ID_IBM; cfg->data.cache_order = (-1); cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; cfg->data.intr_coal.count = 0; @@ -181,7 +179,7 @@ int rsxx_load_config(struct rsxx_cardinfo *card) } else { dev_info(CARD_TO_DEV(card), "Initializing card configuration.\n"); - initialize_config(card); + initialize_config(&card->config); st = rsxx_save_config(card); if (st) return st; diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index e5162487686a..5af21f2db29c 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -30,6 +30,7 @@ #include <linux/reboot.h> #include <linux/slab.h> #include <linux/bitops.h> +#include <linux/delay.h> #include <linux/genhd.h> #include <linux/idr.h> @@ -39,8 +40,8 @@ #define NO_LEGACY 0 -MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); -MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver"); +MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); @@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida); static DEFINE_SPINLOCK(rsxx_ida_lock); /*----------------- Interrupt Control & Handling -------------------*/ + +static void rsxx_mask_interrupts(struct rsxx_cardinfo *card) +{ + card->isr_mask = 0; + card->ier_mask = 0; +} + static void __enable_intr(unsigned int *mask, unsigned int intr) { *mask |= intr; @@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr) */ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) { - if (unlikely(card->halt)) + if (unlikely(card->halt) || + unlikely(card->eeh_state)) return; __enable_intr(&card->ier_mask, intr); @@ -80,6 +89,9 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) { + if (unlikely(card->eeh_state)) + return; + __disable_intr(&card->ier_mask, intr); iowrite32(card->ier_mask, card->regmap + IER); } @@ -87,7 +99,8 @@ void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, unsigned int intr) { - if (unlikely(card->halt)) + if (unlikely(card->halt) || + unlikely(card->eeh_state)) return; __enable_intr(&card->isr_mask, intr); @@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, unsigned int intr) { + if (unlikely(card->eeh_state)) + return; + __disable_intr(&card->isr_mask, intr); __disable_intr(&card->ier_mask, intr); iowrite32(card->ier_mask, card->regmap + IER); @@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata) do { reread_isr = 0; + if (unlikely(card->eeh_state)) + break; + isr = ioread32(card->regmap + ISR); if (isr == 0xffffffff) { /* @@ -161,9 +180,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata) } /*----------------- Card Event Handler -------------------*/ -static char *rsxx_card_state_to_str(unsigned int state) +static const char * const rsxx_card_state_to_str(unsigned int state) { - static char *state_strings[] = { + static const char * const state_strings[] = { "Unknown", "Shutdown", "Starting", "Formatting", "Uninitialized", "Good", "Shutting Down", "Fault", "Read Only Fault", "dStroying" @@ -304,6 +323,192 @@ static int card_shutdown(struct rsxx_cardinfo *card) return 0; } +static int rsxx_eeh_frozen(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + int i; + int st; + + dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n"); + + card->eeh_state = 1; + rsxx_mask_interrupts(card); + + /* + * We need to guarantee that the write for eeh_state and masking + * interrupts does not become reordered. This will prevent a possible + * race condition with the EEH code. + */ + wmb(); + + pci_disable_device(dev); + + st = rsxx_eeh_save_issued_dmas(card); + if (st) + return st; + + rsxx_eeh_save_issued_creg(card); + + for (i = 0; i < card->n_targets; i++) { + if (card->ctrl[i].status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + card->ctrl[i].status.buf, + card->ctrl[i].status.dma_addr); + if (card->ctrl[i].cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + card->ctrl[i].cmd.buf, + card->ctrl[i].cmd.dma_addr); + } + + return 0; +} + +static void rsxx_eeh_failure(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + int i; + + dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n"); + + card->eeh_state = 1; + + for (i = 0; i < card->n_targets; i++) + del_timer_sync(&card->ctrl[i].activity_timer); + + rsxx_eeh_cancel_dmas(card); +} + +static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) +{ + unsigned int status; + int iter = 0; + + /* We need to wait for the hardware to reset */ + while (iter++ < 10) { + status = ioread32(card->regmap + PCI_RECONFIG); + + if (status & RSXX_FLUSH_BUSY) { + ssleep(1); + continue; + } + + if (status & RSXX_FLUSH_TIMEOUT) + dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n"); + return 0; + } + + /* Hardware failed resetting itself. */ + return -1; +} + +static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, + enum pci_channel_state error) +{ + int st; + + if (dev->revision < RSXX_EEH_SUPPORT) + return PCI_ERS_RESULT_NONE; + + if (error == pci_channel_io_perm_failure) { + rsxx_eeh_failure(dev); + return PCI_ERS_RESULT_DISCONNECT; + } + + st = rsxx_eeh_frozen(dev); + if (st) { + dev_err(&dev->dev, "Slot reset setup failed\n"); + rsxx_eeh_failure(dev); + return PCI_ERS_RESULT_DISCONNECT; + } + + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + int st; + + dev_warn(&dev->dev, + "IBM FlashSystem PCI: recovering from slot reset.\n"); + + st = pci_enable_device(dev); + if (st) + goto failed_hw_setup; + + pci_set_master(dev); + + st = rsxx_eeh_fifo_flush_poll(card); + if (st) + goto failed_hw_setup; + + rsxx_dma_queue_reset(card); + + for (i = 0; i < card->n_targets; i++) { + st = rsxx_hw_buffers_init(dev, &card->ctrl[i]); + if (st) + goto failed_hw_buffers_init; + } + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Clears the ISR register from spurious interrupts */ + st = ioread32(card->regmap + ISR); + + card->eeh_state = 0; + + st = rsxx_eeh_remap_dmas(card); + if (st) + goto failed_remap_dmas; + + spin_lock_irqsave(&card->irq_lock, flags); + if (card->n_targets & RSXX_MAX_TARGETS) + rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G); + else + rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C); + spin_unlock_irqrestore(&card->irq_lock, flags); + + rsxx_kick_creg_queue(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock(&card->ctrl[i].queue_lock); + if (list_empty(&card->ctrl[i].queue)) { + spin_unlock(&card->ctrl[i].queue_lock); + continue; + } + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + + dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n"); + + return PCI_ERS_RESULT_RECOVERED; + +failed_hw_buffers_init: +failed_remap_dmas: + for (i = 0; i < card->n_targets; i++) { + if (card->ctrl[i].status.buf) + pci_free_consistent(card->dev, + STATUS_BUFFER_SIZE8, + card->ctrl[i].status.buf, + card->ctrl[i].status.dma_addr); + if (card->ctrl[i].cmd.buf) + pci_free_consistent(card->dev, + COMMAND_BUFFER_SIZE8, + card->ctrl[i].cmd.buf, + card->ctrl[i].cmd.dma_addr); + } +failed_hw_setup: + rsxx_eeh_failure(dev); + return PCI_ERS_RESULT_DISCONNECT; + +} + /*----------------- Driver Initialization & Setup -------------------*/ /* Returns: 0 if the driver is compatible with the device -1 if the driver is NOT compatible with the device */ @@ -383,6 +588,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, spin_lock_init(&card->irq_lock); card->halt = 0; + card->eeh_state = 0; spin_lock_irq(&card->irq_lock); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); @@ -538,9 +744,6 @@ static void rsxx_pci_remove(struct pci_dev *dev) rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); spin_unlock_irqrestore(&card->irq_lock, flags); - /* Prevent work_structs from re-queuing themselves. */ - card->halt = 1; - cancel_work_sync(&card->event_work); rsxx_destroy_dev(card); @@ -549,6 +752,10 @@ static void rsxx_pci_remove(struct pci_dev *dev) spin_lock_irqsave(&card->irq_lock, flags); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + free_irq(dev->irq, card); if (!force_legacy) @@ -592,11 +799,14 @@ static void rsxx_pci_shutdown(struct pci_dev *dev) card_shutdown(card); } +static const struct pci_error_handlers rsxx_err_handler = { + .error_detected = rsxx_error_detected, + .slot_reset = rsxx_slot_reset, +}; + static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, - {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)}, {0,}, }; @@ -609,6 +819,7 @@ static struct pci_driver rsxx_pci_driver = { .remove = rsxx_pci_remove, .suspend = rsxx_pci_suspend, .shutdown = rsxx_pci_shutdown, + .err_handler = &rsxx_err_handler, }; static int __init rsxx_core_init(void) diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 80bbe639fccd..4b5c020a0a65 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -58,7 +58,7 @@ static struct kmem_cache *creg_cmd_pool; #error Unknown endianess!!! Aborting... #endif -static void copy_to_creg_data(struct rsxx_cardinfo *card, +static int copy_to_creg_data(struct rsxx_cardinfo *card, int cnt8, void *buf, unsigned int stream) @@ -66,6 +66,9 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card, int i = 0; u32 *data = buf; + if (unlikely(card->eeh_state)) + return -EIO; + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { /* * Firmware implementation makes it necessary to byte swap on @@ -76,10 +79,12 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card, else iowrite32(data[i], card->regmap + CREG_DATA(i)); } + + return 0; } -static void copy_from_creg_data(struct rsxx_cardinfo *card, +static int copy_from_creg_data(struct rsxx_cardinfo *card, int cnt8, void *buf, unsigned int stream) @@ -87,6 +92,9 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card, int i = 0; u32 *data = buf; + if (unlikely(card->eeh_state)) + return -EIO; + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { /* * Firmware implementation makes it necessary to byte swap on @@ -97,41 +105,31 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card, else data[i] = ioread32(card->regmap + CREG_DATA(i)); } -} - -static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) -{ - struct creg_cmd *cmd; - /* - * Spin lock is needed because this can be called in atomic/interrupt - * context. - */ - spin_lock_bh(&card->creg_ctrl.lock); - cmd = card->creg_ctrl.active_cmd; - card->creg_ctrl.active_cmd = NULL; - spin_unlock_bh(&card->creg_ctrl.lock); - - return cmd; + return 0; } static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) { + int st; + + if (unlikely(card->eeh_state)) + return; + iowrite32(cmd->addr, card->regmap + CREG_ADD); iowrite32(cmd->cnt8, card->regmap + CREG_CNT); if (cmd->op == CREG_OP_WRITE) { - if (cmd->buf) - copy_to_creg_data(card, cmd->cnt8, - cmd->buf, cmd->stream); + if (cmd->buf) { + st = copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + if (st) + return; + } } - /* - * Data copy must complete before initiating the command. This is - * needed for weakly ordered processors (i.e. PowerPC), so that all - * neccessary registers are written before we kick the hardware. - */ - wmb(); + if (unlikely(card->eeh_state)) + return; /* Setting the valid bit will kick off the command. */ iowrite32(cmd->op, card->regmap + CREG_CMD); @@ -196,11 +194,11 @@ static int creg_queue_cmd(struct rsxx_cardinfo *card, cmd->cb_private = cb_private; cmd->status = 0; - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); list_add_tail(&cmd->list, &card->creg_ctrl.queue); card->creg_ctrl.q_depth++; creg_kick_queue(card); - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); return 0; } @@ -210,7 +208,11 @@ static void creg_cmd_timed_out(unsigned long data) struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; struct creg_cmd *cmd; - cmd = pop_active_cmd(card); + spin_lock(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock(&card->creg_ctrl.lock); + if (cmd == NULL) { card->creg_ctrl.creg_stats.creg_timeout++; dev_warn(CARD_TO_DEV(card), @@ -247,7 +249,11 @@ static void creg_cmd_done(struct work_struct *work) if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) card->creg_ctrl.creg_stats.failed_cancel_timer++; - cmd = pop_active_cmd(card); + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + if (cmd == NULL) { dev_err(CARD_TO_DEV(card), "Spurious creg interrupt!\n"); @@ -287,7 +293,7 @@ static void creg_cmd_done(struct work_struct *work) goto creg_done; } - copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); } creg_done: @@ -296,10 +302,10 @@ creg_done: kmem_cache_free(creg_cmd_pool, cmd); - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); card->creg_ctrl.active = 0; creg_kick_queue(card); - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); } static void creg_reset(struct rsxx_cardinfo *card) @@ -324,7 +330,7 @@ static void creg_reset(struct rsxx_cardinfo *card) "Resetting creg interface for recovery\n"); /* Cancel outstanding commands */ - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { list_del(&cmd->list); card->creg_ctrl.q_depth--; @@ -345,7 +351,7 @@ static void creg_reset(struct rsxx_cardinfo *card) card->creg_ctrl.active = 0; } - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); card->creg_ctrl.reset = 0; spin_lock_irqsave(&card->irq_lock, flags); @@ -399,12 +405,12 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card, return st; /* - * This timeout is neccessary for unresponsive hardware. The additional + * This timeout is necessary for unresponsive hardware. The additional * 20 seconds to used to guarantee that each cregs requests has time to * complete. */ - timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * - card->creg_ctrl.q_depth) + 20000); + timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth + 20000); /* * The creg interface is guaranteed to complete. It has a timeout @@ -690,6 +696,32 @@ int rsxx_reg_access(struct rsxx_cardinfo *card, return 0; } +void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + + if (cmd) { + del_timer_sync(&card->creg_ctrl.cmd_timer); + + spin_lock_bh(&card->creg_ctrl.lock); + list_add(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + card->creg_ctrl.active = 0; + spin_unlock_bh(&card->creg_ctrl.lock); + } +} + +void rsxx_kick_creg_queue(struct rsxx_cardinfo *card) +{ + spin_lock_bh(&card->creg_ctrl.lock); + if (!list_empty(&card->creg_ctrl.queue)) + creg_kick_queue(card); + spin_unlock_bh(&card->creg_ctrl.lock); +} + /*------------ Initialization & Setup --------------*/ int rsxx_creg_setup(struct rsxx_cardinfo *card) { @@ -712,7 +744,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card) int cnt = 0; /* Cancel outstanding commands */ - spin_lock(&card->creg_ctrl.lock); + spin_lock_bh(&card->creg_ctrl.lock); list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { list_del(&cmd->list); if (cmd->cb) @@ -737,7 +769,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card) "Canceled active creg command\n"); kmem_cache_free(creg_cmd_pool, cmd); } - spin_unlock(&card->creg_ctrl.lock); + spin_unlock_bh(&card->creg_ctrl.lock); cancel_work_sync(&card->creg_ctrl.done_work); } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 63176e67662f..0607513cfb41 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -28,7 +28,7 @@ struct rsxx_dma { struct list_head list; u8 cmd; - unsigned int laddr; /* Logical address on the ramsan */ + unsigned int laddr; /* Logical address */ struct { u32 off; u32 cnt; @@ -81,9 +81,6 @@ enum rsxx_hw_status { HW_STATUS_FAULT = 0x08, }; -#define STATUS_BUFFER_SIZE8 4096 -#define COMMAND_BUFFER_SIZE8 4096 - static struct kmem_cache *rsxx_dma_pool; struct dma_tracker { @@ -122,7 +119,7 @@ static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) return tgt; } -static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) { /* Reset all DMA Command/Status Queues */ iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); @@ -210,7 +207,8 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) u32 q_depth = 0; u32 intr_coal; - if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE || + unlikely(card->eeh_state)) return; for (i = 0; i < card->n_targets; i++) @@ -223,31 +221,26 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) } /*----------------- RSXX DMA Handling -------------------*/ -static void rsxx_complete_dma(struct rsxx_cardinfo *card, +static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma, unsigned int status) { if (status & DMA_SW_ERR) - printk_ratelimited(KERN_ERR - "SW Error in DMA(cmd x%02x, laddr x%08x)\n", - dma->cmd, dma->laddr); + ctrl->stats.dma_sw_err++; if (status & DMA_HW_FAULT) - printk_ratelimited(KERN_ERR - "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", - dma->cmd, dma->laddr); + ctrl->stats.dma_hw_fault++; if (status & DMA_CANCELLED) - printk_ratelimited(KERN_ERR - "DMA Cancelled(cmd x%02x, laddr x%08x)\n", - dma->cmd, dma->laddr); + ctrl->stats.dma_cancelled++; if (dma->dma_addr) - pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + pci_unmap_page(ctrl->card->dev, dma->dma_addr, + get_dma_size(dma), dma->cmd == HW_CMD_BLK_WRITE ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (dma->cb) - dma->cb(card, dma->cb_data, status ? 1 : 0); + dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0); kmem_cache_free(rsxx_dma_pool, dma); } @@ -330,14 +323,15 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, if (requeue_cmd) rsxx_requeue_dma(ctrl, dma); else - rsxx_complete_dma(ctrl->card, dma, status); + rsxx_complete_dma(ctrl, dma, status); } static void dma_engine_stalled(unsigned long data) { struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; - if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + if (atomic_read(&ctrl->stats.hw_q_depth) == 0 || + unlikely(ctrl->card->eeh_state)) return; if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { @@ -369,7 +363,8 @@ static void rsxx_issue_dmas(struct work_struct *work) ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); hw_cmd_buf = ctrl->cmd.buf; - if (unlikely(ctrl->card->halt)) + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->eeh_state)) return; while (1) { @@ -397,7 +392,7 @@ static void rsxx_issue_dmas(struct work_struct *work) */ if (unlikely(ctrl->card->dma_fault)) { push_tracker(ctrl->trackers, tag); - rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); continue; } @@ -432,19 +427,15 @@ static void rsxx_issue_dmas(struct work_struct *work) /* Let HW know we've queued commands. */ if (cmds_pending) { - /* - * We must guarantee that the CPU writes to 'ctrl->cmd.buf' - * (which is in PCI-consistent system-memory) from the loop - * above make it into the coherency domain before the - * following PIO "trigger" updating the cmd.idx. A WMB is - * sufficient. We need not explicitly CPU cache-flush since - * the memory is a PCI-consistent (ie; coherent) mapping. - */ - wmb(); - atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); mod_timer(&ctrl->activity_timer, jiffies + DMA_ACTIVITY_TIMEOUT); + + if (unlikely(ctrl->card->eeh_state)) { + del_timer_sync(&ctrl->activity_timer); + return; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); } } @@ -463,7 +454,8 @@ static void rsxx_dma_done(struct work_struct *work) hw_st_buf = ctrl->status.buf; if (unlikely(ctrl->card->halt) || - unlikely(ctrl->card->dma_fault)) + unlikely(ctrl->card->dma_fault) || + unlikely(ctrl->card->eeh_state)) return; count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); @@ -508,7 +500,7 @@ static void rsxx_dma_done(struct work_struct *work) if (status) rsxx_handle_dma_error(ctrl, dma, status); else - rsxx_complete_dma(ctrl->card, dma, 0); + rsxx_complete_dma(ctrl, dma, 0); push_tracker(ctrl->trackers, tag); @@ -727,20 +719,54 @@ bvec_err: /*----------------- DMA Engine Initialization & Setup -------------------*/ +int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) +{ + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + return 0; +} + static int rsxx_dma_ctrl_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) { int i; + int st; memset(&ctrl->stats, 0, sizeof(ctrl->stats)); - ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, - &ctrl->status.dma_addr); - ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, - &ctrl->cmd.dma_addr); - if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) - return -ENOMEM; - ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); if (!ctrl->trackers) return -ENOMEM; @@ -770,35 +796,9 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); - memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); - iowrite32(lower_32_bits(ctrl->status.dma_addr), - ctrl->regmap + SB_ADD_LO); - iowrite32(upper_32_bits(ctrl->status.dma_addr), - ctrl->regmap + SB_ADD_HI); - - memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); - iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); - iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); - - ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); - if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { - dev_crit(&dev->dev, "Failed reading status cnt x%x\n", - ctrl->status.idx); - return -EINVAL; - } - iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); - iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); - - ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); - if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { - dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", - ctrl->status.idx); - return -EINVAL; - } - iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); - iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); - - wmb(); + st = rsxx_hw_buffers_init(dev, ctrl); + if (st) + return st; return 0; } @@ -834,7 +834,7 @@ static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, return 0; } -static int rsxx_dma_configure(struct rsxx_cardinfo *card) +int rsxx_dma_configure(struct rsxx_cardinfo *card) { u32 intr_coal; @@ -980,6 +980,103 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) } } +int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) +{ + int i; + int j; + int cnt; + struct rsxx_dma *dma; + struct list_head *issued_dmas; + + issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets, + GFP_KERNEL); + if (!issued_dmas) + return -ENOMEM; + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&issued_dmas[i]); + cnt = 0; + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(card->ctrl[i].trackers, j); + if (dma == NULL) + continue; + + if (dma->cmd == HW_CMD_BLK_WRITE) + card->ctrl[i].stats.writes_issued--; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + card->ctrl[i].stats.discards_issued--; + else + card->ctrl[i].stats.reads_issued--; + + list_add_tail(&dma->list, &issued_dmas[i]); + push_tracker(card->ctrl[i].trackers, j); + cnt++; + } + + spin_lock(&card->ctrl[i].queue_lock); + list_splice(&issued_dmas[i], &card->ctrl[i].queue); + + atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); + card->ctrl[i].stats.sw_q_depth += cnt; + card->ctrl[i].e_cnt = 0; + + list_for_each_entry(dma, &card->ctrl[i].queue, list) { + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + } + spin_unlock(&card->ctrl[i].queue_lock); + } + + kfree(issued_dmas); + + return 0; +} + +void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int i; + + for (i = 0; i < card->n_targets; i++) { + spin_lock(&card->ctrl[i].queue_lock); + list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) { + list_del(&dma->list); + + rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED); + } + spin_unlock(&card->ctrl[i].queue_lock); + } +} + +int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) +{ + struct rsxx_dma *dma; + int i; + + for (i = 0; i < card->n_targets; i++) { + spin_lock(&card->ctrl[i].queue_lock); + list_for_each_entry(dma, &card->ctrl[i].queue, list) { + dma->dma_addr = pci_map_page(card->dev, dma->page, + dma->pg_off, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + spin_unlock(&card->ctrl[i].queue_lock); + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + } + spin_unlock(&card->ctrl[i].queue_lock); + } + + return 0; +} int rsxx_dma_init(void) { diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h index 2e50b65902b7..24ba3642bd89 100644 --- a/drivers/block/rsxx/rsxx.h +++ b/drivers/block/rsxx/rsxx.h @@ -27,15 +27,17 @@ /*----------------- IOCTL Definitions -------------------*/ +#define RSXX_MAX_DATA 8 + struct rsxx_reg_access { __u32 addr; __u32 cnt; __u32 stat; __u32 stream; - __u32 data[8]; + __u32 data[RSXX_MAX_DATA]; }; -#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) +#define RSXX_MAX_REG_CNT (RSXX_MAX_DATA * (sizeof(__u32))) #define RSXX_IOC_MAGIC 'r' diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h index c025fe5fdb70..f384c943846d 100644 --- a/drivers/block/rsxx/rsxx_cfg.h +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -58,7 +58,7 @@ struct rsxx_card_cfg { }; /* Vendor ID Values */ -#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_IBM 0 #define RSXX_VENDOR_ID_DSI 1 #define RSXX_VENDOR_COUNT 2 diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index a1ac907d8f4c..382e8bf5c03b 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -45,16 +45,13 @@ struct proc_cmd; -#define PCI_VENDOR_ID_TMS_IBM 0x15B6 -#define PCI_DEVICE_ID_RS70_FLASH 0x0019 -#define PCI_DEVICE_ID_RS70D_FLASH 0x001A -#define PCI_DEVICE_ID_RS80_FLASH 0x001C -#define PCI_DEVICE_ID_RS81_FLASH 0x001E +#define PCI_DEVICE_ID_FS70_FLASH 0x04A9 +#define PCI_DEVICE_ID_FS80_FLASH 0x04AA #define RS70_PCI_REV_SUPPORTED 4 #define DRIVER_NAME "rsxx" -#define DRIVER_VERSION "3.7" +#define DRIVER_VERSION "4.0" /* Block size is 4096 */ #define RSXX_HW_BLK_SHIFT 12 @@ -67,6 +64,9 @@ struct proc_cmd; #define RSXX_MAX_OUTSTANDING_CMDS 255 #define RSXX_CS_IDX_MASK 0xff +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + #define RSXX_MAX_TARGETS 8 struct dma_tracker_list; @@ -91,6 +91,9 @@ struct rsxx_dma_stats { u32 discards_failed; u32 done_rescheduled; u32 issue_rescheduled; + u32 dma_sw_err; + u32 dma_hw_fault; + u32 dma_cancelled; u32 sw_q_depth; /* Number of DMAs on the SW queue. */ atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ }; @@ -116,6 +119,7 @@ struct rsxx_dma_ctrl { struct rsxx_cardinfo { struct pci_dev *dev; unsigned int halt; + unsigned int eeh_state; void __iomem *regmap; spinlock_t irq_lock; @@ -224,6 +228,7 @@ enum rsxx_pci_regmap { PERF_RD512_HI = 0xac, PERF_WR512_LO = 0xb0, PERF_WR512_HI = 0xb4, + PCI_RECONFIG = 0xb8, }; enum rsxx_intr { @@ -237,6 +242,8 @@ enum rsxx_intr { CR_INTR_DMA5 = 0x00000080, CR_INTR_DMA6 = 0x00000100, CR_INTR_DMA7 = 0x00000200, + CR_INTR_ALL_C = 0x0000003f, + CR_INTR_ALL_G = 0x000003ff, CR_INTR_DMA_ALL = 0x000003f5, CR_INTR_ALL = 0xffffffff, }; @@ -253,8 +260,14 @@ enum rsxx_pci_reset { DMA_QUEUE_RESET = 0x00000001, }; +enum rsxx_hw_fifo_flush { + RSXX_FLUSH_BUSY = 0x00000002, + RSXX_FLUSH_TIMEOUT = 0x00000004, +}; + enum rsxx_pci_revision { RSXX_DISCARD_SUPPORT = 2, + RSXX_EEH_SUPPORT = 3, }; enum rsxx_creg_cmd { @@ -360,11 +373,17 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card); void rsxx_dma_destroy(struct rsxx_cardinfo *card); int rsxx_dma_init(void); void rsxx_dma_cleanup(void); +void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); +int rsxx_dma_configure(struct rsxx_cardinfo *card); int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, struct bio *bio, atomic_t *n_dmas, rsxx_dma_cb cb, void *cb_data); +int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); +int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); +void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card); +int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card); /***** cregs.c *****/ int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, @@ -389,10 +408,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card); void rsxx_creg_destroy(struct rsxx_cardinfo *card); int rsxx_creg_init(void); void rsxx_creg_cleanup(void); - int rsxx_reg_access(struct rsxx_cardinfo *card, struct rsxx_reg_access __user *ucmd, int read); +void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card); +void rsxx_kick_creg_queue(struct rsxx_cardinfo *card); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index de1f319f7bd7..dd5b2fed97e9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -164,7 +164,7 @@ static void make_response(struct xen_blkif *blkif, u64 id, #define foreach_grant_safe(pos, n, rbtree, node) \ for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \ - (n) = rb_next(&(pos)->node); \ + (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \ &(pos)->node != NULL; \ (pos) = container_of(n, typeof(*(pos)), node), \ (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) @@ -381,8 +381,8 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" - " | ds %4d\n", + pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" + " | ds %4llu\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req, blkif->st_ds_req); @@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg) } struct seg_buf { - unsigned long buf; + unsigned int offset; unsigned int nsec; }; /* @@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req, * If this is a new persistent grant * save the handler */ - persistent_gnts[i]->handle = map[j].handle; - persistent_gnts[i]->dev_bus_addr = - map[j++].dev_bus_addr; + persistent_gnts[i]->handle = map[j++].handle; } pending_handle(pending_req, i) = persistent_gnts[i]->handle; if (ret) continue; - - seg[i].buf = persistent_gnts[i]->dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); } else { - pending_handle(pending_req, i) = map[j].handle; + pending_handle(pending_req, i) = map[j++].handle; bitmap_set(pending_req->unmap_seg, i, 1); - if (ret) { - j++; + if (ret) continue; - } - - seg[i].buf = map[j++].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); } + seg[i].offset = (req->u.rw.seg[i].first_sect << 9); } return ret; } @@ -679,6 +670,16 @@ static int dispatch_discard_io(struct xen_blkif *blkif, return err; } +static int dispatch_other_io(struct xen_blkif *blkif, + struct blkif_request *req, + struct pending_req *pending_req) +{ + free_req(pending_req); + make_response(blkif, req->u.other.id, req->operation, + BLKIF_RSP_EOPNOTSUPP); + return -EIO; +} + static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); @@ -800,17 +801,30 @@ __do_block_io_op(struct xen_blkif *blkif) /* Apply all sanity checks to /private copy/ of request. */ barrier(); - if (unlikely(req.operation == BLKIF_OP_DISCARD)) { + + switch (req.operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + if (dispatch_rw_block_io(blkif, &req, pending_req)) + goto done; + break; + case BLKIF_OP_DISCARD: free_req(pending_req); if (dispatch_discard_io(blkif, &req)) - break; - } else if (dispatch_rw_block_io(blkif, &req, pending_req)) + goto done; break; + default: + if (dispatch_other_io(blkif, &req, pending_req)) + goto done; + break; + } /* Yield point for this unbounded loop. */ cond_resched(); } - +done: return more_to_do; } @@ -904,7 +918,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); + preq.sector_number + preq.nr_sects, + blkif->vbd.pdevice); goto fail_response; } @@ -947,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, (bio_add_page(bio, pages[i], seg[i].nsec << 9, - seg[i].buf & ~PAGE_MASK) == 0)) { + seg[i].offset) == 0)) { bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) @@ -977,13 +992,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, bio->bi_end_io = end_block_io_op; } - /* - * We set it one so that the last submit_bio does not have to call - * atomic_inc. - */ atomic_set(&pending_req->pendcnt, nbio); - - /* Get a reference count for the disk queue and start sending I/O */ blk_start_plug(&plug); for (i = 0; i < nbio; i++) @@ -1011,6 +1020,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, fail_put_bio: for (i = 0; i < nbio; i++) bio_put(biolist[i]); + atomic_set(&pending_req->pendcnt, 1); __end_block_io_op(pending_req, -EINVAL); msleep(1); /* back off a bit */ return -EIO; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6072390c7f57..60103e2517ba 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -77,11 +77,18 @@ struct blkif_x86_32_request_discard { uint64_t nr_sectors; } __attribute__((__packed__)); +struct blkif_x86_32_request_other { + uint8_t _pad1; + blkif_vdev_t _pad2; + uint64_t id; /* private guest value, echoed in resp */ +} __attribute__((__packed__)); + struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; + struct blkif_x86_32_request_other other; } u; } __attribute__((__packed__)); @@ -113,11 +120,19 @@ struct blkif_x86_64_request_discard { uint64_t nr_sectors; } __attribute__((__packed__)); +struct blkif_x86_64_request_other { + uint8_t _pad1; + blkif_vdev_t _pad2; + uint32_t _pad3; /* offsetof(blkif_..,u.discard.id)==8 */ + uint64_t id; /* private guest value, echoed in resp */ +} __attribute__((__packed__)); + struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; + struct blkif_x86_64_request_other other; } u; } __attribute__((__packed__)); @@ -172,7 +187,6 @@ struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; - uint64_t dev_bus_addr; struct rb_node node; }; @@ -208,13 +222,13 @@ struct xen_blkif { /* statistics */ unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_f_req; - int st_ds_req; - int st_rd_sect; - int st_wr_sect; + unsigned long long st_rd_req; + unsigned long long st_wr_req; + unsigned long long st_oo_req; + unsigned long long st_f_req; + unsigned long long st_ds_req; + unsigned long long st_rd_sect; + unsigned long long st_wr_sect; wait_queue_head_t waiting_to_free; }; @@ -278,6 +292,11 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: + /* + * Don't know how to translate this op. Only get the + * ID so failure can be reported to the frontend. + */ + dst->u.other.id = src->u.other.id; break; } } @@ -309,6 +328,11 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: + /* + * Don't know how to translate this op. Only get the + * ID so failure can be reported to the frontend. + */ + dst->u.other.id = src->u.other.id; break; } } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 5e237f630c47..8bfd1bcf95ec 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -230,13 +230,13 @@ int __init xen_blkif_interface_init(void) } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) -VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); -VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); -VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); -VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); -VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); -VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); -VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); +VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req); +VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req); +VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req); +VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect); static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_oo_req.attr, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index c3dae2e0f290..a894f88762d8 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -44,7 +44,7 @@ #include <linux/mutex.h> #include <linux/scatterlist.h> #include <linux/bitmap.h> -#include <linux/llist.h> +#include <linux/list.h> #include <xen/xen.h> #include <xen/xenbus.h> @@ -68,13 +68,12 @@ enum blkif_state { struct grant { grant_ref_t gref; unsigned long pfn; - struct llist_node node; + struct list_head node; }; struct blk_shadow { struct blkif_request req; struct request *request; - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; @@ -105,7 +104,7 @@ struct blkfront_info struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; - struct llist_head persistent_gnts; + struct list_head persistent_gnts; unsigned int persistent_gnts_c; unsigned long shadow_free; unsigned int feature_flush; @@ -165,6 +164,69 @@ static int add_id_to_freelist(struct blkfront_info *info, return 0; } +static int fill_grant_buffer(struct blkfront_info *info, int num) +{ + struct page *granted_page; + struct grant *gnt_list_entry, *n; + int i = 0; + + while(i < num) { + gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO); + if (!gnt_list_entry) + goto out_of_memory; + + granted_page = alloc_page(GFP_NOIO); + if (!granted_page) { + kfree(gnt_list_entry); + goto out_of_memory; + } + + gnt_list_entry->pfn = page_to_pfn(granted_page); + gnt_list_entry->gref = GRANT_INVALID_REF; + list_add(&gnt_list_entry->node, &info->persistent_gnts); + i++; + } + + return 0; + +out_of_memory: + list_for_each_entry_safe(gnt_list_entry, n, + &info->persistent_gnts, node) { + list_del(&gnt_list_entry->node); + __free_page(pfn_to_page(gnt_list_entry->pfn)); + kfree(gnt_list_entry); + i--; + } + BUG_ON(i != 0); + return -ENOMEM; +} + +static struct grant *get_grant(grant_ref_t *gref_head, + struct blkfront_info *info) +{ + struct grant *gnt_list_entry; + unsigned long buffer_mfn; + + BUG_ON(list_empty(&info->persistent_gnts)); + gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant, + node); + list_del(&gnt_list_entry->node); + + if (gnt_list_entry->gref != GRANT_INVALID_REF) { + info->persistent_gnts_c--; + return gnt_list_entry; + } + + /* Assign a gref to this page */ + gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); + BUG_ON(gnt_list_entry->gref == -ENOSPC); + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); + gnttab_grant_foreign_access_ref(gnt_list_entry->gref, + info->xbdev->otherend_id, + buffer_mfn, 0); + return gnt_list_entry; +} + static const char *op_name(int op) { static const char *const names[] = { @@ -293,7 +355,6 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, static int blkif_queue_request(struct request *req) { struct blkfront_info *info = req->rq_disk->private_data; - unsigned long buffer_mfn; struct blkif_request *ring_req; unsigned long id; unsigned int fsect, lsect; @@ -306,7 +367,6 @@ static int blkif_queue_request(struct request *req) */ bool new_persistent_gnts; grant_ref_t gref_head; - struct page *granted_page; struct grant *gnt_list_entry = NULL; struct scatterlist *sg; @@ -370,41 +430,8 @@ static int blkif_queue_request(struct request *req) fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; - if (info->persistent_gnts_c) { - BUG_ON(llist_empty(&info->persistent_gnts)); - gnt_list_entry = llist_entry( - llist_del_first(&info->persistent_gnts), - struct grant, node); - - ref = gnt_list_entry->gref; - buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); - info->persistent_gnts_c--; - } else { - ref = gnttab_claim_grant_reference(&gref_head); - BUG_ON(ref == -ENOSPC); - - gnt_list_entry = - kmalloc(sizeof(struct grant), - GFP_ATOMIC); - if (!gnt_list_entry) - return -ENOMEM; - - granted_page = alloc_page(GFP_ATOMIC); - if (!granted_page) { - kfree(gnt_list_entry); - return -ENOMEM; - } - - gnt_list_entry->pfn = - page_to_pfn(granted_page); - gnt_list_entry->gref = ref; - - buffer_mfn = pfn_to_mfn(page_to_pfn( - granted_page)); - gnttab_grant_foreign_access_ref(ref, - info->xbdev->otherend_id, - buffer_mfn, 0); - } + gnt_list_entry = get_grant(&gref_head, info); + ref = gnt_list_entry->gref; info->shadow[id].grants_used[i] = gnt_list_entry; @@ -435,7 +462,6 @@ static int blkif_queue_request(struct request *req) kunmap_atomic(shared_data); } - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); ring_req->u.rw.seg[i] = (struct blkif_request_segment) { .gref = ref, @@ -790,9 +816,8 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { - struct llist_node *all_gnts; - struct grant *persistent_gnt, *tmp; - struct llist_node *n; + struct grant *persistent_gnt; + struct grant *n; /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); @@ -803,22 +828,20 @@ static void blkif_free(struct blkfront_info *info, int suspend) blk_stop_queue(info->rq); /* Remove all persistent grants */ - if (info->persistent_gnts_c) { - all_gnts = llist_del_all(&info->persistent_gnts); - persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); - while (persistent_gnt) { - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + if (!list_empty(&info->persistent_gnts)) { + list_for_each_entry_safe(persistent_gnt, n, + &info->persistent_gnts, node) { + list_del(&persistent_gnt->node); + if (persistent_gnt->gref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(persistent_gnt->gref, + 0, 0UL); + info->persistent_gnts_c--; + } __free_page(pfn_to_page(persistent_gnt->pfn)); - tmp = persistent_gnt; - n = persistent_gnt->node.next; - if (n) - persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); - else - persistent_gnt = NULL; - kfree(tmp); + kfree(persistent_gnt); } - info->persistent_gnts_c = 0; } + BUG_ON(info->persistent_gnts_c != 0); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); @@ -875,7 +898,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } /* Add the persistent grant into the list of free grants */ for (i = 0; i < s->req.u.rw.nr_segments; i++) { - llist_add(&s->grants_used[i]->node, &info->persistent_gnts); + list_add(&s->grants_used[i]->node, &info->persistent_gnts); info->persistent_gnts_c++; } } @@ -1013,6 +1036,12 @@ static int setup_blkring(struct xenbus_device *dev, sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + /* Allocate memory for grants */ + err = fill_grant_buffer(info, BLK_RING_SIZE * + BLKIF_MAX_SEGMENTS_PER_REQUEST); + if (err) + goto fail; + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); @@ -1171,7 +1200,7 @@ static int blkfront_probe(struct xenbus_device *dev, spin_lock_init(&info->io_lock); info->xbdev = dev; info->vdevice = vdevice; - init_llist_head(&info->persistent_gnts); + INIT_LIST_HEAD(&info->persistent_gnts); info->persistent_gnts_c = 0; info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); @@ -1203,11 +1232,10 @@ static int blkif_recover(struct blkfront_info *info) int j; /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmalloc(sizeof(info->shadow), + copy = kmemdup(info->shadow, sizeof(info->shadow), GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); if (!copy) return -ENOMEM; - memcpy(copy, info->shadow, sizeof(info->shadow)); /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); @@ -1236,7 +1264,7 @@ static int blkif_recover(struct blkfront_info *info) gnttab_grant_foreign_access_ref( req->u.rw.seg[j].gref, info->xbdev->otherend_id, - pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), + pfn_to_mfn(copy[i].grants_used[j]->pfn), 0); } info->shadow[req->u.rw.id].req = *req; diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index e3f9a99b8522..d784650d14f0 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -373,26 +373,14 @@ static int hpet_mmap(struct file *file, struct vm_area_struct *vma) struct hpet_dev *devp; unsigned long addr; - if (((vma->vm_end - vma->vm_start) != PAGE_SIZE) || vma->vm_pgoff) - return -EINVAL; - devp = file->private_data; addr = devp->hd_hpets->hp_hpet_phys; if (addr & (PAGE_SIZE - 1)) return -ENOSYS; - vma->vm_flags |= VM_IO; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - printk(KERN_ERR "%s: io_remap_pfn_range failed\n", - __func__); - return -EAGAIN; - } - - return 0; + return vm_iomap_memory(vma, addr, PAGE_SIZE); #else return -ENOSYS; #endif diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 69ae5972713c..a0f7724852eb 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -380,6 +380,15 @@ void hwrng_unregister(struct hwrng *rng) } EXPORT_SYMBOL_GPL(hwrng_unregister); +static void __exit hwrng_exit(void) +{ + mutex_lock(&rng_mutex); + BUG_ON(current_rng); + kfree(rng_buffer); + mutex_unlock(&rng_mutex); +} + +module_exit(hwrng_exit); MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 10fd71ccf587..6bf4d47324eb 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -92,14 +92,22 @@ static int probe_common(struct virtio_device *vdev) { int err; + if (vq) { + /* We only support one device for now */ + return -EBUSY; + } /* We expect a single virtqueue. */ vq = virtio_find_single_vq(vdev, random_recv_done, "input"); - if (IS_ERR(vq)) - return PTR_ERR(vq); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + vq = NULL; + return err; + } err = hwrng_register(&virtio_hwrng); if (err) { vdev->config->del_vqs(vdev); + vq = NULL; return err; } @@ -112,6 +120,7 @@ static void remove_common(struct virtio_device *vdev) busy = false; hwrng_unregister(&virtio_hwrng); vdev->config->del_vqs(vdev); + vq = NULL; } static int virtrng_probe(struct virtio_device *vdev) diff --git a/drivers/char/random.c b/drivers/char/random.c index 594bda9dcfc8..32a6c5764950 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -852,6 +852,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, int reserved) { unsigned long flags; + int wakeup_write = 0; /* Hold lock while accounting */ spin_lock_irqsave(&r->lock, flags); @@ -873,10 +874,8 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, else r->entropy_count = reserved; - if (r->entropy_count < random_write_wakeup_thresh) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } + if (r->entropy_count < random_write_wakeup_thresh) + wakeup_write = 1; } DEBUG_ENT("debiting %zu entropy credits from %s%s\n", @@ -884,6 +883,11 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, spin_unlock_irqrestore(&r->lock, flags); + if (wakeup_write) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } + return nbytes; } diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e905d5f53051..ce5f3fc25d6d 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -149,7 +149,8 @@ struct ports_device { spinlock_t ports_lock; /* To protect the vq operations for the control channel */ - spinlock_t cvq_lock; + spinlock_t c_ivq_lock; + spinlock_t c_ovq_lock; /* The current config space is stored here */ struct virtio_console_config config; @@ -569,11 +570,14 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, vq = portdev->c_ovq; sg_init_one(sg, &cpkt, sizeof(cpkt)); + + spin_lock(&portdev->c_ovq_lock); if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) == 0) { virtqueue_kick(vq); while (!virtqueue_get_buf(vq, &len)) cpu_relax(); } + spin_unlock(&portdev->c_ovq_lock); return 0; } @@ -1436,7 +1440,7 @@ static int add_port(struct ports_device *portdev, u32 id) * rproc_serial does not want the console port, only * the generic port implementation. */ - port->host_connected = port->guest_connected = true; + port->host_connected = true; else if (!use_multiport(port->portdev)) { /* * If we're not using multiport support, @@ -1709,23 +1713,23 @@ static void control_work_handler(struct work_struct *work) portdev = container_of(work, struct ports_device, control_work); vq = portdev->c_ivq; - spin_lock(&portdev->cvq_lock); + spin_lock(&portdev->c_ivq_lock); while ((buf = virtqueue_get_buf(vq, &len))) { - spin_unlock(&portdev->cvq_lock); + spin_unlock(&portdev->c_ivq_lock); buf->len = len; buf->offset = 0; handle_control_message(portdev, buf); - spin_lock(&portdev->cvq_lock); + spin_lock(&portdev->c_ivq_lock); if (add_inbuf(portdev->c_ivq, buf) < 0) { dev_warn(&portdev->vdev->dev, "Error adding buffer to queue\n"); free_buf(buf, false); } } - spin_unlock(&portdev->cvq_lock); + spin_unlock(&portdev->c_ivq_lock); } static void out_intr(struct virtqueue *vq) @@ -1752,13 +1756,23 @@ static void in_intr(struct virtqueue *vq) port->inbuf = get_inbuf(port); /* - * Don't queue up data when port is closed. This condition + * Normally the port should not accept data when the port is + * closed. For generic serial ports, the host won't (shouldn't) + * send data till the guest is connected. But this condition * can be reached when a console port is not yet connected (no - * tty is spawned) and the host sends out data to console - * ports. For generic serial ports, the host won't - * (shouldn't) send data till the guest is connected. + * tty is spawned) and the other side sends out data over the + * vring, or when a remote devices start sending data before + * the ports are opened. + * + * A generic serial port will discard data if not connected, + * while console ports and rproc-serial ports accepts data at + * any time. rproc-serial is initiated with guest_connected to + * false because port_fops_open expects this. Console ports are + * hooked up with an HVC console and is initialized with + * guest_connected to true. */ - if (!port->guest_connected) + + if (!port->guest_connected && !is_rproc_serial(port->portdev->vdev)) discard_port_data(port); spin_unlock_irqrestore(&port->inbuf_lock, flags); @@ -1986,10 +2000,12 @@ static int virtcons_probe(struct virtio_device *vdev) if (multiport) { unsigned int nr_added_bufs; - spin_lock_init(&portdev->cvq_lock); + spin_lock_init(&portdev->c_ivq_lock); + spin_lock_init(&portdev->c_ovq_lock); INIT_WORK(&portdev->control_work, &control_work_handler); - nr_added_bufs = fill_queue(portdev->c_ivq, &portdev->cvq_lock); + nr_added_bufs = fill_queue(portdev->c_ivq, + &portdev->c_ivq_lock); if (!nr_added_bufs) { dev_err(&vdev->dev, "Error allocating buffers for control queue\n"); @@ -2140,7 +2156,7 @@ static int virtcons_restore(struct virtio_device *vdev) return ret; if (use_multiport(portdev)) - fill_queue(portdev->c_ivq, &portdev->cvq_lock); + fill_queue(portdev->c_ivq, &portdev->c_ivq_lock); list_for_each_entry(port, &portdev->ports, list) { port->in_vq = portdev->in_vqs[port->id]; diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c index b5538bba7a10..09c63315e579 100644 --- a/drivers/clk/clk-vt8500.c +++ b/drivers/clk/clk-vt8500.c @@ -157,7 +157,7 @@ static int vt8500_dclk_set_rate(struct clk_hw *hw, unsigned long rate, divisor = parent_rate / rate; /* If prate / rate would be decimal, incr the divisor */ - if (rate * divisor < *prate) + if (rate * divisor < parent_rate) divisor++; if (divisor == cdev->div_mask + 1) diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 143ce1f899ad..f873dcefe0de 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -703,7 +703,7 @@ static void tegra20_pll_init(void) clks[pll_a_out0] = clk; /* PLLE */ - clk = tegra_clk_register_plle("pll_e", "pll_ref", clk_base, NULL, + clk = tegra_clk_register_plle("pll_e", "pll_ref", clk_base, pmc_base, 0, 100000000, &pll_e_params, 0, pll_e_freq_table, NULL); clk_register_clkdev(clk, "pll_e", NULL); @@ -1292,7 +1292,6 @@ static struct tegra_clk_duplicate tegra_clk_duplicates[] = { TEGRA_CLK_DUPLICATE(usbd, "tegra-ehci.0", NULL), TEGRA_CLK_DUPLICATE(usbd, "tegra-otg", NULL), TEGRA_CLK_DUPLICATE(cclk, NULL, "cpu"), - TEGRA_CLK_DUPLICATE(twd, "smp_twd", NULL), TEGRA_CLK_DUPLICATE(clk_max, NULL, NULL), /* Must be the last entry */ }; diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 32c61cb6d0bb..ba6f51bc9f3b 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -1931,7 +1931,6 @@ static struct tegra_clk_duplicate tegra_clk_duplicates[] = { TEGRA_CLK_DUPLICATE(cml1, "tegra_sata_cml", NULL), TEGRA_CLK_DUPLICATE(cml0, "tegra_pcie", "cml"), TEGRA_CLK_DUPLICATE(pciex, "tegra_pcie", "pciex"), - TEGRA_CLK_DUPLICATE(twd, "smp_twd", NULL), TEGRA_CLK_DUPLICATE(vcp, "nvavp", "vcp"), TEGRA_CLK_DUPLICATE(clk_max, NULL, NULL), /* MUST be the last entry */ }; diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 1110478dd0fd..08ae128cce9b 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -232,6 +232,31 @@ void proc_comm_connector(struct task_struct *task) cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } +void proc_coredump_connector(struct task_struct *task) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + struct timespec ts; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg *)buffer; + ev = (struct proc_event *)msg->data; + get_seq(&msg->seq, &ev->cpu); + ktime_get_ts(&ts); /* get high res monotonic timestamp */ + put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->what = PROC_EVENT_COREDUMP; + ev->event_data.coredump.process_pid = task->pid; + ev->event_data.coredump.process_tgid = task->tgid; + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + void proc_exit_connector(struct task_struct *task) { struct cn_msg *msg; diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index f1b7e244bfc1..6ecfa758942c 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -23,7 +23,7 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/skbuff.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/moduleparam.h> #include <linux/connector.h> #include <linux/slab.h> @@ -95,13 +95,13 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask) if (!netlink_has_listeners(dev->nls, group)) return -ESRCH; - size = NLMSG_SPACE(sizeof(*msg) + msg->len); + size = sizeof(*msg) + msg->len; - skb = alloc_skb(size, gfp_mask); + skb = nlmsg_new(size, gfp_mask); if (!skb) return -ENOMEM; - nlh = nlmsg_put(skb, 0, msg->seq, NLMSG_DONE, size - sizeof(*nlh), 0); + nlh = nlmsg_put(skb, 0, msg->seq, NLMSG_DONE, size, 0); if (!nlh) { kfree_skb(skb); return -EMSGSIZE; @@ -124,7 +124,7 @@ static int cn_call_callback(struct sk_buff *skb) { struct cn_callback_entry *i, *cbq = NULL; struct cn_dev *dev = &cdev; - struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(skb)); + struct cn_msg *msg = nlmsg_data(nlmsg_hdr(skb)); struct netlink_skb_parms *nsp = &NETLINK_CB(skb); int err = -ENODEV; @@ -162,7 +162,7 @@ static void cn_rx_skb(struct sk_buff *__skb) skb = skb_get(__skb); - if (skb->len >= NLMSG_SPACE(0)) { + if (skb->len >= NLMSG_HDRLEN) { nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(struct cn_msg) || diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 937bc286591f..57a8774f0b4e 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -730,7 +730,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { cpumask_copy(policy->cpus, perf->shared_cpu_map); } - cpumask_copy(policy->related_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); @@ -742,7 +741,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); - cpumask_copy(policy->related_cpus, cpu_sibling_mask(cpu)); policy->shared_type = CPUFREQ_SHARED_TYPE_HW; pr_info_once(PFX "overriding BIOS provided _PSD data\n"); } diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 4e5b7fb8927c..37d23a0f8c56 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -178,10 +178,16 @@ static struct cpufreq_driver cpu0_cpufreq_driver = { static int cpu0_cpufreq_probe(struct platform_device *pdev) { - struct device_node *np; + struct device_node *np, *parent; int ret; - for_each_child_of_node(of_find_node_by_path("/cpus"), np) { + parent = of_find_node_by_path("/cpus"); + if (!parent) { + pr_err("failed to find OF /cpus\n"); + return -ENOENT; + } + + for_each_child_of_node(parent, np) { if (of_get_property(np, "operating-points", NULL)) break; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index d2ac91150600..cc4bd2f6838a 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -14,8 +14,8 @@ * published by the Free Software Foundation. */ -#ifndef _CPUFREQ_GOVERNER_H -#define _CPUFREQ_GOVERNER_H +#ifndef _CPUFREQ_GOVERNOR_H +#define _CPUFREQ_GOVERNOR_H #include <linux/cpufreq.h> #include <linux/kobject.h> @@ -64,7 +64,7 @@ static void *get_cpu_dbs_info_s(int cpu) \ * dbs: used as a shortform for demand based switching It helps to keep variable * names smaller, simpler * cdbs: common dbs - * on_*: On-demand governor + * od_*: On-demand governor * cs_*: Conservative governor */ @@ -175,4 +175,4 @@ bool need_load_eval(struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate); int cpufreq_governor_dbs(struct dbs_data *dbs_data, struct cpufreq_policy *policy, unsigned int event); -#endif /* _CPUFREQ_GOVERNER_H */ +#endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 2fd779eb1ed1..bfd6273fd873 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -180,15 +180,19 @@ static void cpufreq_stats_free_sysfs(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - if (!cpufreq_frequency_get_table(cpu)) + if (!policy) return; - if (policy && !policy_is_shared(policy)) { + if (!cpufreq_frequency_get_table(cpu)) + goto put_ref; + + if (!policy_is_shared(policy)) { pr_debug("%s: Free sysfs stat\n", __func__); sysfs_remove_group(&policy->kobj, &stats_attr_group); } - if (policy) - cpufreq_cpu_put(policy); + +put_ref: + cpufreq_cpu_put(policy); } static int cpufreq_stats_create_table(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index 66e3a71b81a3..b61b5a3fad64 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -28,13 +28,7 @@ static int hb_voltage_change(unsigned int freq) { - int i; - u32 msg[HB_CPUFREQ_IPC_LEN]; - - msg[0] = HB_CPUFREQ_CHANGE_NOTE; - msg[1] = freq / 1000000; - for (i = 2; i < HB_CPUFREQ_IPC_LEN; i++) - msg[i] = 0; + u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000}; return pl320_ipc_transmit(msg); } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 096fde0ebcb5..6133ef5cf671 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -358,14 +358,14 @@ static void intel_pstate_sysfs_expose_params(void) static int intel_pstate_min_pstate(void) { u64 value; - rdmsrl(0xCE, value); + rdmsrl(MSR_PLATFORM_INFO, value); return (value >> 40) & 0xFF; } static int intel_pstate_max_pstate(void) { u64 value; - rdmsrl(0xCE, value); + rdmsrl(MSR_PLATFORM_INFO, value); return (value >> 8) & 0xFF; } @@ -373,7 +373,7 @@ static int intel_pstate_turbo_pstate(void) { u64 value; int nont, ret; - rdmsrl(0x1AD, value); + rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); nont = intel_pstate_max_pstate(); ret = ((value) & 255); if (ret <= nont) @@ -454,7 +454,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, sample->idletime_us * 100, sample->duration_us); core_pct = div64_u64(sample->aperf * 100, sample->mperf); - sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000; + sample->freq = cpu->pstate.max_pstate * core_pct * 1000; sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct), 100); @@ -502,7 +502,6 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) sample_time = cpu->pstate_policy->sample_rate_ms; delay = msecs_to_jiffies(sample_time); - delay -= jiffies % delay; mod_timer_pinned(&cpu->timer, jiffies + delay); } @@ -662,6 +661,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; + if (!policy->cpuinfo.max_freq) + return -ENODEV; + intel_pstate_get_min_max(cpu, &min, &max); limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; @@ -747,37 +749,34 @@ static struct cpufreq_driver intel_pstate_driver = { .owner = THIS_MODULE, }; -static void intel_pstate_exit(void) +static int __initdata no_load; + +static int intel_pstate_msrs_not_valid(void) { - int cpu; + /* Check that all the msr's we are using are valid. */ + u64 aperf, mperf, tmp; - sysfs_remove_group(intel_pstate_kobject, - &intel_pstate_attr_group); - debugfs_remove_recursive(debugfs_parent); + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); - cpufreq_unregister_driver(&intel_pstate_driver); + if (!intel_pstate_min_pstate() || + !intel_pstate_max_pstate() || + !intel_pstate_turbo_pstate()) + return -ENODEV; - if (!all_cpu_data) - return; + rdmsrl(MSR_IA32_APERF, tmp); + if (!(tmp - aperf)) + return -ENODEV; - get_online_cpus(); - for_each_online_cpu(cpu) { - if (all_cpu_data[cpu]) { - del_timer_sync(&all_cpu_data[cpu]->timer); - kfree(all_cpu_data[cpu]); - } - } + rdmsrl(MSR_IA32_MPERF, tmp); + if (!(tmp - mperf)) + return -ENODEV; - put_online_cpus(); - vfree(all_cpu_data); + return 0; } -module_exit(intel_pstate_exit); - -static int __initdata no_load; - static int __init intel_pstate_init(void) { - int rc = 0; + int cpu, rc = 0; const struct x86_cpu_id *id; if (no_load) @@ -787,6 +786,9 @@ static int __init intel_pstate_init(void) if (!id) return -ENODEV; + if (intel_pstate_msrs_not_valid()) + return -ENODEV; + pr_info("Intel P-state driver initializing.\n"); all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus()); @@ -802,7 +804,16 @@ static int __init intel_pstate_init(void) intel_pstate_sysfs_expose_params(); return rc; out: - intel_pstate_exit(); + get_online_cpus(); + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) { + del_timer_sync(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + } + } + + put_online_cpus(); + vfree(all_cpu_data); return -ENODEV; } device_initcall(intel_pstate_init); diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index b2a0a0726a54..cf268b14ae9a 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1650,11 +1650,7 @@ struct caam_alg_template { }; static struct caam_alg_template driver_algs[] = { - /* - * single-pass ipsec_esp descriptor - * authencesn(*,*) is also registered, although not present - * explicitly here. - */ + /* single-pass ipsec_esp descriptor */ { .name = "authenc(hmac(md5),cbc(aes))", .driver_name = "authenc-hmac-md5-cbc-aes-caam", @@ -2217,9 +2213,7 @@ static int __init caam_algapi_init(void) for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { /* TODO: check if h/w supports alg */ struct caam_crypto_alg *t_alg; - bool done = false; -authencesn: t_alg = caam_alg_alloc(ctrldev, &driver_algs[i]); if (IS_ERR(t_alg)) { err = PTR_ERR(t_alg); @@ -2233,25 +2227,8 @@ authencesn: dev_warn(ctrldev, "%s alg registration failed\n", t_alg->crypto_alg.cra_driver_name); kfree(t_alg); - } else { + } else list_add_tail(&t_alg->entry, &priv->alg_list); - if (driver_algs[i].type == CRYPTO_ALG_TYPE_AEAD && - !memcmp(driver_algs[i].name, "authenc", 7) && - !done) { - char *name; - - name = driver_algs[i].name; - memmove(name + 10, name + 7, strlen(name) - 7); - memcpy(name + 7, "esn", 3); - - name = driver_algs[i].driver_name; - memmove(name + 10, name + 7, strlen(name) - 7); - memcpy(name + 7, "esn", 3); - - done = true; - goto authencesn; - } - } } if (!list_empty(&priv->alg_list)) dev_info(ctrldev, "%s algorithms registered in /proc/crypto\n", diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index cf15e7813801..762aeff626ac 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -23,7 +23,6 @@ #include <linux/types.h> #include <linux/debugfs.h> #include <linux/circ_buf.h> -#include <linux/string.h> #include <net/xfrm.h> #include <crypto/algapi.h> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 09b184adf31b..5b2b5e61e4f9 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -38,7 +38,6 @@ #include <linux/spinlock.h> #include <linux/rtnetlink.h> #include <linux/slab.h> -#include <linux/string.h> #include <crypto/algapi.h> #include <crypto/aes.h> @@ -1974,11 +1973,7 @@ struct talitos_alg_template { }; static struct talitos_alg_template driver_algs[] = { - /* - * AEAD algorithms. These use a single-pass ipsec_esp descriptor. - * authencesn(*,*) is also registered, although not present - * explicitly here. - */ + /* AEAD algorithms. These use a single-pass ipsec_esp descriptor */ { .type = CRYPTO_ALG_TYPE_AEAD, .alg.crypto = { .cra_name = "authenc(hmac(sha1),cbc(aes))", @@ -2820,9 +2815,7 @@ static int talitos_probe(struct platform_device *ofdev) if (hw_supports(dev, driver_algs[i].desc_hdr_template)) { struct talitos_crypto_alg *t_alg; char *name = NULL; - bool authenc = false; -authencesn: t_alg = talitos_alg_alloc(dev, &driver_algs[i]); if (IS_ERR(t_alg)) { err = PTR_ERR(t_alg); @@ -2837,8 +2830,6 @@ authencesn: err = crypto_register_alg( &t_alg->algt.alg.crypto); name = t_alg->algt.alg.crypto.cra_driver_name; - authenc = authenc ? !authenc : - !(bool)memcmp(name, "authenc", 7); break; case CRYPTO_ALG_TYPE_AHASH: err = crypto_register_ahash( @@ -2851,25 +2842,8 @@ authencesn: dev_err(dev, "%s alg registration failed\n", name); kfree(t_alg); - } else { + } else list_add_tail(&t_alg->entry, &priv->alg_list); - if (authenc) { - struct crypto_alg *alg = - &driver_algs[i].alg.crypto; - - name = alg->cra_name; - memmove(name + 10, name + 7, - strlen(name) - 7); - memcpy(name + 7, "esn", 3); - - name = alg->cra_driver_name; - memmove(name + 10, name + 7, - strlen(name) - 7); - memcpy(name + 7, "esn", 3); - - goto authencesn; - } - } } } if (!list_empty(&priv->alg_list)) diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 8bc5fef07e7a..22c9063e0120 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1750,7 +1750,7 @@ static struct platform_driver cryp_driver = { .shutdown = ux500_cryp_shutdown, .driver = { .owner = THIS_MODULE, - .name = "cryp1" + .name = "cryp1", .pm = &ux500_cryp_pm, } }; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 80b69971cf28..aeaea32bcfda 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -83,6 +83,7 @@ config INTEL_IOP_ADMA config DW_DMAC tristate "Synopsys DesignWare AHB DMA support" + depends on GENERIC_HARDIRQS select DMA_ENGINE default y if CPU_AT32AP7000 help diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6e13f262139a..88cfc61329d2 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -310,8 +310,6 @@ static void atc_complete_all(struct at_dma_chan *atchan) dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); - BUG_ON(atc_chan_is_enabled(atchan)); - /* * Submit queued descriptors ASAP, i.e. before we go through * the completed ones. @@ -368,6 +366,9 @@ static void atc_advance_work(struct at_dma_chan *atchan) { dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + if (atc_chan_is_enabled(atchan)) + return; + if (list_empty(&atchan->active_list) || list_is_singular(&atchan->active_list)) { atc_complete_all(atchan); @@ -1078,9 +1079,7 @@ static void atc_issue_pending(struct dma_chan *chan) return; spin_lock_irqsave(&atchan->lock, flags); - if (!atc_chan_is_enabled(atchan)) { - atc_advance_work(atchan); - } + atc_advance_work(atchan); spin_unlock_irqrestore(&atchan->lock, flags); } diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index c599558faeda..43a5329d4483 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1001,6 +1001,13 @@ static inline void convert_burst(u32 *maxburst) *maxburst = 0; } +static inline void convert_slave_id(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + dwc->dma_sconfig.slave_id -= dw->request_line_base; +} + static int set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig) { @@ -1015,6 +1022,7 @@ set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig) convert_burst(&dwc->dma_sconfig.src_maxburst); convert_burst(&dwc->dma_sconfig.dst_maxburst); + convert_slave_id(dwc); return 0; } @@ -1276,9 +1284,9 @@ static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec, if (dma_spec->args_count != 3) return NULL; - fargs.req = be32_to_cpup(dma_spec->args+0); - fargs.src = be32_to_cpup(dma_spec->args+1); - fargs.dst = be32_to_cpup(dma_spec->args+2); + fargs.req = dma_spec->args[0]; + fargs.src = dma_spec->args[1]; + fargs.dst = dma_spec->args[2]; if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS || fargs.src >= dw->nr_masters || @@ -1628,6 +1636,7 @@ dw_dma_parse_dt(struct platform_device *pdev) static int dw_probe(struct platform_device *pdev) { + const struct platform_device_id *match; struct dw_dma_platform_data *pdata; struct resource *io; struct dw_dma *dw; @@ -1711,6 +1720,11 @@ static int dw_probe(struct platform_device *pdev) memcpy(dw->data_width, pdata->data_width, 4); } + /* Get the base request line if set */ + match = platform_get_device_id(pdev); + if (match) + dw->request_line_base = (unsigned int)match->driver_data; + /* Calculate all channel mask before DMA setup */ dw->all_chan_mask = (1 << nr_channels) - 1; @@ -1906,7 +1920,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_id_table); #endif static const struct platform_device_id dw_dma_ids[] = { - { "INTL9C60", 0 }, + /* Name, Request Line Base */ + { "INTL9C60", (kernel_ulong_t)16 }, { } }; diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index cf0ce5c77d60..4d02c3669b75 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -247,6 +247,7 @@ struct dw_dma { /* hardware configuration */ unsigned char nr_masters; unsigned char data_width[4]; + unsigned int request_line_base; struct dw_dma_chan chan[0]; }; diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 9b041858d10d..9e84d5bc9307 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -470,8 +470,10 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) } if (!dca2_tag_map_valid(ioatdca->tag_map)) { - dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, " - "disabling DCA\n"); + WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, + "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", + dev_driver_string(&pdev->dev), + dev_name(&pdev->dev)); free_dca_provider(dca); return NULL; } @@ -689,7 +691,10 @@ struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) } if (dca3_tag_map_invalid(ioatdca->tag_map)) { - dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n"); + WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, + "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", + dev_driver_string(&pdev->dev), + dev_name(&pdev->dev)); free_dca_provider(dca); return NULL; } diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index c4b4fd2acc42..08b43bf37158 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -276,12 +276,20 @@ static void omap_dma_issue_pending(struct dma_chan *chan) spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) { - struct omap_dmadev *d = to_omap_dma_dev(chan->device); - spin_lock(&d->lock); - if (list_empty(&c->node)) - list_add_tail(&c->node, &d->pending); - spin_unlock(&d->lock); - tasklet_schedule(&d->task); + /* + * c->cyclic is used only by audio and in this case the DMA need + * to be started without delay. + */ + if (!c->cyclic) { + struct omap_dmadev *d = to_omap_dma_dev(chan->device); + spin_lock(&d->lock); + if (list_empty(&c->node)) + list_add_tail(&c->node, &d->pending); + spin_unlock(&d->lock); + tasklet_schedule(&d->task); + } else { + omap_dma_start_desc(c); + } } spin_unlock_irqrestore(&c->vc.lock, flags); } diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 718153122759..5dbc5946c4c3 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2882,7 +2882,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) { struct dma_pl330_platdata *pdat; struct dma_pl330_dmac *pdmac; - struct dma_pl330_chan *pch; + struct dma_pl330_chan *pch, *_p; struct pl330_info *pi; struct dma_device *pd; struct resource *res; @@ -2984,7 +2984,16 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) ret = dma_async_device_register(pd); if (ret) { dev_err(&adev->dev, "unable to register DMAC\n"); - goto probe_err2; + goto probe_err3; + } + + if (adev->dev.of_node) { + ret = of_dma_controller_register(adev->dev.of_node, + of_dma_pl330_xlate, pdmac); + if (ret) { + dev_err(&adev->dev, + "unable to register DMA to the generic DT DMA helpers\n"); + } } dev_info(&adev->dev, @@ -2995,16 +3004,21 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pi->pcfg.data_bus_width / 8, pi->pcfg.num_chan, pi->pcfg.num_peri, pi->pcfg.num_events); - ret = of_dma_controller_register(adev->dev.of_node, - of_dma_pl330_xlate, pdmac); - if (ret) { - dev_err(&adev->dev, - "unable to register DMA to the generic DT DMA helpers\n"); - goto probe_err2; - } - return 0; +probe_err3: + amba_set_drvdata(adev, NULL); + /* Idle the DMAC */ + list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels, + chan.device_node) { + + /* Remove the channel */ + list_del(&pch->chan.device_node); + + /* Flush the channel */ + pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); + pl330_free_chan_resources(&pch->chan); + } probe_err2: pl330_del(pi); probe_err1: @@ -3023,8 +3037,10 @@ static int pl330_remove(struct amba_device *adev) if (!pdmac) return 0; - of_dma_controller_free(adev->dev.of_node); + if (adev->dev.of_node) + of_dma_controller_free(adev->dev.of_node); + dma_async_device_unregister(&pdmac->ddma); amba_set_drvdata(adev, NULL); /* Idle the DMAC */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 910b0116c128..e1d13c463c90 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2048,12 +2048,18 @@ static int init_csrows(struct mem_ctl_info *mci) edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, i); - if (row_dct0) + if (row_dct0) { nr_pages = amd64_csrow_nr_pages(pvt, 0, i); + csrow->channels[0]->dimm->nr_pages = nr_pages; + } /* K8 has only one DCT */ - if (boot_cpu_data.x86 != 0xf && row_dct1) - nr_pages += amd64_csrow_nr_pages(pvt, 1, i); + if (boot_cpu_data.x86 != 0xf && row_dct1) { + int row_dct1_pages = amd64_csrow_nr_pages(pvt, 1, i); + + csrow->channels[1]->dimm->nr_pages = row_dct1_pages; + nr_pages += row_dct1_pages; + } mtype = amd64_determine_memory_type(pvt, i); @@ -2072,9 +2078,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = mtype; dimm->edac_mode = edac_mode; - dimm->nr_pages = nr_pages; } - csrow->nr_pages = nr_pages; } return empty; @@ -2419,7 +2423,6 @@ static int amd64_init_one_instance(struct pci_dev *F2) mci->pvt_info = pvt; mci->pdev = &pvt->F2->dev; - mci->csbased = 1; setup_mci_misc_attrs(mci, fam_type); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index cdb81aa73ab7..27e86d938262 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -86,7 +86,7 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) edac_dimm_info_location(dimm, location, sizeof(location)); edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", - dimm->mci->mem_is_per_rank ? "rank" : "dimm", + dimm->mci->csbased ? "rank" : "dimm", number, location, dimm->csrow, dimm->cschannel); edac_dbg(4, " dimm = %p\n", dimm); edac_dbg(4, " dimm->label = '%s'\n", dimm->label); @@ -341,7 +341,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, memcpy(mci->layers, layers, sizeof(*layer) * n_layers); mci->nr_csrows = tot_csrows; mci->num_cschannel = tot_channels; - mci->mem_is_per_rank = per_rank; + mci->csbased = per_rank; /* * Alocate and fill the csrow/channels structs @@ -1235,7 +1235,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * incrementing the compat API counters */ edac_dbg(4, "%s csrows map: (%d,%d)\n", - mci->mem_is_per_rank ? "rank" : "dimm", + mci->csbased ? "rank" : "dimm", dimm->csrow, dimm->cschannel); if (row == -1) row = dimm->csrow; diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 4f4b6137d74e..5899a76eec3b 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -143,7 +143,7 @@ static const char *edac_caps[] = { * and the per-dimm/per-rank one */ #define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) + static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) struct dev_ch_attribute { struct device_attribute attr; @@ -180,9 +180,6 @@ static ssize_t csrow_size_show(struct device *dev, int i; u32 nr_pages = 0; - if (csrow->mci->csbased) - return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages)); - for (i = 0; i < csrow->nr_channels; i++) nr_pages += csrow->channels[i]->dimm->nr_pages; return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); @@ -612,7 +609,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; - if (mci->mem_is_per_rank) + if (mci->csbased) dev_set_name(&dimm->dev, "rank%d", index); else dev_set_name(&dimm->dev, "dimm%d", index); @@ -778,14 +775,10 @@ static ssize_t mci_size_mb_show(struct device *dev, for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { struct csrow_info *csrow = mci->csrows[csrow_idx]; - if (csrow->mci->csbased) { - total_pages += csrow->nr_pages; - } else { - for (j = 0; j < csrow->nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + for (j = 0; j < csrow->nr_channels; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; - total_pages += dimm->nr_pages; - } + total_pages += dimm->nr_pages; } } diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index cdae207028a7..6c3fca97d346 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -19,10 +19,10 @@ /* There is only *one* pci_eisa device per machine, right ? */ static struct eisa_root_device pci_eisa_root; -static int __init pci_eisa_init(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int __init pci_eisa_init(struct pci_dev *pdev) { - int rc; + int rc, i; + struct resource *res, *bus_res = NULL; if ((rc = pci_enable_device (pdev))) { printk (KERN_ERR "pci_eisa : Could not enable device %s\n", @@ -30,9 +30,30 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return rc; } + /* + * The Intel 82375 PCI-EISA bridge is a subtractive-decode PCI + * device, so the resources available on EISA are the same as those + * available on the 82375 bus. This works the same as a PCI-PCI + * bridge in subtractive-decode mode (see pci_read_bridge_bases()). + * We assume other PCI-EISA bridges are similar. + * + * eisa_root_register() can only deal with a single io port resource, + * so we use the first valid io port resource. + */ + pci_bus_for_each_resource(pdev->bus, res, i) + if (res && (res->flags & IORESOURCE_IO)) { + bus_res = res; + break; + } + + if (!bus_res) { + dev_err(&pdev->dev, "No resources available\n"); + return -1; + } + pci_eisa_root.dev = &pdev->dev; - pci_eisa_root.res = pdev->bus->resource[0]; - pci_eisa_root.bus_base_addr = pdev->bus->resource[0]->start; + pci_eisa_root.res = bus_res; + pci_eisa_root.bus_base_addr = bus_res->start; pci_eisa_root.slots = EISA_MAX_SLOTS; pci_eisa_root.dma_mask = pdev->dma_mask; dev_set_drvdata(pci_eisa_root.dev, &pci_eisa_root); @@ -45,22 +66,26 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return 0; } -static struct pci_device_id pci_eisa_pci_tbl[] = { - { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, - { 0, } -}; +/* + * We have to call pci_eisa_init_early() before pnpacpi_init()/isapnp_init(). + * Otherwise pnp resource will get enabled early and could prevent eisa + * to be initialized. + * Also need to make sure pci_eisa_init_early() is called after + * x86/pci_subsys_init(). + * So need to use subsys_initcall_sync with it. + */ +static int __init pci_eisa_init_early(void) +{ + struct pci_dev *dev = NULL; + int ret; -static struct pci_driver __refdata pci_eisa_driver = { - .name = "pci_eisa", - .id_table = pci_eisa_pci_tbl, - .probe = pci_eisa_init, -}; + for_each_pci_dev(dev) + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_EISA) { + ret = pci_eisa_init(dev); + if (ret) + return ret; + } -static int __init pci_eisa_init_module (void) -{ - return pci_register_driver (&pci_eisa_driver); + return 0; } - -device_initcall(pci_eisa_init_module); -MODULE_DEVICE_TABLE(pci, pci_eisa_pci_tbl); +subsys_initcall_sync(pci_eisa_init_early); diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c index b70e3815c459..8f3c947b0029 100644 --- a/drivers/extcon/extcon-max77693.c +++ b/drivers/extcon/extcon-max77693.c @@ -32,6 +32,38 @@ #define DEV_NAME "max77693-muic" #define DELAY_MS_DEFAULT 20000 /* unit: millisecond */ +/* + * Default value of MAX77693 register to bring up MUIC device. + * If user don't set some initial value for MUIC device through platform data, + * extcon-max77693 driver use 'default_init_data' to bring up base operation + * of MAX77693 MUIC device. + */ +struct max77693_reg_data default_init_data[] = { + { + /* STATUS2 - [3]ChgDetRun */ + .addr = MAX77693_MUIC_REG_STATUS2, + .data = STATUS2_CHGDETRUN_MASK, + }, { + /* INTMASK1 - Unmask [3]ADC1KM,[0]ADCM */ + .addr = MAX77693_MUIC_REG_INTMASK1, + .data = INTMASK1_ADC1K_MASK + | INTMASK1_ADC_MASK, + }, { + /* INTMASK2 - Unmask [0]ChgTypM */ + .addr = MAX77693_MUIC_REG_INTMASK2, + .data = INTMASK2_CHGTYP_MASK, + }, { + /* INTMASK3 - Mask all of interrupts */ + .addr = MAX77693_MUIC_REG_INTMASK3, + .data = 0x0, + }, { + /* CDETCTRL2 */ + .addr = MAX77693_MUIC_REG_CDETCTRL2, + .data = CDETCTRL2_VIDRMEN_MASK + | CDETCTRL2_DXOVPEN_MASK, + }, +}; + enum max77693_muic_adc_debounce_time { ADC_DEBOUNCE_TIME_5MS = 0, ADC_DEBOUNCE_TIME_10MS, @@ -1045,8 +1077,9 @@ static int max77693_muic_probe(struct platform_device *pdev) { struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent); struct max77693_platform_data *pdata = dev_get_platdata(max77693->dev); - struct max77693_muic_platform_data *muic_pdata = pdata->muic_data; struct max77693_muic_info *info; + struct max77693_reg_data *init_data; + int num_init_data; int delay_jiffies; int ret; int i; @@ -1145,15 +1178,25 @@ static int max77693_muic_probe(struct platform_device *pdev) goto err_irq; } - /* Initialize MUIC register by using platform data */ - for (i = 0 ; i < muic_pdata->num_init_data ; i++) { - enum max77693_irq_source irq_src = MAX77693_IRQ_GROUP_NR; + + /* Initialize MUIC register by using platform data or default data */ + if (pdata->muic_data) { + init_data = pdata->muic_data->init_data; + num_init_data = pdata->muic_data->num_init_data; + } else { + init_data = default_init_data; + num_init_data = ARRAY_SIZE(default_init_data); + } + + for (i = 0 ; i < num_init_data ; i++) { + enum max77693_irq_source irq_src + = MAX77693_IRQ_GROUP_NR; max77693_write_reg(info->max77693->regmap_muic, - muic_pdata->init_data[i].addr, - muic_pdata->init_data[i].data); + init_data[i].addr, + init_data[i].data); - switch (muic_pdata->init_data[i].addr) { + switch (init_data[i].addr) { case MAX77693_MUIC_REG_INTMASK1: irq_src = MUIC_INT1; break; @@ -1167,22 +1210,40 @@ static int max77693_muic_probe(struct platform_device *pdev) if (irq_src < MAX77693_IRQ_GROUP_NR) info->max77693->irq_masks_cur[irq_src] - = muic_pdata->init_data[i].data; + = init_data[i].data; } - /* - * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB - * h/w path of COMP2/COMN1 on CONTROL1 register. - */ - if (muic_pdata->path_uart) - info->path_uart = muic_pdata->path_uart; - else - info->path_uart = CONTROL1_SW_UART; + if (pdata->muic_data) { + struct max77693_muic_platform_data *muic_pdata = pdata->muic_data; - if (muic_pdata->path_usb) - info->path_usb = muic_pdata->path_usb; - else + /* + * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB + * h/w path of COMP2/COMN1 on CONTROL1 register. + */ + if (muic_pdata->path_uart) + info->path_uart = muic_pdata->path_uart; + else + info->path_uart = CONTROL1_SW_UART; + + if (muic_pdata->path_usb) + info->path_usb = muic_pdata->path_usb; + else + info->path_usb = CONTROL1_SW_USB; + + /* + * Default delay time for detecting cable state + * after certain time. + */ + if (muic_pdata->detcable_delay_ms) + delay_jiffies = + msecs_to_jiffies(muic_pdata->detcable_delay_ms); + else + delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); + } else { info->path_usb = CONTROL1_SW_USB; + info->path_uart = CONTROL1_SW_UART; + delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); + } /* Set initial path for UART */ max77693_muic_set_path(info, info->path_uart, true); @@ -1208,10 +1269,6 @@ static int max77693_muic_probe(struct platform_device *pdev) * driver should notify cable state to upper layer. */ INIT_DELAYED_WORK(&info->wq_detcable, max77693_muic_detect_cable_wq); - if (muic_pdata->detcable_delay_ms) - delay_jiffies = msecs_to_jiffies(muic_pdata->detcable_delay_ms); - else - delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); schedule_delayed_work(&info->wq_detcable, delay_jiffies); return ret; diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c index e636d950ad6c..69641bcae325 100644 --- a/drivers/extcon/extcon-max8997.c +++ b/drivers/extcon/extcon-max8997.c @@ -712,29 +712,45 @@ static int max8997_muic_probe(struct platform_device *pdev) goto err_irq; } - /* Initialize registers according to platform data */ if (pdata->muic_pdata) { - struct max8997_muic_platform_data *mdata = info->muic_pdata; - - for (i = 0; i < mdata->num_init_data; i++) { - max8997_write_reg(info->muic, mdata->init_data[i].addr, - mdata->init_data[i].data); + struct max8997_muic_platform_data *muic_pdata + = pdata->muic_pdata; + + /* Initialize registers according to platform data */ + for (i = 0; i < muic_pdata->num_init_data; i++) { + max8997_write_reg(info->muic, + muic_pdata->init_data[i].addr, + muic_pdata->init_data[i].data); } - } - /* - * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB - * h/w path of COMP2/COMN1 on CONTROL1 register. - */ - if (pdata->muic_pdata->path_uart) - info->path_uart = pdata->muic_pdata->path_uart; - else - info->path_uart = CONTROL1_SW_UART; + /* + * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB + * h/w path of COMP2/COMN1 on CONTROL1 register. + */ + if (muic_pdata->path_uart) + info->path_uart = muic_pdata->path_uart; + else + info->path_uart = CONTROL1_SW_UART; - if (pdata->muic_pdata->path_usb) - info->path_usb = pdata->muic_pdata->path_usb; - else + if (muic_pdata->path_usb) + info->path_usb = muic_pdata->path_usb; + else + info->path_usb = CONTROL1_SW_USB; + + /* + * Default delay time for detecting cable state + * after certain time. + */ + if (muic_pdata->detcable_delay_ms) + delay_jiffies = + msecs_to_jiffies(muic_pdata->detcable_delay_ms); + else + delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); + } else { + info->path_uart = CONTROL1_SW_UART; info->path_usb = CONTROL1_SW_USB; + delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); + } /* Set initial path for UART */ max8997_muic_set_path(info, info->path_uart, true); @@ -751,10 +767,6 @@ static int max8997_muic_probe(struct platform_device *pdev) * driver should notify cable state to upper layer. */ INIT_DELAYED_WORK(&info->wq_detcable, max8997_muic_detect_cable_wq); - if (pdata->muic_pdata->detcable_delay_ms) - delay_jiffies = msecs_to_jiffies(pdata->muic_pdata->detcable_delay_ms); - else - delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); schedule_delayed_work(&info->wq_detcable, delay_jiffies); return 0; diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index 7224533e8ca6..7a701a58bbf0 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -47,9 +47,9 @@ config FIREWIRE_NET tristate "IP networking over 1394" depends on FIREWIRE && INET help - This enables IPv4 over IEEE 1394, providing IP connectivity with - other implementations of RFC 2734 as found on several operating - systems. Multicast support is currently limited. + This enables IPv4/IPv6 over IEEE 1394, providing IP connectivity + with other implementations of RFC 2734/3146 as found on several + operating systems. Multicast support is currently limited. To compile this driver as a module, say M here: The module will be called firewire-net. diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 2b27bff2591a..4d565365e476 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -1,5 +1,6 @@ /* * IPv4 over IEEE 1394, per RFC 2734 + * IPv6 over IEEE 1394, per RFC 3146 * * Copyright (C) 2009 Jay Fenlason <fenlason@redhat.com> * @@ -28,6 +29,7 @@ #include <asm/unaligned.h> #include <net/arp.h> +#include <net/firewire.h> /* rx limits */ #define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */ @@ -45,6 +47,7 @@ #define IANA_SPECIFIER_ID 0x00005eU #define RFC2734_SW_VERSION 0x000001U +#define RFC3146_SW_VERSION 0x000002U #define IEEE1394_GASP_HDR_SIZE 8 @@ -57,32 +60,10 @@ #define RFC2374_HDR_LASTFRAG 2 /* last fragment */ #define RFC2374_HDR_INTFRAG 3 /* interior fragment */ -#define RFC2734_HW_ADDR_LEN 16 - -struct rfc2734_arp { - __be16 hw_type; /* 0x0018 */ - __be16 proto_type; /* 0x0806 */ - u8 hw_addr_len; /* 16 */ - u8 ip_addr_len; /* 4 */ - __be16 opcode; /* ARP Opcode */ - /* Above is exactly the same format as struct arphdr */ - - __be64 s_uniq_id; /* Sender's 64bit EUI */ - u8 max_rec; /* Sender's max packet size */ - u8 sspd; /* Sender's max speed */ - __be16 fifo_hi; /* hi 16bits of sender's FIFO addr */ - __be32 fifo_lo; /* lo 32bits of sender's FIFO addr */ - __be32 sip; /* Sender's IP Address */ - __be32 tip; /* IP Address of requested hw addr */ -} __packed; - -/* This header format is specific to this driver implementation. */ -#define FWNET_ALEN 8 -#define FWNET_HLEN 10 -struct fwnet_header { - u8 h_dest[FWNET_ALEN]; /* destination address */ - __be16 h_proto; /* packet type ID field */ -} __packed; +static bool fwnet_hwaddr_is_multicast(u8 *ha) +{ + return !!(*ha & 1); +} /* IPv4 and IPv6 encapsulation header */ struct rfc2734_header { @@ -191,8 +172,6 @@ struct fwnet_peer { struct list_head peer_link; struct fwnet_device *dev; u64 guid; - u64 fifo; - __be32 ip; /* guarded by dev->lock */ struct list_head pd_list; /* received partial datagrams */ @@ -222,6 +201,15 @@ struct fwnet_packet_task { }; /* + * Get fifo address embedded in hwaddr + */ +static __u64 fwnet_hwaddr_fifo(union fwnet_hwaddr *ha) +{ + return (u64)get_unaligned_be16(&ha->uc.fifo_hi) << 32 + | get_unaligned_be32(&ha->uc.fifo_lo); +} + +/* * saddr == NULL means use device source address. * daddr == NULL means leave destination address (eg unresolved arp). */ @@ -513,10 +501,20 @@ static int fwnet_finish_incoming_packet(struct net_device *net, bool is_broadcast, u16 ether_type) { struct fwnet_device *dev; - static const __be64 broadcast_hw = cpu_to_be64(~0ULL); int status; __be64 guid; + switch (ether_type) { + case ETH_P_ARP: + case ETH_P_IP: +#if IS_ENABLED(CONFIG_IPV6) + case ETH_P_IPV6: +#endif + break; + default: + goto err; + } + dev = netdev_priv(net); /* Write metadata, and then pass to the receive level */ skb->dev = net; @@ -524,92 +522,11 @@ static int fwnet_finish_incoming_packet(struct net_device *net, /* * Parse the encapsulation header. This actually does the job of - * converting to an ethernet frame header, as well as arp - * conversion if needed. ARP conversion is easier in this - * direction, since we are using ethernet as our backend. + * converting to an ethernet-like pseudo frame header. */ - /* - * If this is an ARP packet, convert it. First, we want to make - * use of some of the fields, since they tell us a little bit - * about the sending machine. - */ - if (ether_type == ETH_P_ARP) { - struct rfc2734_arp *arp1394; - struct arphdr *arp; - unsigned char *arp_ptr; - u64 fifo_addr; - u64 peer_guid; - unsigned sspd; - u16 max_payload; - struct fwnet_peer *peer; - unsigned long flags; - - arp1394 = (struct rfc2734_arp *)skb->data; - arp = (struct arphdr *)skb->data; - arp_ptr = (unsigned char *)(arp + 1); - peer_guid = get_unaligned_be64(&arp1394->s_uniq_id); - fifo_addr = (u64)get_unaligned_be16(&arp1394->fifo_hi) << 32 - | get_unaligned_be32(&arp1394->fifo_lo); - - sspd = arp1394->sspd; - /* Sanity check. OS X 10.3 PPC reportedly sends 131. */ - if (sspd > SCODE_3200) { - dev_notice(&net->dev, "sspd %x out of range\n", sspd); - sspd = SCODE_3200; - } - max_payload = fwnet_max_payload(arp1394->max_rec, sspd); - - spin_lock_irqsave(&dev->lock, flags); - peer = fwnet_peer_find_by_guid(dev, peer_guid); - if (peer) { - peer->fifo = fifo_addr; - - if (peer->speed > sspd) - peer->speed = sspd; - if (peer->max_payload > max_payload) - peer->max_payload = max_payload; - - peer->ip = arp1394->sip; - } - spin_unlock_irqrestore(&dev->lock, flags); - - if (!peer) { - dev_notice(&net->dev, - "no peer for ARP packet from %016llx\n", - (unsigned long long)peer_guid); - goto no_peer; - } - - /* - * Now that we're done with the 1394 specific stuff, we'll - * need to alter some of the data. Believe it or not, all - * that needs to be done is sender_IP_address needs to be - * moved, the destination hardware address get stuffed - * in and the hardware address length set to 8. - * - * IMPORTANT: The code below overwrites 1394 specific data - * needed above so keep the munging of the data for the - * higher level IP stack last. - */ - - arp->ar_hln = 8; - /* skip over sender unique id */ - arp_ptr += arp->ar_hln; - /* move sender IP addr */ - put_unaligned(arp1394->sip, (u32 *)arp_ptr); - /* skip over sender IP addr */ - arp_ptr += arp->ar_pln; - - if (arp->ar_op == htons(ARPOP_REQUEST)) - memset(arp_ptr, 0, sizeof(u64)); - else - memcpy(arp_ptr, net->dev_addr, sizeof(u64)); - } - - /* Now add the ethernet header. */ guid = cpu_to_be64(dev->card->guid); if (dev_hard_header(skb, net, ether_type, - is_broadcast ? &broadcast_hw : &guid, + is_broadcast ? net->broadcast : net->dev_addr, NULL, skb->len) >= 0) { struct fwnet_header *eth; u16 *rawp; @@ -618,7 +535,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, skb_reset_mac_header(skb); skb_pull(skb, sizeof(*eth)); eth = (struct fwnet_header *)skb_mac_header(skb); - if (*eth->h_dest & 1) { + if (fwnet_hwaddr_is_multicast(eth->h_dest)) { if (memcmp(eth->h_dest, net->broadcast, net->addr_len) == 0) skb->pkt_type = PACKET_BROADCAST; @@ -630,7 +547,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, if (memcmp(eth->h_dest, net->dev_addr, net->addr_len)) skb->pkt_type = PACKET_OTHERHOST; } - if (ntohs(eth->h_proto) >= 1536) { + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) { protocol = eth->h_proto; } else { rawp = (u16 *)skb->data; @@ -652,7 +569,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, return 0; - no_peer: + err: net->stats.rx_errors++; net->stats.rx_dropped++; @@ -856,7 +773,12 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context, ver = be32_to_cpu(buf_ptr[1]) & 0xffffff; source_node_id = be32_to_cpu(buf_ptr[0]) >> 16; - if (specifier_id == IANA_SPECIFIER_ID && ver == RFC2734_SW_VERSION) { + if (specifier_id == IANA_SPECIFIER_ID && + (ver == RFC2734_SW_VERSION +#if IS_ENABLED(CONFIG_IPV6) + || ver == RFC3146_SW_VERSION +#endif + )) { buf_ptr += 2; length -= IEEE1394_GASP_HDR_SIZE; fwnet_incoming_packet(dev, buf_ptr, length, source_node_id, @@ -1059,16 +981,27 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) u8 *p; int generation; int node_id; + unsigned int sw_version; /* ptask->generation may not have been set yet */ generation = dev->card->generation; smp_rmb(); node_id = dev->card->node_id; + switch (ptask->skb->protocol) { + default: + sw_version = RFC2734_SW_VERSION; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + sw_version = RFC3146_SW_VERSION; +#endif + } + p = skb_push(ptask->skb, IEEE1394_GASP_HDR_SIZE); put_unaligned_be32(node_id << 16 | IANA_SPECIFIER_ID >> 8, p); put_unaligned_be32((IANA_SPECIFIER_ID & 0xff) << 24 - | RFC2734_SW_VERSION, &p[4]); + | sw_version, &p[4]); /* We should not transmit if broadcast_channel.valid == 0. */ fw_send_request(dev->card, &ptask->transaction, @@ -1116,6 +1049,62 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) return 0; } +static void fwnet_fifo_stop(struct fwnet_device *dev) +{ + if (dev->local_fifo == FWNET_NO_FIFO_ADDR) + return; + + fw_core_remove_address_handler(&dev->handler); + dev->local_fifo = FWNET_NO_FIFO_ADDR; +} + +static int fwnet_fifo_start(struct fwnet_device *dev) +{ + int retval; + + if (dev->local_fifo != FWNET_NO_FIFO_ADDR) + return 0; + + dev->handler.length = 4096; + dev->handler.address_callback = fwnet_receive_packet; + dev->handler.callback_data = dev; + + retval = fw_core_add_address_handler(&dev->handler, + &fw_high_memory_region); + if (retval < 0) + return retval; + + dev->local_fifo = dev->handler.offset; + + return 0; +} + +static void __fwnet_broadcast_stop(struct fwnet_device *dev) +{ + unsigned u; + + if (dev->broadcast_state != FWNET_BROADCAST_ERROR) { + for (u = 0; u < FWNET_ISO_PAGE_COUNT; u++) + kunmap(dev->broadcast_rcv_buffer.pages[u]); + fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer, dev->card); + } + if (dev->broadcast_rcv_context) { + fw_iso_context_destroy(dev->broadcast_rcv_context); + dev->broadcast_rcv_context = NULL; + } + kfree(dev->broadcast_rcv_buffer_ptrs); + dev->broadcast_rcv_buffer_ptrs = NULL; + dev->broadcast_state = FWNET_BROADCAST_ERROR; +} + +static void fwnet_broadcast_stop(struct fwnet_device *dev) +{ + if (dev->broadcast_state == FWNET_BROADCAST_ERROR) + return; + fw_iso_context_stop(dev->broadcast_rcv_context); + __fwnet_broadcast_stop(dev); +} + static int fwnet_broadcast_start(struct fwnet_device *dev) { struct fw_iso_context *context; @@ -1124,60 +1113,47 @@ static int fwnet_broadcast_start(struct fwnet_device *dev) unsigned max_receive; struct fw_iso_packet packet; unsigned long offset; + void **ptrptr; unsigned u; - if (dev->local_fifo == FWNET_NO_FIFO_ADDR) { - dev->handler.length = 4096; - dev->handler.address_callback = fwnet_receive_packet; - dev->handler.callback_data = dev; - - retval = fw_core_add_address_handler(&dev->handler, - &fw_high_memory_region); - if (retval < 0) - goto failed_initial; - - dev->local_fifo = dev->handler.offset; - } + if (dev->broadcast_state != FWNET_BROADCAST_ERROR) + return 0; max_receive = 1U << (dev->card->max_receive + 1); num_packets = (FWNET_ISO_PAGE_COUNT * PAGE_SIZE) / max_receive; - if (!dev->broadcast_rcv_context) { - void **ptrptr; - - context = fw_iso_context_create(dev->card, - FW_ISO_CONTEXT_RECEIVE, IEEE1394_BROADCAST_CHANNEL, - dev->card->link_speed, 8, fwnet_receive_broadcast, dev); - if (IS_ERR(context)) { - retval = PTR_ERR(context); - goto failed_context_create; - } + ptrptr = kmalloc(sizeof(void *) * num_packets, GFP_KERNEL); + if (!ptrptr) { + retval = -ENOMEM; + goto failed; + } + dev->broadcast_rcv_buffer_ptrs = ptrptr; + + context = fw_iso_context_create(dev->card, FW_ISO_CONTEXT_RECEIVE, + IEEE1394_BROADCAST_CHANNEL, + dev->card->link_speed, 8, + fwnet_receive_broadcast, dev); + if (IS_ERR(context)) { + retval = PTR_ERR(context); + goto failed; + } - retval = fw_iso_buffer_init(&dev->broadcast_rcv_buffer, - dev->card, FWNET_ISO_PAGE_COUNT, DMA_FROM_DEVICE); - if (retval < 0) - goto failed_buffer_init; + retval = fw_iso_buffer_init(&dev->broadcast_rcv_buffer, dev->card, + FWNET_ISO_PAGE_COUNT, DMA_FROM_DEVICE); + if (retval < 0) + goto failed; - ptrptr = kmalloc(sizeof(void *) * num_packets, GFP_KERNEL); - if (!ptrptr) { - retval = -ENOMEM; - goto failed_ptrs_alloc; - } + dev->broadcast_state = FWNET_BROADCAST_STOPPED; - dev->broadcast_rcv_buffer_ptrs = ptrptr; - for (u = 0; u < FWNET_ISO_PAGE_COUNT; u++) { - void *ptr; - unsigned v; + for (u = 0; u < FWNET_ISO_PAGE_COUNT; u++) { + void *ptr; + unsigned v; - ptr = kmap(dev->broadcast_rcv_buffer.pages[u]); - for (v = 0; v < num_packets / FWNET_ISO_PAGE_COUNT; v++) - *ptrptr++ = (void *) - ((char *)ptr + v * max_receive); - } - dev->broadcast_rcv_context = context; - } else { - context = dev->broadcast_rcv_context; + ptr = kmap(dev->broadcast_rcv_buffer.pages[u]); + for (v = 0; v < num_packets / FWNET_ISO_PAGE_COUNT; v++) + *ptrptr++ = (void *) ((char *)ptr + v * max_receive); } + dev->broadcast_rcv_context = context; packet.payload_length = max_receive; packet.interrupt = 1; @@ -1191,7 +1167,7 @@ static int fwnet_broadcast_start(struct fwnet_device *dev) retval = fw_iso_context_queue(context, &packet, &dev->broadcast_rcv_buffer, offset); if (retval < 0) - goto failed_rcv_queue; + goto failed; offset += max_receive; } @@ -1201,7 +1177,7 @@ static int fwnet_broadcast_start(struct fwnet_device *dev) retval = fw_iso_context_start(context, -1, 0, FW_ISO_CONTEXT_MATCH_ALL_TAGS); /* ??? sync */ if (retval < 0) - goto failed_rcv_queue; + goto failed; /* FIXME: adjust it according to the min. speed of all known peers? */ dev->broadcast_xmt_max_payload = IEEE1394_MAX_PAYLOAD_S100 @@ -1210,19 +1186,8 @@ static int fwnet_broadcast_start(struct fwnet_device *dev) return 0; - failed_rcv_queue: - kfree(dev->broadcast_rcv_buffer_ptrs); - dev->broadcast_rcv_buffer_ptrs = NULL; - failed_ptrs_alloc: - fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer, dev->card); - failed_buffer_init: - fw_iso_context_destroy(context); - dev->broadcast_rcv_context = NULL; - failed_context_create: - fw_core_remove_address_handler(&dev->handler); - failed_initial: - dev->local_fifo = FWNET_NO_FIFO_ADDR; - + failed: + __fwnet_broadcast_stop(dev); return retval; } @@ -1240,11 +1205,10 @@ static int fwnet_open(struct net_device *net) struct fwnet_device *dev = netdev_priv(net); int ret; - if (dev->broadcast_state == FWNET_BROADCAST_ERROR) { - ret = fwnet_broadcast_start(dev); - if (ret) - return ret; - } + ret = fwnet_broadcast_start(dev); + if (ret) + return ret; + netif_start_queue(net); spin_lock_irq(&dev->lock); @@ -1257,9 +1221,10 @@ static int fwnet_open(struct net_device *net) /* ifdown */ static int fwnet_stop(struct net_device *net) { - netif_stop_queue(net); + struct fwnet_device *dev = netdev_priv(net); - /* Deallocate iso context for use by other applications? */ + netif_stop_queue(net); + fwnet_broadcast_stop(dev); return 0; } @@ -1299,19 +1264,27 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) * We might need to rebuild the header on tx failure. */ memcpy(&hdr_buf, skb->data, sizeof(hdr_buf)); - skb_pull(skb, sizeof(hdr_buf)); - proto = hdr_buf.h_proto; + + switch (proto) { + case htons(ETH_P_ARP): + case htons(ETH_P_IP): +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): +#endif + break; + default: + goto fail; + } + + skb_pull(skb, sizeof(hdr_buf)); dg_size = skb->len; /* * Set the transmission type for the packet. ARP packets and IP * broadcast packets are sent via GASP. */ - if (memcmp(hdr_buf.h_dest, net->broadcast, FWNET_ALEN) == 0 - || proto == htons(ETH_P_ARP) - || (proto == htons(ETH_P_IP) - && IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) { + if (fwnet_hwaddr_is_multicast(hdr_buf.h_dest)) { max_payload = dev->broadcast_xmt_max_payload; datagram_label_ptr = &dev->broadcast_xmt_datagramlabel; @@ -1320,11 +1293,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) ptask->dest_node = IEEE1394_ALL_NODES; ptask->speed = SCODE_100; } else { - __be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest); + union fwnet_hwaddr *ha = (union fwnet_hwaddr *)hdr_buf.h_dest; + __be64 guid = get_unaligned(&ha->uc.uniq_id); u8 generation; peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); - if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) + if (!peer) goto fail; generation = peer->generation; @@ -1332,32 +1306,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) max_payload = peer->max_payload; datagram_label_ptr = &peer->datagram_label; - ptask->fifo_addr = peer->fifo; + ptask->fifo_addr = fwnet_hwaddr_fifo(ha); ptask->generation = generation; ptask->dest_node = dest_node; ptask->speed = peer->speed; } - /* If this is an ARP packet, convert it */ - if (proto == htons(ETH_P_ARP)) { - struct arphdr *arp = (struct arphdr *)skb->data; - unsigned char *arp_ptr = (unsigned char *)(arp + 1); - struct rfc2734_arp *arp1394 = (struct rfc2734_arp *)skb->data; - __be32 ipaddr; - - ipaddr = get_unaligned((__be32 *)(arp_ptr + FWNET_ALEN)); - - arp1394->hw_addr_len = RFC2734_HW_ADDR_LEN; - arp1394->max_rec = dev->card->max_receive; - arp1394->sspd = dev->card->link_speed; - - put_unaligned_be16(dev->local_fifo >> 32, - &arp1394->fifo_hi); - put_unaligned_be32(dev->local_fifo & 0xffffffff, - &arp1394->fifo_lo); - put_unaligned(ipaddr, &arp1394->sip); - } - ptask->hdr.w0 = 0; ptask->hdr.w1 = 0; ptask->skb = skb; @@ -1472,8 +1426,6 @@ static int fwnet_add_peer(struct fwnet_device *dev, peer->dev = dev; peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; - peer->fifo = FWNET_NO_FIFO_ADDR; - peer->ip = 0; INIT_LIST_HEAD(&peer->pd_list); peer->pdg_size = 0; peer->datagram_label = 0; @@ -1503,6 +1455,7 @@ static int fwnet_probe(struct device *_dev) struct fwnet_device *dev; unsigned max_mtu; int ret; + union fwnet_hwaddr *ha; mutex_lock(&fwnet_device_mutex); @@ -1533,6 +1486,11 @@ static int fwnet_probe(struct device *_dev) dev->card = card; dev->netdev = net; + ret = fwnet_fifo_start(dev); + if (ret < 0) + goto out; + dev->local_fifo = dev->handler.offset; + /* * Use the RFC 2734 default 1500 octets or the maximum payload * as initial MTU @@ -1542,24 +1500,31 @@ static int fwnet_probe(struct device *_dev) net->mtu = min(1500U, max_mtu); /* Set our hardware address while we're at it */ - put_unaligned_be64(card->guid, net->dev_addr); - put_unaligned_be64(~0ULL, net->broadcast); + ha = (union fwnet_hwaddr *)net->dev_addr; + put_unaligned_be64(card->guid, &ha->uc.uniq_id); + ha->uc.max_rec = dev->card->max_receive; + ha->uc.sspd = dev->card->link_speed; + put_unaligned_be16(dev->local_fifo >> 32, &ha->uc.fifo_hi); + put_unaligned_be32(dev->local_fifo & 0xffffffff, &ha->uc.fifo_lo); + + memset(net->broadcast, -1, net->addr_len); + ret = register_netdev(net); if (ret) goto out; list_add_tail(&dev->dev_link, &fwnet_device_list); - dev_notice(&net->dev, "IPv4 over IEEE 1394 on card %s\n", + dev_notice(&net->dev, "IP over IEEE 1394 on card %s\n", dev_name(card->device)); have_dev: ret = fwnet_add_peer(dev, unit, device); if (ret && allocated_netdev) { unregister_netdev(net); list_del(&dev->dev_link); - } out: - if (ret && allocated_netdev) + fwnet_fifo_stop(dev); free_netdev(net); + } mutex_unlock(&fwnet_device_mutex); @@ -1592,22 +1557,14 @@ static int fwnet_remove(struct device *_dev) mutex_lock(&fwnet_device_mutex); net = dev->netdev; - if (net && peer->ip) - arp_invalidate(net, peer->ip); fwnet_remove_peer(peer, dev); if (list_empty(&dev->peer_list)) { unregister_netdev(net); - if (dev->local_fifo != FWNET_NO_FIFO_ADDR) - fw_core_remove_address_handler(&dev->handler); - if (dev->broadcast_rcv_context) { - fw_iso_context_stop(dev->broadcast_rcv_context); - fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer, - dev->card); - fw_iso_context_destroy(dev->broadcast_rcv_context); - } + fwnet_fifo_stop(dev); + for (i = 0; dev->queued_datagrams && i < 5; i++) ssleep(1); WARN_ON(dev->queued_datagrams); @@ -1646,6 +1603,14 @@ static const struct ieee1394_device_id fwnet_id_table[] = { .specifier_id = IANA_SPECIFIER_ID, .version = RFC2734_SW_VERSION, }, +#if IS_ENABLED(CONFIG_IPV6) + { + .match_flags = IEEE1394_MATCH_SPECIFIER_ID | + IEEE1394_MATCH_VERSION, + .specifier_id = IANA_SPECIFIER_ID, + .version = RFC3146_SW_VERSION, + }, +#endif { } }; @@ -1683,6 +1648,30 @@ static struct fw_descriptor rfc2374_unit_directory = { .data = rfc2374_unit_directory_data }; +#if IS_ENABLED(CONFIG_IPV6) +static const u32 rfc3146_unit_directory_data[] = { + 0x00040000, /* directory_length */ + 0x1200005e, /* unit_specifier_id: IANA */ + 0x81000003, /* textual descriptor offset */ + 0x13000002, /* unit_sw_version: RFC 3146 */ + 0x81000005, /* textual descriptor offset */ + 0x00030000, /* descriptor_length */ + 0x00000000, /* text */ + 0x00000000, /* minimal ASCII, en */ + 0x49414e41, /* I A N A */ + 0x00030000, /* descriptor_length */ + 0x00000000, /* text */ + 0x00000000, /* minimal ASCII, en */ + 0x49507636, /* I P v 6 */ +}; + +static struct fw_descriptor rfc3146_unit_directory = { + .length = ARRAY_SIZE(rfc3146_unit_directory_data), + .key = (CSR_DIRECTORY | CSR_UNIT) << 24, + .data = rfc3146_unit_directory_data +}; +#endif + static int __init fwnet_init(void) { int err; @@ -1691,11 +1680,17 @@ static int __init fwnet_init(void) if (err) return err; +#if IS_ENABLED(CONFIG_IPV6) + err = fw_core_add_descriptor(&rfc3146_unit_directory); + if (err) + goto out; +#endif + fwnet_packet_task_cache = kmem_cache_create("packet_task", sizeof(struct fwnet_packet_task), 0, 0, NULL); if (!fwnet_packet_task_cache) { err = -ENOMEM; - goto out; + goto out2; } err = driver_register(&fwnet_driver.driver); @@ -1703,7 +1698,11 @@ static int __init fwnet_init(void) return 0; kmem_cache_destroy(fwnet_packet_task_cache); +out2: +#if IS_ENABLED(CONFIG_IPV6) + fw_core_remove_descriptor(&rfc3146_unit_directory); out: +#endif fw_core_remove_descriptor(&rfc2374_unit_directory); return err; @@ -1714,11 +1713,14 @@ static void __exit fwnet_cleanup(void) { driver_unregister(&fwnet_driver.driver); kmem_cache_destroy(fwnet_packet_task_cache); +#if IS_ENABLED(CONFIG_IPV6) + fw_core_remove_descriptor(&rfc3146_unit_directory); +#endif fw_core_remove_descriptor(&rfc2374_unit_directory); } module_exit(fwnet_cleanup); MODULE_AUTHOR("Jay Fenlason <fenlason@redhat.com>"); -MODULE_DESCRIPTION("IPv4 over IEEE1394 as per RFC 2734"); +MODULE_DESCRIPTION("IP over IEEE1394 as per RFC 2734/3146"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(ieee1394, fwnet_id_table); diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 9b00072a020f..3e532002e4d1 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -39,6 +39,7 @@ config FIRMWARE_MEMMAP config EFI_VARS tristate "EFI Variable Support via sysfs" depends on EFI + select UCS2_STRING default n help If you say Y here, you are able to get EFI (Extensible Firmware @@ -53,6 +54,24 @@ config EFI_VARS Subsequent efibootmgr releases may be found at: <http://linux.dell.com/efibootmgr> +config EFI_VARS_PSTORE + bool "Register efivars backend for pstore" + depends on EFI_VARS && PSTORE + default y + help + Say Y here to enable use efivars as a backend to pstore. This + will allow writing console messages, crash dumps, or anything + else supported by pstore to EFI variables. + +config EFI_VARS_PSTORE_DEFAULT_DISABLE + bool "Disable using efivars as a pstore backend by default" + depends on EFI_VARS_PSTORE + default n + help + Saying Y here will disable the use of efivars as a storage + backend for pstore by default. This setting can be overridden + using the efivars module's pstore_disable parameter. + config EFI_PCDP bool "Console device selection via EFI PCDP or HCDP table" depends on ACPI && EFI && IA64 diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 982f1f5f5742..4cd392dbf115 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -442,7 +442,6 @@ static int __init dmi_present(const char __iomem *p) static int __init smbios_present(const char __iomem *p) { u8 buf[32]; - int offset = 0; memcpy_fromio(buf, p, 32); if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { @@ -461,9 +460,9 @@ static int __init smbios_present(const char __iomem *p) dmi_ver = 0x0206; break; } - offset = 16; + return memcmp(p + 16, "_DMI_", 5) || dmi_present(p + 16); } - return dmi_present(buf + offset); + return 1; } void __init dmi_scan_machine(void) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 7320bf891706..182ce9471175 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -80,6 +80,7 @@ #include <linux/slab.h> #include <linux/pstore.h> #include <linux/ctype.h> +#include <linux/ucs2_string.h> #include <linux/fs.h> #include <linux/ramfs.h> @@ -103,6 +104,11 @@ MODULE_VERSION(EFIVARS_VERSION); */ #define GUID_LEN 36 +static bool efivars_pstore_disable = + IS_ENABLED(CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE); + +module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644); + /* * The maximum size of VariableName + Data = 1024 * Therefore, it's reasonable to save that much @@ -165,51 +171,7 @@ efivar_create_sysfs_entry(struct efivars *efivars, static void efivar_update_sysfs_entries(struct work_struct *); static DECLARE_WORK(efivar_work, efivar_update_sysfs_entries); - -/* Return the number of unicode characters in data */ -static unsigned long -utf16_strnlen(efi_char16_t *s, size_t maxlength) -{ - unsigned long length = 0; - - while (*s++ != 0 && length < maxlength) - length++; - return length; -} - -static inline unsigned long -utf16_strlen(efi_char16_t *s) -{ - return utf16_strnlen(s, ~0UL); -} - -/* - * Return the number of bytes is the length of this string - * Note: this is NOT the same as the number of unicode characters - */ -static inline unsigned long -utf16_strsize(efi_char16_t *data, unsigned long maxlength) -{ - return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); -} - -static inline int -utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len) -{ - while (1) { - if (len == 0) - return 0; - if (*a < *b) - return -1; - if (*a > *b) - return 1; - if (*a == 0) /* implies *b == 0 */ - return 0; - a++; - b++; - len--; - } -} +static bool efivar_wq_enabled = true; static bool validate_device_path(struct efi_variable *var, int match, u8 *buffer, @@ -262,7 +224,7 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer, u16 filepathlength; int i, desclength = 0, namelen; - namelen = utf16_strnlen(var->VariableName, sizeof(var->VariableName)); + namelen = ucs2_strnlen(var->VariableName, sizeof(var->VariableName)); /* Either "Boot" or "Driver" followed by four digits of hex */ for (i = match; i < match+4; i++) { @@ -285,7 +247,7 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer, * There's no stored length for the description, so it has to be * found by hand */ - desclength = utf16_strsize((efi_char16_t *)(buffer + 6), len - 6) + 2; + desclength = ucs2_strsize((efi_char16_t *)(buffer + 6), len - 6) + 2; /* Each boot entry must have a descriptor */ if (!desclength) @@ -426,6 +388,32 @@ get_var_data(struct efivars *efivars, struct efi_variable *var) return status; } +static efi_status_t +check_var_size_locked(struct efivars *efivars, u32 attributes, + unsigned long size) +{ + const struct efivar_operations *fops = efivars->ops; + + if (!efivars->ops->query_variable_store) + return EFI_UNSUPPORTED; + + return fops->query_variable_store(attributes, size); +} + + +static efi_status_t +check_var_size(struct efivars *efivars, u32 attributes, unsigned long size) +{ + efi_status_t status; + unsigned long flags; + + spin_lock_irqsave(&efivars->lock, flags); + status = check_var_size_locked(efivars, attributes, size); + spin_unlock_irqrestore(&efivars->lock, flags); + + return status; +} + static ssize_t efivar_guid_read(struct efivar_entry *entry, char *buf) { @@ -547,11 +535,16 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) } spin_lock_irq(&efivars->lock); - status = efivars->ops->set_variable(new_var->VariableName, - &new_var->VendorGuid, - new_var->Attributes, - new_var->DataSize, - new_var->Data); + + status = check_var_size_locked(efivars, new_var->Attributes, + new_var->DataSize + ucs2_strsize(new_var->VariableName, 1024)); + + if (status == EFI_SUCCESS || status == EFI_UNSUPPORTED) + status = efivars->ops->set_variable(new_var->VariableName, + &new_var->VendorGuid, + new_var->Attributes, + new_var->DataSize, + new_var->Data); spin_unlock_irq(&efivars->lock); @@ -702,8 +695,7 @@ static ssize_t efivarfs_file_write(struct file *file, u32 attributes; struct inode *inode = file->f_mapping->host; unsigned long datasize = count - sizeof(attributes); - unsigned long newdatasize; - u64 storage_size, remaining_size, max_size; + unsigned long newdatasize, varsize; ssize_t bytes = 0; if (count < sizeof(attributes)) @@ -722,28 +714,18 @@ static ssize_t efivarfs_file_write(struct file *file, * amounts of memory. Pick a default size of 64K if * QueryVariableInfo() isn't supported by the firmware. */ - spin_lock_irq(&efivars->lock); - if (!efivars->ops->query_variable_info) - status = EFI_UNSUPPORTED; - else { - const struct efivar_operations *fops = efivars->ops; - status = fops->query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - } - - spin_unlock_irq(&efivars->lock); + varsize = datasize + ucs2_strsize(var->var.VariableName, 1024); + status = check_var_size(efivars, attributes, varsize); if (status != EFI_SUCCESS) { if (status != EFI_UNSUPPORTED) return efi_status_to_err(status); - remaining_size = 65536; + if (datasize > 65536) + return -ENOSPC; } - if (datasize > remaining_size) - return -ENOSPC; - data = kmalloc(datasize, GFP_KERNEL); if (!data) return -ENOMEM; @@ -765,6 +747,19 @@ static ssize_t efivarfs_file_write(struct file *file, */ spin_lock_irq(&efivars->lock); + /* + * Ensure that the available space hasn't shrunk below the safe level + */ + + status = check_var_size_locked(efivars, attributes, varsize); + + if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { + spin_unlock_irq(&efivars->lock); + kfree(data); + + return efi_status_to_err(status); + } + status = efivars->ops->set_variable(var->var.VariableName, &var->var.VendorGuid, attributes, datasize, @@ -929,8 +924,8 @@ static bool efivarfs_valid_name(const char *str, int len) if (len < GUID_LEN + 2) return false; - /* GUID should be right after the first '-' */ - if (s - 1 != strchr(str, '-')) + /* GUID must be preceded by a '-' */ + if (*(s - 1) != '-') return false; /* @@ -1118,15 +1113,22 @@ static struct dentry_operations efivarfs_d_ops = { static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) { + struct dentry *d; struct qstr q; + int err; q.name = name; q.len = strlen(name); - if (efivarfs_d_hash(NULL, NULL, &q)) - return NULL; + err = efivarfs_d_hash(NULL, NULL, &q); + if (err) + return ERR_PTR(err); + + d = d_alloc(parent, &q); + if (d) + return d; - return d_alloc(parent, &q); + return ERR_PTR(-ENOMEM); } static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) @@ -1136,6 +1138,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) struct efivar_entry *entry, *n; struct efivars *efivars = &__efivars; char *name; + int err = -ENOMEM; efivarfs_sb = sb; @@ -1164,7 +1167,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) inode = NULL; - len = utf16_strlen(entry->var.VariableName); + len = ucs2_strlen(entry->var.VariableName); /* name, plus '-', plus GUID, plus NUL*/ name = kmalloc(len + 1 + GUID_LEN + 1, GFP_ATOMIC); @@ -1186,8 +1189,10 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) goto fail_name; dentry = efivarfs_alloc_dentry(root, name); - if (!dentry) + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); goto fail_inode; + } /* copied by the above to local storage in the dentry. */ kfree(name); @@ -1214,7 +1219,7 @@ fail_inode: fail_name: kfree(name); fail: - return -ENOMEM; + return err; } static struct dentry *efivarfs_mount(struct file_system_type *fs_type, @@ -1234,6 +1239,7 @@ static struct file_system_type efivarfs_type = { .mount = efivarfs_mount, .kill_sb = efivarfs_kill_sb, }; +MODULE_ALIAS_FS("efivarfs"); /* * Handle negative dentry. @@ -1253,9 +1259,7 @@ static const struct inode_operations efivarfs_dir_inode_operations = { .create = efivarfs_create, }; -static struct pstore_info efi_pstore_info; - -#ifdef CONFIG_PSTORE +#ifdef CONFIG_EFI_VARS_PSTORE static int efi_pstore_open(struct pstore_info *psi) { @@ -1345,7 +1349,6 @@ static int efi_pstore_write(enum pstore_type_id type, efi_guid_t vendor = LINUX_EFI_CRASH_GUID; struct efivars *efivars = psi->data; int i, ret = 0; - u64 storage_space, remaining_space, max_variable_size; efi_status_t status = EFI_NOT_FOUND; unsigned long flags; @@ -1365,11 +1368,11 @@ static int efi_pstore_write(enum pstore_type_id type, * size: a size of logging data * DUMP_NAME_LEN * 2: a maximum size of variable name */ - status = efivars->ops->query_variable_info(PSTORE_EFI_ATTRIBUTES, - &storage_space, - &remaining_space, - &max_variable_size); - if (status || remaining_space < size + DUMP_NAME_LEN * 2) { + + status = check_var_size_locked(efivars, PSTORE_EFI_ATTRIBUTES, + size + DUMP_NAME_LEN * 2); + + if (status) { spin_unlock_irqrestore(&efivars->lock, flags); *id = part; return -ENOSPC; @@ -1386,7 +1389,7 @@ static int efi_pstore_write(enum pstore_type_id type, spin_unlock_irqrestore(&efivars->lock, flags); - if (reason == KMSG_DUMP_OOPS) + if (reason == KMSG_DUMP_OOPS && efivar_wq_enabled) schedule_work(&efivar_work); *id = part; @@ -1422,8 +1425,8 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, if (efi_guidcmp(entry->var.VendorGuid, vendor)) continue; - if (utf16_strncmp(entry->var.VariableName, efi_name, - utf16_strlen(efi_name))) { + if (ucs2_strncmp(entry->var.VariableName, efi_name, + ucs2_strlen(efi_name))) { /* * Check if an old format, * which doesn't support holding @@ -1435,8 +1438,8 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, for (i = 0; i < DUMP_NAME_LEN; i++) efi_name_old[i] = name_old[i]; - if (utf16_strncmp(entry->var.VariableName, efi_name_old, - utf16_strlen(efi_name_old))) + if (ucs2_strncmp(entry->var.VariableName, efi_name_old, + ucs2_strlen(efi_name_old))) continue; } @@ -1459,38 +1462,6 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, return 0; } -#else -static int efi_pstore_open(struct pstore_info *psi) -{ - return 0; -} - -static int efi_pstore_close(struct pstore_info *psi) -{ - return 0; -} - -static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, int *count, - struct timespec *timespec, - char **buf, struct pstore_info *psi) -{ - return -1; -} - -static int efi_pstore_write(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, int count, size_t size, - struct pstore_info *psi) -{ - return 0; -} - -static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, - struct timespec time, struct pstore_info *psi) -{ - return 0; -} -#endif static struct pstore_info efi_pstore_info = { .owner = THIS_MODULE, @@ -1502,6 +1473,24 @@ static struct pstore_info efi_pstore_info = { .erase = efi_pstore_erase, }; +static void efivar_pstore_register(struct efivars *efivars) +{ + efivars->efi_pstore_info = efi_pstore_info; + efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL); + if (efivars->efi_pstore_info.buf) { + efivars->efi_pstore_info.bufsize = 1024; + efivars->efi_pstore_info.data = efivars; + spin_lock_init(&efivars->efi_pstore_info.buf_lock); + pstore_register(&efivars->efi_pstore_info); + } +} +#else +static void efivar_pstore_register(struct efivars *efivars) +{ + return; +} +#endif + static ssize_t efivar_create(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) @@ -1528,8 +1517,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf16_strsize(new_var->VariableName, 1024); + strsize1 = ucs2_strsize(search_efivar->var.VariableName, 1024); + strsize2 = ucs2_strsize(new_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), new_var->VariableName, strsize1) && @@ -1544,6 +1533,14 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, return -EINVAL; } + status = check_var_size_locked(efivars, new_var->Attributes, + new_var->DataSize + ucs2_strsize(new_var->VariableName, 1024)); + + if (status && status != EFI_UNSUPPORTED) { + spin_unlock_irq(&efivars->lock); + return efi_status_to_err(status); + } + /* now *really* create the variable via EFI */ status = efivars->ops->set_variable(new_var->VariableName, &new_var->VendorGuid, @@ -1561,7 +1558,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, /* Create the entry in sysfs. Locking is not required here */ status = efivar_create_sysfs_entry(efivars, - utf16_strsize(new_var->VariableName, + ucs2_strsize(new_var->VariableName, 1024), new_var->VariableName, &new_var->VendorGuid); @@ -1591,8 +1588,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf16_strsize(del_var->VariableName, 1024); + strsize1 = ucs2_strsize(search_efivar->var.VariableName, 1024); + strsize2 = ucs2_strsize(del_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), del_var->VariableName, strsize1) && @@ -1638,9 +1635,9 @@ static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor) unsigned long strsize1, strsize2; bool found = false; - strsize1 = utf16_strsize(variable_name, 1024); + strsize1 = ucs2_strsize(variable_name, 1024); list_for_each_entry_safe(entry, n, &efivars->list, list) { - strsize2 = utf16_strsize(entry->var.VariableName, 1024); + strsize2 = ucs2_strsize(entry->var.VariableName, 1024); if (strsize1 == strsize2 && !memcmp(variable_name, &(entry->var.VariableName), strsize2) && @@ -1653,6 +1650,31 @@ static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor) return found; } +/* + * Returns the size of variable_name, in bytes, including the + * terminating NULL character, or variable_name_size if no NULL + * character is found among the first variable_name_size bytes. + */ +static unsigned long var_name_strnsize(efi_char16_t *variable_name, + unsigned long variable_name_size) +{ + unsigned long len; + efi_char16_t c; + + /* + * The variable name is, by definition, a NULL-terminated + * string, so make absolutely sure that variable_name_size is + * the value we expect it to be. If not, return the real size. + */ + for (len = 2; len <= variable_name_size; len += sizeof(c)) { + c = variable_name[(len / sizeof(c)) - 1]; + if (!c) + break; + } + + return min(len, variable_name_size); +} + static void efivar_update_sysfs_entries(struct work_struct *work) { struct efivars *efivars = &__efivars; @@ -1693,10 +1715,13 @@ static void efivar_update_sysfs_entries(struct work_struct *work) if (!found) { kfree(variable_name); break; - } else + } else { + variable_name_size = var_name_strnsize(variable_name, + variable_name_size); efivar_create_sysfs_entry(efivars, variable_name_size, variable_name, &vendor); + } } } @@ -1895,6 +1920,35 @@ void unregister_efivars(struct efivars *efivars) } EXPORT_SYMBOL_GPL(unregister_efivars); +/* + * Print a warning when duplicate EFI variables are encountered and + * disable the sysfs workqueue since the firmware is buggy. + */ +static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid, + unsigned long len16) +{ + size_t i, len8 = len16 / sizeof(efi_char16_t); + char *s8; + + /* + * Disable the workqueue since the algorithm it uses for + * detecting new variables won't work with this buggy + * implementation of GetNextVariableName(). + */ + efivar_wq_enabled = false; + + s8 = kzalloc(len8, GFP_KERNEL); + if (!s8) + return; + + for (i = 0; i < len8; i++) + s8[i] = s16[i]; + + printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n", + s8, vendor_guid); + kfree(s8); +} + int register_efivars(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *parent_kobj) @@ -1943,6 +1997,24 @@ int register_efivars(struct efivars *efivars, &vendor_guid); switch (status) { case EFI_SUCCESS: + variable_name_size = var_name_strnsize(variable_name, + variable_name_size); + + /* + * Some firmware implementations return the + * same variable name on multiple calls to + * get_next_variable(). Terminate the loop + * immediately as there is no guarantee that + * we'll ever see a different variable name, + * and may end up looping here forever. + */ + if (variable_is_present(variable_name, &vendor_guid)) { + dup_variable_bug(variable_name, &vendor_guid, + variable_name_size); + status = EFI_NOT_FOUND; + break; + } + efivar_create_sysfs_entry(efivars, variable_name_size, variable_name, @@ -1962,15 +2034,8 @@ int register_efivars(struct efivars *efivars, if (error) unregister_efivars(efivars); - efivars->efi_pstore_info = efi_pstore_info; - - efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL); - if (efivars->efi_pstore_info.buf) { - efivars->efi_pstore_info.bufsize = 1024; - efivars->efi_pstore_info.data = efivars; - spin_lock_init(&efivars->efi_pstore_info.buf_lock); - pstore_register(&efivars->efi_pstore_info); - } + if (!efivars_pstore_disable) + efivar_pstore_register(efivars); register_filesystem(&efivarfs_type); @@ -2010,7 +2075,7 @@ efivars_init(void) ops.get_variable = efi.get_variable; ops.set_variable = efi.set_variable; ops.get_next_variable = efi.get_next_variable; - ops.query_variable_info = efi.query_variable_info; + ops.query_variable_store = efi_query_variable_store; error = register_efivars(&__efivars, &ops, efi_kobj); if (error) diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index f9dbd503fc40..de3c317bd3e2 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -214,7 +214,7 @@ static int ichx_gpio_request(struct gpio_chip *chip, unsigned nr) * If it can't be trusted, assume that the pin can be used as a GPIO. */ if (ichx_priv.desc->use_sel_ignore[nr / 32] & (1 << (nr & 0x1f))) - return 1; + return 0; return ichx_read_bit(GPIO_USE_SEL, nr) ? 0 : -ENODEV; } diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 7472182967ce..61a6fde6c089 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -42,6 +42,7 @@ #include <linux/io.h> #include <linux/of_irq.h> #include <linux/of_device.h> +#include <linux/clk.h> #include <linux/pinctrl/consumer.h> /* @@ -496,6 +497,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) struct resource *res; struct irq_chip_generic *gc; struct irq_chip_type *ct; + struct clk *clk; unsigned int ngpios; int soc_variant; int i, cpu, id; @@ -529,6 +531,11 @@ static int mvebu_gpio_probe(struct platform_device *pdev) return id; } + clk = devm_clk_get(&pdev->dev, NULL); + /* Not all SoCs require a clock.*/ + if (!IS_ERR(clk)) + clk_prepare_enable(clk); + mvchip->soc_variant = soc_variant; mvchip->chip.label = dev_name(&pdev->dev); mvchip->chip.dev = &pdev->dev; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 24059462c87f..9391cf16e990 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -575,7 +575,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, chip->gpio_chip.ngpio, irq_base, &pca953x_irq_simple_ops, - NULL); + chip); if (!chip->domain) return -ENODEV; diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 770476a9da87..3ce5bc38ac31 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -307,11 +307,15 @@ static const struct irq_domain_ops stmpe_gpio_irq_simple_ops = { .xlate = irq_domain_xlate_twocell, }; -static int stmpe_gpio_irq_init(struct stmpe_gpio *stmpe_gpio) +static int stmpe_gpio_irq_init(struct stmpe_gpio *stmpe_gpio, + struct device_node *np) { - int base = stmpe_gpio->irq_base; + int base = 0; - stmpe_gpio->domain = irq_domain_add_simple(NULL, + if (!np) + base = stmpe_gpio->irq_base; + + stmpe_gpio->domain = irq_domain_add_simple(np, stmpe_gpio->chip.ngpio, base, &stmpe_gpio_irq_simple_ops, stmpe_gpio); if (!stmpe_gpio->domain) { @@ -346,6 +350,9 @@ static int stmpe_gpio_probe(struct platform_device *pdev) stmpe_gpio->chip = template_chip; stmpe_gpio->chip.ngpio = stmpe->num_gpios; stmpe_gpio->chip.dev = &pdev->dev; +#ifdef CONFIG_OF + stmpe_gpio->chip.of_node = np; +#endif stmpe_gpio->chip.base = pdata ? pdata->gpio_base : -1; if (pdata) @@ -366,7 +373,7 @@ static int stmpe_gpio_probe(struct platform_device *pdev) goto out_free; if (irq >= 0) { - ret = stmpe_gpio_irq_init(stmpe_gpio); + ret = stmpe_gpio_irq_init(stmpe_gpio, np); if (ret) goto out_disable; diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index a71a54a3e3f7..5150df6cba08 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -193,7 +193,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip) if (!np) return; - do { + for (;; index++) { ret = of_parse_phandle_with_args(np, "gpio-ranges", "#gpio-range-cells", index, &pinspec); if (ret) @@ -222,8 +222,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip) if (ret) break; - - } while (index++); + } } #else diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 792c3e3795ca..dd64a06dc5b4 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2326,7 +2326,6 @@ int drm_mode_addfb(struct drm_device *dev, fb = dev->mode_config.funcs->fb_create(dev, file_priv, &r); if (IS_ERR(fb)) { DRM_DEBUG_KMS("could not create framebuffer\n"); - drm_modeset_unlock_all(dev); return PTR_ERR(fb); } @@ -2506,7 +2505,6 @@ int drm_mode_addfb2(struct drm_device *dev, fb = dev->mode_config.funcs->fb_create(dev, file_priv, r); if (IS_ERR(fb)) { DRM_DEBUG_KMS("could not create framebuffer\n"); - drm_modeset_unlock_all(dev); return PTR_ERR(fb); } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index c194f4e680ad..e2acfdbf7d3c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1634,7 +1634,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, unsigned vblank = (pt->vactive_vblank_hi & 0xf) << 8 | pt->vblank_lo; unsigned hsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc0) << 2 | pt->hsync_offset_lo; unsigned hsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0x30) << 4 | pt->hsync_pulse_width_lo; - unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) >> 2 | pt->vsync_offset_pulse_width_lo >> 4; + unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) << 2 | pt->vsync_offset_pulse_width_lo >> 4; unsigned vsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0x3) << 4 | (pt->vsync_offset_pulse_width_lo & 0xf); /* ignore tiny modes */ @@ -1715,6 +1715,7 @@ set_size: } mode->type = DRM_MODE_TYPE_DRIVER; + mode->vrefresh = drm_mode_vrefresh(mode); drm_mode_set_name(mode); return mode; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 59d6b9bf204b..892ff9f95975 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1544,10 +1544,10 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) if (!fb_helper->fb) return 0; - drm_modeset_lock_all(dev); + mutex_lock(&fb_helper->dev->mode_config.mutex); if (!drm_fb_helper_is_bound(fb_helper)) { fb_helper->delayed_hotplug = true; - drm_modeset_unlock_all(dev); + mutex_unlock(&fb_helper->dev->mode_config.mutex); return 0; } DRM_DEBUG_KMS("\n"); @@ -1558,9 +1558,11 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) count = drm_fb_helper_probe_connector_modes(fb_helper, max_width, max_height); + mutex_unlock(&fb_helper->dev->mode_config.mutex); + + drm_modeset_lock_all(dev); drm_setup_crtcs(fb_helper); drm_modeset_unlock_all(dev); - drm_fb_helper_set_par(fb_helper->fbdev); return 0; diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 13fdcd10a605..429e07d0b0f1 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -123,6 +123,7 @@ int drm_open(struct inode *inode, struct file *filp) int retcode = 0; int need_setup = 0; struct address_space *old_mapping; + struct address_space *old_imapping; minor = idr_find(&drm_minors_idr, minor_id); if (!minor) @@ -137,6 +138,7 @@ int drm_open(struct inode *inode, struct file *filp) if (!dev->open_count++) need_setup = 1; mutex_lock(&dev->struct_mutex); + old_imapping = inode->i_mapping; old_mapping = dev->dev_mapping; if (old_mapping == NULL) dev->dev_mapping = &inode->i_data; @@ -159,8 +161,8 @@ int drm_open(struct inode *inode, struct file *filp) err_undo: mutex_lock(&dev->struct_mutex); - filp->f_mapping = old_mapping; - inode->i_mapping = old_mapping; + filp->f_mapping = old_imapping; + inode->i_mapping = old_imapping; iput(container_of(dev->dev_mapping, struct inode, i_data)); dev->dev_mapping = old_mapping; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 36493ce71f9a..98cc14725ba9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -38,11 +38,12 @@ /* position control register for hardware window 0, 2 ~ 4.*/ #define VIDOSD_A(win) (VIDOSD_BASE + 0x00 + (win) * 16) #define VIDOSD_B(win) (VIDOSD_BASE + 0x04 + (win) * 16) -/* size control register for hardware window 0. */ -#define VIDOSD_C_SIZE_W0 (VIDOSD_BASE + 0x08) -/* alpha control register for hardware window 1 ~ 4. */ -#define VIDOSD_C(win) (VIDOSD_BASE + 0x18 + (win) * 16) -/* size control register for hardware window 1 ~ 4. */ +/* + * size control register for hardware windows 0 and alpha control register + * for hardware windows 1 ~ 4 + */ +#define VIDOSD_C(win) (VIDOSD_BASE + 0x08 + (win) * 16) +/* size control register for hardware windows 1 ~ 2. */ #define VIDOSD_D(win) (VIDOSD_BASE + 0x0C + (win) * 16) #define VIDWx_BUF_START(win, buf) (VIDW_BUF_START(buf) + (win) * 8) @@ -50,9 +51,9 @@ #define VIDWx_BUF_SIZE(win, buf) (VIDW_BUF_SIZE(buf) + (win) * 4) /* color key control register for hardware window 1 ~ 4. */ -#define WKEYCON0_BASE(x) ((WKEYCON0 + 0x140) + (x * 8)) +#define WKEYCON0_BASE(x) ((WKEYCON0 + 0x140) + ((x - 1) * 8)) /* color key value register for hardware window 1 ~ 4. */ -#define WKEYCON1_BASE(x) ((WKEYCON1 + 0x140) + (x * 8)) +#define WKEYCON1_BASE(x) ((WKEYCON1 + 0x140) + ((x - 1) * 8)) /* FIMD has totally five hardware windows. */ #define WINDOWS_NR 5 @@ -109,9 +110,9 @@ struct fimd_context { #ifdef CONFIG_OF static const struct of_device_id fimd_driver_dt_match[] = { - { .compatible = "samsung,exynos4-fimd", + { .compatible = "samsung,exynos4210-fimd", .data = &exynos4_fimd_driver_data }, - { .compatible = "samsung,exynos5-fimd", + { .compatible = "samsung,exynos5250-fimd", .data = &exynos5_fimd_driver_data }, {}, }; @@ -581,7 +582,7 @@ static void fimd_win_commit(struct device *dev, int zpos) if (win != 3 && win != 4) { u32 offset = VIDOSD_D(win); if (win == 0) - offset = VIDOSD_C_SIZE_W0; + offset = VIDOSD_C(win); val = win_data->ovl_width * win_data->ovl_height; writel(val, ctx->regs + offset); diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 3b0da0378acf..47a493c8a71f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -48,8 +48,14 @@ /* registers for base address */ #define G2D_SRC_BASE_ADDR 0x0304 +#define G2D_SRC_COLOR_MODE 0x030C +#define G2D_SRC_LEFT_TOP 0x0310 +#define G2D_SRC_RIGHT_BOTTOM 0x0314 #define G2D_SRC_PLANE2_BASE_ADDR 0x0318 #define G2D_DST_BASE_ADDR 0x0404 +#define G2D_DST_COLOR_MODE 0x040C +#define G2D_DST_LEFT_TOP 0x0410 +#define G2D_DST_RIGHT_BOTTOM 0x0414 #define G2D_DST_PLANE2_BASE_ADDR 0x0418 #define G2D_PAT_BASE_ADDR 0x0500 #define G2D_MSK_BASE_ADDR 0x0520 @@ -82,7 +88,7 @@ #define G2D_DMA_LIST_DONE_COUNT_OFFSET 17 /* G2D_DMA_HOLD_CMD */ -#define G2D_USET_HOLD (1 << 2) +#define G2D_USER_HOLD (1 << 2) #define G2D_LIST_HOLD (1 << 1) #define G2D_BITBLT_HOLD (1 << 0) @@ -91,13 +97,27 @@ #define G2D_START_NHOLT (1 << 1) #define G2D_START_BITBLT (1 << 0) +/* buffer color format */ +#define G2D_FMT_XRGB8888 0 +#define G2D_FMT_ARGB8888 1 +#define G2D_FMT_RGB565 2 +#define G2D_FMT_XRGB1555 3 +#define G2D_FMT_ARGB1555 4 +#define G2D_FMT_XRGB4444 5 +#define G2D_FMT_ARGB4444 6 +#define G2D_FMT_PACKED_RGB888 7 +#define G2D_FMT_A8 11 +#define G2D_FMT_L8 12 + +/* buffer valid length */ +#define G2D_LEN_MIN 1 +#define G2D_LEN_MAX 8000 + #define G2D_CMDLIST_SIZE (PAGE_SIZE / 4) #define G2D_CMDLIST_NUM 64 #define G2D_CMDLIST_POOL_SIZE (G2D_CMDLIST_SIZE * G2D_CMDLIST_NUM) #define G2D_CMDLIST_DATA_NUM (G2D_CMDLIST_SIZE / sizeof(u32) - 2) -#define MAX_BUF_ADDR_NR 6 - /* maximum buffer pool size of userptr is 64MB as default */ #define MAX_POOL (64 * 1024 * 1024) @@ -106,6 +126,17 @@ enum { BUF_TYPE_USERPTR, }; +enum g2d_reg_type { + REG_TYPE_NONE = -1, + REG_TYPE_SRC, + REG_TYPE_SRC_PLANE2, + REG_TYPE_DST, + REG_TYPE_DST_PLANE2, + REG_TYPE_PAT, + REG_TYPE_MSK, + MAX_REG_TYPE_NR +}; + /* cmdlist data structure */ struct g2d_cmdlist { u32 head; @@ -113,6 +144,42 @@ struct g2d_cmdlist { u32 last; /* last data offset */ }; +/* + * A structure of buffer description + * + * @format: color format + * @left_x: the x coordinates of left top corner + * @top_y: the y coordinates of left top corner + * @right_x: the x coordinates of right bottom corner + * @bottom_y: the y coordinates of right bottom corner + * + */ +struct g2d_buf_desc { + unsigned int format; + unsigned int left_x; + unsigned int top_y; + unsigned int right_x; + unsigned int bottom_y; +}; + +/* + * A structure of buffer information + * + * @map_nr: manages the number of mapped buffers + * @reg_types: stores regitster type in the order of requested command + * @handles: stores buffer handle in its reg_type position + * @types: stores buffer type in its reg_type position + * @descs: stores buffer description in its reg_type position + * + */ +struct g2d_buf_info { + unsigned int map_nr; + enum g2d_reg_type reg_types[MAX_REG_TYPE_NR]; + unsigned long handles[MAX_REG_TYPE_NR]; + unsigned int types[MAX_REG_TYPE_NR]; + struct g2d_buf_desc descs[MAX_REG_TYPE_NR]; +}; + struct drm_exynos_pending_g2d_event { struct drm_pending_event base; struct drm_exynos_g2d_event event; @@ -131,14 +198,11 @@ struct g2d_cmdlist_userptr { bool in_pool; bool out_of_list; }; - struct g2d_cmdlist_node { struct list_head list; struct g2d_cmdlist *cmdlist; - unsigned int map_nr; - unsigned long handles[MAX_BUF_ADDR_NR]; - unsigned int obj_type[MAX_BUF_ADDR_NR]; dma_addr_t dma_addr; + struct g2d_buf_info buf_info; struct drm_exynos_pending_g2d_event *event; }; @@ -188,6 +252,7 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) struct exynos_drm_subdrv *subdrv = &g2d->subdrv; int nr; int ret; + struct g2d_buf_info *buf_info; init_dma_attrs(&g2d->cmdlist_dma_attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &g2d->cmdlist_dma_attrs); @@ -209,11 +274,17 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) } for (nr = 0; nr < G2D_CMDLIST_NUM; nr++) { + unsigned int i; + node[nr].cmdlist = g2d->cmdlist_pool_virt + nr * G2D_CMDLIST_SIZE; node[nr].dma_addr = g2d->cmdlist_pool + nr * G2D_CMDLIST_SIZE; + buf_info = &node[nr].buf_info; + for (i = 0; i < MAX_REG_TYPE_NR; i++) + buf_info->reg_types[i] = REG_TYPE_NONE; + list_add_tail(&node[nr].list, &g2d->free_cmdlist); } @@ -450,7 +521,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, DMA_BIDIRECTIONAL); if (ret < 0) { DRM_ERROR("failed to map sgt with dma region.\n"); - goto err_free_sgt; + goto err_sg_free_table; } g2d_userptr->dma_addr = sgt->sgl[0].dma_address; @@ -467,8 +538,10 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, return &g2d_userptr->dma_addr; -err_free_sgt: +err_sg_free_table: sg_free_table(sgt); + +err_free_sgt: kfree(sgt); sgt = NULL; @@ -506,36 +579,172 @@ static void g2d_userptr_free_all(struct drm_device *drm_dev, g2d->current_pool = 0; } +static enum g2d_reg_type g2d_get_reg_type(int reg_offset) +{ + enum g2d_reg_type reg_type; + + switch (reg_offset) { + case G2D_SRC_BASE_ADDR: + case G2D_SRC_COLOR_MODE: + case G2D_SRC_LEFT_TOP: + case G2D_SRC_RIGHT_BOTTOM: + reg_type = REG_TYPE_SRC; + break; + case G2D_SRC_PLANE2_BASE_ADDR: + reg_type = REG_TYPE_SRC_PLANE2; + break; + case G2D_DST_BASE_ADDR: + case G2D_DST_COLOR_MODE: + case G2D_DST_LEFT_TOP: + case G2D_DST_RIGHT_BOTTOM: + reg_type = REG_TYPE_DST; + break; + case G2D_DST_PLANE2_BASE_ADDR: + reg_type = REG_TYPE_DST_PLANE2; + break; + case G2D_PAT_BASE_ADDR: + reg_type = REG_TYPE_PAT; + break; + case G2D_MSK_BASE_ADDR: + reg_type = REG_TYPE_MSK; + break; + default: + reg_type = REG_TYPE_NONE; + DRM_ERROR("Unknown register offset![%d]\n", reg_offset); + break; + }; + + return reg_type; +} + +static unsigned long g2d_get_buf_bpp(unsigned int format) +{ + unsigned long bpp; + + switch (format) { + case G2D_FMT_XRGB8888: + case G2D_FMT_ARGB8888: + bpp = 4; + break; + case G2D_FMT_RGB565: + case G2D_FMT_XRGB1555: + case G2D_FMT_ARGB1555: + case G2D_FMT_XRGB4444: + case G2D_FMT_ARGB4444: + bpp = 2; + break; + case G2D_FMT_PACKED_RGB888: + bpp = 3; + break; + default: + bpp = 1; + break; + } + + return bpp; +} + +static bool g2d_check_buf_desc_is_valid(struct g2d_buf_desc *buf_desc, + enum g2d_reg_type reg_type, + unsigned long size) +{ + unsigned int width, height; + unsigned long area; + + /* + * check source and destination buffers only. + * so the others are always valid. + */ + if (reg_type != REG_TYPE_SRC && reg_type != REG_TYPE_DST) + return true; + + width = buf_desc->right_x - buf_desc->left_x; + if (width < G2D_LEN_MIN || width > G2D_LEN_MAX) { + DRM_ERROR("width[%u] is out of range!\n", width); + return false; + } + + height = buf_desc->bottom_y - buf_desc->top_y; + if (height < G2D_LEN_MIN || height > G2D_LEN_MAX) { + DRM_ERROR("height[%u] is out of range!\n", height); + return false; + } + + area = (unsigned long)width * (unsigned long)height * + g2d_get_buf_bpp(buf_desc->format); + if (area > size) { + DRM_ERROR("area[%lu] is out of range[%lu]!\n", area, size); + return false; + } + + return true; +} + static int g2d_map_cmdlist_gem(struct g2d_data *g2d, struct g2d_cmdlist_node *node, struct drm_device *drm_dev, struct drm_file *file) { struct g2d_cmdlist *cmdlist = node->cmdlist; + struct g2d_buf_info *buf_info = &node->buf_info; int offset; + int ret; int i; - for (i = 0; i < node->map_nr; i++) { + for (i = 0; i < buf_info->map_nr; i++) { + struct g2d_buf_desc *buf_desc; + enum g2d_reg_type reg_type; + int reg_pos; unsigned long handle; dma_addr_t *addr; - offset = cmdlist->last - (i * 2 + 1); - handle = cmdlist->data[offset]; + reg_pos = cmdlist->last - 2 * (i + 1); + + offset = cmdlist->data[reg_pos]; + handle = cmdlist->data[reg_pos + 1]; + + reg_type = g2d_get_reg_type(offset); + if (reg_type == REG_TYPE_NONE) { + ret = -EFAULT; + goto err; + } + + buf_desc = &buf_info->descs[reg_type]; + + if (buf_info->types[reg_type] == BUF_TYPE_GEM) { + unsigned long size; + + size = exynos_drm_gem_get_size(drm_dev, handle, file); + if (!size) { + ret = -EFAULT; + goto err; + } + + if (!g2d_check_buf_desc_is_valid(buf_desc, reg_type, + size)) { + ret = -EFAULT; + goto err; + } - if (node->obj_type[i] == BUF_TYPE_GEM) { addr = exynos_drm_gem_get_dma_addr(drm_dev, handle, file); if (IS_ERR(addr)) { - node->map_nr = i; - return -EFAULT; + ret = -EFAULT; + goto err; } } else { struct drm_exynos_g2d_userptr g2d_userptr; if (copy_from_user(&g2d_userptr, (void __user *)handle, sizeof(struct drm_exynos_g2d_userptr))) { - node->map_nr = i; - return -EFAULT; + ret = -EFAULT; + goto err; + } + + if (!g2d_check_buf_desc_is_valid(buf_desc, reg_type, + g2d_userptr.size)) { + ret = -EFAULT; + goto err; } addr = g2d_userptr_get_dma_addr(drm_dev, @@ -544,16 +753,21 @@ static int g2d_map_cmdlist_gem(struct g2d_data *g2d, file, &handle); if (IS_ERR(addr)) { - node->map_nr = i; - return -EFAULT; + ret = -EFAULT; + goto err; } } - cmdlist->data[offset] = *addr; - node->handles[i] = handle; + cmdlist->data[reg_pos + 1] = *addr; + buf_info->reg_types[i] = reg_type; + buf_info->handles[reg_type] = handle; } return 0; + +err: + buf_info->map_nr = i; + return ret; } static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d, @@ -561,22 +775,33 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d, struct drm_file *filp) { struct exynos_drm_subdrv *subdrv = &g2d->subdrv; + struct g2d_buf_info *buf_info = &node->buf_info; int i; - for (i = 0; i < node->map_nr; i++) { - unsigned long handle = node->handles[i]; + for (i = 0; i < buf_info->map_nr; i++) { + struct g2d_buf_desc *buf_desc; + enum g2d_reg_type reg_type; + unsigned long handle; + + reg_type = buf_info->reg_types[i]; + + buf_desc = &buf_info->descs[reg_type]; + handle = buf_info->handles[reg_type]; - if (node->obj_type[i] == BUF_TYPE_GEM) + if (buf_info->types[reg_type] == BUF_TYPE_GEM) exynos_drm_gem_put_dma_addr(subdrv->drm_dev, handle, filp); else g2d_userptr_put_dma_addr(subdrv->drm_dev, handle, false); - node->handles[i] = 0; + buf_info->reg_types[i] = REG_TYPE_NONE; + buf_info->handles[reg_type] = 0; + buf_info->types[reg_type] = 0; + memset(buf_desc, 0x00, sizeof(*buf_desc)); } - node->map_nr = 0; + buf_info->map_nr = 0; } static void g2d_dma_start(struct g2d_data *g2d, @@ -589,10 +814,6 @@ static void g2d_dma_start(struct g2d_data *g2d, pm_runtime_get_sync(g2d->dev); clk_enable(g2d->gate_clk); - /* interrupt enable */ - writel_relaxed(G2D_INTEN_ACF | G2D_INTEN_UCF | G2D_INTEN_GCF, - g2d->regs + G2D_INTEN); - writel_relaxed(node->dma_addr, g2d->regs + G2D_DMA_SFR_BASE_ADDR); writel_relaxed(G2D_DMA_START, g2d->regs + G2D_DMA_COMMAND); } @@ -643,7 +864,6 @@ static void g2d_runqueue_worker(struct work_struct *work) struct g2d_data *g2d = container_of(work, struct g2d_data, runqueue_work); - mutex_lock(&g2d->runqueue_mutex); clk_disable(g2d->gate_clk); pm_runtime_put_sync(g2d->dev); @@ -724,20 +944,14 @@ static int g2d_check_reg_offset(struct device *dev, int i; for (i = 0; i < nr; i++) { - index = cmdlist->last - 2 * (i + 1); + struct g2d_buf_info *buf_info = &node->buf_info; + struct g2d_buf_desc *buf_desc; + enum g2d_reg_type reg_type; + unsigned long value; - if (for_addr) { - /* check userptr buffer type. */ - reg_offset = (cmdlist->data[index] & - ~0x7fffffff) >> 31; - if (reg_offset) { - node->obj_type[i] = BUF_TYPE_USERPTR; - cmdlist->data[index] &= ~G2D_BUF_USERPTR; - } - } + index = cmdlist->last - 2 * (i + 1); reg_offset = cmdlist->data[index] & ~0xfffff000; - if (reg_offset < G2D_VALID_START || reg_offset > G2D_VALID_END) goto err; if (reg_offset % 4) @@ -753,8 +967,60 @@ static int g2d_check_reg_offset(struct device *dev, if (!for_addr) goto err; - if (node->obj_type[i] != BUF_TYPE_USERPTR) - node->obj_type[i] = BUF_TYPE_GEM; + reg_type = g2d_get_reg_type(reg_offset); + if (reg_type == REG_TYPE_NONE) + goto err; + + /* check userptr buffer type. */ + if ((cmdlist->data[index] & ~0x7fffffff) >> 31) { + buf_info->types[reg_type] = BUF_TYPE_USERPTR; + cmdlist->data[index] &= ~G2D_BUF_USERPTR; + } else + buf_info->types[reg_type] = BUF_TYPE_GEM; + break; + case G2D_SRC_COLOR_MODE: + case G2D_DST_COLOR_MODE: + if (for_addr) + goto err; + + reg_type = g2d_get_reg_type(reg_offset); + if (reg_type == REG_TYPE_NONE) + goto err; + + buf_desc = &buf_info->descs[reg_type]; + value = cmdlist->data[index + 1]; + + buf_desc->format = value & 0xf; + break; + case G2D_SRC_LEFT_TOP: + case G2D_DST_LEFT_TOP: + if (for_addr) + goto err; + + reg_type = g2d_get_reg_type(reg_offset); + if (reg_type == REG_TYPE_NONE) + goto err; + + buf_desc = &buf_info->descs[reg_type]; + value = cmdlist->data[index + 1]; + + buf_desc->left_x = value & 0x1fff; + buf_desc->top_y = (value & 0x1fff0000) >> 16; + break; + case G2D_SRC_RIGHT_BOTTOM: + case G2D_DST_RIGHT_BOTTOM: + if (for_addr) + goto err; + + reg_type = g2d_get_reg_type(reg_offset); + if (reg_type == REG_TYPE_NONE) + goto err; + + buf_desc = &buf_info->descs[reg_type]; + value = cmdlist->data[index + 1]; + + buf_desc->right_x = value & 0x1fff; + buf_desc->bottom_y = (value & 0x1fff0000) >> 16; break; default: if (for_addr) @@ -860,9 +1126,23 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data, cmdlist->data[cmdlist->last++] = G2D_SRC_BASE_ADDR; cmdlist->data[cmdlist->last++] = 0; + /* + * 'LIST_HOLD' command should be set to the DMA_HOLD_CMD_REG + * and GCF bit should be set to INTEN register if user wants + * G2D interrupt event once current command list execution is + * finished. + * Otherwise only ACF bit should be set to INTEN register so + * that one interrupt is occured after all command lists + * have been completed. + */ if (node->event) { + cmdlist->data[cmdlist->last++] = G2D_INTEN; + cmdlist->data[cmdlist->last++] = G2D_INTEN_ACF | G2D_INTEN_GCF; cmdlist->data[cmdlist->last++] = G2D_DMA_HOLD_CMD; cmdlist->data[cmdlist->last++] = G2D_LIST_HOLD; + } else { + cmdlist->data[cmdlist->last++] = G2D_INTEN; + cmdlist->data[cmdlist->last++] = G2D_INTEN_ACF; } /* Check size of cmdlist: last 2 is about G2D_BITBLT_START */ @@ -887,7 +1167,7 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data, if (ret < 0) goto err_free_event; - node->map_nr = req->cmd_buf_nr; + node->buf_info.map_nr = req->cmd_buf_nr; if (req->cmd_buf_nr) { struct drm_exynos_g2d_cmd *cmd_buf; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 67e17ce112b6..0e6fe000578c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -164,6 +164,27 @@ out: exynos_gem_obj = NULL; } +unsigned long exynos_drm_gem_get_size(struct drm_device *dev, + unsigned int gem_handle, + struct drm_file *file_priv) +{ + struct exynos_drm_gem_obj *exynos_gem_obj; + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(dev, file_priv, gem_handle); + if (!obj) { + DRM_ERROR("failed to lookup gem object.\n"); + return 0; + } + + exynos_gem_obj = to_exynos_gem_obj(obj); + + drm_gem_object_unreference_unlocked(obj); + + return exynos_gem_obj->buffer->size; +} + + struct exynos_drm_gem_obj *exynos_drm_gem_init(struct drm_device *dev, unsigned long size) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 35ebac47dc2b..468766bee450 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -130,6 +130,11 @@ int exynos_drm_gem_userptr_ioctl(struct drm_device *dev, void *data, int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +/* get buffer size to gem handle. */ +unsigned long exynos_drm_gem_get_size(struct drm_device *dev, + unsigned int gem_handle, + struct drm_file *file_priv); + /* initialize gem object. */ int exynos_drm_gem_init_object(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 13ccbd4bcfaa..9504b0cd825a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -117,13 +117,12 @@ static struct edid *vidi_get_edid(struct device *dev, } edid_len = (1 + ctx->raw_edid->extensions) * EDID_LENGTH; - edid = kzalloc(edid_len, GFP_KERNEL); + edid = kmemdup(ctx->raw_edid, edid_len, GFP_KERNEL); if (!edid) { DRM_DEBUG_KMS("failed to allocate edid\n"); return ERR_PTR(-ENOMEM); } - memcpy(edid, ctx->raw_edid, edid_len); return edid; } @@ -563,12 +562,11 @@ int vidi_connection_ioctl(struct drm_device *drm_dev, void *data, return -EINVAL; } edid_len = (1 + raw_edid->extensions) * EDID_LENGTH; - ctx->raw_edid = kzalloc(edid_len, GFP_KERNEL); + ctx->raw_edid = kmemdup(raw_edid, edid_len, GFP_KERNEL); if (!ctx->raw_edid) { DRM_DEBUG_KMS("failed to allocate raw_edid.\n"); return -ENOMEM; } - memcpy(ctx->raw_edid, raw_edid, edid_len); } else { /* * with connection = 0, free raw_edid diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index e919aba29b3d..2f4f72f07047 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -818,7 +818,7 @@ static void mixer_win_disable(void *ctx, int win) mixer_ctx->win_data[win].enabled = false; } -int mixer_check_timing(void *ctx, struct fb_videomode *timing) +static int mixer_check_timing(void *ctx, struct fb_videomode *timing) { struct mixer_context *mixer_ctx = ctx; u32 w, h; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aae31489c893..7299ea45dd03 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -103,7 +103,7 @@ static const char *cache_level_str(int type) static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { - seq_printf(m, "%p: %s%s %8zdKiB %02x %02x %d %d %d%s%s%s", + seq_printf(m, "%pK: %s%s %8zdKiB %02x %02x %d %d %d%s%s%s", &obj->base, get_pin_flag(obj), get_tiling_flag(obj), diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c5b8c81b9440..e9b57893db2b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -125,6 +125,11 @@ MODULE_PARM_DESC(preliminary_hw_support, "Enable Haswell and ValleyView Support. " "(default: false)"); +int i915_disable_power_well __read_mostly = 0; +module_param_named(disable_power_well, i915_disable_power_well, int, 0600); +MODULE_PARM_DESC(disable_power_well, + "Disable the power well when possible (default: false)"); + static struct drm_driver driver; extern int intel_agp_enabled; @@ -379,15 +384,15 @@ static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_VGA_DEVICE(0x0A06, &intel_haswell_m_info), /* ULT GT1 mobile */ INTEL_VGA_DEVICE(0x0A16, &intel_haswell_m_info), /* ULT GT2 mobile */ INTEL_VGA_DEVICE(0x0A26, &intel_haswell_m_info), /* ULT GT2 mobile */ - INTEL_VGA_DEVICE(0x0D12, &intel_haswell_d_info), /* CRW GT1 desktop */ + INTEL_VGA_DEVICE(0x0D02, &intel_haswell_d_info), /* CRW GT1 desktop */ + INTEL_VGA_DEVICE(0x0D12, &intel_haswell_d_info), /* CRW GT2 desktop */ INTEL_VGA_DEVICE(0x0D22, &intel_haswell_d_info), /* CRW GT2 desktop */ - INTEL_VGA_DEVICE(0x0D32, &intel_haswell_d_info), /* CRW GT2 desktop */ - INTEL_VGA_DEVICE(0x0D1A, &intel_haswell_d_info), /* CRW GT1 server */ + INTEL_VGA_DEVICE(0x0D0A, &intel_haswell_d_info), /* CRW GT1 server */ + INTEL_VGA_DEVICE(0x0D1A, &intel_haswell_d_info), /* CRW GT2 server */ INTEL_VGA_DEVICE(0x0D2A, &intel_haswell_d_info), /* CRW GT2 server */ - INTEL_VGA_DEVICE(0x0D3A, &intel_haswell_d_info), /* CRW GT2 server */ - INTEL_VGA_DEVICE(0x0D16, &intel_haswell_m_info), /* CRW GT1 mobile */ + INTEL_VGA_DEVICE(0x0D06, &intel_haswell_m_info), /* CRW GT1 mobile */ + INTEL_VGA_DEVICE(0x0D16, &intel_haswell_m_info), /* CRW GT2 mobile */ INTEL_VGA_DEVICE(0x0D26, &intel_haswell_m_info), /* CRW GT2 mobile */ - INTEL_VGA_DEVICE(0x0D36, &intel_haswell_m_info), /* CRW GT2 mobile */ INTEL_VGA_DEVICE(0x0f30, &intel_valleyview_m_info), INTEL_VGA_DEVICE(0x0157, &intel_valleyview_m_info), INTEL_VGA_DEVICE(0x0155, &intel_valleyview_d_info), @@ -495,6 +500,7 @@ static int i915_drm_freeze(struct drm_device *dev) intel_modeset_disable(dev); drm_irq_uninstall(dev); + dev_priv->enable_hotplug_processing = false; } i915_save_state(dev); @@ -568,10 +574,20 @@ static int __i915_drm_thaw(struct drm_device *dev) error = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); + /* We need working interrupts for modeset enabling ... */ + drm_irq_install(dev); + intel_modeset_init_hw(dev); intel_modeset_setup_hw_state(dev, false); - drm_irq_install(dev); + + /* + * ... but also need to make sure that hotplug processing + * doesn't cause havoc. Like in the driver load code we don't + * bother with the tiny race here where we might loose hotplug + * notifications. + * */ intel_hpd_init(dev); + dev_priv->enable_hotplug_processing = true; } intel_opregion_init(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e95337c97459..01769e2a9953 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1398,6 +1398,7 @@ extern int i915_enable_fbc __read_mostly; extern bool i915_enable_hangcheck __read_mostly; extern int i915_enable_ppgtt __read_mostly; extern unsigned int i915_preliminary_hw_support __read_mostly; +extern int i915_disable_power_well __read_mostly; extern int i915_suspend(struct drm_device *dev, pm_message_t state); extern int i915_resume(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2f2daebd0eef..9a48e1a2d417 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -57,7 +57,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) if (eb == NULL) { int size = args->buffer_count; int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; - BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); + BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); while (count > 2*size) count >>= 1; eb = kzalloc(count*sizeof(struct hlist_head) + @@ -732,6 +732,8 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count) { int i; + int relocs_total = 0; + int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry); for (i = 0; i < count; i++) { char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; @@ -740,10 +742,13 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) return -EINVAL; - /* First check for malicious input causing overflow */ - if (exec[i].relocation_count > - INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) + /* First check for malicious input causing overflow in + * the worst case where we need to allocate the entire + * relocation tree as a single array. + */ + if (exec[i].relocation_count > relocs_max - relocs_total) return -EINVAL; + relocs_total += exec[i].relocation_count; length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 2cd97d1cc920..3c7bb0410b51 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -701,7 +701,7 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - u32 de_iir, gt_iir, de_ier, pm_iir; + u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier; irqreturn_t ret = IRQ_NONE; int i; @@ -711,6 +711,15 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg) de_ier = I915_READ(DEIER); I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); + /* Disable south interrupts. We'll only write to SDEIIR once, so further + * interrupts will will be stored on its back queue, and then we'll be + * able to process them after we restore SDEIER (as soon as we restore + * it, we'll get an interrupt if SDEIIR still has something to process + * due to its back queue). */ + sde_ier = I915_READ(SDEIER); + I915_WRITE(SDEIER, 0); + POSTING_READ(SDEIER); + gt_iir = I915_READ(GTIIR); if (gt_iir) { snb_gt_irq_handler(dev, dev_priv, gt_iir); @@ -759,6 +768,8 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg) I915_WRITE(DEIER, de_ier); POSTING_READ(DEIER); + I915_WRITE(SDEIER, sde_ier); + POSTING_READ(SDEIER); return ret; } @@ -778,7 +789,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; int ret = IRQ_NONE; - u32 de_iir, gt_iir, de_ier, pm_iir; + u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier; atomic_inc(&dev_priv->irq_received); @@ -787,6 +798,15 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); POSTING_READ(DEIER); + /* Disable south interrupts. We'll only write to SDEIIR once, so further + * interrupts will will be stored on its back queue, and then we'll be + * able to process them after we restore SDEIER (as soon as we restore + * it, we'll get an interrupt if SDEIIR still has something to process + * due to its back queue). */ + sde_ier = I915_READ(SDEIER); + I915_WRITE(SDEIER, 0); + POSTING_READ(SDEIER); + de_iir = I915_READ(DEIIR); gt_iir = I915_READ(GTIIR); pm_iir = I915_READ(GEN6_PMIIR); @@ -849,6 +869,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) done: I915_WRITE(DEIER, de_ier); POSTING_READ(DEIER); + I915_WRITE(SDEIER, sde_ier); + POSTING_READ(SDEIER); return ret; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 527b664d3434..848992f67d56 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1613,9 +1613,9 @@ #define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16) #define ADPA_USE_VGA_HVPOLARITY (1<<15) #define ADPA_SETS_HVPOLARITY 0 -#define ADPA_VSYNC_CNTL_DISABLE (1<<11) +#define ADPA_VSYNC_CNTL_DISABLE (1<<10) #define ADPA_VSYNC_CNTL_ENABLE 0 -#define ADPA_HSYNC_CNTL_DISABLE (1<<10) +#define ADPA_HSYNC_CNTL_DISABLE (1<<11) #define ADPA_HSYNC_CNTL_ENABLE 0 #define ADPA_VSYNC_ACTIVE_HIGH (1<<4) #define ADPA_VSYNC_ACTIVE_LOW 0 diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 969d08c72d10..1ce45a0a2d3e 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -45,6 +45,9 @@ struct intel_crt { struct intel_encoder base; + /* DPMS state is stored in the connector, which we need in the + * encoder's enable/disable callbacks */ + struct intel_connector *connector; bool force_hotplug_required; u32 adpa_reg; }; @@ -81,29 +84,6 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, return true; } -static void intel_disable_crt(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; - struct intel_crt *crt = intel_encoder_to_crt(encoder); - u32 temp; - - temp = I915_READ(crt->adpa_reg); - temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE); - temp &= ~ADPA_DAC_ENABLE; - I915_WRITE(crt->adpa_reg, temp); -} - -static void intel_enable_crt(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; - struct intel_crt *crt = intel_encoder_to_crt(encoder); - u32 temp; - - temp = I915_READ(crt->adpa_reg); - temp |= ADPA_DAC_ENABLE; - I915_WRITE(crt->adpa_reg, temp); -} - /* Note: The caller is required to filter out dpms modes not supported by the * platform. */ static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode) @@ -135,6 +115,19 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode) I915_WRITE(crt->adpa_reg, temp); } +static void intel_disable_crt(struct intel_encoder *encoder) +{ + intel_crt_set_dpms(encoder, DRM_MODE_DPMS_OFF); +} + +static void intel_enable_crt(struct intel_encoder *encoder) +{ + struct intel_crt *crt = intel_encoder_to_crt(encoder); + + intel_crt_set_dpms(encoder, crt->connector->base.dpms); +} + + static void intel_crt_dpms(struct drm_connector *connector, int mode) { struct drm_device *dev = connector->dev; @@ -746,6 +739,7 @@ void intel_crt_init(struct drm_device *dev) } connector = &intel_connector->base; + crt->connector = intel_connector; drm_connector_init(dev, &intel_connector->base, &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index d64af5aa4a1c..8d0bac3c35d7 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1391,8 +1391,8 @@ void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder) struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = encoder->dev->dev_private; enum port port = intel_dig_port->port; - bool wait; uint32_t val; + bool wait = false; if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { val = I915_READ(DDI_BUF_CTL(port)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a05ac2c91ba2..b20d50192fcc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3604,6 +3604,30 @@ static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) */ } +/** + * i9xx_fixup_plane - ugly workaround for G45 to fire up the hardware + * cursor plane briefly if not already running after enabling the display + * plane. + * This workaround avoids occasional blank screens when self refresh is + * enabled. + */ +static void +g4x_fixup_plane(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + u32 cntl = I915_READ(CURCNTR(pipe)); + + if ((cntl & CURSOR_MODE) == 0) { + u32 fw_bcl_self = I915_READ(FW_BLC_SELF); + + I915_WRITE(FW_BLC_SELF, fw_bcl_self & ~FW_BLC_SELF_EN); + I915_WRITE(CURCNTR(pipe), CURSOR_MODE_64_ARGB_AX); + intel_wait_for_vblank(dev_priv->dev, pipe); + I915_WRITE(CURCNTR(pipe), cntl); + I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); + I915_WRITE(FW_BLC_SELF, fw_bcl_self); + } +} + static void i9xx_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -3629,6 +3653,8 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) intel_enable_pipe(dev_priv, pipe, false); intel_enable_plane(dev_priv, plane, pipe); + if (IS_G4X(dev)) + g4x_fixup_plane(dev_priv, pipe); intel_crtc_load_lut(crtc); intel_update_fbc(dev); @@ -5745,6 +5771,11 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, num_connectors++; } + if (is_cpu_edp) + intel_crtc->cpu_transcoder = TRANSCODER_EDP; + else + intel_crtc->cpu_transcoder = pipe; + /* We are not sure yet this won't happen. */ WARN(!HAS_PCH_LPT(dev), "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev)); @@ -5811,11 +5842,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int pipe = intel_crtc->pipe; int ret; - if (IS_HASWELL(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) - intel_crtc->cpu_transcoder = TRANSCODER_EDP; - else - intel_crtc->cpu_transcoder = pipe; - drm_vblank_pre_modeset(dev, pipe); ret = dev_priv->display.crtc_mode_set(crtc, mode, adjusted_mode, @@ -7256,8 +7282,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_framebuffer *intel_fb; - struct drm_i915_gem_object *obj; + struct drm_framebuffer *old_fb = crtc->fb; + struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)->obj; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_unpin_work *work; unsigned long flags; @@ -7282,8 +7308,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, work->event = event; work->crtc = crtc; - intel_fb = to_intel_framebuffer(crtc->fb); - work->old_fb_obj = intel_fb->obj; + work->old_fb_obj = to_intel_framebuffer(old_fb)->obj; INIT_WORK(&work->work, intel_unpin_work_fn); ret = drm_vblank_get(dev, intel_crtc->pipe); @@ -7303,9 +7328,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_crtc->unpin_work = work; spin_unlock_irqrestore(&dev->event_lock, flags); - intel_fb = to_intel_framebuffer(fb); - obj = intel_fb->obj; - if (atomic_read(&intel_crtc->unpin_work_count) >= 2) flush_workqueue(dev_priv->wq); @@ -7340,6 +7362,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); + crtc->fb = old_fb; drm_gem_object_unreference(&work->old_fb_obj->base); drm_gem_object_unreference(&obj->base); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f61cb7998c72..8fc93f90a7cd 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -353,7 +353,8 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) if (has_aux_irq) - done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, 10); + done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, + msecs_to_jiffies(10)); else done = wait_for_atomic(C, 10) == 0; if (!done) @@ -819,6 +820,7 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, struct intel_link_m_n m_n; int pipe = intel_crtc->pipe; enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; + int target_clock; /* * Find the lane count in the intel_encoder private @@ -834,13 +836,22 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, } } + target_clock = mode->clock; + for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + if (intel_encoder->type == INTEL_OUTPUT_EDP) { + target_clock = intel_edp_target_clock(intel_encoder, + mode); + break; + } + } + /* * Compute the GMCH and Link ratios. The '3' here is * the number of bytes_per_pixel post-LUT, which we always * set up for 8-bits of R/G/B, or 3 bytes total. */ intel_link_compute_m_n(intel_crtc->bpp, lane_count, - mode->clock, adjusted_mode->clock, &m_n); + target_clock, adjusted_mode->clock, &m_n); if (IS_HASWELL(dev)) { I915_WRITE(PIPE_DATA_M1(cpu_transcoder), @@ -1929,7 +1940,7 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) for (i = 0; i < intel_dp->lane_count; i++) if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0) break; - if (i == intel_dp->lane_count && voltage_tries == 5) { + if (i == intel_dp->lane_count) { ++loop_tries; if (loop_tries == 5) { DRM_DEBUG_KMS("too many full retries, give up\n"); @@ -2548,12 +2559,15 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); struct intel_dp *intel_dp = &intel_dig_port->dp; + struct drm_device *dev = intel_dp_to_dev(intel_dp); i2c_del_adapter(&intel_dp->adapter); drm_encoder_cleanup(encoder); if (is_edp(intel_dp)) { cancel_delayed_work_sync(&intel_dp->panel_vdd_work); + mutex_lock(&dev->mode_config.mutex); ironlake_panel_vdd_off_sync(intel_dp); + mutex_unlock(&dev->mode_config.mutex); } kfree(intel_dig_port); } diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index acf8aec9ada7..ef4744e1bf0b 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -203,7 +203,13 @@ intel_gpio_setup(struct intel_gmbus *bus, u32 pin) algo->data = bus; } -#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 4) +/* + * gmbus on gen4 seems to be able to generate legacy interrupts even when in MSI + * mode. This results in spurious interrupt warnings if the legacy irq no. is + * shared with another device. The kernel then disables that interrupt source + * and so prevents the other device from working properly. + */ +#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) static int gmbus_wait_hw_status(struct drm_i915_private *dev_priv, u32 gmbus2_status, @@ -214,6 +220,9 @@ gmbus_wait_hw_status(struct drm_i915_private *dev_priv, u32 gmbus2 = 0; DEFINE_WAIT(wait); + if (!HAS_GMBUS_IRQ(dev_priv->dev)) + gmbus4_irq_en = 0; + /* Important: The hw handles only the first bit, so set only one! Since * we also need to check for NAKs besides the hw ready/idle signal, we * need to wake up periodically and check that ourselves. */ diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index a3730e0289e5..bee8cb6108a7 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -321,9 +321,6 @@ void intel_panel_enable_backlight(struct drm_device *dev, if (dev_priv->backlight_level == 0) dev_priv->backlight_level = intel_panel_get_max_backlight(dev); - dev_priv->backlight_enabled = true; - intel_panel_actually_set_backlight(dev, dev_priv->backlight_level); - if (INTEL_INFO(dev)->gen >= 4) { uint32_t reg, tmp; @@ -359,12 +356,12 @@ void intel_panel_enable_backlight(struct drm_device *dev, } set_level: - /* Check the current backlight level and try to set again if it's zero. - * On some machines, BLC_PWM_CPU_CTL is cleared to zero automatically - * when BLC_PWM_CPU_CTL2 and BLC_PWM_PCH_CTL1 are written. + /* Call below after setting BLC_PWM_CPU_CTL2 and BLC_PWM_PCH_CTL1. + * BLC_PWM_CPU_CTL may be cleared to zero automatically when these + * registers are set. */ - if (!intel_panel_get_backlight(dev)) - intel_panel_actually_set_backlight(dev, dev_priv->backlight_level); + dev_priv->backlight_enabled = true; + intel_panel_actually_set_backlight(dev, dev_priv->backlight_level); } static void intel_panel_init_backlight(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 61fee7fcdc2c..adca00783e61 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2574,7 +2574,7 @@ static void gen6_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC_SLEEP, 0); I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 100000); + I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* Check if we are enabling RC6 */ @@ -4079,6 +4079,9 @@ void intel_set_power_well(struct drm_device *dev, bool enable) if (!IS_HASWELL(dev)) return; + if (!i915_disable_power_well && !enable) + return; + tmp = I915_READ(HSW_PWR_WELL_DRIVER); is_enabled = tmp & HSW_PWR_WELL_STATE; enable_requested = tmp & HSW_PWR_WELL_ENABLE; diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 5ea5033eae0a..4d932c46725d 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -112,7 +112,6 @@ struct mga_framebuffer { struct mga_fbdev { struct drm_fb_helper helper; struct mga_framebuffer mfb; - struct list_head fbdev_list; void *sysram; int size; struct ttm_bo_kmap_obj mapping; diff --git a/drivers/gpu/drm/mgag200/mgag200_i2c.c b/drivers/gpu/drm/mgag200/mgag200_i2c.c index 5a88ec51b513..d3dcf54e6233 100644 --- a/drivers/gpu/drm/mgag200/mgag200_i2c.c +++ b/drivers/gpu/drm/mgag200/mgag200_i2c.c @@ -92,6 +92,7 @@ struct mga_i2c_chan *mgag200_i2c_create(struct drm_device *dev) int ret; int data, clock; + WREG_DAC(MGA1064_GEN_IO_CTL2, 1); WREG_DAC(MGA1064_GEN_IO_DATA, 0xff); WREG_DAC(MGA1064_GEN_IO_CTL, 0); diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index d3d99a28ddef..78d8e919509f 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -382,19 +382,19 @@ static int mga_g200eh_set_plls(struct mga_device *mdev, long clock) m = n = p = 0; vcomax = 800000; vcomin = 400000; - pllreffreq = 3333; + pllreffreq = 33333; delta = 0xffffffff; permitteddelta = clock * 5 / 1000; - for (testp = 16; testp > 0; testp--) { + for (testp = 16; testp > 0; testp >>= 1) { if (clock * testp > vcomax) continue; if (clock * testp < vcomin) continue; for (testm = 1; testm < 33; testm++) { - for (testn = 1; testn < 257; testn++) { + for (testn = 17; testn < 257; testn++) { computed = (pllreffreq * testn) / (testm * testp); if (computed > clock) @@ -404,11 +404,11 @@ static int mga_g200eh_set_plls(struct mga_device *mdev, long clock) if (tmpdelta < delta) { delta = tmpdelta; n = testn - 1; - m = (testm - 1) | ((n >> 1) & 0x80); + m = (testm - 1); p = testp - 1; } if ((clock * testp) >= 600000) - p |= 80; + p |= 0x80; } } } @@ -751,8 +751,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, int i; unsigned char misc = 0; unsigned char ext_vga[6]; - unsigned char ext_vga_index24; - unsigned char dac_index90 = 0; u8 bppshift; static unsigned char dacvalue[] = { @@ -803,7 +801,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, option2 = 0x0000b000; break; case G200_ER: - dac_index90 = 0; break; } @@ -852,10 +849,8 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, WREG_DAC(i, dacvalue[i]); } - if (mdev->type == G200_ER) { - WREG_DAC(0x90, dac_index90); - } - + if (mdev->type == G200_ER) + WREG_DAC(0x90, 0); if (option) pci_write_config_dword(dev->pdev, PCI_MGA_OPTION, option); @@ -952,8 +947,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, if (mdev->type == G200_WB) ext_vga[1] |= 0x88; - ext_vga_index24 = 0x05; - /* Set pixel clocks */ misc = 0x2d; WREG8(MGA_MISC_OUT, misc); @@ -965,7 +958,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, } if (mdev->type == G200_ER) - WREG_ECRT(24, ext_vga_index24); + WREG_ECRT(0x24, 0x5); if (mdev->type == G200_EV) { WREG_ECRT(6, 0); @@ -1406,6 +1399,14 @@ static int mga_vga_get_modes(struct drm_connector *connector) static int mga_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_device *dev = connector->dev; + struct mga_device *mdev = (struct mga_device*)dev->dev_private; + struct mga_fbdev *mfbdev = mdev->mfbdev; + struct drm_fb_helper *fb_helper = &mfbdev->helper; + struct drm_fb_helper_connector *fb_helper_conn = NULL; + int bpp = 32; + int i = 0; + /* FIXME: Add bandwidth and g200se limitations */ if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 || @@ -1415,6 +1416,25 @@ static int mga_vga_mode_valid(struct drm_connector *connector, return MODE_BAD; } + /* Validate the mode input by the user */ + for (i = 0; i < fb_helper->connector_count; i++) { + if (fb_helper->connector_info[i]->connector == connector) { + /* Found the helper for this connector */ + fb_helper_conn = fb_helper->connector_info[i]; + if (fb_helper_conn->cmdline_mode.specified) { + if (fb_helper_conn->cmdline_mode.bpp_specified) { + bpp = fb_helper_conn->cmdline_mode.bpp; + } + } + } + } + + if ((mode->hdisplay * mode->vdisplay * (bpp/8)) > mdev->mc.vram_size) { + if (fb_helper_conn) + fb_helper_conn->cmdline_mode.specified = false; + return MODE_BAD; + } + return MODE_OK; } diff --git a/drivers/gpu/drm/nouveau/core/core/object.c b/drivers/gpu/drm/nouveau/core/core/object.c index 0daab62ea14c..3b2e7b6304d3 100644 --- a/drivers/gpu/drm/nouveau/core/core/object.c +++ b/drivers/gpu/drm/nouveau/core/core/object.c @@ -278,7 +278,6 @@ nouveau_object_del(struct nouveau_object *client, u32 _parent, u32 _handle) struct nouveau_object *parent = NULL; struct nouveau_object *namedb = NULL; struct nouveau_handle *handle = NULL; - int ret = -EINVAL; parent = nouveau_handle_ref(client, _parent); if (!parent) @@ -295,7 +294,7 @@ nouveau_object_del(struct nouveau_object *client, u32 _parent, u32 _handle) } nouveau_object_ref(NULL, &parent); - return ret; + return handle ? 0 : -EINVAL; } int diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c index 5fa13267bd9f..02e369f80449 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c @@ -544,13 +544,13 @@ nv50_disp_curs_ofuncs = { static void nv50_disp_base_vblank_enable(struct nouveau_event *event, int head) { - nv_mask(event->priv, 0x61002c, (1 << head), (1 << head)); + nv_mask(event->priv, 0x61002c, (4 << head), (4 << head)); } static void nv50_disp_base_vblank_disable(struct nouveau_event *event, int head) { - nv_mask(event->priv, 0x61002c, (1 << head), (0 << head)); + nv_mask(event->priv, 0x61002c, (4 << head), 0); } static int diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c index 61cec0f6ff1c..4857f913efdd 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c @@ -350,7 +350,7 @@ nve0_graph_init_gpc_0(struct nvc0_graph_priv *priv) nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918); } - nv_wr32(priv, GPC_BCAST(0x1bd4), magicgpc918); + nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918); nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800)); } diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/therm.h b/drivers/gpu/drm/nouveau/core/include/subdev/therm.h index 6b17b614629f..0b20fc0d19c1 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/therm.h @@ -4,7 +4,7 @@ #include <core/device.h> #include <core/subdev.h> -enum nouveau_therm_mode { +enum nouveau_therm_fan_mode { NOUVEAU_THERM_CTRL_NONE = 0, NOUVEAU_THERM_CTRL_MANUAL = 1, NOUVEAU_THERM_CTRL_AUTO = 2, diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c index e816f06637a7..0e2c1a4f1659 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c @@ -248,6 +248,22 @@ nouveau_bios_shadow_pci(struct nouveau_bios *bios) } } +static void +nouveau_bios_shadow_platform(struct nouveau_bios *bios) +{ + struct pci_dev *pdev = nv_device(bios)->pdev; + size_t size; + + void __iomem *rom = pci_platform_rom(pdev, &size); + if (rom && size) { + bios->data = kmalloc(size, GFP_KERNEL); + if (bios->data) { + memcpy_fromio(bios->data, rom, size); + bios->size = size; + } + } +} + static int nouveau_bios_score(struct nouveau_bios *bios, const bool writeable) { @@ -288,6 +304,7 @@ nouveau_bios_shadow(struct nouveau_bios *bios) { "PROM", nouveau_bios_shadow_prom, false, 0, 0, NULL }, { "ACPI", nouveau_bios_shadow_acpi, true, 0, 0, NULL }, { "PCIROM", nouveau_bios_shadow_pci, true, 0, 0, NULL }, + { "PLATFORM", nouveau_bios_shadow_platform, true, 0, 0, NULL }, {} }; struct methods *mthd, *best; diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c index 2cc1e6a5eb6a..9c41b58d57e2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c @@ -869,7 +869,7 @@ init_idx_addr_latched(struct nvbios_init *init) init->offset += 2; init_wr32(init, dreg, idata); - init_mask(init, creg, ~mask, data | idata); + init_mask(init, creg, ~mask, data | iaddr); } } diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c index a114a0ed7e98..2e98e8a3f1aa 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c @@ -142,6 +142,7 @@ nouveau_i2c_port_create_(struct nouveau_object *parent, /* drop port's i2c subdev refcount, i2c handles this itself */ if (ret == 0) { list_add_tail(&port->head, &i2c->ports); + atomic_dec(&parent->refcount); atomic_dec(&engine->refcount); } diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c index f794dc89a3b2..a00a5a76e2d6 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c @@ -134,7 +134,7 @@ nouveau_therm_alarm(struct nouveau_alarm *alarm) } int -nouveau_therm_mode(struct nouveau_therm *therm, int mode) +nouveau_therm_fan_mode(struct nouveau_therm *therm, int mode) { struct nouveau_therm_priv *priv = (void *)therm; struct nouveau_device *device = nv_device(therm); @@ -149,10 +149,15 @@ nouveau_therm_mode(struct nouveau_therm *therm, int mode) (mode != NOUVEAU_THERM_CTRL_NONE && device->card_type >= NV_C0)) return -EINVAL; + /* do not allow automatic fan management if the thermal sensor is + * not available */ + if (priv->mode == 2 && therm->temp_get(therm) < 0) + return -EINVAL; + if (priv->mode == mode) return 0; - nv_info(therm, "Thermal management: %s\n", name[mode]); + nv_info(therm, "fan management: %s\n", name[mode]); nouveau_therm_update(therm, mode); return 0; } @@ -213,7 +218,7 @@ nouveau_therm_attr_set(struct nouveau_therm *therm, priv->fan->bios.max_duty = value; return 0; case NOUVEAU_THERM_ATTR_FAN_MODE: - return nouveau_therm_mode(therm, value); + return nouveau_therm_fan_mode(therm, value); case NOUVEAU_THERM_ATTR_THRS_FAN_BOOST: priv->bios_sensor.thrs_fan_boost.temp = value; priv->sensor.program_alarms(therm); @@ -263,7 +268,7 @@ _nouveau_therm_init(struct nouveau_object *object) return ret; if (priv->suspend >= 0) - nouveau_therm_mode(therm, priv->mode); + nouveau_therm_fan_mode(therm, priv->mode); priv->sensor.program_alarms(therm); return 0; } @@ -313,11 +318,12 @@ nouveau_therm_create_(struct nouveau_object *parent, int nouveau_therm_preinit(struct nouveau_therm *therm) { - nouveau_therm_ic_ctor(therm); nouveau_therm_sensor_ctor(therm); + nouveau_therm_ic_ctor(therm); nouveau_therm_fan_ctor(therm); - nouveau_therm_mode(therm, NOUVEAU_THERM_CTRL_NONE); + nouveau_therm_fan_mode(therm, NOUVEAU_THERM_CTRL_NONE); + nouveau_therm_sensor_preinit(therm); return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c b/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c index e24090bac195..8b3adec5fbb1 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c @@ -32,6 +32,7 @@ probe_monitoring_device(struct nouveau_i2c_port *i2c, struct i2c_board_info *info) { struct nouveau_therm_priv *priv = (void *)nouveau_therm(i2c); + struct nvbios_therm_sensor *sensor = &priv->bios_sensor; struct i2c_client *client; request_module("%s%s", I2C_MODULE_PREFIX, info->type); @@ -46,8 +47,9 @@ probe_monitoring_device(struct nouveau_i2c_port *i2c, } nv_info(priv, - "Found an %s at address 0x%x (controlled by lm_sensors)\n", - info->type, info->addr); + "Found an %s at address 0x%x (controlled by lm_sensors, " + "temp offset %+i C)\n", + info->type, info->addr, sensor->offset_constant); priv->ic = client; return true; diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c index 0f5363edb964..a70d1b7e397b 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c @@ -29,54 +29,83 @@ struct nv40_therm_priv { struct nouveau_therm_priv base; }; +enum nv40_sensor_style { INVALID_STYLE = -1, OLD_STYLE = 0, NEW_STYLE = 1 }; + +static enum nv40_sensor_style +nv40_sensor_style(struct nouveau_therm *therm) +{ + struct nouveau_device *device = nv_device(therm); + + switch (device->chipset) { + case 0x43: + case 0x44: + case 0x4a: + case 0x47: + return OLD_STYLE; + + case 0x46: + case 0x49: + case 0x4b: + case 0x4e: + case 0x4c: + case 0x67: + case 0x68: + case 0x63: + return NEW_STYLE; + default: + return INVALID_STYLE; + } +} + static int nv40_sensor_setup(struct nouveau_therm *therm) { - struct nouveau_device *device = nv_device(therm); + enum nv40_sensor_style style = nv40_sensor_style(therm); /* enable ADC readout and disable the ALARM threshold */ - if (device->chipset >= 0x46) { + if (style == NEW_STYLE) { nv_mask(therm, 0x15b8, 0x80000000, 0); nv_wr32(therm, 0x15b0, 0x80003fff); - mdelay(10); /* wait for the temperature to stabilize */ + mdelay(20); /* wait for the temperature to stabilize */ return nv_rd32(therm, 0x15b4) & 0x3fff; - } else { + } else if (style == OLD_STYLE) { nv_wr32(therm, 0x15b0, 0xff); + mdelay(20); /* wait for the temperature to stabilize */ return nv_rd32(therm, 0x15b4) & 0xff; - } + } else + return -ENODEV; } static int nv40_temp_get(struct nouveau_therm *therm) { struct nouveau_therm_priv *priv = (void *)therm; - struct nouveau_device *device = nv_device(therm); struct nvbios_therm_sensor *sensor = &priv->bios_sensor; + enum nv40_sensor_style style = nv40_sensor_style(therm); int core_temp; - if (device->chipset >= 0x46) { + if (style == NEW_STYLE) { nv_wr32(therm, 0x15b0, 0x80003fff); core_temp = nv_rd32(therm, 0x15b4) & 0x3fff; - } else { + } else if (style == OLD_STYLE) { nv_wr32(therm, 0x15b0, 0xff); core_temp = nv_rd32(therm, 0x15b4) & 0xff; - } - - /* Setup the sensor if the temperature is 0 */ - if (core_temp == 0) - core_temp = nv40_sensor_setup(therm); + } else + return -ENODEV; - if (sensor->slope_div == 0) - sensor->slope_div = 1; - if (sensor->offset_den == 0) - sensor->offset_den = 1; - if (sensor->slope_mult < 1) - sensor->slope_mult = 1; + /* if the slope or the offset is unset, do no use the sensor */ + if (!sensor->slope_div || !sensor->slope_mult || + !sensor->offset_num || !sensor->offset_den) + return -ENODEV; core_temp = core_temp * sensor->slope_mult / sensor->slope_div; core_temp = core_temp + sensor->offset_num / sensor->offset_den; core_temp = core_temp + sensor->offset_constant - 8; + /* reserve negative temperatures for errors */ + if (core_temp < 0) + core_temp = 0; + return core_temp; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h index 06b98706b3fc..438d9824b774 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h @@ -102,7 +102,7 @@ struct nouveau_therm_priv { struct i2c_client *ic; }; -int nouveau_therm_mode(struct nouveau_therm *therm, int mode); +int nouveau_therm_fan_mode(struct nouveau_therm *therm, int mode); int nouveau_therm_attr_get(struct nouveau_therm *therm, enum nouveau_therm_attr_type type); int nouveau_therm_attr_set(struct nouveau_therm *therm, @@ -122,6 +122,7 @@ int nouveau_therm_fan_sense(struct nouveau_therm *therm); int nouveau_therm_preinit(struct nouveau_therm *); +void nouveau_therm_sensor_preinit(struct nouveau_therm *); void nouveau_therm_sensor_set_threshold_state(struct nouveau_therm *therm, enum nouveau_therm_thrs thrs, enum nouveau_therm_thrs_state st); diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c index b37624af8297..470f6a47b656 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c @@ -34,10 +34,6 @@ nouveau_therm_temp_set_defaults(struct nouveau_therm *therm) { struct nouveau_therm_priv *priv = (void *)therm; - priv->bios_sensor.slope_mult = 1; - priv->bios_sensor.slope_div = 1; - priv->bios_sensor.offset_num = 0; - priv->bios_sensor.offset_den = 1; priv->bios_sensor.offset_constant = 0; priv->bios_sensor.thrs_fan_boost.temp = 90; @@ -60,11 +56,6 @@ nouveau_therm_temp_safety_checks(struct nouveau_therm *therm) struct nouveau_therm_priv *priv = (void *)therm; struct nvbios_therm_sensor *s = &priv->bios_sensor; - if (!priv->bios_sensor.slope_div) - priv->bios_sensor.slope_div = 1; - if (!priv->bios_sensor.offset_den) - priv->bios_sensor.offset_den = 1; - /* enforce a minimum hysteresis on thresholds */ s->thrs_fan_boost.hysteresis = max_t(u8, s->thrs_fan_boost.hysteresis, 2); s->thrs_down_clock.hysteresis = max_t(u8, s->thrs_down_clock.hysteresis, 2); @@ -106,16 +97,16 @@ void nouveau_therm_sensor_event(struct nouveau_therm *therm, const char *thresolds[] = { "fanboost", "downclock", "critical", "shutdown" }; - uint8_t temperature = therm->temp_get(therm); + int temperature = therm->temp_get(therm); if (thrs < 0 || thrs > 3) return; if (dir == NOUVEAU_THERM_THRS_FALLING) - nv_info(therm, "temperature (%u C) went below the '%s' threshold\n", + nv_info(therm, "temperature (%i C) went below the '%s' threshold\n", temperature, thresolds[thrs]); else - nv_info(therm, "temperature (%u C) hit the '%s' threshold\n", + nv_info(therm, "temperature (%i C) hit the '%s' threshold\n", temperature, thresolds[thrs]); active = (dir == NOUVEAU_THERM_THRS_RISING); @@ -123,7 +114,7 @@ void nouveau_therm_sensor_event(struct nouveau_therm *therm, case NOUVEAU_THERM_THRS_FANBOOST: if (active) { nouveau_therm_fan_set(therm, true, 100); - nouveau_therm_mode(therm, NOUVEAU_THERM_CTRL_AUTO); + nouveau_therm_fan_mode(therm, NOUVEAU_THERM_CTRL_AUTO); } break; case NOUVEAU_THERM_THRS_DOWNCLOCK: @@ -202,7 +193,7 @@ alarm_timer_callback(struct nouveau_alarm *alarm) NOUVEAU_THERM_THRS_SHUTDOWN); /* schedule the next poll in one second */ - if (list_empty(&alarm->head)) + if (therm->temp_get(therm) >= 0 && list_empty(&alarm->head)) ptimer->alarm(ptimer, 1000 * 1000 * 1000, alarm); spin_unlock_irqrestore(&priv->sensor.alarm_program_lock, flags); @@ -225,6 +216,17 @@ nouveau_therm_program_alarms_polling(struct nouveau_therm *therm) alarm_timer_callback(&priv->sensor.therm_poll_alarm); } +void +nouveau_therm_sensor_preinit(struct nouveau_therm *therm) +{ + const char *sensor_avail = "yes"; + + if (therm->temp_get(therm) < 0) + sensor_avail = "no"; + + nv_info(therm, "internal sensor: %s\n", sensor_avail); +} + int nouveau_therm_sensor_ctor(struct nouveau_therm *therm) { diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 41241922263f..5eb3e0da7c6e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -116,6 +116,11 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, { struct nouveau_abi16_ntfy *ntfy, *temp; + /* wait for all activity to stop before releasing notify object, which + * may be still in use */ + if (chan->chan && chan->ntfy) + nouveau_channel_idle(chan->chan); + /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { nouveau_abi16_ntfy_fini(chan, ntfy); @@ -386,7 +391,7 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS) struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_device *device = nv_device(drm->device); struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev); - struct nouveau_abi16_chan *chan, *temp; + struct nouveau_abi16_chan *chan = NULL, *temp; struct nouveau_abi16_ntfy *ntfy; struct nouveau_object *object; struct nv_dma_class args = {}; @@ -399,10 +404,11 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS) if (unlikely(nv_device(abi16->device)->card_type >= NV_C0)) return nouveau_abi16_put(abi16, -EINVAL); - list_for_each_entry_safe(chan, temp, &abi16->channels, head) { - if (chan->chan->handle == (NVDRM_CHAN | info->channel)) + list_for_each_entry(temp, &abi16->channels, head) { + if (temp->chan->handle == (NVDRM_CHAN | info->channel)) { + chan = temp; break; - chan = NULL; + } } if (!chan) @@ -454,17 +460,18 @@ nouveau_abi16_ioctl_gpuobj_free(ABI16_IOCTL_ARGS) { struct drm_nouveau_gpuobj_free *fini = data; struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev); - struct nouveau_abi16_chan *chan, *temp; + struct nouveau_abi16_chan *chan = NULL, *temp; struct nouveau_abi16_ntfy *ntfy; int ret; if (unlikely(!abi16)) return -ENOMEM; - list_for_each_entry_safe(chan, temp, &abi16->channels, head) { - if (chan->chan->handle == (NVDRM_CHAN | fini->channel)) + list_for_each_entry(temp, &abi16->channels, head) { + if (temp->chan->handle == (NVDRM_CHAN | fini->channel)) { + chan = temp; break; - chan = NULL; + } } if (!chan) diff --git a/drivers/gpu/drm/nouveau/nouveau_agp.c b/drivers/gpu/drm/nouveau/nouveau_agp.c index d28430cd2ba6..6e7a55f93a85 100644 --- a/drivers/gpu/drm/nouveau/nouveau_agp.c +++ b/drivers/gpu/drm/nouveau/nouveau_agp.c @@ -47,6 +47,18 @@ nouveau_agp_enabled(struct nouveau_drm *drm) if (drm->agp.stat == UNKNOWN) { if (!nouveau_agpmode) return false; +#ifdef __powerpc__ + /* Disable AGP by default on all PowerPC machines for + * now -- At least some UniNorth-2 AGP bridges are + * known to be broken: DMA from the host to the card + * works just fine, but writeback from the card to the + * host goes straight to memory untranslated bypassing + * the GATT somehow, making them quite painful to deal + * with... + */ + if (nouveau_agpmode == -1) + return false; +#endif return true; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 11ca82148edc..7ff10711a4d0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -801,7 +801,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, stride = 16 * 4; height = amount / stride; - if (new_mem->mem_type == TTM_PL_VRAM && + if (old_mem->mem_type == TTM_PL_VRAM && nouveau_bo_tile_layout(nvbo)) { ret = RING_SPACE(chan, 8); if (ret) @@ -823,7 +823,7 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, BEGIN_NV04(chan, NvSubCopy, 0x0200, 1); OUT_RING (chan, 1); } - if (old_mem->mem_type == TTM_PL_VRAM && + if (new_mem->mem_type == TTM_PL_VRAM && nouveau_bo_tile_layout(nvbo)) { ret = RING_SPACE(chan, 8); if (ret) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index d1099365bfc1..c95decf543e9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -72,11 +72,25 @@ module_param_named(modeset, nouveau_modeset, int, 0400); static struct drm_driver driver; static int +nouveau_drm_vblank_handler(struct nouveau_eventh *event, int head) +{ + struct nouveau_drm *drm = + container_of(event, struct nouveau_drm, vblank[head]); + drm_handle_vblank(drm->dev, head); + return NVKM_EVENT_KEEP; +} + +static int nouveau_drm_vblank_enable(struct drm_device *dev, int head) { struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_disp *pdisp = nouveau_disp(drm->device); - nouveau_event_get(pdisp->vblank, head, &drm->vblank); + + if (WARN_ON_ONCE(head > ARRAY_SIZE(drm->vblank))) + return -EIO; + WARN_ON_ONCE(drm->vblank[head].func); + drm->vblank[head].func = nouveau_drm_vblank_handler; + nouveau_event_get(pdisp->vblank, head, &drm->vblank[head]); return 0; } @@ -85,16 +99,11 @@ nouveau_drm_vblank_disable(struct drm_device *dev, int head) { struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_disp *pdisp = nouveau_disp(drm->device); - nouveau_event_put(pdisp->vblank, head, &drm->vblank); -} - -static int -nouveau_drm_vblank_handler(struct nouveau_eventh *event, int head) -{ - struct nouveau_drm *drm = - container_of(event, struct nouveau_drm, vblank); - drm_handle_vblank(drm->dev, head); - return NVKM_EVENT_KEEP; + if (drm->vblank[head].func) + nouveau_event_put(pdisp->vblank, head, &drm->vblank[head]); + else + WARN_ON_ONCE(1); + drm->vblank[head].func = NULL; } static u64 @@ -292,7 +301,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) dev->dev_private = drm; drm->dev = dev; - drm->vblank.func = nouveau_drm_vblank_handler; INIT_LIST_HEAD(&drm->clients); spin_lock_init(&drm->tile.lock); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h index b25df374c901..9c39bafbef2c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.h +++ b/drivers/gpu/drm/nouveau/nouveau_drm.h @@ -113,7 +113,7 @@ struct nouveau_drm { struct nvbios vbios; struct nouveau_display *display; struct backlight_device *backlight; - struct nouveau_eventh vblank; + struct nouveau_eventh vblank[4]; /* power management */ struct nouveau_pm *pm; diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c index bb54098c6d97..936b442a6ab7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_pm.c +++ b/drivers/gpu/drm/nouveau/nouveau_pm.c @@ -402,8 +402,12 @@ nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf) struct drm_device *dev = dev_get_drvdata(d); struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_therm *therm = nouveau_therm(drm->device); + int temp = therm->temp_get(therm); - return snprintf(buf, PAGE_SIZE, "%d\n", therm->temp_get(therm) * 1000); + if (temp < 0) + return temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp * 1000); } static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, nouveau_hwmon_show_temp, NULL, 0); @@ -871,7 +875,12 @@ static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO | S_IWUSR, nouveau_hwmon_get_pwm1_max, nouveau_hwmon_set_pwm1_max, 0); -static struct attribute *hwmon_attributes[] = { +static struct attribute *hwmon_default_attributes[] = { + &sensor_dev_attr_name.dev_attr.attr, + &sensor_dev_attr_update_rate.dev_attr.attr, + NULL +}; +static struct attribute *hwmon_temp_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_auto_point1_pwm.dev_attr.attr, &sensor_dev_attr_temp1_auto_point1_temp.dev_attr.attr, @@ -882,8 +891,6 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp1_emergency.dev_attr.attr, &sensor_dev_attr_temp1_emergency_hyst.dev_attr.attr, - &sensor_dev_attr_name.dev_attr.attr, - &sensor_dev_attr_update_rate.dev_attr.attr, NULL }; static struct attribute *hwmon_fan_rpm_attributes[] = { @@ -898,8 +905,11 @@ static struct attribute *hwmon_pwm_fan_attributes[] = { NULL }; -static const struct attribute_group hwmon_attrgroup = { - .attrs = hwmon_attributes, +static const struct attribute_group hwmon_default_attrgroup = { + .attrs = hwmon_default_attributes, +}; +static const struct attribute_group hwmon_temp_attrgroup = { + .attrs = hwmon_temp_attributes, }; static const struct attribute_group hwmon_fan_rpm_attrgroup = { .attrs = hwmon_fan_rpm_attributes, @@ -931,13 +941,22 @@ nouveau_hwmon_init(struct drm_device *dev) } dev_set_drvdata(hwmon_dev, dev); - /* default sysfs entries */ - ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_attrgroup); + /* set the default attributes */ + ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_default_attrgroup); if (ret) { if (ret) goto error; } + /* if the card has a working thermal sensor */ + if (therm->temp_get(therm) >= 0) { + ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_temp_attrgroup); + if (ret) { + if (ret) + goto error; + } + } + /* if the card has a pwm fan */ /*XXX: incorrect, need better detection for this, some boards have * the gpio entries for pwm fan control even when there's no @@ -979,11 +998,10 @@ nouveau_hwmon_fini(struct drm_device *dev) struct nouveau_pm *pm = nouveau_pm(dev); if (pm->hwmon) { - sysfs_remove_group(&pm->hwmon->kobj, &hwmon_attrgroup); - sysfs_remove_group(&pm->hwmon->kobj, - &hwmon_pwm_fan_attrgroup); - sysfs_remove_group(&pm->hwmon->kobj, - &hwmon_fan_rpm_attrgroup); + sysfs_remove_group(&pm->hwmon->kobj, &hwmon_default_attrgroup); + sysfs_remove_group(&pm->hwmon->kobj, &hwmon_temp_attrgroup); + sysfs_remove_group(&pm->hwmon->kobj, &hwmon_pwm_fan_attrgroup); + sysfs_remove_group(&pm->hwmon->kobj, &hwmon_fan_rpm_attrgroup); hwmon_device_unregister(pm->hwmon); } diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index a6237c9cbbc3..1ddc03e51bf4 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -55,9 +55,9 @@ /* offsets in shared sync bo of various structures */ #define EVO_SYNC(c, o) ((c) * 0x0100 + (o)) -#define EVO_MAST_NTFY EVO_SYNC( 0, 0x00) -#define EVO_FLIP_SEM0(c) EVO_SYNC((c), 0x00) -#define EVO_FLIP_SEM1(c) EVO_SYNC((c), 0x10) +#define EVO_MAST_NTFY EVO_SYNC( 0, 0x00) +#define EVO_FLIP_SEM0(c) EVO_SYNC((c) + 1, 0x00) +#define EVO_FLIP_SEM1(c) EVO_SYNC((c) + 1, 0x10) #define EVO_CORE_HANDLE (0xd1500000) #define EVO_CHAN_HANDLE(t,i) (0xd15c0000 | (((t) & 0x00ff) << 8) | (i)) @@ -341,10 +341,8 @@ struct nv50_curs { struct nv50_sync { struct nv50_dmac base; - struct { - u32 offset; - u16 value; - } sem; + u32 addr; + u32 data; }; struct nv50_ovly { @@ -471,13 +469,33 @@ nv50_display_crtc_sema(struct drm_device *dev, int crtc) return nv50_disp(dev)->sync; } +struct nv50_display_flip { + struct nv50_disp *disp; + struct nv50_sync *chan; +}; + +static bool +nv50_display_flip_wait(void *data) +{ + struct nv50_display_flip *flip = data; + if (nouveau_bo_rd32(flip->disp->sync, flip->chan->addr / 4) == + flip->chan->data) + return true; + usleep_range(1, 2); + return false; +} + void nv50_display_flip_stop(struct drm_crtc *crtc) { - struct nv50_sync *sync = nv50_sync(crtc); + struct nouveau_device *device = nouveau_dev(crtc->dev); + struct nv50_display_flip flip = { + .disp = nv50_disp(crtc->dev), + .chan = nv50_sync(crtc), + }; u32 *push; - push = evo_wait(sync, 8); + push = evo_wait(flip.chan, 8); if (push) { evo_mthd(push, 0x0084, 1); evo_data(push, 0x00000000); @@ -487,8 +505,10 @@ nv50_display_flip_stop(struct drm_crtc *crtc) evo_data(push, 0x00000000); evo_mthd(push, 0x0080, 1); evo_data(push, 0x00000000); - evo_kick(push, sync); + evo_kick(push, flip.chan); } + + nv_wait_cb(device, nv50_display_flip_wait, &flip); } int @@ -496,73 +516,78 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_channel *chan, u32 swap_interval) { struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb); - struct nv50_disp *disp = nv50_disp(crtc->dev); struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nv50_sync *sync = nv50_sync(crtc); + int head = nv_crtc->index, ret; u32 *push; - int ret; swap_interval <<= 4; if (swap_interval == 0) swap_interval |= 0x100; + if (chan == NULL) + evo_sync(crtc->dev); push = evo_wait(sync, 128); if (unlikely(push == NULL)) return -EBUSY; - /* synchronise with the rendering channel, if necessary */ - if (likely(chan)) { + if (chan && nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) { + ret = RING_SPACE(chan, 8); + if (ret) + return ret; + + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); + OUT_RING (chan, NvEvoSema0 + head); + OUT_RING (chan, sync->addr ^ 0x10); + BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); + OUT_RING (chan, sync->data + 1); + BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2); + OUT_RING (chan, sync->addr); + OUT_RING (chan, sync->data); + } else + if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { + u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + ret = RING_SPACE(chan, 12); + if (ret) + return ret; + + BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); + OUT_RING (chan, chan->vram); + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr ^ 0x10)); + OUT_RING (chan, lower_32_bits(addr ^ 0x10)); + OUT_RING (chan, sync->data + 1); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG); + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr)); + OUT_RING (chan, lower_32_bits(addr)); + OUT_RING (chan, sync->data); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); + } else + if (chan) { + u64 addr = nv84_fence_crtc(chan, head) + sync->addr; ret = RING_SPACE(chan, 10); if (ret) return ret; - if (nv_mclass(chan->object) < NV84_CHANNEL_IND_CLASS) { - BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); - OUT_RING (chan, NvEvoSema0 + nv_crtc->index); - OUT_RING (chan, sync->sem.offset); - BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); - OUT_RING (chan, 0xf00d0000 | sync->sem.value); - BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2); - OUT_RING (chan, sync->sem.offset ^ 0x10); - OUT_RING (chan, 0x74b1e000); - BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); - OUT_RING (chan, NvSema); - } else - if (nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { - u64 offset = nv84_fence_crtc(chan, nv_crtc->index); - offset += sync->sem.offset; - - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset)); - OUT_RING (chan, 0xf00d0000 | sync->sem.value); - OUT_RING (chan, 0x00000002); - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset ^ 0x10)); - OUT_RING (chan, 0x74b1e000); - OUT_RING (chan, 0x00000001); - } else { - u64 offset = nv84_fence_crtc(chan, nv_crtc->index); - offset += sync->sem.offset; - - BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset)); - OUT_RING (chan, 0xf00d0000 | sync->sem.value); - OUT_RING (chan, 0x00001002); - BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); - OUT_RING (chan, upper_32_bits(offset)); - OUT_RING (chan, lower_32_bits(offset ^ 0x10)); - OUT_RING (chan, 0x74b1e000); - OUT_RING (chan, 0x00001001); - } + BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr ^ 0x10)); + OUT_RING (chan, lower_32_bits(addr ^ 0x10)); + OUT_RING (chan, sync->data + 1); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG | + NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD); + BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); + OUT_RING (chan, upper_32_bits(addr)); + OUT_RING (chan, lower_32_bits(addr)); + OUT_RING (chan, sync->data); + OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL | + NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD); + } + if (chan) { + sync->addr ^= 0x10; + sync->data++; FIRE_RING (chan); - } else { - nouveau_bo_wr32(disp->sync, sync->sem.offset / 4, - 0xf00d0000 | sync->sem.value); - evo_sync(crtc->dev); } /* queue the flip */ @@ -575,9 +600,9 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, evo_data(push, 0x40000000); } evo_mthd(push, 0x0088, 4); - evo_data(push, sync->sem.offset); - evo_data(push, 0xf00d0000 | sync->sem.value); - evo_data(push, 0x74b1e000); + evo_data(push, sync->addr); + evo_data(push, sync->data++); + evo_data(push, sync->data); evo_data(push, NvEvoSync); evo_mthd(push, 0x00a0, 2); evo_data(push, 0x00000000); @@ -605,9 +630,6 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, evo_mthd(push, 0x0080, 1); evo_data(push, 0x00000000); evo_kick(push, sync); - - sync->sem.offset ^= 0x10; - sync->sem.value++; return 0; } @@ -1379,7 +1401,8 @@ nv50_crtc_create(struct drm_device *dev, struct nouveau_object *core, int index) if (ret) goto out; - head->sync.sem.offset = EVO_SYNC(1 + index, 0x00); + head->sync.addr = EVO_FLIP_SEM0(index); + head->sync.data = 0x00000000; /* allocate overlay resources */ ret = nv50_pioc_create(disp->core, NV50_DISP_OIMM_CLASS, index, @@ -2112,15 +2135,23 @@ nv50_display_fini(struct drm_device *dev) int nv50_display_init(struct drm_device *dev) { - u32 *push = evo_wait(nv50_mast(dev), 32); - if (push) { - evo_mthd(push, 0x0088, 1); - evo_data(push, NvEvoSync); - evo_kick(push, nv50_mast(dev)); - return 0; + struct nv50_disp *disp = nv50_disp(dev); + struct drm_crtc *crtc; + u32 *push; + + push = evo_wait(nv50_mast(dev), 32); + if (!push) + return -EBUSY; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct nv50_sync *sync = nv50_sync(crtc); + nouveau_bo_wr32(disp->sync, sync->addr / 4, sync->data); } - return -EBUSY; + evo_mthd(push, 0x0088, 1); + evo_data(push, NvEvoSync); + evo_kick(push, nv50_mast(dev)); + return 0; } void @@ -2245,6 +2276,7 @@ nv50_display_create(struct drm_device *dev) NV_WARN(drm, "failed to create encoder %d/%d/%d: %d\n", dcbe->location, dcbe->type, ffs(dcbe->or) - 1, ret); + ret = 0; } } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 3c38ea46531c..305a657bf215 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2438,6 +2438,12 @@ static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev) if (tmp & L2_BUSY) reset_mask |= RADEON_RESET_VMC; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 99fb13286fd0..eb8ac315f92f 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -834,7 +834,7 @@ static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, __func__, __LINE__, toffset, surf.base_align); return -EINVAL; } - if (moffset & (surf.base_align - 1)) { + if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) { dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n", __func__, __LINE__, moffset, surf.base_align); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 7cead763be9e..27769e724b6d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -468,13 +468,19 @@ static void cayman_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x9907) || (rdev->pdev->device == 0x9908) || (rdev->pdev->device == 0x9909) || + (rdev->pdev->device == 0x990B) || + (rdev->pdev->device == 0x990C) || + (rdev->pdev->device == 0x990F) || (rdev->pdev->device == 0x9910) || - (rdev->pdev->device == 0x9917)) { + (rdev->pdev->device == 0x9917) || + (rdev->pdev->device == 0x9999)) { rdev->config.cayman.max_simds_per_se = 6; rdev->config.cayman.max_backends_per_se = 2; } else if ((rdev->pdev->device == 0x9903) || (rdev->pdev->device == 0x9904) || (rdev->pdev->device == 0x990A) || + (rdev->pdev->device == 0x990D) || + (rdev->pdev->device == 0x990E) || (rdev->pdev->device == 0x9913) || (rdev->pdev->device == 0x9918)) { rdev->config.cayman.max_simds_per_se = 4; @@ -483,6 +489,9 @@ static void cayman_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x9990) || (rdev->pdev->device == 0x9991) || (rdev->pdev->device == 0x9994) || + (rdev->pdev->device == 0x9995) || + (rdev->pdev->device == 0x9996) || + (rdev->pdev->device == 0x999A) || (rdev->pdev->device == 0x99A0)) { rdev->config.cayman.max_simds_per_se = 3; rdev->config.cayman.max_backends_per_se = 1; @@ -616,11 +625,22 @@ static void cayman_gpu_init(struct radeon_device *rdev) WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); - tmp = gb_addr_config & NUM_PIPES_MASK; - tmp = r6xx_remap_render_backend(rdev, tmp, - rdev->config.cayman.max_backends_per_se * - rdev->config.cayman.max_shader_engines, - CAYMAN_MAX_BACKENDS, disabled_rb_mask); + if ((rdev->config.cayman.max_backends_per_se == 1) && + (rdev->flags & RADEON_IS_IGP)) { + if ((disabled_rb_mask & 3) == 1) { + /* RB0 disabled, RB1 enabled */ + tmp = 0x11111111; + } else { + /* RB1 disabled, RB0 enabled */ + tmp = 0x00000000; + } + } else { + tmp = gb_addr_config & NUM_PIPES_MASK; + tmp = r6xx_remap_render_backend(rdev, tmp, + rdev->config.cayman.max_backends_per_se * + rdev->config.cayman.max_shader_engines, + CAYMAN_MAX_BACKENDS, disabled_rb_mask); + } WREG32(GB_BACKEND_MAP, tmp); cgts_tcc_disable = 0xffff0000; @@ -1381,6 +1401,12 @@ static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev) if (tmp & L2_BUSY) reset_mask |= RADEON_RESET_VMC; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } @@ -1765,6 +1791,7 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + radeon_vm_manager_fini(rdev); cayman_cp_enable(rdev, false); cayman_dma_stop(rdev); evergreen_irq_suspend(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 6d4b5611daf4..0740db3fcd22 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1394,6 +1394,12 @@ static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev) if (r600_is_display_hung(rdev)) reset_mask |= RADEON_RESET_DISPLAY; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index bedda9caadd9..6e05a2e75a46 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -122,10 +122,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, goto out_cleanup; } - /* r100 doesn't have dma engine so skip the test */ - /* also, VRAM-to-VRAM test doesn't make much sense for DMA */ - /* skip it as well if domains are the same */ - if ((rdev->asic->copy.dma) && (sdomain != ddomain)) { + if (rdev->asic->copy.dma) { time = radeon_benchmark_do_move(rdev, size, saddr, daddr, RADEON_BENCHMARK_COPY_DMA, n); if (time < 0) @@ -135,13 +132,15 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, sdomain, ddomain, "dma"); } - time = radeon_benchmark_do_move(rdev, size, saddr, daddr, - RADEON_BENCHMARK_COPY_BLIT, n); - if (time < 0) - goto out_cleanup; - if (time > 0) - radeon_benchmark_log_results(n, size, time, - sdomain, ddomain, "blit"); + if (rdev->asic->copy.blit) { + time = radeon_benchmark_do_move(rdev, size, saddr, daddr, + RADEON_BENCHMARK_COPY_BLIT, n); + if (time < 0) + goto out_cleanup; + if (time > 0) + radeon_benchmark_log_results(n, size, time, + sdomain, ddomain, "blit"); + } out_cleanup: if (sobj) { diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index b8015913d382..fa3c56fba294 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -99,6 +99,29 @@ static bool radeon_read_bios(struct radeon_device *rdev) return true; } +static bool radeon_read_platform_bios(struct radeon_device *rdev) +{ + uint8_t __iomem *bios; + size_t size; + + rdev->bios = NULL; + + bios = pci_platform_rom(rdev->pdev, &size); + if (!bios) { + return false; + } + + if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { + return false; + } + rdev->bios = kmemdup(bios, size, GFP_KERNEL); + if (rdev->bios == NULL) { + return false; + } + + return true; +} + #ifdef CONFIG_ACPI /* ATRM is used to get the BIOS on the discrete cards in * dual-gpu systems. @@ -620,6 +643,9 @@ bool radeon_get_bios(struct radeon_device *rdev) if (r == false) { r = radeon_read_disabled_bios(rdev); } + if (r == false) { + r = radeon_read_platform_bios(rdev); + } if (r == false || rdev->bios == NULL) { DRM_ERROR("Unable to locate a BIOS ROM\n"); rdev->bios = NULL; diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 3e403bdda58f..78edadc9e86b 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -970,6 +970,15 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct found = 1; } + /* quirks */ + /* Radeon 9100 (R200) */ + if ((dev->pdev->device == 0x514D) && + (dev->pdev->subsystem_vendor == 0x174B) && + (dev->pdev->subsystem_device == 0x7149)) { + /* vbios value is bad, use the default */ + found = 0; + } + if (!found) /* fallback to defaults */ radeon_legacy_get_primary_dac_info_from_table(rdev, p_dac); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 167758488ed6..66a7f0fd9620 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -70,9 +70,10 @@ * 2.27.0 - r600-SI: Add CS ioctl support for async DMA * 2.28.0 - r600-eg: Add MEM_WRITE packet support * 2.29.0 - R500 FP16 color clear registers + * 2.30.0 - fix for FMASK texturing */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 29 +#define KMS_DRIVER_MINOR 30 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 90374dd77960..48f80cd42d8f 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -400,6 +400,9 @@ void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) { unsigned long irqflags; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.afmt[block] = true; radeon_irq_set(rdev); @@ -419,6 +422,9 @@ void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) { unsigned long irqflags; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.afmt[block] = false; radeon_irq_set(rdev); @@ -438,6 +444,9 @@ void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask) unsigned long irqflags; int i; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) rdev->irq.hpd[i] |= !!(hpd_mask & (1 << i)); @@ -458,6 +467,9 @@ void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask) unsigned long irqflags; int i; + if (!rdev->ddev->irq_enabled) + return; + spin_lock_irqsave(&rdev->irq.lock, irqflags); for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) rdev->irq.hpd[i] &= !(hpd_mask & (1 << i)); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 80979ed951eb..bafbe3216952 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2284,6 +2284,12 @@ static u32 si_gpu_check_soft_reset(struct radeon_device *rdev) if (tmp & L2_BUSY) reset_mask |= RADEON_RESET_VMC; + /* Skip MC reset as it's mostly likely not hung, just busy */ + if (reset_mask & RADEON_RESET_MC) { + DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); + reset_mask &= ~RADEON_RESET_MC; + } + return reset_mask; } @@ -4463,6 +4469,7 @@ int si_resume(struct radeon_device *rdev) int si_suspend(struct radeon_device *rdev) { + radeon_vm_manager_fini(rdev); si_cp_enable(rdev, false); cayman_dma_stop(rdev); si_irq_suspend(rdev); diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index c92955df0658..be1daf7344d3 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -4,7 +4,6 @@ config DRM_TEGRA select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER select DRM_KMS_CMA_HELPER - select DRM_HDMI select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index fe5cdbcf2636..b44d548c56f8 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -61,6 +61,10 @@ static int udl_get_modes(struct drm_connector *connector) int ret; edid = (struct edid *)udl_get_edid(udl); + if (!edid) { + drm_mode_connector_update_edid_property(connector, NULL); + return 0; + } /* * We only read the main block, but if the monitor reports extension diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 512b01c04ea7..aa341d135867 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2077,7 +2077,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HYBRID) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HEATCONTROL) }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_BEATPAD) }, - { HID_USB_DEVICE(USB_VENDOR_ID_MASTERKIT, USB_DEVICE_ID_MASTERKIT_MA901RADIO) }, { HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1024LS) }, { HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1208LS) }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT1) }, @@ -2244,6 +2243,18 @@ bool hid_ignore(struct hid_device *hdev) hdev->product <= USB_DEVICE_ID_VELLEMAN_K8061_LAST)) return true; break; + case USB_VENDOR_ID_ATMEL_V_USB: + /* Masterkit MA901 usb radio based on Atmel tiny85 chip and + * it has the same USB ID as many Atmel V-USB devices. This + * usb radio is handled by radio-ma901.c driver so we want + * ignore the hid. Check the name, bus, product and ignore + * if we have MA901 usb radio. + */ + if (hdev->product == USB_DEVICE_ID_ATMEL_V_USB && + hdev->bus == BUS_USB && + strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0) + return true; + break; } if (hdev->type == HID_TYPE_USBMOUSE && diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 92e47e5c9564..5309fd5eb0eb 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -158,6 +158,8 @@ #define USB_VENDOR_ID_ATMEL 0x03eb #define USB_DEVICE_ID_ATMEL_MULTITOUCH 0x211c #define USB_DEVICE_ID_ATMEL_MXT_DIGITIZER 0x2118 +#define USB_VENDOR_ID_ATMEL_V_USB 0x16c0 +#define USB_DEVICE_ID_ATMEL_V_USB 0x05df #define USB_VENDOR_ID_AUREAL 0x0755 #define USB_DEVICE_ID_AUREAL_W01RN 0x2626 @@ -557,9 +559,6 @@ #define USB_VENDOR_ID_MADCATZ 0x0738 #define USB_DEVICE_ID_MADCATZ_BEATPAD 0x4540 -#define USB_VENDOR_ID_MASTERKIT 0x16c0 -#define USB_DEVICE_ID_MASTERKIT_MA901RADIO 0x05df - #define USB_VENDOR_ID_MCC 0x09db #define USB_DEVICE_ID_MCC_PMD1024LS 0x0076 #define USB_DEVICE_ID_MCC_PMD1208LS 0x007a @@ -590,6 +589,9 @@ #define USB_VENDOR_ID_MONTEREY 0x0566 #define USB_DEVICE_ID_GENIUS_KB29E 0x3004 +#define USB_VENDOR_ID_MSI 0x1770 +#define USB_DEVICE_ID_MSI_GX680R_LED_PANEL 0xff00 + #define USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR 0x0400 #define USB_DEVICE_ID_N_S_HARMONY 0xc359 @@ -684,6 +686,9 @@ #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001 0x3001 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008 +#define USB_VENDOR_ID_REALTEK 0x0bda +#define USB_DEVICE_ID_REALTEK_READER 0x0152 + #define USB_VENDOR_ID_ROCCAT 0x1e7d #define USB_DEVICE_ID_ROCCAT_ARVO 0x30d4 #define USB_DEVICE_ID_ROCCAT_ISKU 0x319c diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 9500f2f3f8fe..8758f38c948c 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -459,19 +459,25 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev, struct dj_report *dj_report) { struct hid_device *hdev = djrcv_dev->hdev; - int sent_bytes; + struct hid_report *report; + struct hid_report_enum *output_report_enum; + u8 *data = (u8 *)(&dj_report->device_index); + int i; - if (!hdev->hid_output_raw_report) { - dev_err(&hdev->dev, "%s:" - "hid_output_raw_report is null\n", __func__); + output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT]; + report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT]; + + if (!report) { + dev_err(&hdev->dev, "%s: unable to find dj report\n", __func__); return -ENODEV; } - sent_bytes = hdev->hid_output_raw_report(hdev, (u8 *) dj_report, - sizeof(struct dj_report), - HID_OUTPUT_REPORT); + for (i = 0; i < report->field[0]->report_count; i++) + report->field[0]->value[i] = data[i]; + + usbhid_submit_report(hdev, report, USB_DIR_OUT); - return (sent_bytes < 0) ? sent_bytes : 0; + return 0; } static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index f7f113ba083e..a8ce44296cfd 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -462,6 +462,21 @@ static int magicmouse_input_mapping(struct hid_device *hdev, return 0; } +static void magicmouse_input_configured(struct hid_device *hdev, + struct hid_input *hi) + +{ + struct magicmouse_sc *msc = hid_get_drvdata(hdev); + + int ret = magicmouse_setup_input(msc->input, hdev); + if (ret) { + hid_err(hdev, "magicmouse setup input failed (%d)\n", ret); + /* clean msc->input to notify probe() of the failure */ + msc->input = NULL; + } +} + + static int magicmouse_probe(struct hid_device *hdev, const struct hid_device_id *id) { @@ -493,15 +508,10 @@ static int magicmouse_probe(struct hid_device *hdev, goto err_free; } - /* We do this after hid-input is done parsing reports so that - * hid-input uses the most natural button and axis IDs. - */ - if (msc->input) { - ret = magicmouse_setup_input(msc->input, hdev); - if (ret) { - hid_err(hdev, "magicmouse setup input failed (%d)\n", ret); - goto err_stop_hw; - } + if (!msc->input) { + hid_err(hdev, "magicmouse input not registered\n"); + ret = -ENOMEM; + goto err_stop_hw; } if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE) @@ -568,6 +578,7 @@ static struct hid_driver magicmouse_driver = { .remove = magicmouse_remove, .raw_event = magicmouse_raw_event, .input_mapping = magicmouse_input_mapping, + .input_configured = magicmouse_input_configured, }; module_hid_driver(magicmouse_driver); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7a1ebb867cf4..82e9211b3ca9 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -621,6 +621,7 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field, { struct mt_device *td = hid_get_drvdata(hid); __s32 quirks = td->mtclass.quirks; + struct input_dev *input = field->hidinput->input; if (hid->claimed & HID_CLAIMED_INPUT) { switch (usage->hid) { @@ -670,13 +671,16 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field, break; default: + if (usage->type) + input_event(input, usage->type, usage->code, + value); return; } if (usage->usage_index + 1 == field->report_count) { /* we only take into account the last report. */ if (usage->hid == td->last_slot_field) - mt_complete_slot(td, field->hidinput->input); + mt_complete_slot(td, input); if (field->index == td->last_field_index && td->num_received >= td->num_expected) diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index e0e6abf1cd3b..19b8360f2330 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -73,6 +73,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, { USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GX680R_LED_PANEL, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_MOUSE, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS }, @@ -80,6 +81,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_REALTEK, USB_DEVICE_ID_REALTEK_READER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SIGMATEL, USB_DEVICE_ID_SIGMATEL_STMP3780, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET }, diff --git a/drivers/hwmon/lineage-pem.c b/drivers/hwmon/lineage-pem.c index 41df29f59b0e..ebbb9f4f27a3 100644 --- a/drivers/hwmon/lineage-pem.c +++ b/drivers/hwmon/lineage-pem.c @@ -422,6 +422,7 @@ static struct attribute *pem_input_attributes[] = { &sensor_dev_attr_in2_input.dev_attr.attr, &sensor_dev_attr_curr1_input.dev_attr.attr, &sensor_dev_attr_power1_input.dev_attr.attr, + NULL }; static const struct attribute_group pem_input_group = { @@ -432,6 +433,7 @@ static struct attribute *pem_fan_attributes[] = { &sensor_dev_attr_fan1_input.dev_attr.attr, &sensor_dev_attr_fan2_input.dev_attr.attr, &sensor_dev_attr_fan3_input.dev_attr.attr, + NULL }; static const struct attribute_group pem_fan_group = { diff --git a/drivers/hwmon/lm75.h b/drivers/hwmon/lm75.h index 668ff4721323..5cde94e56f17 100644 --- a/drivers/hwmon/lm75.h +++ b/drivers/hwmon/lm75.h @@ -25,7 +25,7 @@ which contains this code, we don't worry about the wasted space. */ -#include <linux/hwmon.h> +#include <linux/kernel.h> /* straight from the datasheet */ #define LM75_TEMP_MIN (-55000) diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index 9652a2c92a24..6d6130752f94 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -59,10 +59,10 @@ enum chips { ltc2978, ltc3880 }; struct ltc2978_data { enum chips id; int vin_min, vin_max; - int temp_min, temp_max; + int temp_min, temp_max[2]; int vout_min[8], vout_max[8]; int iout_max[2]; - int temp2_max[2]; + int temp2_max; struct pmbus_driver_info info; }; @@ -113,9 +113,10 @@ static int ltc2978_read_word_data_common(struct i2c_client *client, int page, ret = pmbus_read_word_data(client, page, LTC2978_MFR_TEMPERATURE_PEAK); if (ret >= 0) { - if (lin11_to_val(ret) > lin11_to_val(data->temp_max)) - data->temp_max = ret; - ret = data->temp_max; + if (lin11_to_val(ret) + > lin11_to_val(data->temp_max[page])) + data->temp_max[page] = ret; + ret = data->temp_max[page]; } break; case PMBUS_VIRT_RESET_VOUT_HISTORY: @@ -204,10 +205,9 @@ static int ltc3880_read_word_data(struct i2c_client *client, int page, int reg) ret = pmbus_read_word_data(client, page, LTC3880_MFR_TEMPERATURE2_PEAK); if (ret >= 0) { - if (lin11_to_val(ret) - > lin11_to_val(data->temp2_max[page])) - data->temp2_max[page] = ret; - ret = data->temp2_max[page]; + if (lin11_to_val(ret) > lin11_to_val(data->temp2_max)) + data->temp2_max = ret; + ret = data->temp2_max; } break; case PMBUS_VIRT_READ_VIN_MIN: @@ -248,11 +248,11 @@ static int ltc2978_write_word_data(struct i2c_client *client, int page, switch (reg) { case PMBUS_VIRT_RESET_IOUT_HISTORY: - data->iout_max[page] = 0x7fff; + data->iout_max[page] = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; case PMBUS_VIRT_RESET_TEMP2_HISTORY: - data->temp2_max[page] = 0x7fff; + data->temp2_max = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; case PMBUS_VIRT_RESET_VOUT_HISTORY: @@ -262,12 +262,12 @@ static int ltc2978_write_word_data(struct i2c_client *client, int page, break; case PMBUS_VIRT_RESET_VIN_HISTORY: data->vin_min = 0x7bff; - data->vin_max = 0; + data->vin_max = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; case PMBUS_VIRT_RESET_TEMP_HISTORY: data->temp_min = 0x7bff; - data->temp_max = 0x7fff; + data->temp_max[page] = 0x7c00; ret = ltc2978_clear_peaks(client, page, data->id); break; default: @@ -321,12 +321,14 @@ static int ltc2978_probe(struct i2c_client *client, info = &data->info; info->write_word_data = ltc2978_write_word_data; - data->vout_min[0] = 0xffff; data->vin_min = 0x7bff; + data->vin_max = 0x7c00; data->temp_min = 0x7bff; - data->temp_max = 0x7fff; + for (i = 0; i < ARRAY_SIZE(data->temp_max); i++) + data->temp_max[i] = 0x7c00; + data->temp2_max = 0x7c00; - switch (id->driver_data) { + switch (data->id) { case ltc2978: info->read_word_data = ltc2978_read_word_data; info->pages = 8; @@ -336,7 +338,6 @@ static int ltc2978_probe(struct i2c_client *client, for (i = 1; i < 8; i++) { info->func[i] = PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT; - data->vout_min[i] = 0xffff; } break; case ltc3880: @@ -352,11 +353,14 @@ static int ltc2978_probe(struct i2c_client *client, | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_POUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; - data->vout_min[1] = 0xffff; + data->iout_max[0] = 0x7c00; + data->iout_max[1] = 0x7c00; break; default: return -ENODEV; } + for (i = 0; i < info->pages; i++) + data->vout_min[i] = 0xffff; return pmbus_do_probe(client, id, info); } diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 80eef50c50fd..9add60920ac0 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -766,12 +766,14 @@ static ssize_t pmbus_show_label(struct device *dev, static int pmbus_add_attribute(struct pmbus_data *data, struct attribute *attr) { if (data->num_attributes >= data->max_attributes - 1) { - data->max_attributes += PMBUS_ATTR_ALLOC_SIZE; - data->group.attrs = krealloc(data->group.attrs, - sizeof(struct attribute *) * - data->max_attributes, GFP_KERNEL); - if (data->group.attrs == NULL) + int new_max_attrs = data->max_attributes + PMBUS_ATTR_ALLOC_SIZE; + void *new_attrs = krealloc(data->group.attrs, + new_max_attrs * sizeof(void *), + GFP_KERNEL); + if (!new_attrs) return -ENOMEM; + data->group.attrs = new_attrs; + data->max_attributes = new_max_attrs; } data->group.attrs[data->num_attributes++] = attr; diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index bfe326e896df..2507f902fb7a 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -965,7 +965,13 @@ static int sht15_probe(struct platform_device *pdev) if (voltage) data->supply_uv = voltage; - regulator_enable(data->reg); + ret = regulator_enable(data->reg); + if (ret != 0) { + dev_err(&pdev->dev, + "failed to enable regulator: %d\n", ret); + return ret; + } + /* * Setup a notifier block to update this if another device * causes the voltage to change diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c index db713c0dfba4..461a0d739d75 100644 --- a/drivers/hwspinlock/hwspinlock_core.c +++ b/drivers/hwspinlock/hwspinlock_core.c @@ -416,6 +416,8 @@ static int __hwspin_lock_request(struct hwspinlock *hwlock) ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "%s: can't power on device\n", __func__); + pm_runtime_put_noidle(dev); + module_put(dev->driver->owner); return ret; } diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index 46cde098c11c..e380c6eef3af 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -4,7 +4,6 @@ menuconfig I2C tristate "I2C support" - depends on !S390 select RT_MUTEXES ---help--- I2C (pronounce: I-squared-C) is a slow serial bus protocol used in @@ -76,6 +75,7 @@ config I2C_HELPER_AUTO config I2C_SMBUS tristate "SMBus-specific protocols" if !I2C_HELPER_AUTO + depends on GENERIC_HARDIRQS help Say Y here if you want support for SMBus extensions to the I2C specification. At the moment, the only supported extension is diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a3725de92384..adfee98486b1 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -114,7 +114,7 @@ config I2C_I801 config I2C_ISCH tristate "Intel SCH SMBus 1.0" - depends on PCI + depends on PCI && GENERIC_HARDIRQS select LPC_SCH help Say Y here if you want to use SMBus controller on the Intel SCH @@ -543,6 +543,7 @@ config I2C_NUC900 config I2C_OCORES tristate "OpenCores I2C Controller" + depends on GENERIC_HARDIRQS help If you say yes to this option, support will be included for the OpenCores I2C controller. For details see @@ -777,7 +778,7 @@ config I2C_DIOLAN_U2C config I2C_PARPORT tristate "Parallel port adapter" - depends on PARPORT + depends on PARPORT && GENERIC_HARDIRQS select I2C_ALGOBIT select I2C_SMBUS help @@ -802,6 +803,7 @@ config I2C_PARPORT config I2C_PARPORT_LIGHT tristate "Parallel port adapter (light)" + depends on GENERIC_HARDIRQS select I2C_ALGOBIT select I2C_SMBUS help diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 0ceb6e1b0f65..e3085c487ace 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -182,7 +182,6 @@ static int dw_i2c_probe(struct platform_device *pdev) adap->algo = &i2c_dw_algo; adap->dev.parent = &pdev->dev; adap->dev.of_node = pdev->dev.of_node; - ACPI_HANDLE_SET(&adap->dev, ACPI_HANDLE(&pdev->dev)); r = i2c_add_numbered_adapter(adap); if (r) { diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index e9205ee8cf94..130f02cc9d94 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -80,6 +80,7 @@ /* PCI DIDs for the Intel SMBus Message Transport (SMT) Devices */ #define PCI_DEVICE_ID_INTEL_S1200_SMT0 0x0c59 #define PCI_DEVICE_ID_INTEL_S1200_SMT1 0x0c5a +#define PCI_DEVICE_ID_INTEL_AVOTON_SMT 0x1f15 #define ISMT_DESC_ENTRIES 32 /* number of descriptor entries */ #define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */ @@ -185,6 +186,7 @@ struct ismt_priv { static const DEFINE_PCI_DEVICE_TABLE(ismt_ids) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT0) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AVOTON_SMT) }, { 0, } }; diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 36704e3ab3fa..b714776b6ddd 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -411,7 +411,11 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) int clk_multiplier = I2C_CLK_MULTIPLIER_STD_FAST_MODE; u32 clk_divisor; - tegra_i2c_clock_enable(i2c_dev); + err = tegra_i2c_clock_enable(i2c_dev); + if (err < 0) { + dev_err(i2c_dev->dev, "Clock enable failed %d\n", err); + return err; + } tegra_periph_reset_assert(i2c_dev->div_clk); udelay(2); @@ -628,7 +632,12 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], if (i2c_dev->is_suspended) return -EBUSY; - tegra_i2c_clock_enable(i2c_dev); + ret = tegra_i2c_clock_enable(i2c_dev); + if (ret < 0) { + dev_err(i2c_dev->dev, "Clock enable failed %d\n", ret); + return ret; + } + for (i = 0; i < num; i++) { enum msg_end_type end_type = MSG_END_STOP; if (i < (num - 1)) { diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c index f3b8f9a6a89b..966a18a5d12d 100644 --- a/drivers/i2c/muxes/i2c-mux-pca9541.c +++ b/drivers/i2c/muxes/i2c-mux-pca9541.c @@ -3,7 +3,7 @@ * * Copyright (c) 2010 Ericsson AB. * - * Author: Guenter Roeck <guenter.roeck@ericsson.com> + * Author: Guenter Roeck <linux@roeck-us.net> * * Derived from: * pca954x.c diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5d6675013864..1a38dd7dfe4e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -465,6 +465,7 @@ static const struct x86_cpu_id intel_idle_ids[] = { ICPU(0x3c, idle_cpu_hsw), ICPU(0x3f, idle_cpu_hsw), ICPU(0x45, idle_cpu_hsw), + ICPU(0x46, idle_cpu_hsw), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 0198324a8b0c..bd33473f8e38 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -62,7 +62,7 @@ st_sensors_match_odr_error: int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) { int err; - struct st_sensor_odr_avl odr_out; + struct st_sensor_odr_avl odr_out = {0, 0}; struct st_sensor_data *sdata = iio_priv(indio_dev); err = st_sensors_match_odr(sdata->sensor, odr, &odr_out); @@ -114,7 +114,7 @@ st_sensors_match_odr_error: static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs) { - int err, i; + int err, i = 0; struct st_sensor_data *sdata = iio_priv(indio_dev); err = st_sensors_match_fs(sdata->sensor, fs, &i); @@ -139,14 +139,13 @@ st_accel_set_fullscale_error: int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable) { - bool found; u8 tmp_value; int err = -EINVAL; - struct st_sensor_odr_avl odr_out; + bool found = false; + struct st_sensor_odr_avl odr_out = {0, 0}; struct st_sensor_data *sdata = iio_priv(indio_dev); if (enable) { - found = false; tmp_value = sdata->sensor->pw.value_on; if ((sdata->sensor->odr.addr == sdata->sensor->pw.addr) && (sdata->sensor->odr.mask == sdata->sensor->pw.mask)) { diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index 2fe1d4edcb2f..74f2d52795f6 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -27,7 +27,6 @@ #define AD5064_ADDR(x) ((x) << 20) #define AD5064_CMD(x) ((x) << 24) -#define AD5064_ADDR_DAC(chan) (chan) #define AD5064_ADDR_ALL_DAC 0xF #define AD5064_CMD_WRITE_INPUT_N 0x0 @@ -131,15 +130,15 @@ static int ad5064_write(struct ad5064_state *st, unsigned int cmd, } static int ad5064_sync_powerdown_mode(struct ad5064_state *st, - unsigned int channel) + const struct iio_chan_spec *chan) { unsigned int val; int ret; - val = (0x1 << channel); + val = (0x1 << chan->address); - if (st->pwr_down[channel]) - val |= st->pwr_down_mode[channel] << 8; + if (st->pwr_down[chan->channel]) + val |= st->pwr_down_mode[chan->channel] << 8; ret = ad5064_write(st, AD5064_CMD_POWERDOWN_DAC, 0, val, 0); @@ -169,7 +168,7 @@ static int ad5064_set_powerdown_mode(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); st->pwr_down_mode[chan->channel] = mode + 1; - ret = ad5064_sync_powerdown_mode(st, chan->channel); + ret = ad5064_sync_powerdown_mode(st, chan); mutex_unlock(&indio_dev->mlock); return ret; @@ -205,7 +204,7 @@ static ssize_t ad5064_write_dac_powerdown(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); st->pwr_down[chan->channel] = pwr_down; - ret = ad5064_sync_powerdown_mode(st, chan->channel); + ret = ad5064_sync_powerdown_mode(st, chan); mutex_unlock(&indio_dev->mlock); return ret ? ret : len; } @@ -258,7 +257,7 @@ static int ad5064_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - if (val > (1 << chan->scan_type.realbits) || val < 0) + if (val >= (1 << chan->scan_type.realbits) || val < 0) return -EINVAL; mutex_lock(&indio_dev->mlock); @@ -292,34 +291,44 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = { { }, }; -#define AD5064_CHANNEL(chan, bits) { \ +#define AD5064_CHANNEL(chan, addr, bits) { \ .type = IIO_VOLTAGE, \ .indexed = 1, \ .output = 1, \ .channel = (chan), \ .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \ IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \ - .address = AD5064_ADDR_DAC(chan), \ + .address = addr, \ .scan_type = IIO_ST('u', (bits), 16, 20 - (bits)), \ .ext_info = ad5064_ext_info, \ } #define DECLARE_AD5064_CHANNELS(name, bits) \ const struct iio_chan_spec name[] = { \ - AD5064_CHANNEL(0, bits), \ - AD5064_CHANNEL(1, bits), \ - AD5064_CHANNEL(2, bits), \ - AD5064_CHANNEL(3, bits), \ - AD5064_CHANNEL(4, bits), \ - AD5064_CHANNEL(5, bits), \ - AD5064_CHANNEL(6, bits), \ - AD5064_CHANNEL(7, bits), \ + AD5064_CHANNEL(0, 0, bits), \ + AD5064_CHANNEL(1, 1, bits), \ + AD5064_CHANNEL(2, 2, bits), \ + AD5064_CHANNEL(3, 3, bits), \ + AD5064_CHANNEL(4, 4, bits), \ + AD5064_CHANNEL(5, 5, bits), \ + AD5064_CHANNEL(6, 6, bits), \ + AD5064_CHANNEL(7, 7, bits), \ +} + +#define DECLARE_AD5065_CHANNELS(name, bits) \ +const struct iio_chan_spec name[] = { \ + AD5064_CHANNEL(0, 0, bits), \ + AD5064_CHANNEL(1, 3, bits), \ } static DECLARE_AD5064_CHANNELS(ad5024_channels, 12); static DECLARE_AD5064_CHANNELS(ad5044_channels, 14); static DECLARE_AD5064_CHANNELS(ad5064_channels, 16); +static DECLARE_AD5065_CHANNELS(ad5025_channels, 12); +static DECLARE_AD5065_CHANNELS(ad5045_channels, 14); +static DECLARE_AD5065_CHANNELS(ad5065_channels, 16); + static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { [ID_AD5024] = { .shared_vref = false, @@ -328,7 +337,7 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { }, [ID_AD5025] = { .shared_vref = false, - .channels = ad5024_channels, + .channels = ad5025_channels, .num_channels = 2, }, [ID_AD5044] = { @@ -338,7 +347,7 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { }, [ID_AD5045] = { .shared_vref = false, - .channels = ad5044_channels, + .channels = ad5045_channels, .num_channels = 2, }, [ID_AD5064] = { @@ -353,7 +362,7 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { }, [ID_AD5065] = { .shared_vref = false, - .channels = ad5064_channels, + .channels = ad5065_channels, .num_channels = 2, }, [ID_AD5628_1] = { @@ -429,6 +438,7 @@ static int ad5064_probe(struct device *dev, enum ad5064_type type, { struct iio_dev *indio_dev; struct ad5064_state *st; + unsigned int midscale; unsigned int i; int ret; @@ -465,11 +475,6 @@ static int ad5064_probe(struct device *dev, enum ad5064_type type, goto error_free_reg; } - for (i = 0; i < st->chip_info->num_channels; ++i) { - st->pwr_down_mode[i] = AD5064_LDAC_PWRDN_1K; - st->dac_cache[i] = 0x8000; - } - indio_dev->dev.parent = dev; indio_dev->name = name; indio_dev->info = &ad5064_info; @@ -477,6 +482,13 @@ static int ad5064_probe(struct device *dev, enum ad5064_type type, indio_dev->channels = st->chip_info->channels; indio_dev->num_channels = st->chip_info->num_channels; + midscale = (1 << indio_dev->channels[0].scan_type.realbits) / 2; + + for (i = 0; i < st->chip_info->num_channels; ++i) { + st->pwr_down_mode[i] = AD5064_LDAC_PWRDN_1K; + st->dac_cache[i] = midscale; + } + ret = iio_device_register(indio_dev); if (ret) goto error_disable_reg; diff --git a/drivers/iio/imu/inv_mpu6050/Kconfig b/drivers/iio/imu/inv_mpu6050/Kconfig index b5cfa3a354cf..361b2328453d 100644 --- a/drivers/iio/imu/inv_mpu6050/Kconfig +++ b/drivers/iio/imu/inv_mpu6050/Kconfig @@ -5,6 +5,7 @@ config INV_MPU6050_IIO tristate "Invensense MPU6050 devices" depends on I2C && SYSFS + select IIO_BUFFER select IIO_TRIGGERED_BUFFER help This driver supports the Invensense MPU6050 devices. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 565bfb161c1a..65c30ea8c1a1 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -511,12 +511,16 @@ static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; + struct cpl_t5_act_open_req *t5_req; struct sk_buff *skb; u64 opt0; u32 opt2; unsigned int mtu_idx; int wscale; - int wrlen = roundup(sizeof *req, 16); + int size = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? + sizeof(struct cpl_act_open_req) : + sizeof(struct cpl_t5_act_open_req); + int wrlen = roundup(size, 16); PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid); @@ -552,17 +556,36 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= WND_SCALE_EN(1); t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); - req = (struct cpl_act_open_req *) skb_put(skb, wrlen); - INIT_TP_WR(req, 0); - OPCODE_TID(req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid))); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0 = cpu_to_be64(opt0); - req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); - req->opt2 = cpu_to_be32(opt2); + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { + req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + INIT_TP_WR(req, 0); + OPCODE_TID(req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid << 14) | ep->atid))); + req->local_port = ep->com.local_addr.sin_port; + req->peer_port = ep->com.remote_addr.sin_port; + req->local_ip = ep->com.local_addr.sin_addr.s_addr; + req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + req->params = cpu_to_be32(select_ntuple(ep->com.dev, + ep->dst, ep->l2t)); + req->opt2 = cpu_to_be32(opt2); + } else { + t5_req = (struct cpl_t5_act_open_req *) skb_put(skb, wrlen); + INIT_TP_WR(t5_req, 0); + OPCODE_TID(t5_req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid << 14) | ep->atid))); + t5_req->local_port = ep->com.local_addr.sin_port; + t5_req->peer_port = ep->com.remote_addr.sin_port; + t5_req->local_ip = ep->com.local_addr.sin_addr.s_addr; + t5_req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; + t5_req->opt0 = cpu_to_be64(opt0); + t5_req->params = cpu_to_be64(V_FILTER_TUPLE( + select_ntuple(ep->com.dev, ep->dst, ep->l2t))); + t5_req->opt2 = cpu_to_be32(opt2); + } + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -1575,6 +1598,12 @@ static int c4iw_reconnect(struct c4iw_ep *ep) neigh = dst_neigh_lookup(ep->dst, &ep->com.cm_id->remote_addr.sin_addr.s_addr); + if (!neigh) { + pr_err("%s - cannot alloc neigh.\n", __func__); + err = -ENOMEM; + goto fail4; + } + /* get a l2t entry */ if (neigh->dev->flags & IFF_LOOPBACK) { PDBG("%s LOOPBACK\n", __func__); @@ -1670,9 +1699,9 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_TIMEDOUT: break; case CPL_ERR_TCAM_FULL: + dev->rdev.stats.tcam_full++; if (dev->rdev.lldi.enable_fw_ofld_conn) { mutex_lock(&dev->rdev.stats.lock); - dev->rdev.stats.tcam_full++; mutex_unlock(&dev->rdev.stats.lock); send_fw_act_open_req(ep, GET_TID_TID(GET_AOPEN_ATID( @@ -2869,12 +2898,14 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) { u32 l2info; - u16 vlantag, len, hdr_len; + u16 vlantag, len, hdr_len, eth_hdr_len; u8 intf; struct cpl_rx_pkt *cpl = cplhdr(skb); struct cpl_pass_accept_req *req; struct tcp_options_received tmp_opt; + struct c4iw_dev *dev; + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); /* Store values from cpl_rx_pkt in temporary location. */ vlantag = (__force u16) cpl->vlan; len = (__force u16) cpl->len; @@ -2890,7 +2921,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); @@ -2898,14 +2929,16 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) V_SYN_MAC_IDX(G_RX_MACIDX( (__force int) htonl(l2info))) | F_SYN_XACT_MATCH); + eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? + G_RX_ETHHDR_LEN((__force int) htonl(l2info)) : + G_RX_T5_ETHHDR_LEN((__force int) htonl(l2info)); req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN( (__force int) htonl(l2info))) | V_TCP_HDR_LEN(G_RX_TCPHDR_LEN( (__force int) htons(hdr_len))) | V_IP_HDR_LEN(G_RX_IPHDR_LEN( (__force int) htons(hdr_len))) | - V_ETH_HDR_LEN(G_RX_ETHHDR_LEN( - (__force int) htonl(l2info)))); + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(eth_hdr_len))); req->vlan = (__force __be16) vlantag; req->len = (__force __be16) len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | @@ -2993,7 +3026,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) u16 window; struct port_info *pi; struct net_device *pdev; - u16 rss_qid; + u16 rss_qid, eth_hdr_len; int step; u32 tx_chan; struct neighbour *neigh; @@ -3022,7 +3055,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? + G_RX_ETHHDR_LEN(htonl(cpl->l2info)) : + G_RX_T5_ETHHDR_LEN(htonl(cpl->l2info)); + if (eth_hdr_len == ETH_HLEN) { eh = (struct ethhdr *)(req + 1); iph = (struct iphdr *)(eh + 1); } else { @@ -3053,6 +3089,12 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) dst = &rt->dst; neigh = dst_neigh_lookup_skb(dst, skb); + if (!neigh) { + pr_err("%s - failed to allocate neigh!\n", + __func__); + goto free_dst; + } + if (neigh->dev->flags & IFF_LOOPBACK) { pdev = ip_dev_find(&init_net, iph->daddr); e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 80069ad595c1..ae656016e1ae 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -41,10 +41,20 @@ #define DRV_VERSION "0.1" MODULE_AUTHOR("Steve Wise"); -MODULE_DESCRIPTION("Chelsio T4 RDMA Driver"); +MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +static int allow_db_fc_on_t5; +module_param(allow_db_fc_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_fc_on_t5, + "Allow DB Flow Control on T5 (default = 0)"); + +static int allow_db_coalescing_on_t5; +module_param(allow_db_coalescing_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_coalescing_on_t5, + "Allow DB Coalescing on T5 (default = 0)"); + struct uld_ctx { struct list_head entry; struct cxgb4_lld_info lldi; @@ -614,7 +624,7 @@ static int rdma_supported(const struct cxgb4_lld_info *infop) { return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && - infop->vr->cq.size > 0 && infop->vr->ocq.size > 0; + infop->vr->cq.size > 0; } static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) @@ -627,6 +637,22 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pci_name(infop->pdev)); return ERR_PTR(-ENOSYS); } + if (!ocqp_supported(infop)) + pr_info("%s: On-Chip Queues not supported on this device.\n", + pci_name(infop->pdev)); + + if (!is_t4(infop->adapter_type)) { + if (!allow_db_fc_on_t5) { + db_fc_threshold = 100000; + pr_info("DB Flow Control Disabled.\n"); + } + + if (!allow_db_coalescing_on_t5) { + db_coalescing_threshold = -1; + pr_info("DB Coalescing Disabled.\n"); + } + } + devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); if (!devp) { printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -678,8 +704,8 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) int i; if (!vers_printed++) - printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n", - DRV_VERSION); + pr_info("Chelsio T4/T5 RDMA Driver - version %s\n", + DRV_VERSION); ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7eec5e13fa8c..485183ad34cd 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -162,7 +162,7 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5)); } -#define C4IW_WR_TO (10*HZ) +#define C4IW_WR_TO (30*HZ) struct c4iw_wr_wait { struct completion completion; @@ -369,7 +369,6 @@ struct c4iw_fr_page_list { DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; - int size; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( @@ -817,6 +816,15 @@ static inline int compute_wscale(int win) return wscale; } +static inline int ocqp_supported(const struct cxgb4_lld_info *infop) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return infop->vr->ocq.size > 0; +#else + return 0; +#endif +} + u32 c4iw_id_alloc(struct c4iw_id_table *alloc); void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj); int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, @@ -930,6 +938,8 @@ extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; extern int c4iw_max_read_depth; extern int db_fc_threshold; +extern int db_coalescing_threshold; +extern int use_dsgl; #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 903a92d6f91d..4cb8eb24497c 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -30,16 +30,76 @@ * SOFTWARE. */ +#include <linux/module.h> +#include <linux/moduleparam.h> #include <rdma/ib_umem.h> #include <linux/atomic.h> #include "iw_cxgb4.h" +int use_dsgl = 1; +module_param(use_dsgl, int, 0644); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)"); + #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 +#define T4_ULPTX_MAX_DMA 1024 +#define C4IW_INLINE_THRESHOLD 128 -static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) +static int inline_threshold = C4IW_INLINE_THRESHOLD; +module_param(inline_threshold, int, 0644); +MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)"); + +static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, + u32 len, dma_addr_t data, int wait) +{ + struct sk_buff *skb; + struct ulp_mem_io *req; + struct ulptx_sgl *sgl; + u8 wr_len; + int ret = 0; + struct c4iw_wr_wait wr_wait; + + addr &= 0x7FFFFFF; + + if (wait) + c4iw_init_wr_wait(&wr_wait); + wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = (struct ulp_mem_io *)__skb_put(skb, wr_len); + memset(req, 0, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | + (wait ? FW_WR_COMPL(1) : 0)); + req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0; + req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); + req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1)); + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5)); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr)); + + sgl = (struct ulptx_sgl *)(req + 1); + sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) | + ULPTX_NSGE(1)); + sgl->len0 = cpu_to_be32(len); + sgl->addr0 = cpu_to_be64(data); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + return ret; + if (wait) + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + return ret; +} + +static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) { struct sk_buff *skb; struct ulp_mem_io *req; @@ -47,6 +107,12 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, u8 wr_len, *to_dp, *from_dp; int copy_len, num_wqe, i, ret = 0; struct c4iw_wr_wait wr_wait; + __be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + + if (is_t4(rdev->lldi.adapter_type)) + cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1)); + else + cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1)); addr &= 0x7FFFFFF; PDBG("%s addr 0x%x len %u\n", __func__, addr, len); @@ -77,7 +143,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, req->wr.wr_mid = cpu_to_be32( FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); - req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE) | (1<<23)); + req->cmd = cmd; req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN( DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO))); req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), @@ -107,6 +173,67 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, return ret; } +int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +{ + u32 remain = len; + u32 dmalen; + int ret = 0; + dma_addr_t daddr; + dma_addr_t save; + + daddr = dma_map_single(&rdev->lldi.pdev->dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&rdev->lldi.pdev->dev, daddr)) + return -1; + save = daddr; + + while (remain > inline_threshold) { + if (remain < T4_ULPTX_MAX_DMA) { + if (remain & ~T4_ULPTX_MIN_IO) + dmalen = remain & ~(T4_ULPTX_MIN_IO-1); + else + dmalen = remain; + } else + dmalen = T4_ULPTX_MAX_DMA; + remain -= dmalen; + ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, + !remain); + if (ret) + goto out; + addr += dmalen >> 5; + data += dmalen; + daddr += dmalen; + } + if (remain) + ret = _c4iw_write_mem_inline(rdev, addr, remain, data); +out: + dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); + return ret; +} + +/* + * write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + */ +static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) +{ + if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { + if (len > inline_threshold) { + if (_c4iw_write_mem_dma(rdev, addr, len, data)) { + printk_ratelimited(KERN_WARNING + "%s: dma map" + " failure (non fatal)\n", + pci_name(rdev->lldi.pdev)); + return _c4iw_write_mem_inline(rdev, addr, len, + data); + } else + return 0; + } else + return _c4iw_write_mem_inline(rdev, addr, len, data); + } else + return _c4iw_write_mem_inline(rdev, addr, len, data); +} + /* * Build and write a TPT entry. * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size, @@ -760,19 +887,23 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, struct c4iw_fr_page_list *c4pl; struct c4iw_dev *dev = to_c4iw_dev(device); dma_addr_t dma_addr; - int size = sizeof *c4pl + page_list_len * sizeof(u64); + int pll_len = roundup(page_list_len * sizeof(u64), 32); - c4pl = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, size, - &dma_addr, GFP_KERNEL); + c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL); if (!c4pl) return ERR_PTR(-ENOMEM); + c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, + pll_len, &dma_addr, + GFP_KERNEL); + if (!c4pl->ibpl.page_list) { + kfree(c4pl); + return ERR_PTR(-ENOMEM); + } dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; - c4pl->size = size; - c4pl->ibpl.page_list = (u64 *)(c4pl + 1); - c4pl->ibpl.max_page_list_len = page_list_len; + c4pl->ibpl.max_page_list_len = pll_len; return &c4pl->ibpl; } @@ -781,8 +912,10 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) { struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); - dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, c4pl->size, - c4pl, dma_unmap_addr(c4pl, mapping)); + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, + c4pl->ibpl.max_page_list_len, + c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); + kfree(c4pl); } int c4iw_dereg_mr(struct ib_mr *ib_mr) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e084fdc6da7f..7e94c9a656a1 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -162,8 +162,14 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) */ if (addr >= rdev->oc_mw_pa) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + else { + if (is_t5(rdev->lldi.adapter_type)) + vma->vm_page_prot = + t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = + pgprot_noncached(vma->vm_page_prot); + } ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, len, vma->vm_page_prot); @@ -263,7 +269,7 @@ static int c4iw_query_device(struct ib_device *ibdev, dev = to_c4iw_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); - props->hw_ver = dev->rdev.lldi.adapter_type; + props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); props->fw_ver = dev->rdev.lldi.fw_vers; props->device_cap_flags = dev->device_cap_flags; props->page_size_cap = T4_PAGESIZE_MASK; @@ -346,7 +352,8 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev.dev); PDBG("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", c4iw_dev->rdev.lldi.adapter_type); + return sprintf(buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 17ba4f8bc12d..5b059e2d80cc 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -42,10 +42,21 @@ static int ocqp_support = 1; module_param(ocqp_support, int, 0644); MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); -int db_fc_threshold = 2000; +int db_fc_threshold = 1000; module_param(db_fc_threshold, int, 0644); -MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic " - "db flow control mode (default = 2000)"); +MODULE_PARM_DESC(db_fc_threshold, + "QP count/threshold that triggers" + " automatic db flow control mode (default = 1000)"); + +int db_coalescing_threshold; +module_param(db_coalescing_threshold, int, 0644); +MODULE_PARM_DESC(db_coalescing_threshold, + "QP count/threshold that triggers" + " disabling db coalescing (default = 0)"); + +static int max_fr_immd = T4_MAX_FR_IMMD; +module_param(max_fr_immd, int, 0644); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { @@ -76,7 +87,7 @@ static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { - if (!ocqp_support || !t4_ocqp_supported()) + if (!ocqp_support || !ocqp_supported(&rdev->lldi)) return -ENOSYS; sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); if (!sq->dma_addr) @@ -129,7 +140,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, int wr_len; struct c4iw_wr_wait wr_wait; struct sk_buff *skb; - int ret; + int ret = 0; int eqsize; wq->sq.qid = c4iw_get_qpid(rdev, uctx); @@ -169,25 +180,24 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, } if (user) { - ret = alloc_oc_sq(rdev, &wq->sq); - if (ret) + if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) goto free_hwaddr; - - ret = alloc_host_sq(rdev, &wq->sq); - if (ret) - goto free_sq; - } else + } else { ret = alloc_host_sq(rdev, &wq->sq); if (ret) goto free_hwaddr; + } + memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, &(wq->rq.dma_addr), GFP_KERNEL); - if (!wq->rq.queue) + if (!wq->rq.queue) { + ret = -ENOMEM; goto free_sq; + } PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", __func__, wq->sq.queue, (unsigned long long)virt_to_phys(wq->sq.queue), @@ -532,7 +542,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, } static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + struct ib_send_wr *wr, u8 *len16, u8 t5dev) { struct fw_ri_immd *imdp; @@ -554,28 +564,51 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); - WARN_ON(pbllen > T4_MAX_FR_IMMD); - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); - rem -= sizeof *p; - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; - } - BUG_ON(rem < 0); - while (rem) { - *p = 0; - rem -= sizeof *p; - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; + + if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { + struct c4iw_fr_page_list *c4pl = + to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); + struct fw_ri_dsgl *sglp; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + wr->wr.fast_reg.page_list->page_list[i] = (__force u64) + cpu_to_be64((u64) + wr->wr.fast_reg.page_list->page_list[i]); + } + + sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); + sglp->op = FW_RI_DATA_DSGL; + sglp->r1 = 0; + sglp->nsge = cpu_to_be16(1); + sglp->addr0 = cpu_to_be64(c4pl->dma_addr); + sglp->len0 = cpu_to_be32(pbllen); + + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); + } else { + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64( + (u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) + + pbllen, 16); } - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } @@ -676,7 +709,10 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -1448,6 +1484,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) rhp->db_state = NORMAL; idr_for_each(&rhp->qpidr, enable_qp_db, NULL); } + if (db_coalescing_threshold >= 0) + if (rhp->qpcnt <= db_coalescing_threshold) + cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]); spin_unlock_irq(&rhp->lock); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); @@ -1559,11 +1598,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, spin_lock_irq(&rhp->lock); if (rhp->db_state != NORMAL) t4_disable_wq_db(&qhp->wq); - if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { + rhp->qpcnt++; + if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { rhp->rdev.stats.db_state_transitions++; rhp->db_state = FLOW_CONTROL; idr_for_each(&rhp->qpidr, disable_qp_db, NULL); } + if (db_coalescing_threshold >= 0) + if (rhp->qpcnt > db_coalescing_threshold) + cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]); ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); spin_unlock_irq(&rhp->lock); if (ret) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 16f26ab29302..ebcb03bd1b72 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -84,7 +84,7 @@ struct t4_status_page { sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ sizeof(struct fw_ri_immd)) & ~31UL) -#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DEPTH (1024 / sizeof(u64)) #define T4_RQ_NUM_SLOTS 2 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) @@ -280,15 +280,6 @@ static inline pgprot_t t4_pgprot_wc(pgprot_t prot) #endif } -static inline int t4_ocqp_supported(void) -{ -#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) - return 1; -#else - return 0; -#endif -} - enum { T4_SQ_ONCHIP = (1<<0), }; diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a479375a8fd8..e0c404bdc4a8 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -410,6 +410,7 @@ static struct file_system_type ipathfs_fs_type = { .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init ipath_init_ipathfs(void) { diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 439c35d4a669..ea93870266eb 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -620,7 +620,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, goto bail; } - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; dev->opstats[opcode].n_bytes += tlen; dev->opstats[opcode].n_packets++; diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index e0d79b2395e4..add98d01476c 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -362,7 +362,6 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; idr_init(&dev->sriov.pv_id_table); - idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL); } /* slave = -1 ==> all slaves */ diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 67647e264611..418004c93feb 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2948,7 +2948,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", nesvnic->netdev->name, vlan_tag); - __vlan_hwaccel_put_tag(rx_skb, vlan_tag); + __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); } if (nes_use_lro) lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 85cf4d1ac442..49eb5111d2cd 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1599,7 +1599,7 @@ static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, /* Enable/Disable VLAN Stripping */ u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) u32temp &= 0xfdffffff; else u32temp |= 0x02000000; @@ -1614,10 +1614,10 @@ static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_feat * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1628,7 +1628,7 @@ static int nes_set_features(struct net_device *netdev, netdev_features_t feature struct nes_device *nesdev = nesvnic->nesdev; u32 changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) nes_vlan_mode(netdev, nesdev, features); return 0; @@ -1706,11 +1706,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->dev_addr[4] = (u8)(u64temp>>8); netdev->dev_addr[5] = (u8)u64temp; - netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX; + netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) netdev->hw_features |= NETIF_F_TSO; - netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_LRO; nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 8349f9c5064c..1e603a375069 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,7 +1,7 @@ config INFINIBAND_QIB - tristate "QLogic PCIe HCA support" + tristate "Intel PCIe HCA support" depends on 64BIT ---help--- - This is a low-level driver for QLogic PCIe QLE InfiniBand host - channel adapters. This driver does not support the QLogic + This is a low-level driver for Intel PCIe QLE InfiniBand host + channel adapters. This driver does not support the Intel HyperTransport card (model QHT7140). diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 5423edcab51f..216092477dfc 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -63,8 +64,8 @@ MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("QLogic <support@qlogic.com>"); -MODULE_DESCRIPTION("QLogic IB driver"); +MODULE_AUTHOR("Intel <ibsupport@intel.com>"); +MODULE_DESCRIPTION("Intel IB driver"); MODULE_VERSION(QIB_DRIVER_VERSION); /* diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 644bd6f6467c..f247fc6e6182 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -604,6 +604,7 @@ static struct file_system_type qibfs_fs_type = { .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init qib_init_qibfs(void) { diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index a099ac171e22..0232ae56b1fa 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64); /* * This file contains all the chip-specific register information and - * access functions for the QLogic QLogic_IB PCI-Express chip. + * access functions for the Intel Intel_IB PCI-Express chip. * */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 50e33aa0b4e3..173f805790da 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -1138,7 +1138,7 @@ void qib_disable_after_error(struct qib_devdata *dd) static void qib_remove_one(struct pci_dev *); static int qib_init_one(struct pci_dev *, const struct pci_device_id *); -#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " +#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { @@ -1355,7 +1355,7 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dd = qib_init_iba6120_funcs(pdev, ent); #else qib_early_err(&pdev->dev, - "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", ent->device); dd = ERR_PTR(-ENODEV); #endif @@ -1371,7 +1371,7 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) default: qib_early_err(&pdev->dev, - "Failing on unknown QLogic deviceid 0x%x\n", + "Failing on unknown Intel deviceid 0x%x\n", ent->device); ret = -ENODEV; } diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index 50a8a0d4fe67..911205d3d5a0 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index ba51a4715a1d..7c0ab16a2fe2 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -2224,7 +2224,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->dma_ops = &qib_dma_mapping_ops; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), - "QLogic Infiniband HCA %s", init_utsname()->nodename); + "Intel Infiniband HCA %s", init_utsname()->nodename); ret = ib_register_device(ibdev, qib_create_port_files); if (ret) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 67b0c1d23678..1ef880de3a41 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -758,9 +758,13 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); - if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) - ipoib_warn(priv, "request notify on send CQ failed\n"); netif_stop_queue(dev); + rc = ib_req_notify_cq(priv->send_cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (rc < 0) + ipoib_warn(priv, "request notify on send CQ failed\n"); + else if (rc) + ipoib_send_comp_handler(priv->send_cq, dev); } } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8534afd04e7c..554b9063da54 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -730,7 +730,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((header->proto != htons(ETH_P_IP)) && (header->proto != htons(ETH_P_IPV6)) && (header->proto != htons(ETH_P_ARP)) && - (header->proto != htons(ETH_P_RARP))) { + (header->proto != htons(ETH_P_RARP)) && + (header->proto != htons(ETH_P_TIPC))) { /* ethertype not supported by IPoIB */ ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -751,6 +752,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) switch (header->proto) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): + case htons(ETH_P_TIPC): neigh = ipoib_neigh_get(dev, cb->hwaddr); if (unlikely(!neigh)) { neigh_add_path(skb, cb->hwaddr, dev); diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 7cd74e29cbc8..9135606c8649 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -158,14 +158,10 @@ static unsigned int get_time_pit(void) #define GET_TIME(x) rdtscl(x) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" -#elif defined(__alpha__) +#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_TILE) #define GET_TIME(x) do { x = get_cycles(); } while (0) #define DELTA(x,y) ((y)-(x)) -#define TIME_NAME "PCC" -#elif defined(CONFIG_MN10300) || defined(CONFIG_TILE) -#define GET_TIME(x) do { x = get_cycles(); } while (0) -#define DELTA(x, y) ((x) - (y)) -#define TIME_NAME "TSC" +#define TIME_NAME "get_cycles" #else #define FAKE_TIME static unsigned long analog_faketime = 0; diff --git a/drivers/input/keyboard/tc3589x-keypad.c b/drivers/input/keyboard/tc3589x-keypad.c index 2fb0d76a04c4..208de7cbb7fa 100644 --- a/drivers/input/keyboard/tc3589x-keypad.c +++ b/drivers/input/keyboard/tc3589x-keypad.c @@ -70,8 +70,6 @@ #define TC3589x_EVT_INT_CLR 0x2 #define TC3589x_KBD_INT_CLR 0x1 -#define TC3589x_KBD_KEYMAP_SIZE 64 - /** * struct tc_keypad - data structure used by keypad driver * @tc3589x: pointer to tc35893 @@ -88,7 +86,7 @@ struct tc_keypad { const struct tc3589x_keypad_platform_data *board; unsigned int krow; unsigned int kcol; - unsigned short keymap[TC3589x_KBD_KEYMAP_SIZE]; + unsigned short *keymap; bool keypad_stopped; }; @@ -338,12 +336,14 @@ static int tc3589x_keypad_probe(struct platform_device *pdev) error = matrix_keypad_build_keymap(plat->keymap_data, NULL, TC3589x_MAX_KPROW, TC3589x_MAX_KPCOL, - keypad->keymap, input); + NULL, input); if (error) { dev_err(&pdev->dev, "Failed to build keymap\n"); goto err_free_mem; } + keypad->keymap = input->keycode; + input_set_capability(input, EV_MSC, MSC_SCAN); if (!plat->no_autorepeat) __set_bit(EV_REP, input->evbit); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 7b99fc7c9438..0238e0e14335 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -490,6 +490,29 @@ static void alps_decode_rushmore(struct alps_fields *f, unsigned char *p) f->y_map |= (p[5] & 0x20) << 6; } +static void alps_decode_dolphin(struct alps_fields *f, unsigned char *p) +{ + f->first_mp = !!(p[0] & 0x02); + f->is_mp = !!(p[0] & 0x20); + + f->fingers = ((p[0] & 0x6) >> 1 | + (p[0] & 0x10) >> 2); + f->x_map = ((p[2] & 0x60) >> 5) | + ((p[4] & 0x7f) << 2) | + ((p[5] & 0x7f) << 9) | + ((p[3] & 0x07) << 16) | + ((p[3] & 0x70) << 15) | + ((p[0] & 0x01) << 22); + f->y_map = (p[1] & 0x7f) | + ((p[2] & 0x1f) << 7); + + f->x = ((p[1] & 0x7f) | ((p[4] & 0x0f) << 7)); + f->y = ((p[2] & 0x7f) | ((p[4] & 0xf0) << 3)); + f->z = (p[0] & 4) ? 0 : p[5] & 0x7f; + + alps_decode_buttons_v3(f, p); +} + static void alps_process_touchpad_packet_v3(struct psmouse *psmouse) { struct alps_data *priv = psmouse->private; @@ -874,7 +897,8 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) } /* Bytes 2 - pktsize should have 0 in the highest bit */ - if (psmouse->pktcnt >= 2 && psmouse->pktcnt <= psmouse->pktsize && + if (priv->proto_version != ALPS_PROTO_V5 && + psmouse->pktcnt >= 2 && psmouse->pktcnt <= psmouse->pktsize && (psmouse->packet[psmouse->pktcnt - 1] & 0x80)) { psmouse_dbg(psmouse, "refusing packet[%i] = %x\n", psmouse->pktcnt - 1, @@ -994,8 +1018,7 @@ static int alps_rpt_cmd(struct psmouse *psmouse, int init_command, return 0; } -static int alps_enter_command_mode(struct psmouse *psmouse, - unsigned char *resp) +static int alps_enter_command_mode(struct psmouse *psmouse) { unsigned char param[4]; @@ -1004,14 +1027,12 @@ static int alps_enter_command_mode(struct psmouse *psmouse, return -1; } - if (param[0] != 0x88 || (param[1] != 0x07 && param[1] != 0x08)) { + if ((param[0] != 0x88 || (param[1] != 0x07 && param[1] != 0x08)) && + param[0] != 0x73) { psmouse_dbg(psmouse, "unknown response while entering command mode\n"); return -1; } - - if (resp) - *resp = param[2]; return 0; } @@ -1176,7 +1197,7 @@ static int alps_passthrough_mode_v3(struct psmouse *psmouse, { int reg_val, ret = -1; - if (alps_enter_command_mode(psmouse, NULL)) + if (alps_enter_command_mode(psmouse)) return -1; reg_val = alps_command_mode_read_reg(psmouse, reg_base + 0x0008); @@ -1216,7 +1237,7 @@ static int alps_probe_trackstick_v3(struct psmouse *psmouse, int reg_base) { int ret = -EIO, reg_val; - if (alps_enter_command_mode(psmouse, NULL)) + if (alps_enter_command_mode(psmouse)) goto error; reg_val = alps_command_mode_read_reg(psmouse, reg_base + 0x08); @@ -1279,7 +1300,7 @@ static int alps_setup_trackstick_v3(struct psmouse *psmouse, int reg_base) * supported by this driver. If bit 1 isn't set the packet * format is different. */ - if (alps_enter_command_mode(psmouse, NULL) || + if (alps_enter_command_mode(psmouse) || alps_command_mode_write_reg(psmouse, reg_base + 0x08, 0x82) || alps_exit_command_mode(psmouse)) @@ -1306,7 +1327,7 @@ static int alps_hw_init_v3(struct psmouse *psmouse) alps_setup_trackstick_v3(psmouse, ALPS_REG_BASE_PINNACLE) == -EIO) goto error; - if (alps_enter_command_mode(psmouse, NULL) || + if (alps_enter_command_mode(psmouse) || alps_absolute_mode_v3(psmouse)) { psmouse_err(psmouse, "Failed to enter absolute mode\n"); goto error; @@ -1381,7 +1402,7 @@ static int alps_hw_init_rushmore_v3(struct psmouse *psmouse) priv->flags &= ~ALPS_DUALPOINT; } - if (alps_enter_command_mode(psmouse, NULL) || + if (alps_enter_command_mode(psmouse) || alps_command_mode_read_reg(psmouse, 0xc2d9) == -1 || alps_command_mode_write_reg(psmouse, 0xc2cb, 0x00)) goto error; @@ -1431,7 +1452,7 @@ static int alps_hw_init_v4(struct psmouse *psmouse) struct ps2dev *ps2dev = &psmouse->ps2dev; unsigned char param[4]; - if (alps_enter_command_mode(psmouse, NULL)) + if (alps_enter_command_mode(psmouse)) goto error; if (alps_absolute_mode_v4(psmouse)) { @@ -1499,6 +1520,23 @@ error: return -1; } +static int alps_hw_init_dolphin_v1(struct psmouse *psmouse) +{ + struct ps2dev *ps2dev = &psmouse->ps2dev; + unsigned char param[2]; + + /* This is dolphin "v1" as empirically defined by florin9doi */ + param[0] = 0x64; + param[1] = 0x28; + + if (ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSTREAM) || + ps2_command(ps2dev, ¶m[0], PSMOUSE_CMD_SETRATE) || + ps2_command(ps2dev, ¶m[1], PSMOUSE_CMD_SETRATE)) + return -1; + + return 0; +} + static void alps_set_defaults(struct alps_data *priv) { priv->byte0 = 0x8f; @@ -1532,6 +1570,21 @@ static void alps_set_defaults(struct alps_data *priv) priv->nibble_commands = alps_v4_nibble_commands; priv->addr_command = PSMOUSE_CMD_DISABLE; break; + case ALPS_PROTO_V5: + priv->hw_init = alps_hw_init_dolphin_v1; + priv->process_packet = alps_process_packet_v3; + priv->decode_fields = alps_decode_dolphin; + priv->set_abs_params = alps_set_abs_params_mt; + priv->nibble_commands = alps_v3_nibble_commands; + priv->addr_command = PSMOUSE_CMD_RESET_WRAP; + priv->byte0 = 0xc8; + priv->mask0 = 0xc8; + priv->flags = 0; + priv->x_max = 1360; + priv->y_max = 660; + priv->x_bits = 23; + priv->y_bits = 12; + break; } } @@ -1592,6 +1645,12 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv) if (alps_match_table(psmouse, priv, e7, ec) == 0) { return 0; + } else if (e7[0] == 0x73 && e7[1] == 0x03 && e7[2] == 0x50 && + ec[0] == 0x73 && ec[1] == 0x01) { + priv->proto_version = ALPS_PROTO_V5; + alps_set_defaults(priv); + + return 0; } else if (ec[0] == 0x88 && ec[1] == 0x08) { priv->proto_version = ALPS_PROTO_V3; alps_set_defaults(priv); diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index 970480551b6e..eee59853b9ce 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -16,6 +16,7 @@ #define ALPS_PROTO_V2 2 #define ALPS_PROTO_V3 3 #define ALPS_PROTO_V4 4 +#define ALPS_PROTO_V5 5 /** * struct alps_model_info - touchpad ID table diff --git a/drivers/input/mouse/cypress_ps2.c b/drivers/input/mouse/cypress_ps2.c index 1673dc6c8092..f51765fff054 100644 --- a/drivers/input/mouse/cypress_ps2.c +++ b/drivers/input/mouse/cypress_ps2.c @@ -236,6 +236,13 @@ static int cypress_read_fw_version(struct psmouse *psmouse) cytp->fw_version = param[2] & FW_VERSION_MASX; cytp->tp_metrics_supported = (param[2] & TP_METRICS_MASK) ? 1 : 0; + /* + * Trackpad fw_version 11 (in Dell XPS12) yields a bogus response to + * CYTP_CMD_READ_TP_METRICS so do not try to use it. LP: #1103594. + */ + if (cytp->fw_version >= 11) + cytp->tp_metrics_supported = 0; + psmouse_dbg(psmouse, "cytp->fw_version = %d\n", cytp->fw_version); psmouse_dbg(psmouse, "cytp->tp_metrics_supported = %d\n", cytp->tp_metrics_supported); @@ -258,6 +265,9 @@ static int cypress_read_tp_metrics(struct psmouse *psmouse) cytp->tp_res_x = cytp->tp_max_abs_x / cytp->tp_width; cytp->tp_res_y = cytp->tp_max_abs_y / cytp->tp_high; + if (!cytp->tp_metrics_supported) + return 0; + memset(param, 0, sizeof(param)); if (cypress_send_ext_cmd(psmouse, CYTP_CMD_READ_TP_METRICS, param) == 0) { /* Update trackpad parameters. */ @@ -315,18 +325,15 @@ static int cypress_read_tp_metrics(struct psmouse *psmouse) static int cypress_query_hardware(struct psmouse *psmouse) { - struct cytp_data *cytp = psmouse->private; int ret; ret = cypress_read_fw_version(psmouse); if (ret) return ret; - if (cytp->tp_metrics_supported) { - ret = cypress_read_tp_metrics(psmouse); - if (ret) - return ret; - } + ret = cypress_read_tp_metrics(psmouse); + if (ret) + return ret; return 0; } diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 41b6fbf60112..0bfd8cf25200 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -359,7 +359,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) case 0x802: /* Intuos4 General Pen */ case 0x804: /* Intuos4 Marker Pen */ case 0x40802: /* Intuos4 Classic Pen */ - case 0x18803: /* DTH2242 Grip Pen */ + case 0x18802: /* DTH2242 Grip Pen */ case 0x022: wacom->tool[idx] = BTN_TOOL_PEN; break; @@ -1912,7 +1912,7 @@ static const struct wacom_features wacom_features_0xBB = { "Wacom Intuos4 12x19", WACOM_PKGLEN_INTUOS, 97536, 60960, 2047, 63, INTUOS4L, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES }; static const struct wacom_features wacom_features_0xBC = - { "Wacom Intuos4 WL", WACOM_PKGLEN_INTUOS, 40840, 25400, 2047, + { "Wacom Intuos4 WL", WACOM_PKGLEN_INTUOS, 40640, 25400, 2047, 63, INTUOS4, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES }; static const struct wacom_features wacom_features_0x26 = { "Wacom Intuos5 touch S", WACOM_PKGLEN_INTUOS, 31496, 19685, 2047, @@ -2017,6 +2017,9 @@ static const struct wacom_features wacom_features_0x100 = static const struct wacom_features wacom_features_0x101 = { "Wacom ISDv4 101", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x10D = + { "Wacom ISDv4 10D", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, + 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x4001 = { "Wacom ISDv4 4001", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2141,7 +2144,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x44) }, { USB_DEVICE_WACOM(0x45) }, { USB_DEVICE_WACOM(0x59) }, - { USB_DEVICE_WACOM(0x5D) }, + { USB_DEVICE_DETAILED(0x5D, USB_CLASS_HID, 0, 0) }, { USB_DEVICE_WACOM(0xB0) }, { USB_DEVICE_WACOM(0xB1) }, { USB_DEVICE_WACOM(0xB2) }, @@ -2201,11 +2204,12 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xEF) }, { USB_DEVICE_WACOM(0x100) }, { USB_DEVICE_WACOM(0x101) }, + { USB_DEVICE_WACOM(0x10D) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x47) }, { USB_DEVICE_WACOM(0xF4) }, { USB_DEVICE_WACOM(0xF8) }, - { USB_DEVICE_WACOM(0xF6) }, + { USB_DEVICE_DETAILED(0xF6, USB_CLASS_HID, 0, 0) }, { USB_DEVICE_WACOM(0xFA) }, { USB_DEVICE_LENOVO(0x6004) }, { } diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 4f702b3ec1a3..434c3df250ca 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -236,7 +236,12 @@ static void __ads7846_disable(struct ads7846 *ts) /* Must be called with ts->lock held */ static void __ads7846_enable(struct ads7846 *ts) { - regulator_enable(ts->reg); + int error; + + error = regulator_enable(ts->reg); + if (error != 0) + dev_err(&ts->spi->dev, "Failed to enable supply: %d\n", error); + ads7846_restart(ts); } diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index d04f810cb1dd..59aa24002c7b 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -176,11 +176,17 @@ /* Define for MXT_GEN_COMMAND_T6 */ #define MXT_BOOT_VALUE 0xa5 #define MXT_BACKUP_VALUE 0x55 -#define MXT_BACKUP_TIME 25 /* msec */ -#define MXT_RESET_TIME 65 /* msec */ +#define MXT_BACKUP_TIME 50 /* msec */ +#define MXT_RESET_TIME 200 /* msec */ #define MXT_FWRESET_TIME 175 /* msec */ +/* MXT_SPT_GPIOPWM_T19 field */ +#define MXT_GPIO0_MASK 0x04 +#define MXT_GPIO1_MASK 0x08 +#define MXT_GPIO2_MASK 0x10 +#define MXT_GPIO3_MASK 0x20 + /* Command to unlock bootloader */ #define MXT_UNLOCK_CMD_MSB 0xaa #define MXT_UNLOCK_CMD_LSB 0xdc @@ -212,6 +218,8 @@ /* Touchscreen absolute values */ #define MXT_MAX_AREA 0xff +#define MXT_PIXELS_PER_MM 20 + struct mxt_info { u8 family_id; u8 variant_id; @@ -243,6 +251,8 @@ struct mxt_data { const struct mxt_platform_data *pdata; struct mxt_object *object_table; struct mxt_info info; + bool is_tp; + unsigned int irq; unsigned int max_x; unsigned int max_y; @@ -251,6 +261,7 @@ struct mxt_data { u8 T6_reportid; u8 T9_reportid_min; u8 T9_reportid_max; + u8 T19_reportid; }; static bool mxt_object_readable(unsigned int type) @@ -502,6 +513,21 @@ static int mxt_write_object(struct mxt_data *data, return mxt_write_reg(data->client, reg + offset, val); } +static void mxt_input_button(struct mxt_data *data, struct mxt_message *message) +{ + struct input_dev *input = data->input_dev; + bool button; + int i; + + /* Active-low switch */ + for (i = 0; i < MXT_NUM_GPIO; i++) { + if (data->pdata->key_map[i] == KEY_RESERVED) + continue; + button = !(message->message[0] & MXT_GPIO0_MASK << i); + input_report_key(input, data->pdata->key_map[i], button); + } +} + static void mxt_input_touchevent(struct mxt_data *data, struct mxt_message *message, int id) { @@ -585,6 +611,9 @@ static irqreturn_t mxt_interrupt(int irq, void *dev_id) int id = reportid - data->T9_reportid_min; mxt_input_touchevent(data, &message, id); update_input = true; + } else if (message.reportid == data->T19_reportid) { + mxt_input_button(data, &message); + update_input = true; } else { mxt_dump_message(dev, &message); } @@ -764,6 +793,9 @@ static int mxt_get_object_table(struct mxt_data *data) data->T9_reportid_min = min_id; data->T9_reportid_max = max_id; break; + case MXT_SPT_GPIOPWM_T19: + data->T19_reportid = min_id; + break; } } @@ -777,7 +809,7 @@ static void mxt_free_object_table(struct mxt_data *data) data->T6_reportid = 0; data->T9_reportid_min = 0; data->T9_reportid_max = 0; - + data->T19_reportid = 0; } static int mxt_initialize(struct mxt_data *data) @@ -1115,9 +1147,13 @@ static int mxt_probe(struct i2c_client *client, goto err_free_mem; } - input_dev->name = "Atmel maXTouch Touchscreen"; + data->is_tp = pdata && pdata->is_tp; + + input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" : + "Atmel maXTouch Touchscreen"; snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0", client->adapter->nr, client->addr); + input_dev->phys = data->phys; input_dev->id.bustype = BUS_I2C; @@ -1140,6 +1176,29 @@ static int mxt_probe(struct i2c_client *client, __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_TOUCH, input_dev->keybit); + if (data->is_tp) { + int i; + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit); + + for (i = 0; i < MXT_NUM_GPIO; i++) + if (pdata->key_map[i] != KEY_RESERVED) + __set_bit(pdata->key_map[i], input_dev->keybit); + + __set_bit(BTN_TOOL_FINGER, input_dev->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit); + + input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_MT_POSITION_X, + MXT_PIXELS_PER_MM); + input_abs_set_res(input_dev, ABS_MT_POSITION_Y, + MXT_PIXELS_PER_MM); + } + /* For single touch */ input_set_abs_params(input_dev, ABS_X, 0, data->max_x, 0, 0); @@ -1258,6 +1317,7 @@ static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume); static const struct i2c_device_id mxt_id[] = { { "qt602240_ts", 0 }, { "atmel_mxt_ts", 0 }, + { "atmel_mxt_tp", 0 }, { "mXT224", 0 }, { } }; diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 4a29ddf6bf1e..1443532fe6c4 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -314,15 +314,27 @@ static int mms114_start(struct mms114_data *data) struct i2c_client *client = data->client; int error; - if (data->core_reg) - regulator_enable(data->core_reg); - if (data->io_reg) - regulator_enable(data->io_reg); + error = regulator_enable(data->core_reg); + if (error) { + dev_err(&client->dev, "Failed to enable avdd: %d\n", error); + return error; + } + + error = regulator_enable(data->io_reg); + if (error) { + dev_err(&client->dev, "Failed to enable vdd: %d\n", error); + regulator_disable(data->core_reg); + return error; + } + mdelay(MMS114_POWERON_DELAY); error = mms114_setup_regs(data); - if (error < 0) + if (error < 0) { + regulator_disable(data->io_reg); + regulator_disable(data->core_reg); return error; + } if (data->pdata->cfg_pin) data->pdata->cfg_pin(true); @@ -335,16 +347,20 @@ static int mms114_start(struct mms114_data *data) static void mms114_stop(struct mms114_data *data) { struct i2c_client *client = data->client; + int error; disable_irq(client->irq); if (data->pdata->cfg_pin) data->pdata->cfg_pin(false); - if (data->io_reg) - regulator_disable(data->io_reg); - if (data->core_reg) - regulator_disable(data->core_reg); + error = regulator_disable(data->io_reg); + if (error) + dev_warn(&client->dev, "Failed to disable vdd: %d\n", error); + + error = regulator_disable(data->core_reg); + if (error) + dev_warn(&client->dev, "Failed to disable avdd: %d\n", error); } static int mms114_input_open(struct input_dev *dev) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 5c514d0711d1..c332fb98480d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -130,7 +130,7 @@ config IRQ_REMAP # OMAP IOMMU support config OMAP_IOMMU bool "OMAP IOMMU Support" - depends on ARCH_OMAP + depends on ARCH_OMAP2PLUS select IOMMU_API config OMAP_IOVMM diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 98f555dafb55..b287ca33833d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2466,18 +2466,16 @@ static int device_change_notifier(struct notifier_block *nb, /* allocate a protection domain if a device is added */ dma_domain = find_protection_domain(devid); - if (dma_domain) - goto out; - dma_domain = dma_ops_domain_alloc(); - if (!dma_domain) - goto out; - dma_domain->target_dev = devid; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - list_add_tail(&dma_domain->list, &iommu_pd_list); - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - dev_data = get_dev_data(dev); + if (!dma_domain) { + dma_domain = dma_ops_domain_alloc(); + if (!dma_domain) + goto out; + dma_domain->target_dev = devid; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + list_add_tail(&dma_domain->list, &iommu_pd_list); + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + } dev->archdata.dma_ops = &amd_iommu_dma_ops; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index b6ecddb63cd0..e3c2d74b7684 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -980,7 +980,7 @@ static void __init free_iommu_all(void) * BIOS should disable L2B micellaneous clock gating by setting * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b */ -static void __init amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) +static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) { u32 value; diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index dc7e478b7e5f..e5cdaf87822c 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1083,6 +1083,7 @@ static const char *dma_remap_fault_reasons[] = "non-zero reserved fields in RTP", "non-zero reserved fields in CTP", "non-zero reserved fields in PTE", + "PCE for translation request specifies blocking", }; static const char *irq_remap_fault_reasons[] = diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index d56f8c17c5fe..7c11ff368d07 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -2,7 +2,6 @@ #include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/cpumask.h> #include <linux/errno.h> #include <linux/msi.h> #include <linux/irq.h> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 644d72468423..fc6aebf1e4b2 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -236,7 +236,8 @@ static int gic_retrigger(struct irq_data *d) if (gic_arch_extn.irq_retrigger) return gic_arch_extn.irq_retrigger(d); - return -ENXIO; + /* the genirq layer expects 0 if we can't retrigger in hardware */ + return 0; } #ifdef CONFIG_SMP @@ -648,7 +649,7 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* Convert our logical CPU mask into a physical one. */ for_each_cpu(cpu, mask) - map |= 1 << cpu_logical_map(cpu); + map |= gic_cpu_map[cpu]; /* * Ensure that stores to Normal memory are visible to the diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index 832bc807ed20..cc9f1927a322 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -469,8 +469,7 @@ static int capidrv_add_ack(struct capidrv_ncci *nccip, { struct ncci_datahandle_queue *n, **pp; - n = (struct ncci_datahandle_queue *) - kmalloc(sizeof(struct ncci_datahandle_queue), GFP_ATOMIC); + n = kmalloc(sizeof(struct ncci_datahandle_queue), GFP_ATOMIC); if (!n) { printk(KERN_ERR "capidrv: kmalloc ncci_datahandle failed\n"); return -1; diff --git a/drivers/isdn/divert/isdn_divert.c b/drivers/isdn/divert/isdn_divert.c index db432e635496..50749a70c5ca 100644 --- a/drivers/isdn/divert/isdn_divert.c +++ b/drivers/isdn/divert/isdn_divert.c @@ -441,8 +441,7 @@ static int isdn_divert_icall(isdn_ctrl *ic) switch (dv->rule.action) { case DEFLECT_IGNORE: - return (0); - break; + return 0; case DEFLECT_ALERT: case DEFLECT_PROCEED: @@ -510,10 +509,9 @@ static int isdn_divert_icall(isdn_ctrl *ic) break; default: - return (0); /* ignore call */ - break; + return 0; /* ignore call */ } /* switch action */ - break; + break; /* will break the 'for' looping */ } /* scan_table */ if (cs) { diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig index 5313c9ea44dc..d9edcc94c2a8 100644 --- a/drivers/isdn/hisax/Kconfig +++ b/drivers/isdn/hisax/Kconfig @@ -237,7 +237,8 @@ config HISAX_MIC config HISAX_NETJET bool "NETjet card" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN))) + depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN))) + depends on VIRT_TO_BUS help This enables HiSax support for the NetJet from Traverse Technologies. @@ -248,7 +249,8 @@ config HISAX_NETJET config HISAX_NETJET_U bool "NETspider U card" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN))) + depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN))) + depends on VIRT_TO_BUS help This enables HiSax support for the Netspider U interface ISDN card from Traverse Technologies. diff --git a/drivers/isdn/hisax/fsm.c b/drivers/isdn/hisax/fsm.c index 1bb291021fdb..c7a94713e9ec 100644 --- a/drivers/isdn/hisax/fsm.c +++ b/drivers/isdn/hisax/fsm.c @@ -26,7 +26,7 @@ FsmNew(struct Fsm *fsm, struct FsmNode *fnlist, int fncount) { int i; - fsm->jumpmatrix = (FSMFNPTR *) + fsm->jumpmatrix = kzalloc(sizeof(FSMFNPTR) * fsm->state_count * fsm->event_count, GFP_KERNEL); if (!fsm->jumpmatrix) return -ENOMEM; diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c index 90f34ae2b80f..dc4574f735ef 100644 --- a/drivers/isdn/hisax/hfc_sx.c +++ b/drivers/isdn/hisax/hfc_sx.c @@ -1479,7 +1479,7 @@ int setup_hfcsx(struct IsdnCard *card) release_region(cs->hw.hfcsx.base, 2); return (0); } - if (!(cs->hw.hfcsx.extra = (void *) + if (!(cs->hw.hfcsx.extra = kmalloc(sizeof(struct hfcsx_extra), GFP_ATOMIC))) { release_region(cs->hw.hfcsx.base, 2); printk(KERN_WARNING "HFC-SX: unable to allocate memory\n"); diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index babc621a07fb..88d657dff474 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1385,7 +1385,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev) if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN)) skb->pkt_type = PACKET_OTHERHOST; } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index d8a7d8323414..ebaebdf30f98 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -902,7 +902,9 @@ isdn_tty_send_msg(modem_info *info, atemu *m, char *msg) int j; int l; - l = strlen(msg); + l = min(strlen(msg), sizeof(cmd.parm) - sizeof(cmd.parm.cmsg) + + sizeof(cmd.parm.cmsg.para) - 2); + if (!l) { isdn_tty_modem_result(RESULT_ERROR, info); return; diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c index c45b3aedafba..d873cbae2fbb 100644 --- a/drivers/mailbox/pl320-ipc.c +++ b/drivers/mailbox/pl320-ipc.c @@ -138,8 +138,7 @@ int pl320_ipc_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(pl320_ipc_unregister_notifier); -static int __init pl320_probe(struct amba_device *adev, - const struct amba_id *id) +static int pl320_probe(struct amba_device *adev, const struct amba_id *id) { int ret; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 3c955e10a618..c6083132c4b8 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1025,6 +1025,8 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, { struct blk_plug plug; + BUG_ON(dm_bufio_in_request()); + blk_start_plug(&plug); dm_bufio_lock(c); diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index fbd3625f2748..83e995fece88 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -83,6 +83,8 @@ struct cache_disk_superblock { __le32 read_misses; __le32 write_hits; __le32 write_misses; + + __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; } __packed; struct dm_cache_metadata { @@ -109,6 +111,7 @@ struct dm_cache_metadata { bool clean_when_opened:1; char policy_name[CACHE_POLICY_NAME_SIZE]; + unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; size_t policy_hint_size; struct dm_cache_statistics stats; }; @@ -268,7 +271,8 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); disk_super->version = cpu_to_le32(CACHE_VERSION); - memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE); + memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); + memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); disk_super->policy_hint_size = 0; r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, @@ -284,7 +288,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); disk_super->cache_blocks = cpu_to_le32(0); - memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); disk_super->read_hits = cpu_to_le32(0); disk_super->read_misses = cpu_to_le32(0); @@ -478,6 +481,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); + cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); + cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); + cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); @@ -572,6 +578,9 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); + disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); + disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); + disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); @@ -854,18 +863,43 @@ struct thunk { bool hints_valid; }; +static bool policy_unchanged(struct dm_cache_metadata *cmd, + struct dm_cache_policy *policy) +{ + const char *policy_name = dm_cache_policy_get_name(policy); + const unsigned *policy_version = dm_cache_policy_get_version(policy); + size_t policy_hint_size = dm_cache_policy_get_hint_size(policy); + + /* + * Ensure policy names match. + */ + if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name))) + return false; + + /* + * Ensure policy major versions match. + */ + if (cmd->policy_version[0] != policy_version[0]) + return false; + + /* + * Ensure policy hint sizes match. + */ + if (cmd->policy_hint_size != policy_hint_size) + return false; + + return true; +} + static bool hints_array_initialized(struct dm_cache_metadata *cmd) { return cmd->hint_root && cmd->policy_hint_size; } static bool hints_array_available(struct dm_cache_metadata *cmd, - const char *policy_name) + struct dm_cache_policy *policy) { - bool policy_names_match = !strncmp(cmd->policy_name, policy_name, - sizeof(cmd->policy_name)); - - return cmd->clean_when_opened && policy_names_match && + return cmd->clean_when_opened && policy_unchanged(cmd, policy) && hints_array_initialized(cmd); } @@ -899,7 +933,8 @@ static int __load_mapping(void *context, uint64_t cblock, void *leaf) return r; } -static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, +static int __load_mappings(struct dm_cache_metadata *cmd, + struct dm_cache_policy *policy, load_mapping_fn fn, void *context) { struct thunk thunk; @@ -909,18 +944,19 @@ static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_nam thunk.cmd = cmd; thunk.respect_dirty_flags = cmd->clean_when_opened; - thunk.hints_valid = hints_array_available(cmd, policy_name); + thunk.hints_valid = hints_array_available(cmd, policy); return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); } -int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, +int dm_cache_load_mappings(struct dm_cache_metadata *cmd, + struct dm_cache_policy *policy, load_mapping_fn fn, void *context) { int r; down_read(&cmd->root_lock); - r = __load_mappings(cmd, policy_name, fn, context); + r = __load_mappings(cmd, policy, fn, context); up_read(&cmd->root_lock); return r; @@ -979,7 +1015,7 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty /* nothing to be done */ return 0; - value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0)); + value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0)); __dm_bless_for_disk(&value); r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), @@ -1070,13 +1106,15 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po __le32 value; size_t hint_size; const char *policy_name = dm_cache_policy_get_name(policy); + const unsigned *policy_version = dm_cache_policy_get_version(policy); if (!policy_name[0] || (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) return -EINVAL; - if (strcmp(cmd->policy_name, policy_name)) { + if (!policy_unchanged(cmd, policy)) { strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); + memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); hint_size = dm_cache_policy_get_hint_size(policy); if (!hint_size) diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 135864ea0eee..f45cef21f3d0 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -89,7 +89,7 @@ typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock, dm_cblock_t cblock, bool dirty, uint32_t hint, bool hint_valid); int dm_cache_load_mappings(struct dm_cache_metadata *cmd, - const char *policy_name, + struct dm_cache_policy *policy, load_mapping_fn fn, void *context); diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index cc05d70b3cb8..b04d1f904d07 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -17,7 +17,6 @@ /*----------------------------------------------------------------*/ #define DM_MSG_PREFIX "cache cleaner" -#define CLEANER_VERSION "1.0.0" /* Cache entry struct. */ struct wb_cache_entry { @@ -434,6 +433,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, static struct dm_cache_policy_type wb_policy_type = { .name = "cleaner", + .version = {1, 0, 0}, .hint_size = 0, .owner = THIS_MODULE, .create = wb_create @@ -446,7 +446,10 @@ static int __init wb_init(void) if (r < 0) DMERR("register failed %d", r); else - DMINFO("version " CLEANER_VERSION " loaded"); + DMINFO("version %u.%u.%u loaded", + wb_policy_type.version[0], + wb_policy_type.version[1], + wb_policy_type.version[2]); return r; } diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h index 52a75beeced5..0928abdc49f0 100644 --- a/drivers/md/dm-cache-policy-internal.h +++ b/drivers/md/dm-cache-policy-internal.h @@ -117,6 +117,8 @@ void dm_cache_policy_destroy(struct dm_cache_policy *p); */ const char *dm_cache_policy_get_name(struct dm_cache_policy *p); +const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p); + size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 964153255076..dc112a7137fe 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -14,7 +14,6 @@ #include <linux/vmalloc.h> #define DM_MSG_PREFIX "cache-policy-mq" -#define MQ_VERSION "1.0.0" static struct kmem_cache *mq_entry_cache; @@ -1133,6 +1132,7 @@ bad_cache_alloc: static struct dm_cache_policy_type mq_policy_type = { .name = "mq", + .version = {1, 0, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create @@ -1140,6 +1140,7 @@ static struct dm_cache_policy_type mq_policy_type = { static struct dm_cache_policy_type default_policy_type = { .name = "default", + .version = {1, 0, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create @@ -1164,7 +1165,10 @@ static int __init mq_init(void) r = dm_cache_policy_register(&default_policy_type); if (!r) { - DMINFO("version " MQ_VERSION " loaded"); + DMINFO("version %u.%u.%u loaded", + mq_policy_type.version[0], + mq_policy_type.version[1], + mq_policy_type.version[2]); return 0; } diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c index 2cbf5fdaac52..21c03c570c06 100644 --- a/drivers/md/dm-cache-policy.c +++ b/drivers/md/dm-cache-policy.c @@ -150,6 +150,14 @@ const char *dm_cache_policy_get_name(struct dm_cache_policy *p) } EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); +const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + return t->version; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_get_version); + size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p) { struct dm_cache_policy_type *t = p->private; diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index f0f51b260544..558bdfdabf5f 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -196,6 +196,7 @@ struct dm_cache_policy { * We maintain a little register of the different policy types. */ #define CACHE_POLICY_NAME_SIZE 16 +#define CACHE_POLICY_VERSION_SIZE 3 struct dm_cache_policy_type { /* For use by the register code only. */ @@ -206,6 +207,7 @@ struct dm_cache_policy_type { * what gets passed on the target line to select your policy. */ char name[CACHE_POLICY_NAME_SIZE]; + unsigned version[CACHE_POLICY_VERSION_SIZE]; /* * Policies may store a hint for each each cache block. diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 0f4e84b15c30..10744091e6ca 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -6,6 +6,7 @@ #include "dm.h" #include "dm-bio-prison.h" +#include "dm-bio-record.h" #include "dm-cache-metadata.h" #include <linux/dm-io.h> @@ -142,6 +143,7 @@ struct cache { spinlock_t lock; struct bio_list deferred_bios; struct bio_list deferred_flush_bios; + struct bio_list deferred_writethrough_bios; struct list_head quiesced_migrations; struct list_head completed_migrations; struct list_head need_commit_migrations; @@ -158,7 +160,7 @@ struct cache { /* * origin_blocks entries, discarded if set. */ - sector_t discard_block_size; /* a power of 2 times sectors per block */ + uint32_t discard_block_size; /* a power of 2 times sectors per block */ dm_dblock_t discard_nr_blocks; unsigned long *discard_bitset; @@ -199,6 +201,16 @@ struct per_bio_data { bool tick:1; unsigned req_nr:2; struct dm_deferred_entry *all_io_entry; + + /* + * writethrough fields. These MUST remain at the end of this + * structure and the 'cache' member must be the first as it + * is used to determine the offsetof the writethrough fields. + */ + struct cache *cache; + dm_cblock_t cblock; + bio_end_io_t *saved_bi_end_io; + struct dm_bio_details bio_details; }; struct dm_cache_migration { @@ -412,17 +424,24 @@ static bool block_size_is_power_of_two(struct cache *cache) return cache->sectors_per_block_shift >= 0; } +static dm_block_t block_div(dm_block_t b, uint32_t n) +{ + do_div(b, n); + + return b; +} + static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) { - sector_t discard_blocks = cache->discard_block_size; + uint32_t discard_blocks = cache->discard_block_size; dm_block_t b = from_oblock(oblock); if (!block_size_is_power_of_two(cache)) - (void) sector_div(discard_blocks, cache->sectors_per_block); + discard_blocks = discard_blocks / cache->sectors_per_block; else discard_blocks >>= cache->sectors_per_block_shift; - (void) sector_div(b, discard_blocks); + b = block_div(b, discard_blocks); return to_dblock(b); } @@ -500,16 +519,28 @@ static void save_stats(struct cache *cache) /*---------------------------------------------------------------- * Per bio data *--------------------------------------------------------------*/ -static struct per_bio_data *get_per_bio_data(struct bio *bio) + +/* + * If using writeback, leave out struct per_bio_data's writethrough fields. + */ +#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) +#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) + +static size_t get_per_bio_data_size(struct cache *cache) { - struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; +} + +static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) +{ + struct per_bio_data *pb = dm_per_bio_data(bio, data_size); BUG_ON(!pb); return pb; } -static struct per_bio_data *init_per_bio_data(struct bio *bio) +static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, data_size); pb->tick = false; pb->req_nr = dm_bio_get_target_bio_nr(bio); @@ -543,7 +574,8 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) { unsigned long flags; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); spin_lock_irqsave(&cache->lock, flags); if (cache->need_tick_bio && @@ -609,6 +641,58 @@ static void issue(struct cache *cache, struct bio *bio) spin_unlock_irqrestore(&cache->lock, flags); } +static void defer_writethrough_bio(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + bio_list_add(&cache->deferred_writethrough_bios, bio); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void writethrough_endio(struct bio *bio, int err) +{ + struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); + bio->bi_end_io = pb->saved_bi_end_io; + + if (err) { + bio_endio(bio, err); + return; + } + + dm_bio_restore(&pb->bio_details, bio); + remap_to_cache(pb->cache, bio, pb->cblock); + + /* + * We can't issue this bio directly, since we're in interrupt + * context. So it get's put on a bio list for processing by the + * worker thread. + */ + defer_writethrough_bio(pb->cache, bio); +} + +/* + * When running in writethrough mode we need to send writes to clean blocks + * to both the cache and origin devices. In future we'd like to clone the + * bio and send them in parallel, but for now we're doing them in + * series as this is easier. + */ +static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, dm_cblock_t cblock) +{ + struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); + + pb->cache = cache; + pb->cblock = cblock; + pb->saved_bi_end_io = bio->bi_end_io; + dm_bio_record(&pb->bio_details, bio); + bio->bi_end_io = writethrough_endio; + + remap_to_origin_clear_discard(pb->cache, bio, oblock); +} + /*---------------------------------------------------------------- * Migration processing * @@ -972,7 +1056,8 @@ static void defer_bio(struct cache *cache, struct bio *bio) static void process_flush_bio(struct cache *cache, struct bio *bio) { - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); BUG_ON(bio->bi_size); if (!pb->req_nr) @@ -1002,7 +1087,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio) dm_block_t end_block = bio->bi_sector + bio_sectors(bio); dm_block_t b; - (void) sector_div(end_block, cache->discard_block_size); + end_block = block_div(end_block, cache->discard_block_size); for (b = start_block; b < end_block; b++) set_discard(cache, to_dblock(b)); @@ -1044,7 +1129,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, dm_oblock_t block = get_bio_block(cache, bio); struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; struct policy_result lookup_result; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); bool discarded_block = is_discarded_oblock(cache, block); bool can_migrate = discarded_block || spare_migration_bandwidth(cache); @@ -1070,14 +1156,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs, inc_hit_counter(cache, bio); pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); - if (is_writethrough_io(cache, bio, lookup_result.cblock)) { - /* - * No need to mark anything dirty in write through mode. - */ - pb->req_nr == 0 ? - remap_to_cache(cache, bio, lookup_result.cblock) : - remap_to_origin_clear_discard(cache, bio, block); - } else + if (is_writethrough_io(cache, bio, lookup_result.cblock)) + remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); + else remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); issue(cache, bio); @@ -1086,17 +1167,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, case POLICY_MISS: inc_miss_counter(cache, bio); pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); - - if (pb->req_nr != 0) { - /* - * This is a duplicate writethrough io that is no - * longer needed because the block has been demoted. - */ - bio_endio(bio, 0); - } else { - remap_to_origin_clear_discard(cache, bio, block); - issue(cache, bio); - } + remap_to_origin_clear_discard(cache, bio, block); + issue(cache, bio); break; case POLICY_NEW: @@ -1217,6 +1289,23 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) submit_bios ? generic_make_request(bio) : bio_io_error(bio); } +static void process_deferred_writethrough_bios(struct cache *cache) +{ + unsigned long flags; + struct bio_list bios; + struct bio *bio; + + bio_list_init(&bios); + + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&bios, &cache->deferred_writethrough_bios); + bio_list_init(&cache->deferred_writethrough_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + while ((bio = bio_list_pop(&bios))) + generic_make_request(bio); +} + static void writeback_some_dirty_blocks(struct cache *cache) { int r = 0; @@ -1313,6 +1402,7 @@ static int more_work(struct cache *cache) else return !bio_list_empty(&cache->deferred_bios) || !bio_list_empty(&cache->deferred_flush_bios) || + !bio_list_empty(&cache->deferred_writethrough_bios) || !list_empty(&cache->quiesced_migrations) || !list_empty(&cache->completed_migrations) || !list_empty(&cache->need_commit_migrations); @@ -1331,6 +1421,8 @@ static void do_worker(struct work_struct *ws) writeback_some_dirty_blocks(cache); + process_deferred_writethrough_bios(cache); + if (commit_if_needed(cache)) { process_deferred_flush_bios(cache, false); @@ -1756,8 +1848,11 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, } r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); - if (r) + if (r) { + *error = "Error setting cache policy's config values"; dm_cache_policy_destroy(cache->policy); + cache->policy = NULL; + } return r; } @@ -1793,8 +1888,6 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size, #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) -static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio); - static int cache_create(struct cache_args *ca, struct cache **result) { int r = 0; @@ -1811,7 +1904,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->ti = ca->ti; ti->private = cache; - ti->per_bio_data_size = sizeof(struct per_bio_data); ti->num_flush_bios = 2; ti->flush_supported = true; @@ -1820,9 +1912,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->discard_zeroes_data_unsupported = true; memcpy(&cache->features, &ca->features, sizeof(cache->features)); - - if (cache->features.write_through) - ti->num_write_bios = cache_num_write_bios; + ti->per_bio_data_size = get_per_bio_data_size(cache); cache->callbacks.congested_fn = cache_is_congested; dm_table_add_target_callbacks(ti->table, &cache->callbacks); @@ -1835,7 +1925,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) /* FIXME: factor out this whole section */ origin_blocks = cache->origin_sectors = ca->origin_sectors; - (void) sector_div(origin_blocks, ca->block_size); + origin_blocks = block_div(origin_blocks, ca->block_size); cache->origin_blocks = to_oblock(origin_blocks); cache->sectors_per_block = ca->block_size; @@ -1848,7 +1938,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) dm_block_t cache_size = ca->cache_sectors; cache->sectors_per_block_shift = -1; - (void) sector_div(cache_size, ca->block_size); + cache_size = block_div(cache_size, ca->block_size); cache->cache_size = to_cblock(cache_size); } else { cache->sectors_per_block_shift = __ffs(ca->block_size); @@ -1873,6 +1963,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) spin_lock_init(&cache->lock); bio_list_init(&cache->deferred_bios); bio_list_init(&cache->deferred_flush_bios); + bio_list_init(&cache->deferred_writethrough_bios); INIT_LIST_HEAD(&cache->quiesced_migrations); INIT_LIST_HEAD(&cache->completed_migrations); INIT_LIST_HEAD(&cache->need_commit_migrations); @@ -2002,6 +2093,8 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out; r = cache_create(ca, &cache); + if (r) + goto out; r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); if (r) { @@ -2016,26 +2109,13 @@ out: return r; } -static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio) -{ - int r; - struct cache *cache = ti->private; - dm_oblock_t block = get_bio_block(cache, bio); - dm_cblock_t cblock; - - r = policy_lookup(cache->policy, block, &cblock); - if (r < 0) - return 2; /* assume the worst */ - - return (!r && !is_dirty(cache, cblock)) ? 2 : 1; -} - static int cache_map(struct dm_target *ti, struct bio *bio) { struct cache *cache = ti->private; int r; dm_oblock_t block = get_bio_block(cache, bio); + size_t pb_data_size = get_per_bio_data_size(cache); bool can_migrate = false; bool discarded_block; struct dm_bio_prison_cell *cell; @@ -2052,7 +2132,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } - pb = init_per_bio_data(bio); + pb = init_per_bio_data(bio, pb_data_size); if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { defer_bio(cache, bio); @@ -2097,18 +2177,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio) inc_hit_counter(cache, bio); pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); - if (is_writethrough_io(cache, bio, lookup_result.cblock)) { - /* - * No need to mark anything dirty in write through mode. - */ - pb->req_nr == 0 ? - remap_to_cache(cache, bio, lookup_result.cblock) : - remap_to_origin_clear_discard(cache, bio, block); - cell_defer(cache, cell, false); - } else { + if (is_writethrough_io(cache, bio, lookup_result.cblock)) + remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); + else remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); - cell_defer(cache, cell, false); - } + + cell_defer(cache, cell, false); break; case POLICY_MISS: @@ -2143,7 +2217,8 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) { struct cache *cache = ti->private; unsigned long flags; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); if (pb->tick) { policy_tick(cache->policy); @@ -2319,8 +2394,7 @@ static int cache_preresume(struct dm_target *ti) } if (!cache->loaded_mappings) { - r = dm_cache_load_mappings(cache->cmd, - dm_cache_policy_get_name(cache->policy), + r = dm_cache_load_mappings(cache->cmd, cache->policy, load_mapping, cache); if (r) { DMERR("could not load cache mappings"); @@ -2535,7 +2609,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 009339d62828..004ad1652b73 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1577,6 +1577,11 @@ static bool data_dev_supports_discard(struct pool_c *pt) return q && blk_queue_discard(q); } +static bool is_factor(sector_t block_size, uint32_t n) +{ + return !sector_div(block_size, n); +} + /* * If discard_passdown was enabled verify that the data device * supports discards. Disable discard_passdown if not. @@ -1602,7 +1607,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) else if (data_limits->discard_granularity > block_size) reason = "discard granularity larger than a block"; - else if (block_size & (data_limits->discard_granularity - 1)) + else if (!is_factor(block_size, data_limits->discard_granularity)) reason = "discard granularity not a factor of block size"; if (reason) { @@ -2544,7 +2549,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 6, 1}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2831,7 +2836,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 7, 1}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 6ad538375c3c..a746f1d21c66 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -93,6 +93,13 @@ struct dm_verity_io { */ }; +struct dm_verity_prefetch_work { + struct work_struct work; + struct dm_verity *v; + sector_t block; + unsigned n_blocks; +}; + static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) { return (struct shash_desc *)(io + 1); @@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bio, int error) * The root buffer is not prefetched, it is assumed that it will be cached * all the time. */ -static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io) +static void verity_prefetch_io(struct work_struct *work) { + struct dm_verity_prefetch_work *pw = + container_of(work, struct dm_verity_prefetch_work, work); + struct dm_verity *v = pw->v; int i; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; sector_t hash_block_end; - verity_hash_at_level(v, io->block, i, &hash_block_start, NULL); - verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL); + verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL); + verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL); if (!i) { unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); @@ -452,6 +462,25 @@ no_prefetch_cluster: dm_bufio_prefetch(v->bufio, hash_block_start, hash_block_end - hash_block_start + 1); } + + kfree(pw); +} + +static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) +{ + struct dm_verity_prefetch_work *pw; + + pw = kmalloc(sizeof(struct dm_verity_prefetch_work), + GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!pw) + return; + + INIT_WORK(&pw->work, verity_prefetch_io); + pw->v = v; + pw->block = io->block; + pw->n_blocks = io->n_blocks; + queue_work(v->verify_wq, &pw->work); } /* @@ -498,7 +527,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) memcpy(io->io_vec, bio_iovec(bio), io->io_vec_size * sizeof(struct bio_vec)); - verity_prefetch_io(v, io); + verity_submit_prefetch(v, io); generic_make_request(bio); @@ -858,7 +887,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 1, 1}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7e469260fe5e..9a0bdad9ad8f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -611,6 +611,7 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ + trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } diff --git a/drivers/md/md.c b/drivers/md/md.c index fcb878f88796..aeceedfc530b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7663,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev) removed++; } } - if (removed) - sysfs_notify(&mddev->kobj, NULL, - "degraded"); - + if (removed && mddev->kobj.sd) + sysfs_notify(&mddev->kobj, NULL, "degraded"); rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && diff --git a/drivers/md/md.h b/drivers/md/md.h index eca59c3074ef..d90fb1a879e1 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev) static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) { char nm[20]; - if (!test_bit(Replacement, &rdev->flags)) { + if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) { sprintf(nm, "rd%d", rdev->raid_disk); return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); } else @@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) { char nm[20]; - if (!test_bit(Replacement, &rdev->flags)) { + if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) { sprintf(nm, "rd%d", rdev->raid_disk); sysfs_remove_link(&mddev->kobj, nm); } diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index c4f28133ef82..b88757cd0d1d 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -139,15 +139,8 @@ struct child { struct btree_node *n; }; -static struct dm_btree_value_type le64_type = { - .context = NULL, - .size = sizeof(__le64), - .inc = NULL, - .dec = NULL, - .equal = NULL -}; - -static int init_child(struct dm_btree_info *info, struct btree_node *parent, +static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, + struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent, result->n = dm_block_data(result->block); if (inc) - inc_children(info->tm, result->n, &le64_type); + inc_children(info->tm, result->n, vt); *((__le64 *) value_ptr(parent, index)) = cpu_to_le64(dm_block_location(result->block)); @@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, } static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, - unsigned left_index) + struct dm_btree_value_type *vt, unsigned left_index) { int r; struct btree_node *parent; @@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, parent = dm_block_data(shadow_current(s)); - r = init_child(info, parent, left_index, &left); + r = init_child(info, vt, parent, left_index, &left); if (r) return r; - r = init_child(info, parent, left_index + 1, &right); + r = init_child(info, vt, parent, left_index + 1, &right); if (r) { exit_child(info, &left); return r; @@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, } static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, - unsigned left_index) + struct dm_btree_value_type *vt, unsigned left_index) { int r; struct btree_node *parent = dm_block_data(shadow_current(s)); @@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, /* * FIXME: fill out an array? */ - r = init_child(info, parent, left_index, &left); + r = init_child(info, vt, parent, left_index, &left); if (r) return r; - r = init_child(info, parent, left_index + 1, ¢er); + r = init_child(info, vt, parent, left_index + 1, ¢er); if (r) { exit_child(info, &left); return r; } - r = init_child(info, parent, left_index + 2, &right); + r = init_child(info, vt, parent, left_index + 2, &right); if (r) { exit_child(info, &left); exit_child(info, ¢er); @@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm, } static int rebalance_children(struct shadow_spine *s, - struct dm_btree_info *info, uint64_t key) + struct dm_btree_info *info, + struct dm_btree_value_type *vt, uint64_t key) { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; @@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s, has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); if (!has_left_sibling) - r = rebalance2(s, info, i); + r = rebalance2(s, info, vt, i); else if (!has_right_sibling) - r = rebalance2(s, info, i - 1); + r = rebalance2(s, info, vt, i - 1); else - r = rebalance3(s, info, i - 1); + r = rebalance3(s, info, vt, i - 1); return r; } @@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, if (le32_to_cpu(n->header.flags) & LEAF_NODE) return do_leaf(n, key, index); - r = rebalance_children(s, info, key); + r = rebalance_children(s, info, vt, key); if (r) break; @@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, return r; } +static struct dm_btree_value_type le64_type = { + .context = NULL, + .size = sizeof(__le64), + .inc = NULL, + .dec = NULL, + .equal = NULL +}; + int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3ee2912889e7..f4e87bfc7567 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,6 +184,8 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; + trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), + bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -671,9 +673,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_next = NULL; if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); - trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), - bi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + + if (conf->mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), + bi, disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); generic_make_request(bi); } if (rrdev) { @@ -701,9 +705,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_size = STRIPE_SIZE; rbi->bi_next = NULL; - trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), - rbi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + if (conf->mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), + rbi, disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); generic_make_request(rbi); } if (!rdev && !rrdev) { @@ -2280,17 +2285,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, int level = conf->level; if (rcw) { - /* if we are not expanding this is a proper write request, and - * there will be bios with new data to be drained into the - * stripe cache - */ - if (!expand) { - sh->reconstruct_state = reconstruct_state_drain_run; - set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); - } else - sh->reconstruct_state = reconstruct_state_run; - - set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -2303,6 +2297,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } } + /* if we are not expanding this is a proper write request, and + * there will be bios with new data to be drained into the + * stripe cache + */ + if (!expand) { + if (!s->locked) + /* False alarm, nothing to do */ + return; + sh->reconstruct_state = reconstruct_state_drain_run; + set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); + } else + sh->reconstruct_state = reconstruct_state_run; + + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); + if (s->locked + conf->max_degraded == disks) if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) atomic_inc(&conf->pending_full_writes); @@ -2311,11 +2320,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); - sh->reconstruct_state = reconstruct_state_prexor_drain_run; - set_bit(STRIPE_OP_PREXOR, &s->ops_request); - set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); - set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); - for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (i == pd_idx) @@ -2330,6 +2334,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } } + if (!s->locked) + /* False alarm - nothing to do */ + return; + sh->reconstruct_state = reconstruct_state_prexor_drain_run; + set_bit(STRIPE_OP_PREXOR, &s->ops_request); + set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); } /* keep the parity disk(s) locked while asynchronous operations @@ -2564,6 +2575,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, int i; clear_bit(STRIPE_SYNCING, &sh->state); + if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) + wake_up(&conf->wait_for_overlap); s->syncing = 0; s->replacing = 0; /* There is nothing more to do for sync/check/repair. @@ -2737,6 +2750,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, { int i; struct r5dev *dev; + int discard_pending = 0; for (i = disks; i--; ) if (sh->dev[i].written) { @@ -2765,9 +2779,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, STRIPE_SECTORS, !test_bit(STRIPE_DEGRADED, &sh->state), 0); - } - } else if (test_bit(R5_Discard, &sh->dev[i].flags)) - clear_bit(R5_Discard, &sh->dev[i].flags); + } else if (test_bit(R5_Discard, &dev->flags)) + discard_pending = 1; + } + if (!discard_pending && + test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { + clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); + clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); + if (sh->qd_idx >= 0) { + clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); + clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); + } + /* now that discard is done we can proceed with any sync */ + clear_bit(STRIPE_DISCARD, &sh->state); + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) + set_bit(STRIPE_HANDLE, &sh->state); + + } if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) if (atomic_dec_and_test(&conf->pending_full_writes)) @@ -2826,8 +2854,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, set_bit(STRIPE_HANDLE, &sh->state); if (rmw < rcw && rmw > 0) { /* prefer read-modify-write, but need to get some data */ - blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", - (unsigned long long)sh->sector, rmw); + if (conf->mddev->queue) + blk_add_trace_msg(conf->mddev->queue, + "raid5 rmw %llu %d", + (unsigned long long)sh->sector, rmw); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if ((dev->towrite || i == sh->pd_idx) && @@ -2877,7 +2907,7 @@ static void handle_stripe_dirtying(struct r5conf *conf, } } } - if (rcw) + if (rcw && conf->mddev->queue) blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", (unsigned long long)sh->sector, rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); @@ -3417,9 +3447,15 @@ static void handle_stripe(struct stripe_head *sh) return; } - if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { - set_bit(STRIPE_SYNCING, &sh->state); - clear_bit(STRIPE_INSYNC, &sh->state); + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { + spin_lock(&sh->stripe_lock); + /* Cannot process 'sync' concurrently with 'discard' */ + if (!test_bit(STRIPE_DISCARD, &sh->state) && + test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { + set_bit(STRIPE_SYNCING, &sh->state); + clear_bit(STRIPE_INSYNC, &sh->state); + } + spin_unlock(&sh->stripe_lock); } clear_bit(STRIPE_DELAYED, &sh->state); @@ -3579,6 +3615,8 @@ static void handle_stripe(struct stripe_head *sh) test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); + if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) + wake_up(&conf->wait_for_overlap); } /* If the failed drives are just a ReadError, then we might need @@ -3878,6 +3916,8 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { + trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), + raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -3982,9 +4022,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) atomic_inc(&conf->active_aligned_reads); spin_unlock_irq(&conf->device_lock); - trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), - align_bi, disk_devt(mddev->gendisk), - raid_bio->bi_sector); + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), + align_bi, disk_devt(mddev->gendisk), + raid_bio->bi_sector); generic_make_request(align_bi); return 1; } else { @@ -4078,7 +4119,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) } spin_unlock_irq(&conf->device_lock); } - trace_block_unplug(mddev->queue, cnt, !from_schedule); + if (mddev->queue) + trace_block_unplug(mddev->queue, cnt, !from_schedule); kfree(cb); } @@ -4141,6 +4183,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) sh = get_active_stripe(conf, logical_sector, 0, 0, 0); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); + set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); + if (test_bit(STRIPE_SYNCING, &sh->state)) { + release_stripe(sh); + schedule(); + goto again; + } + clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); spin_lock_irq(&sh->stripe_lock); for (d = 0; d < conf->raid_disks; d++) { if (d == sh->pd_idx || d == sh->qd_idx) @@ -4153,6 +4202,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) goto again; } } + set_bit(STRIPE_DISCARD, &sh->state); finish_wait(&conf->wait_for_overlap, &w); for (d = 0; d < conf->raid_disks; d++) { if (d == sh->pd_idx || d == sh->qd_idx) @@ -4336,6 +4386,8 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); + trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), + bi, 0); bio_endio(bi, 0); } } @@ -4712,8 +4764,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) + if (remaining == 0) { + trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), + raid_bio, 0); bio_endio(raid_bio, 0); + } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 18b2c4a8a1fd..b0b663b119a8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -221,10 +221,6 @@ struct stripe_head { struct stripe_operations { int target, target2; enum sum_check_flags zero_sum_result; - #ifdef CONFIG_MULTICORE_RAID456 - unsigned long request; - wait_queue_head_t wait_for_ops; - #endif } ops; struct r5dev { /* rreq and rvec are used for the replacement device when @@ -323,6 +319,7 @@ enum { STRIPE_COMPUTE_RUN, STRIPE_OPS_REQ_PENDING, STRIPE_ON_UNPLUG_LIST, + STRIPE_DISCARD, }; /* diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index 44225b186f6d..83a23afb13ab 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -185,7 +185,7 @@ static __be16 dvb_net_eth_type_trans(struct sk_buff *skb, skb->pkt_type=PACKET_MULTICAST; } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; @@ -228,9 +228,9 @@ static int ule_test_sndu( struct dvb_net_priv *p ) static int ule_bridged_sndu( struct dvb_net_priv *p ) { struct ethhdr *hdr = (struct ethhdr*) p->ule_next_hdr; - if(ntohs(hdr->h_proto) < 1536) { + if(ntohs(hdr->h_proto) < ETH_P_802_3_MIN) { int framelen = p->ule_sndu_len - ((p->ule_next_hdr+sizeof(struct ethhdr)) - p->ule_skb->data); - /* A frame Type < 1536 for a bridged frame, introduces a LLC Length field. */ + /* A frame Type < ETH_P_802_3_MIN for a bridged frame, introduces a LLC Length field. */ if(framelen != ntohs(hdr->h_proto)) { return -1; } @@ -320,7 +320,7 @@ static int handle_ule_extensions( struct dvb_net_priv *p ) (int) p->ule_sndu_type, l, total_ext_len); #endif - } while (p->ule_sndu_type < 1536); + } while (p->ule_sndu_type < ETH_P_802_3_MIN); return total_ext_len; } @@ -712,7 +712,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len ) } /* Handle ULE Extension Headers. */ - if (priv->ule_sndu_type < 1536) { + if (priv->ule_sndu_type < ETH_P_802_3_MIN) { /* There is an extension header. Handle it accordingly. */ int l = handle_ule_extensions(priv); if (l < 0) { diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c index d4e7567b367c..0b899cb6cda1 100644 --- a/drivers/media/i2c/m5mols/m5mols_core.c +++ b/drivers/media/i2c/m5mols/m5mols_core.c @@ -724,7 +724,7 @@ static int m5mols_s_stream(struct v4l2_subdev *sd, int enable) if (enable) { if (is_code(code, M5MOLS_RESTYPE_MONITOR)) ret = m5mols_start_monitor(info); - if (is_code(code, M5MOLS_RESTYPE_CAPTURE)) + else if (is_code(code, M5MOLS_RESTYPE_CAPTURE)) ret = m5mols_start_capture(info); else ret = -EINVAL; diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index ccd18e4ee789..54579e4c740b 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -250,17 +250,19 @@ static u8 SRAM_Table[][60] = vdelay start of active video in 2 * field lines relative to trailing edge of /VRESET pulse (VDELAY register). sheight height of active video in 2 * field lines. + extraheight Added to sheight for cropcap.bounds.height only videostart0 ITU-R frame line number of the line corresponding to vdelay in the first field. */ #define CROPCAP(minhdelayx1, hdelayx1, swidth, totalwidth, sqwidth, \ - vdelay, sheight, videostart0) \ + vdelay, sheight, extraheight, videostart0) \ .cropcap.bounds.left = minhdelayx1, \ /* * 2 because vertically we count field lines times two, */ \ /* e.g. 23 * 2 to 23 * 2 + 576 in PAL-BGHI defrect. */ \ .cropcap.bounds.top = (videostart0) * 2 - (vdelay) + MIN_VDELAY, \ /* 4 is a safety margin at the end of the line. */ \ .cropcap.bounds.width = (totalwidth) - (minhdelayx1) - 4, \ - .cropcap.bounds.height = (sheight) + (vdelay) - MIN_VDELAY, \ + .cropcap.bounds.height = (sheight) + (extraheight) + (vdelay) - \ + MIN_VDELAY, \ .cropcap.defrect.left = hdelayx1, \ .cropcap.defrect.top = (videostart0) * 2, \ .cropcap.defrect.width = swidth, \ @@ -301,9 +303,10 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* totalwidth */ 1135, /* sqwidth */ 944, /* vdelay */ 0x20, - /* bt878 (and bt848?) can capture another - line below active video. */ - /* sheight */ (576 + 2) + 0x20 - 2, + /* sheight */ 576, + /* bt878 (and bt848?) can capture another + line below active video. */ + /* extraheight */ 2, /* videostart0 */ 23) },{ .v4l2_id = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR, @@ -330,6 +333,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 780, /* vdelay */ 0x1a, /* sheight */ 480, + /* extraheight */ 0, /* videostart0 */ 23) },{ .v4l2_id = V4L2_STD_SECAM, @@ -355,6 +359,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 944, /* vdelay */ 0x20, /* sheight */ 576, + /* extraheight */ 0, /* videostart0 */ 23) },{ .v4l2_id = V4L2_STD_PAL_Nc, @@ -380,6 +385,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 780, /* vdelay */ 0x1a, /* sheight */ 576, + /* extraheight */ 0, /* videostart0 */ 23) },{ .v4l2_id = V4L2_STD_PAL_M, @@ -405,6 +411,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 780, /* vdelay */ 0x1a, /* sheight */ 480, + /* extraheight */ 0, /* videostart0 */ 23) },{ .v4l2_id = V4L2_STD_PAL_N, @@ -430,6 +437,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 944, /* vdelay */ 0x20, /* sheight */ 576, + /* extraheight */ 0, /* videostart0 */ 23) },{ .v4l2_id = V4L2_STD_NTSC_M_JP, @@ -455,6 +463,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 780, /* vdelay */ 0x16, /* sheight */ 480, + /* extraheight */ 0, /* videostart0 */ 23) },{ /* that one hopefully works with the strange timing @@ -484,6 +493,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = { /* sqwidth */ 944, /* vdelay */ 0x1a, /* sheight */ 480, + /* extraheight */ 0, /* videostart0 */ 23) } }; diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 05d7b6333461..a0639e779973 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -204,7 +204,7 @@ config VIDEO_SAMSUNG_EXYNOS_GSC config VIDEO_SH_VEU tristate "SuperH VEU mem2mem video processing driver" - depends on VIDEO_DEV && VIDEO_V4L2 + depends on VIDEO_DEV && VIDEO_V4L2 && GENERIC_HARDIRQS select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV help diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c index 82d9f6ac12f3..33b5ffc8d66d 100644 --- a/drivers/media/platform/exynos-gsc/gsc-core.c +++ b/drivers/media/platform/exynos-gsc/gsc-core.c @@ -1054,16 +1054,18 @@ static int gsc_m2m_suspend(struct gsc_dev *gsc) static int gsc_m2m_resume(struct gsc_dev *gsc) { + struct gsc_ctx *ctx; unsigned long flags; spin_lock_irqsave(&gsc->slock, flags); /* Clear for full H/W setup in first run after resume */ + ctx = gsc->m2m.ctx; gsc->m2m.ctx = NULL; spin_unlock_irqrestore(&gsc->slock, flags); if (test_and_clear_bit(ST_M2M_SUSPENDED, &gsc->state)) - gsc_m2m_job_finish(gsc->m2m.ctx, - VB2_BUF_STATE_ERROR); + gsc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR); + return 0; } @@ -1204,7 +1206,7 @@ static int gsc_resume(struct device *dev) /* Do not resume if the device was idle before system suspend */ spin_lock_irqsave(&gsc->slock, flags); if (!test_and_clear_bit(ST_SUSPEND, &gsc->state) || - !gsc_m2m_active(gsc)) { + !gsc_m2m_opened(gsc)) { spin_unlock_irqrestore(&gsc->slock, flags); return 0; } diff --git a/drivers/media/platform/s5p-fimc/fimc-core.c b/drivers/media/platform/s5p-fimc/fimc-core.c index e3916bde45cf..0f513dd19f86 100644 --- a/drivers/media/platform/s5p-fimc/fimc-core.c +++ b/drivers/media/platform/s5p-fimc/fimc-core.c @@ -850,16 +850,18 @@ static int fimc_m2m_suspend(struct fimc_dev *fimc) static int fimc_m2m_resume(struct fimc_dev *fimc) { + struct fimc_ctx *ctx; unsigned long flags; spin_lock_irqsave(&fimc->slock, flags); /* Clear for full H/W setup in first run after resume */ + ctx = fimc->m2m.ctx; fimc->m2m.ctx = NULL; spin_unlock_irqrestore(&fimc->slock, flags); if (test_and_clear_bit(ST_M2M_SUSPENDED, &fimc->state)) - fimc_m2m_job_finish(fimc->m2m.ctx, - VB2_BUF_STATE_ERROR); + fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR); + return 0; } diff --git a/drivers/media/platform/s5p-fimc/fimc-lite-reg.c b/drivers/media/platform/s5p-fimc/fimc-lite-reg.c index f0af0754a7b4..ac9663ce2a49 100644 --- a/drivers/media/platform/s5p-fimc/fimc-lite-reg.c +++ b/drivers/media/platform/s5p-fimc/fimc-lite-reg.c @@ -128,10 +128,10 @@ static const u32 src_pixfmt_map[8][3] = { void flite_hw_set_source_format(struct fimc_lite *dev, struct flite_frame *f) { enum v4l2_mbus_pixelcode pixelcode = dev->fmt->mbus_code; - unsigned int i = ARRAY_SIZE(src_pixfmt_map); + int i = ARRAY_SIZE(src_pixfmt_map); u32 cfg; - while (i-- >= 0) { + while (--i >= 0) { if (src_pixfmt_map[i][0] == pixelcode) break; } @@ -224,9 +224,9 @@ static void flite_hw_set_out_order(struct fimc_lite *dev, struct flite_frame *f) { V4L2_MBUS_FMT_VYUY8_2X8, FLITE_REG_CIODMAFMT_CRYCBY }, }; u32 cfg = readl(dev->regs + FLITE_REG_CIODMAFMT); - unsigned int i = ARRAY_SIZE(pixcode); + int i = ARRAY_SIZE(pixcode); - while (i-- >= 0) + while (--i >= 0) if (pixcode[i][0] == dev->fmt->mbus_code) break; cfg &= ~FLITE_REG_CIODMAFMT_YCBCR_ORDER_MASK; diff --git a/drivers/media/platform/s5p-fimc/fimc-lite.c b/drivers/media/platform/s5p-fimc/fimc-lite.c index bfc4206935c8..bbc35de7db27 100644 --- a/drivers/media/platform/s5p-fimc/fimc-lite.c +++ b/drivers/media/platform/s5p-fimc/fimc-lite.c @@ -1408,6 +1408,7 @@ static const struct v4l2_ctrl_config fimc_lite_ctrl = { .id = V4L2_CTRL_CLASS_USER | 0x1001, .type = V4L2_CTRL_TYPE_BOOLEAN, .name = "Test Pattern 640x480", + .step = 1, }; static int fimc_lite_create_capture_subdev(struct fimc_lite *fimc) diff --git a/drivers/media/platform/s5p-fimc/fimc-mdevice.c b/drivers/media/platform/s5p-fimc/fimc-mdevice.c index a17fcb2d5d41..cd38d708ab58 100644 --- a/drivers/media/platform/s5p-fimc/fimc-mdevice.c +++ b/drivers/media/platform/s5p-fimc/fimc-mdevice.c @@ -827,7 +827,7 @@ static int fimc_md_link_notify(struct media_pad *source, struct fimc_pipeline *pipeline; struct v4l2_subdev *sd; struct mutex *lock; - int ret = 0; + int i, ret = 0; int ref_count; if (media_entity_type(sink->entity) != MEDIA_ENT_T_V4L2_SUBDEV) @@ -854,29 +854,28 @@ static int fimc_md_link_notify(struct media_pad *source, return 0; } + mutex_lock(lock); + ref_count = fimc ? fimc->vid_cap.refcnt : fimc_lite->ref_count; + if (!(flags & MEDIA_LNK_FL_ENABLED)) { - int i; - mutex_lock(lock); - ret = __fimc_pipeline_close(pipeline); + if (ref_count > 0) { + ret = __fimc_pipeline_close(pipeline); + if (!ret && fimc) + fimc_ctrls_delete(fimc->vid_cap.ctx); + } for (i = 0; i < IDX_MAX; i++) pipeline->subdevs[i] = NULL; - if (fimc) - fimc_ctrls_delete(fimc->vid_cap.ctx); - mutex_unlock(lock); - return ret; + } else if (ref_count > 0) { + /* + * Link activation. Enable power of pipeline elements only if + * the pipeline is already in use, i.e. its video node is open. + * Recreate the controls destroyed during the link deactivation. + */ + ret = __fimc_pipeline_open(pipeline, + source->entity, true); + if (!ret && fimc) + ret = fimc_capture_ctrls_create(fimc); } - /* - * Link activation. Enable power of pipeline elements only if the - * pipeline is already in use, i.e. its video node is opened. - * Recreate the controls destroyed during the link deactivation. - */ - mutex_lock(lock); - - ref_count = fimc ? fimc->vid_cap.refcnt : fimc_lite->ref_count; - if (ref_count > 0) - ret = __fimc_pipeline_open(pipeline, source->entity, true); - if (!ret && fimc) - ret = fimc_capture_ctrls_create(fimc); mutex_unlock(lock); return ret ? -EPIPE : ret; diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index e84703c314ce..1cb6d57987c6 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -276,7 +276,7 @@ static void s5p_mfc_handle_frame_new(struct s5p_mfc_ctx *ctx, unsigned int err) unsigned int frame_type; dspl_y_addr = s5p_mfc_hw_call(dev->mfc_ops, get_dspl_y_adr, dev); - frame_type = s5p_mfc_hw_call(dev->mfc_ops, get_dec_frame_type, dev); + frame_type = s5p_mfc_hw_call(dev->mfc_ops, get_disp_frame_type, ctx); /* If frame is same as previous then skip and do not dequeue */ if (frame_type == S5P_FIMV_DECODE_FRAME_SKIPPED) { diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c index 2356fd52a169..4f6b553c4b2d 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c @@ -232,6 +232,7 @@ static struct mfc_control controls[] = { .minimum = 0, .maximum = 1, .default_value = 0, + .step = 1, .menu_skip_mask = 0, }, { diff --git a/drivers/media/radio/radio-ma901.c b/drivers/media/radio/radio-ma901.c index c61f590029ad..348dafc0318a 100644 --- a/drivers/media/radio/radio-ma901.c +++ b/drivers/media/radio/radio-ma901.c @@ -347,9 +347,20 @@ static void usb_ma901radio_release(struct v4l2_device *v4l2_dev) static int usb_ma901radio_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_device *dev = interface_to_usbdev(intf); struct ma901radio_device *radio; int retval = 0; + /* Masterkit MA901 usb radio has the same USB ID as many others + * Atmel V-USB devices. Let's make additional checks to be sure + * that this is our device. + */ + + if (dev->product && dev->manufacturer && + (strncmp(dev->product, "MA901", 5) != 0 + || strncmp(dev->manufacturer, "www.masterkit.ru", 16) != 0)) + return -ENODEV; + radio = kzalloc(sizeof(struct ma901radio_device), GFP_KERNEL); if (!radio) { dev_err(&intf->dev, "kzalloc for ma901radio_device failed\n"); diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig index 19f3563c61da..5a79c333d45e 100644 --- a/drivers/media/rc/Kconfig +++ b/drivers/media/rc/Kconfig @@ -291,7 +291,7 @@ config IR_TTUSBIR config IR_RX51 tristate "Nokia N900 IR transmitter diode" - depends on OMAP_DM_TIMER && LIRC && !ARCH_MULTIPLATFORM + depends on OMAP_DM_TIMER && ARCH_OMAP2PLUS && LIRC && !ARCH_MULTIPLATFORM ---help--- Say Y or M here if you want to enable support for the IR transmitter diode built in the Nokia N900 (RX51) device. diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index a9d355230e8e..768aaf62d5dc 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -10,7 +10,7 @@ ifeq ($(CONFIG_COMPAT),y) videodev-objs += v4l2-compat-ioctl32.o endif -obj-$(CONFIG_VIDEO_DEV) += videodev.o +obj-$(CONFIG_VIDEO_V4L2) += videodev.o obj-$(CONFIG_VIDEO_V4L2_INT_DEVICE) += v4l2-int-device.o obj-$(CONFIG_VIDEO_V4L2) += v4l2-common.o diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 671f5b171c73..c346941a2515 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -858,6 +858,7 @@ config EZX_PCAP config AB8500_CORE bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU + select POWER_SUPPLY select MFD_CORE select IRQ_DOMAIN help diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index b1f3561b023f..5f341a50ee5a 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -594,9 +594,12 @@ static int ab8500_gpadc_runtime_suspend(struct device *dev) static int ab8500_gpadc_runtime_resume(struct device *dev) { struct ab8500_gpadc *gpadc = dev_get_drvdata(dev); + int ret; - regulator_enable(gpadc->regu); - return 0; + ret = regulator_enable(gpadc->regu); + if (ret) + dev_err(dev, "Failed to enable vtvout LDO: %d\n", ret); + return ret; } static int ab8500_gpadc_runtime_idle(struct device *dev) @@ -643,7 +646,7 @@ static int ab8500_gpadc_probe(struct platform_device *pdev) } /* VTVout LDO used to power up ab8500-GPADC */ - gpadc->regu = regulator_get(&pdev->dev, "vddadc"); + gpadc->regu = devm_regulator_get(&pdev->dev, "vddadc"); if (IS_ERR(gpadc->regu)) { ret = PTR_ERR(gpadc->regu); dev_err(gpadc->dev, "failed to get vtvout LDO\n"); @@ -652,7 +655,11 @@ static int ab8500_gpadc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, gpadc); - regulator_enable(gpadc->regu); + ret = regulator_enable(gpadc->regu); + if (ret) { + dev_err(gpadc->dev, "Failed to enable vtvout LDO: %d\n", ret); + goto fail_enable; + } pm_runtime_set_autosuspend_delay(gpadc->dev, GPADC_AUDOSUSPEND_DELAY); pm_runtime_use_autosuspend(gpadc->dev); @@ -663,6 +670,8 @@ static int ab8500_gpadc_probe(struct platform_device *pdev) list_add_tail(&gpadc->node, &ab8500_gpadc_list); dev_dbg(gpadc->dev, "probe success\n"); return 0; + +fail_enable: fail_irq: free_irq(gpadc->irq, gpadc); fail: diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 6b5edf64de2b..4febc5c7fdee 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -460,15 +460,15 @@ static void omap_usbhs_init(struct device *dev) switch (omap->usbhs_rev) { case OMAP_USBHS_REV1: - omap_usbhs_rev1_hostconfig(omap, reg); + reg = omap_usbhs_rev1_hostconfig(omap, reg); break; case OMAP_USBHS_REV2: - omap_usbhs_rev2_hostconfig(omap, reg); + reg = omap_usbhs_rev2_hostconfig(omap, reg); break; default: /* newer revisions */ - omap_usbhs_rev2_hostconfig(omap, reg); + reg = omap_usbhs_rev2_hostconfig(omap, reg); break; } diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index bbdbc50a3cca..73bf76df1044 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -257,9 +257,24 @@ static struct regmap_irq_chip palmas_irq_chip = { PALMAS_INT1_MASK), }; -static void palmas_dt_to_pdata(struct device_node *node, +static int palmas_set_pdata_irq_flag(struct i2c_client *i2c, struct palmas_platform_data *pdata) { + struct irq_data *irq_data = irq_get_irq_data(i2c->irq); + if (!irq_data) { + dev_err(&i2c->dev, "Invalid IRQ: %d\n", i2c->irq); + return -EINVAL; + } + + pdata->irq_flags = irqd_get_trigger_type(irq_data); + dev_info(&i2c->dev, "Irq flag is 0x%08x\n", pdata->irq_flags); + return 0; +} + +static void palmas_dt_to_pdata(struct i2c_client *i2c, + struct palmas_platform_data *pdata) +{ + struct device_node *node = i2c->dev.of_node; int ret; u32 prop; @@ -283,6 +298,8 @@ static void palmas_dt_to_pdata(struct device_node *node, pdata->power_ctrl = PALMAS_POWER_CTRL_NSLEEP_MASK | PALMAS_POWER_CTRL_ENABLE1_MASK | PALMAS_POWER_CTRL_ENABLE2_MASK; + if (i2c->irq) + palmas_set_pdata_irq_flag(i2c, pdata); } static int palmas_i2c_probe(struct i2c_client *i2c, @@ -304,7 +321,7 @@ static int palmas_i2c_probe(struct i2c_client *i2c, if (!pdata) return -ENOMEM; - palmas_dt_to_pdata(node, pdata); + palmas_dt_to_pdata(i2c, pdata); } if (!pdata) @@ -344,6 +361,19 @@ static int palmas_i2c_probe(struct i2c_client *i2c, } } + /* Change interrupt line output polarity */ + if (pdata->irq_flags & IRQ_TYPE_LEVEL_HIGH) + reg = PALMAS_POLARITY_CTRL_INT_POLARITY; + else + reg = 0; + ret = palmas_update_bits(palmas, PALMAS_PU_PD_OD_BASE, + PALMAS_POLARITY_CTRL, PALMAS_POLARITY_CTRL_INT_POLARITY, + reg); + if (ret < 0) { + dev_err(palmas->dev, "POLARITY_CTRL updat failed: %d\n", ret); + goto err; + } + /* Change IRQ into clear on read mode for efficiency */ slave = PALMAS_BASE_TO_SLAVE(PALMAS_INTERRUPT_BASE); addr = PALMAS_BASE_TO_REG(PALMAS_INTERRUPT_BASE, PALMAS_INT_CTRL); @@ -352,7 +382,7 @@ static int palmas_i2c_probe(struct i2c_client *i2c, regmap_write(palmas->regmap[slave], addr, reg); ret = regmap_add_irq_chip(palmas->regmap[slave], palmas->irq, - IRQF_ONESHOT | IRQF_TRIGGER_LOW, 0, &palmas_irq_chip, + IRQF_ONESHOT | pdata->irq_flags, 0, &palmas_irq_chip, &palmas->irq_data); if (ret < 0) goto err; diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c index 4658b5bdcd84..aeb8e40ab424 100644 --- a/drivers/mfd/tps65912-core.c +++ b/drivers/mfd/tps65912-core.c @@ -169,6 +169,7 @@ err: void tps65912_device_exit(struct tps65912 *tps65912) { mfd_remove_devices(tps65912->dev); + tps65912_irq_exit(tps65912); kfree(tps65912); } diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c index e16edca92670..d2ab222138c2 100644 --- a/drivers/mfd/twl4030-audio.c +++ b/drivers/mfd/twl4030-audio.c @@ -118,7 +118,7 @@ EXPORT_SYMBOL_GPL(twl4030_audio_enable_resource); * Disable the resource. * The function returns with error or the content of the register */ -int twl4030_audio_disable_resource(unsigned id) +int twl4030_audio_disable_resource(enum twl4030_audio_res id) { struct twl4030_audio *audio = platform_get_drvdata(twl4030_audio_dev); int val; diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c index 88ff9dc83305..942b666a2a07 100644 --- a/drivers/mfd/twl4030-madc.c +++ b/drivers/mfd/twl4030-madc.c @@ -800,7 +800,7 @@ static int twl4030_madc_remove(struct platform_device *pdev) static struct platform_driver twl4030_madc_driver = { .probe = twl4030_madc_probe, - .remove = __exit_p(twl4030_madc_remove), + .remove = twl4030_madc_remove, .driver = { .name = "twl4030_madc", .owner = THIS_MODULE, diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 6673e578b3e9..ce5b75616b45 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -110,6 +110,7 @@ static struct file_system_type ibmasmfs_type = { .mount = ibmasmfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("ibmasmfs"); static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) { diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 45ea7185c003..642c6223fa6c 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -152,6 +152,20 @@ static void mei_me_intr_disable(struct mei_device *dev) } /** + * mei_me_hw_reset_release - release device from the reset + * + * @dev: the device structure + */ +static void mei_me_hw_reset_release(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 hcsr = mei_hcsr_read(hw); + + hcsr |= H_IG; + hcsr &= ~H_RST; + mei_hcsr_set(hw, hcsr); +} +/** * mei_me_hw_reset - resets fw via mei csr register. * * @dev: the device structure @@ -169,18 +183,14 @@ static void mei_me_hw_reset(struct mei_device *dev, bool intr_enable) if (intr_enable) hcsr |= H_IE; else - hcsr &= ~H_IE; - - mei_hcsr_set(hw, hcsr); - - hcsr = mei_hcsr_read(hw) | H_IG; - hcsr &= ~H_RST; + hcsr |= ~H_IE; mei_hcsr_set(hw, hcsr); - hcsr = mei_hcsr_read(hw); + if (dev->dev_state == MEI_DEV_POWER_DOWN) + mei_me_hw_reset_release(dev); - dev_dbg(&dev->pdev->dev, "current HCSR = 0x%08x.\n", hcsr); + dev_dbg(&dev->pdev->dev, "current HCSR = 0x%08x.\n", mei_hcsr_read(hw)); } /** @@ -466,7 +476,8 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) mutex_unlock(&dev->device_lock); return IRQ_HANDLED; } else { - dev_dbg(&dev->pdev->dev, "FW not ready.\n"); + dev_dbg(&dev->pdev->dev, "Reset Completed.\n"); + mei_me_hw_reset_release(dev); mutex_unlock(&dev->device_lock); return IRQ_HANDLED; } diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 6ec530168afb..356179991a2e 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -183,6 +183,24 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled) mei_cl_all_write_clear(dev); } +void mei_stop(struct mei_device *dev) +{ + dev_dbg(&dev->pdev->dev, "stopping the device.\n"); + + mutex_lock(&dev->device_lock); + + cancel_delayed_work(&dev->timer_work); + + mei_wd_stop(dev); + + dev->dev_state = MEI_DEV_POWER_DOWN; + mei_reset(dev, 0); + + mutex_unlock(&dev->device_lock); + + flush_scheduled_work(); +} + diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index cb80166161f0..97873812e33b 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -381,6 +381,7 @@ static inline unsigned long mei_secs_to_jiffies(unsigned long sec) void mei_device_init(struct mei_device *dev); void mei_reset(struct mei_device *dev, int interrupts); int mei_hw_init(struct mei_device *dev); +void mei_stop(struct mei_device *dev); /* * MEI interrupt functions prototype diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index b40ec0601ab0..b8b5c9c3ad03 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -247,44 +247,14 @@ static void mei_remove(struct pci_dev *pdev) hw = to_me_hw(dev); - mutex_lock(&dev->device_lock); - - cancel_delayed_work(&dev->timer_work); - mei_wd_stop(dev); + dev_err(&pdev->dev, "stop\n"); + mei_stop(dev); mei_pdev = NULL; - if (dev->iamthif_cl.state == MEI_FILE_CONNECTED) { - dev->iamthif_cl.state = MEI_FILE_DISCONNECTING; - mei_cl_disconnect(&dev->iamthif_cl); - } - if (dev->wd_cl.state == MEI_FILE_CONNECTED) { - dev->wd_cl.state = MEI_FILE_DISCONNECTING; - mei_cl_disconnect(&dev->wd_cl); - } - - /* Unregistering watchdog device */ mei_watchdog_unregister(dev); - /* remove entry if already in list */ - dev_dbg(&pdev->dev, "list del iamthif and wd file list.\n"); - - if (dev->open_handle_count > 0) - dev->open_handle_count--; - mei_cl_unlink(&dev->wd_cl); - - if (dev->open_handle_count > 0) - dev->open_handle_count--; - mei_cl_unlink(&dev->iamthif_cl); - - dev->iamthif_current_cb = NULL; - dev->me_clients_num = 0; - - mutex_unlock(&dev->device_lock); - - flush_scheduled_work(); - /* disable interrupts */ mei_disable_interrupts(dev); @@ -308,28 +278,20 @@ static int mei_pci_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev = pci_get_drvdata(pdev); - int err; if (!dev) return -ENODEV; - mutex_lock(&dev->device_lock); - cancel_delayed_work(&dev->timer_work); + dev_err(&pdev->dev, "suspend\n"); - /* Stop watchdog if exists */ - err = mei_wd_stop(dev); - /* Set new mei state */ - if (dev->dev_state == MEI_DEV_ENABLED || - dev->dev_state == MEI_DEV_RECOVERING_FROM_RESET) { - dev->dev_state = MEI_DEV_POWER_DOWN; - mei_reset(dev, 0); - } - mutex_unlock(&dev->device_lock); + mei_stop(dev); + + mei_disable_interrupts(dev); free_irq(pdev->irq, dev); pci_disable_msi(pdev); - return err; + return 0; } static int mei_pci_resume(struct device *device) diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig index 39c2ecadb273..ea98f7e9ccd1 100644 --- a/drivers/misc/vmw_vmci/Kconfig +++ b/drivers/misc/vmw_vmci/Kconfig @@ -4,7 +4,7 @@ config VMWARE_VMCI tristate "VMware VMCI Driver" - depends on X86 && PCI + depends on X86 && PCI && NET help This is VMware's Virtual Machine Communication Interface. It enables high-speed communication between host and guest in a virtual diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index ed5c433cd493..f3cdd904fe4d 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -42,9 +42,11 @@ struct datagram_entry { struct delayed_datagram_info { struct datagram_entry *entry; - struct vmci_datagram msg; struct work_struct work; bool in_dg_host_queue; + /* msg and msg_payload must be together. */ + struct vmci_datagram msg; + u8 msg_payload[]; }; /* Number of in-flight host->host datagrams */ diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index 63feb75cc8e0..9279a9174f84 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -19,6 +19,12 @@ /* 10 parts were found on sflash on Netgear WNDR4500 */ #define BCM47XXPART_MAX_PARTS 12 +/* + * Amount of bytes we read when analyzing each block of flash memory. + * Set it big enough to allow detecting partition and reading important data. + */ +#define BCM47XXPART_BYTES_TO_READ 0x404 + /* Magics */ #define BOARD_DATA_MAGIC 0x5246504D /* MPFR */ #define POT_MAGIC1 0x54544f50 /* POTT */ @@ -57,17 +63,15 @@ static int bcm47xxpart_parse(struct mtd_info *master, struct trx_header *trx; int trx_part = -1; int last_trx_part = -1; - int max_bytes_to_read = 0x8004; + int possible_nvram_sizes[] = { 0x8000, 0xF000, 0x10000, }; if (blocksize <= 0x10000) blocksize = 0x10000; - if (blocksize == 0x20000) - max_bytes_to_read = 0x18004; /* Alloc */ parts = kzalloc(sizeof(struct mtd_partition) * BCM47XXPART_MAX_PARTS, GFP_KERNEL); - buf = kzalloc(max_bytes_to_read, GFP_KERNEL); + buf = kzalloc(BCM47XXPART_BYTES_TO_READ, GFP_KERNEL); /* Parse block by block looking for magics */ for (offset = 0; offset <= master->size - blocksize; @@ -82,7 +86,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Read beginning of the block */ - if (mtd_read(master, offset, max_bytes_to_read, + if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ, &bytes_read, (uint8_t *)buf) < 0) { pr_err("mtd_read error while parsing (offset: 0x%X)!\n", offset); @@ -96,20 +100,6 @@ static int bcm47xxpart_parse(struct mtd_info *master, continue; } - /* Standard NVRAM */ - if (buf[0x000 / 4] == NVRAM_HEADER || - buf[0x1000 / 4] == NVRAM_HEADER || - buf[0x8000 / 4] == NVRAM_HEADER || - (blocksize == 0x20000 && ( - buf[0x10000 / 4] == NVRAM_HEADER || - buf[0x11000 / 4] == NVRAM_HEADER || - buf[0x18000 / 4] == NVRAM_HEADER))) { - bcm47xxpart_add_part(&parts[curr_part++], "nvram", - offset, 0); - offset = rounddown(offset, blocksize); - continue; - } - /* * board_data starts with board_id which differs across boards, * but we can use 'MPFR' (hopefully) magic at 0x100 @@ -178,6 +168,30 @@ static int bcm47xxpart_parse(struct mtd_info *master, continue; } } + + /* Look for NVRAM at the end of the last block. */ + for (i = 0; i < ARRAY_SIZE(possible_nvram_sizes); i++) { + if (curr_part > BCM47XXPART_MAX_PARTS) { + pr_warn("Reached maximum number of partitions, scanning stopped!\n"); + break; + } + + offset = master->size - possible_nvram_sizes[i]; + if (mtd_read(master, offset, 0x4, &bytes_read, + (uint8_t *)buf) < 0) { + pr_err("mtd_read error while reading at offset 0x%X!\n", + offset); + continue; + } + + /* Standard NVRAM */ + if (buf[0] == NVRAM_HEADER) { + bcm47xxpart_add_part(&parts[curr_part++], "nvram", + master->size - blocksize, 0); + break; + } + } + kfree(buf); /* diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 82c06165d3d2..dc571ebc1aa0 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1123,33 +1123,6 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file, } #endif -static inline unsigned long get_vm_size(struct vm_area_struct *vma) -{ - return vma->vm_end - vma->vm_start; -} - -static inline resource_size_t get_vm_offset(struct vm_area_struct *vma) -{ - return (resource_size_t) vma->vm_pgoff << PAGE_SHIFT; -} - -/* - * Set a new vm offset. - * - * Verify that the incoming offset really works as a page offset, - * and that the offset and size fit in a resource_size_t. - */ -static inline int set_vm_offset(struct vm_area_struct *vma, resource_size_t off) -{ - pgoff_t pgoff = off >> PAGE_SHIFT; - if (off != (resource_size_t) pgoff << PAGE_SHIFT) - return -EINVAL; - if (off + get_vm_size(vma) - 1 < off) - return -EINVAL; - vma->vm_pgoff = pgoff; - return 0; -} - /* * set up a mapping for shared memory segments */ @@ -1159,45 +1132,17 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) struct mtd_file_info *mfi = file->private_data; struct mtd_info *mtd = mfi->mtd; struct map_info *map = mtd->priv; - resource_size_t start, off; - unsigned long len, vma_len; /* This is broken because it assumes the MTD device is map-based and that mtd->priv is a valid struct map_info. It should be replaced with something that uses the mtd_get_unmapped_area() operation properly. */ if (0 /*mtd->type == MTD_RAM || mtd->type == MTD_ROM*/) { - off = get_vm_offset(vma); - start = map->phys; - len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size); - start &= PAGE_MASK; - vma_len = get_vm_size(vma); - - /* Overflow in off+len? */ - if (vma_len + off < off) - return -EINVAL; - /* Does it fit in the mapping? */ - if (vma_len + off > len) - return -EINVAL; - - off += start; - /* Did that overflow? */ - if (off < start) - return -EINVAL; - if (set_vm_offset(vma, off) < 0) - return -EINVAL; - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - #ifdef pgprot_noncached - if (file->f_flags & O_DSYNC || off >= __pa(high_memory)) + if (file->f_flags & O_DSYNC || map->phys >= __pa(high_memory)) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); #endif - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - - return 0; + return vm_iomap_memory(vma, map->phys, map->size); } return -ENOSYS; #else @@ -1238,6 +1183,7 @@ static struct file_system_type mtd_inodefs_type = { .mount = mtd_inodefs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("mtd_inodefs"); static int __init init_mtdchar(void) { diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 43214151b882..42c63927609d 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1523,6 +1523,14 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, oobreadlen -= toread; } } + + if (chip->options & NAND_NEED_READRDY) { + /* Apply delay or wait for ready/busy pin */ + if (!chip->dev_ready) + udelay(chip->chip_delay); + else + nand_wait_ready(mtd); + } } else { memcpy(buf, chip->buffers->databuf + col, bytes); buf += bytes; @@ -1787,6 +1795,14 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, len = min(len, readlen); buf = nand_transfer_oob(chip, buf, ops, len); + if (chip->options & NAND_NEED_READRDY) { + /* Apply delay or wait for ready/busy pin */ + if (!chip->dev_ready) + udelay(chip->chip_delay); + else + nand_wait_ready(mtd); + } + readlen -= len; if (!readlen) break; diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index e3aa2748a6e7..9c612388e5de 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -22,49 +22,51 @@ * 512 512 Byte page size */ struct nand_flash_dev nand_flash_ids[] = { +#define SP_OPTIONS NAND_NEED_READRDY +#define SP_OPTIONS16 (SP_OPTIONS | NAND_BUSWIDTH_16) #ifdef CONFIG_MTD_NAND_MUSEUM_IDS - {"NAND 1MiB 5V 8-bit", 0x6e, 256, 1, 0x1000, 0}, - {"NAND 2MiB 5V 8-bit", 0x64, 256, 2, 0x1000, 0}, - {"NAND 4MiB 5V 8-bit", 0x6b, 512, 4, 0x2000, 0}, - {"NAND 1MiB 3,3V 8-bit", 0xe8, 256, 1, 0x1000, 0}, - {"NAND 1MiB 3,3V 8-bit", 0xec, 256, 1, 0x1000, 0}, - {"NAND 2MiB 3,3V 8-bit", 0xea, 256, 2, 0x1000, 0}, - {"NAND 4MiB 3,3V 8-bit", 0xd5, 512, 4, 0x2000, 0}, - {"NAND 4MiB 3,3V 8-bit", 0xe3, 512, 4, 0x2000, 0}, - {"NAND 4MiB 3,3V 8-bit", 0xe5, 512, 4, 0x2000, 0}, - {"NAND 8MiB 3,3V 8-bit", 0xd6, 512, 8, 0x2000, 0}, - - {"NAND 8MiB 1,8V 8-bit", 0x39, 512, 8, 0x2000, 0}, - {"NAND 8MiB 3,3V 8-bit", 0xe6, 512, 8, 0x2000, 0}, - {"NAND 8MiB 1,8V 16-bit", 0x49, 512, 8, 0x2000, NAND_BUSWIDTH_16}, - {"NAND 8MiB 3,3V 16-bit", 0x59, 512, 8, 0x2000, NAND_BUSWIDTH_16}, + {"NAND 1MiB 5V 8-bit", 0x6e, 256, 1, 0x1000, SP_OPTIONS}, + {"NAND 2MiB 5V 8-bit", 0x64, 256, 2, 0x1000, SP_OPTIONS}, + {"NAND 4MiB 5V 8-bit", 0x6b, 512, 4, 0x2000, SP_OPTIONS}, + {"NAND 1MiB 3,3V 8-bit", 0xe8, 256, 1, 0x1000, SP_OPTIONS}, + {"NAND 1MiB 3,3V 8-bit", 0xec, 256, 1, 0x1000, SP_OPTIONS}, + {"NAND 2MiB 3,3V 8-bit", 0xea, 256, 2, 0x1000, SP_OPTIONS}, + {"NAND 4MiB 3,3V 8-bit", 0xd5, 512, 4, 0x2000, SP_OPTIONS}, + {"NAND 4MiB 3,3V 8-bit", 0xe3, 512, 4, 0x2000, SP_OPTIONS}, + {"NAND 4MiB 3,3V 8-bit", 0xe5, 512, 4, 0x2000, SP_OPTIONS}, + {"NAND 8MiB 3,3V 8-bit", 0xd6, 512, 8, 0x2000, SP_OPTIONS}, + + {"NAND 8MiB 1,8V 8-bit", 0x39, 512, 8, 0x2000, SP_OPTIONS}, + {"NAND 8MiB 3,3V 8-bit", 0xe6, 512, 8, 0x2000, SP_OPTIONS}, + {"NAND 8MiB 1,8V 16-bit", 0x49, 512, 8, 0x2000, SP_OPTIONS16}, + {"NAND 8MiB 3,3V 16-bit", 0x59, 512, 8, 0x2000, SP_OPTIONS16}, #endif - {"NAND 16MiB 1,8V 8-bit", 0x33, 512, 16, 0x4000, 0}, - {"NAND 16MiB 3,3V 8-bit", 0x73, 512, 16, 0x4000, 0}, - {"NAND 16MiB 1,8V 16-bit", 0x43, 512, 16, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 16MiB 3,3V 16-bit", 0x53, 512, 16, 0x4000, NAND_BUSWIDTH_16}, - - {"NAND 32MiB 1,8V 8-bit", 0x35, 512, 32, 0x4000, 0}, - {"NAND 32MiB 3,3V 8-bit", 0x75, 512, 32, 0x4000, 0}, - {"NAND 32MiB 1,8V 16-bit", 0x45, 512, 32, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 32MiB 3,3V 16-bit", 0x55, 512, 32, 0x4000, NAND_BUSWIDTH_16}, - - {"NAND 64MiB 1,8V 8-bit", 0x36, 512, 64, 0x4000, 0}, - {"NAND 64MiB 3,3V 8-bit", 0x76, 512, 64, 0x4000, 0}, - {"NAND 64MiB 1,8V 16-bit", 0x46, 512, 64, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 64MiB 3,3V 16-bit", 0x56, 512, 64, 0x4000, NAND_BUSWIDTH_16}, - - {"NAND 128MiB 1,8V 8-bit", 0x78, 512, 128, 0x4000, 0}, - {"NAND 128MiB 1,8V 8-bit", 0x39, 512, 128, 0x4000, 0}, - {"NAND 128MiB 3,3V 8-bit", 0x79, 512, 128, 0x4000, 0}, - {"NAND 128MiB 1,8V 16-bit", 0x72, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 128MiB 1,8V 16-bit", 0x49, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 128MiB 3,3V 16-bit", 0x74, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - {"NAND 128MiB 3,3V 16-bit", 0x59, 512, 128, 0x4000, NAND_BUSWIDTH_16}, - - {"NAND 256MiB 3,3V 8-bit", 0x71, 512, 256, 0x4000, 0}, + {"NAND 16MiB 1,8V 8-bit", 0x33, 512, 16, 0x4000, SP_OPTIONS}, + {"NAND 16MiB 3,3V 8-bit", 0x73, 512, 16, 0x4000, SP_OPTIONS}, + {"NAND 16MiB 1,8V 16-bit", 0x43, 512, 16, 0x4000, SP_OPTIONS16}, + {"NAND 16MiB 3,3V 16-bit", 0x53, 512, 16, 0x4000, SP_OPTIONS16}, + + {"NAND 32MiB 1,8V 8-bit", 0x35, 512, 32, 0x4000, SP_OPTIONS}, + {"NAND 32MiB 3,3V 8-bit", 0x75, 512, 32, 0x4000, SP_OPTIONS}, + {"NAND 32MiB 1,8V 16-bit", 0x45, 512, 32, 0x4000, SP_OPTIONS16}, + {"NAND 32MiB 3,3V 16-bit", 0x55, 512, 32, 0x4000, SP_OPTIONS16}, + + {"NAND 64MiB 1,8V 8-bit", 0x36, 512, 64, 0x4000, SP_OPTIONS}, + {"NAND 64MiB 3,3V 8-bit", 0x76, 512, 64, 0x4000, SP_OPTIONS}, + {"NAND 64MiB 1,8V 16-bit", 0x46, 512, 64, 0x4000, SP_OPTIONS16}, + {"NAND 64MiB 3,3V 16-bit", 0x56, 512, 64, 0x4000, SP_OPTIONS16}, + + {"NAND 128MiB 1,8V 8-bit", 0x78, 512, 128, 0x4000, SP_OPTIONS}, + {"NAND 128MiB 1,8V 8-bit", 0x39, 512, 128, 0x4000, SP_OPTIONS}, + {"NAND 128MiB 3,3V 8-bit", 0x79, 512, 128, 0x4000, SP_OPTIONS}, + {"NAND 128MiB 1,8V 16-bit", 0x72, 512, 128, 0x4000, SP_OPTIONS16}, + {"NAND 128MiB 1,8V 16-bit", 0x49, 512, 128, 0x4000, SP_OPTIONS16}, + {"NAND 128MiB 3,3V 16-bit", 0x74, 512, 128, 0x4000, SP_OPTIONS16}, + {"NAND 128MiB 3,3V 16-bit", 0x59, 512, 128, 0x4000, SP_OPTIONS16}, + + {"NAND 256MiB 3,3V 8-bit", 0x71, 512, 256, 0x4000, SP_OPTIONS}, /* * These are the new chips with large page size. The pagesize and the diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 87f1d39ca551..3835321b8cf3 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -151,6 +151,7 @@ config MACVTAP config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" depends on INET + select NET_IP_TUNNEL ---help--- This allows one to create vxlan virtual interfaces that provide Layer 2 Networks over Layer 3 Networks. VXLAN is often used diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index f5a89164e779..4ce6ca5f3d36 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -106,20 +106,4 @@ config IPDDP_ENCAP IP packets inside AppleTalk frames; this is useful if your Linux box is stuck on an AppleTalk network (which hopefully contains a decapsulator somewhere). Please see - <file:Documentation/networking/ipddp.txt> for more information. If - you said Y to "AppleTalk-IP driver support" above and you say Y - here, then you cannot say Y to "AppleTalk-IP to IP Decapsulation - support", below. - -config IPDDP_DECAP - bool "Appletalk-IP to IP Decapsulation support" - depends on IPDDP - help - If you say Y here, the AppleTalk-IP code will be able to decapsulate - AppleTalk-IP frames to IP packets; this is useful if you want your - Linux box to act as an Internet gateway for an AppleTalk network. - Please see <file:Documentation/networking/ipddp.txt> for more - information. If you said Y to "AppleTalk-IP driver support" above - and you say Y here, then you cannot say Y to "IP to AppleTalk-IP - Encapsulation support", above. - + <file:Documentation/networking/ipddp.txt> for more information. diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index f5e052723029..e02cc265723a 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -514,7 +514,7 @@ static void rlb_update_client(struct rlb_client_info *client_info) skb->dev = client_info->slave->dev; if (client_info->tag) { - skb = vlan_put_tag(skb, client_info->vlan_id); + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id); if (!skb) { pr_err("%s: Error: failed to insert VLAN tag\n", client_info->slave->bond->dev->name); @@ -1014,7 +1014,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) continue; } - skb = vlan_put_tag(skb, vlan->vlan_id); + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id); if (!skb) { pr_err("%s: Error: failed to insert VLAN tag\n", bond->dev->name); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7bd068a6056a..532153db1f9c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -428,14 +428,15 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, * @bond_dev: bonding net device that got called * @vid: vlan id being added */ -static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) +static int bond_vlan_rx_add_vid(struct net_device *bond_dev, + __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *stop_at; int i, res; bond_for_each_slave(bond, slave, i) { - res = vlan_vid_add(slave->dev, vid); + res = vlan_vid_add(slave->dev, proto, vid); if (res) goto unwind; } @@ -453,7 +454,7 @@ unwind: /* unwind from head to the slave that failed */ stop_at = slave; bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) - vlan_vid_del(slave->dev, vid); + vlan_vid_del(slave->dev, proto, vid); return res; } @@ -463,14 +464,15 @@ unwind: * @bond_dev: bonding net device that got called * @vid: vlan id being removed */ -static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) +static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, + __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; int i, res; bond_for_each_slave(bond, slave, i) - vlan_vid_del(slave->dev, vid); + vlan_vid_del(slave->dev, proto, vid); res = bond_del_vlan(bond, vid); if (res) { @@ -488,7 +490,8 @@ static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *sla int res; list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - res = vlan_vid_add(slave_dev, vlan->vlan_id); + res = vlan_vid_add(slave_dev, htons(ETH_P_8021Q), + vlan->vlan_id); if (res) pr_warning("%s: Failed to add vlan id %d to device %s\n", bond->dev->name, vlan->vlan_id, @@ -504,7 +507,7 @@ static void bond_del_vlans_from_slave(struct bonding *bond, list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { if (!vlan->vlan_id) continue; - vlan_vid_del(slave_dev, vlan->vlan_id); + vlan_vid_del(slave_dev, htons(ETH_P_8021Q), vlan->vlan_id); } } @@ -779,7 +782,7 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) /* rejoin all groups on vlan devices */ list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { - vlan_dev = __vlan_find_dev_deep(bond_dev, + vlan_dev = __vlan_find_dev_deep(bond_dev, htons(ETH_P_8021Q), vlan->vlan_id); if (vlan_dev) __bond_resend_igmp_join_requests(vlan_dev); @@ -796,9 +799,8 @@ static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mcast_work.work); - rcu_read_lock(); + bond_resend_igmp_join_requests(bond); - rcu_read_unlock(); } /* @@ -846,8 +848,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(old_active->dev, -1); + netif_addr_lock_bh(bond->dev); netdev_for_each_mc_addr(ha, bond->dev) dev_mc_del(old_active->dev, ha->addr); + netif_addr_unlock_bh(bond->dev); } if (new_active) { @@ -858,8 +862,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(new_active->dev, 1); + netif_addr_lock_bh(bond->dev); netdev_for_each_mc_addr(ha, bond->dev) dev_mc_add(new_active->dev, ha->addr); + netif_addr_unlock_bh(bond->dev); } } @@ -1746,6 +1752,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_compute_features(bond); + bond_update_speed_duplex(new_slave); + read_lock(&bond->lock); new_slave->last_arp_rx = jiffies - @@ -1798,8 +1806,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->link == BOND_LINK_DOWN ? "DOWN" : (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); - bond_update_speed_duplex(new_slave); - if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { /* if there is a primary slave, remember it */ if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { @@ -1901,11 +1907,29 @@ err_dest_symlinks: bond_destroy_slave_symlinks(bond_dev, slave_dev); err_detach: + if (!USES_PRIMARY(bond->params.mode)) { + netif_addr_lock_bh(bond_dev); + bond_mc_list_flush(bond_dev, slave_dev); + netif_addr_unlock_bh(bond_dev); + } + bond_del_vlans_from_slave(bond, slave_dev); write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); + if (bond->primary_slave == new_slave) + bond->primary_slave = NULL; write_unlock_bh(&bond->lock); + if (bond->curr_active_slave == new_slave) { + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, NULL); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + } + slave_disable_netpoll(new_slave); err_close: + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_unset_master: @@ -1964,7 +1988,6 @@ static int __bond_release_one(struct net_device *bond_dev, } block_netpoll_tx(); - call_netdevice_notifiers(NETDEV_RELEASE, bond_dev); write_lock_bh(&bond->lock); slave = bond_get_slave_by_dev(bond, slave_dev); @@ -1977,12 +2000,11 @@ static int __bond_release_one(struct net_device *bond_dev, return -EINVAL; } + write_unlock_bh(&bond->lock); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ netdev_rx_handler_unregister(slave_dev); - write_unlock_bh(&bond->lock); - synchronize_net(); write_lock_bh(&bond->lock); if (!all && !bond->params.fail_over_mac) { @@ -2066,8 +2088,10 @@ static int __bond_release_one(struct net_device *bond_dev, write_unlock_bh(&bond->lock); unblock_netpoll_tx(); - if (bond->slave_cnt == 0) + if (bond->slave_cnt == 0) { call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); + call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); + } bond_compute_features(bond); if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && @@ -2373,8 +2397,6 @@ static void bond_miimon_commit(struct bonding *bond) bond_set_backup_slave(slave); } - bond_update_speed_duplex(slave); - pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex.\n", bond->dev->name, slave->dev->name, slave->speed, slave->duplex ? "full" : "half"); @@ -2512,7 +2534,8 @@ static int bond_has_this_ip(struct bonding *bond, __be32 ip) list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { rcu_read_lock(); - vlan_dev = __vlan_find_dev_deep(bond->dev, vlan->vlan_id); + vlan_dev = __vlan_find_dev_deep(bond->dev, htons(ETH_P_8021Q), + vlan->vlan_id); rcu_read_unlock(); if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip)) return 1; @@ -2541,7 +2564,7 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ return; } if (vlan_id) { - skb = vlan_put_tag(skb, vlan_id); + skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (!skb) { pr_err("failed to insert VLAN tag\n"); return; @@ -2603,6 +2626,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { rcu_read_lock(); vlan_dev = __vlan_find_dev_deep(bond->dev, + htons(ETH_P_8021Q), vlan->vlan_id); rcu_read_unlock(); if (vlan_dev == rt->dst.dev) { @@ -3170,11 +3194,20 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) { struct slave *slave = bond_slave_get_rtnl(slave_dev); - struct bonding *bond = slave->bond; - struct net_device *bond_dev = slave->bond->dev; + struct bonding *bond; + struct net_device *bond_dev; u32 old_speed; u8 old_duplex; + /* A netdev event can be generated while enslaving a device + * before netdev_rx_handler_register is called in which case + * slave will be NULL + */ + if (!slave) + return NOTIFY_DONE; + bond_dev = slave->bond->dev; + bond = slave->bond; + switch (event) { case NETDEV_UNREGISTER: if (bond->setup_by_slave) @@ -3288,20 +3321,22 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) */ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) { - struct ethhdr *data = (struct ethhdr *)skb->data; - struct iphdr *iph; - struct ipv6hdr *ipv6h; + const struct ethhdr *data; + const struct iphdr *iph; + const struct ipv6hdr *ipv6h; u32 v6hash; - __be32 *s, *d; + const __be32 *s, *d; if (skb->protocol == htons(ETH_P_IP) && - skb_network_header_len(skb) >= sizeof(*iph)) { + pskb_network_may_pull(skb, sizeof(*iph))) { iph = ip_hdr(skb); + data = (struct ethhdr *)skb->data; return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ (data->h_dest[5] ^ data->h_source[5])) % count; } else if (skb->protocol == htons(ETH_P_IPV6) && - skb_network_header_len(skb) >= sizeof(*ipv6h)) { + pskb_network_may_pull(skb, sizeof(*ipv6h))) { ipv6h = ipv6_hdr(skb); + data = (struct ethhdr *)skb->data; s = &ipv6h->saddr.s6_addr32[0]; d = &ipv6h->daddr.s6_addr32[0]; v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); @@ -3320,33 +3355,36 @@ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) { u32 layer4_xor = 0; - struct iphdr *iph; - struct ipv6hdr *ipv6h; - __be32 *s, *d; - __be16 *layer4hdr; + const struct iphdr *iph; + const struct ipv6hdr *ipv6h; + const __be32 *s, *d; + const __be16 *l4 = NULL; + __be16 _l4[2]; + int noff = skb_network_offset(skb); + int poff; if (skb->protocol == htons(ETH_P_IP) && - skb_network_header_len(skb) >= sizeof(*iph)) { + pskb_may_pull(skb, noff + sizeof(*iph))) { iph = ip_hdr(skb); - if (!ip_is_fragment(iph) && - (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP) && - (skb_headlen(skb) - skb_network_offset(skb) >= - iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) { - layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); - layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + poff = proto_ports_offset(iph->protocol); + + if (!ip_is_fragment(iph) && poff >= 0) { + l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, + sizeof(_l4), &_l4); + if (l4) + layer4_xor = ntohs(l4[0] ^ l4[1]); } return (layer4_xor ^ ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; } else if (skb->protocol == htons(ETH_P_IPV6) && - skb_network_header_len(skb) >= sizeof(*ipv6h)) { + pskb_may_pull(skb, noff + sizeof(*ipv6h))) { ipv6h = ipv6_hdr(skb); - if ((ipv6h->nexthdr == IPPROTO_TCP || - ipv6h->nexthdr == IPPROTO_UDP) && - (skb_headlen(skb) - skb_network_offset(skb) >= - sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) { - layer4hdr = (__be16 *)(ipv6h + 1); - layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + poff = proto_ports_offset(ipv6h->nexthdr); + if (poff >= 0) { + l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff, + sizeof(_l4), &_l4); + if (l4) + layer4_xor = ntohs(l4[0] ^ l4[1]); } s = &ipv6h->saddr.s6_addr32[0]; d = &ipv6h->daddr.s6_addr32[0]; @@ -4224,6 +4262,37 @@ void bond_set_mode_ops(struct bonding *bond, int mode) } } +static int bond_ethtool_get_settings(struct net_device *bond_dev, + struct ethtool_cmd *ecmd) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + int i; + unsigned long speed = 0; + + ecmd->duplex = DUPLEX_UNKNOWN; + ecmd->port = PORT_OTHER; + + /* Since SLAVE_IS_OK returns false for all inactive or down slaves, we + * do not need to check mode. Though link speed might not represent + * the true receive or transmit bandwidth (not all modes are symmetric) + * this is an accurate maximum. + */ + read_lock(&bond->lock); + bond_for_each_slave(bond, slave, i) { + if (SLAVE_IS_OK(slave)) { + if (slave->speed != SPEED_UNKNOWN) + speed += slave->speed; + if (ecmd->duplex == DUPLEX_UNKNOWN && + slave->duplex != DUPLEX_UNKNOWN) + ecmd->duplex = slave->duplex; + } + } + ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); + read_unlock(&bond->lock); + return 0; +} + static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, struct ethtool_drvinfo *drvinfo) { @@ -4235,6 +4304,7 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, static const struct ethtool_ops bond_ethtool_ops = { .get_drvinfo = bond_ethtool_get_drvinfo, + .get_settings = bond_ethtool_get_settings, .get_link = ethtool_op_get_link, }; @@ -4325,9 +4395,9 @@ static void bond_setup(struct net_device *bond_dev) */ bond_dev->hw_features = BOND_VLAN_FEATURES | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); bond_dev->features |= bond_dev->hw_features; @@ -4848,9 +4918,18 @@ static int __net_init bond_net_init(struct net *net) static void __net_exit bond_net_exit(struct net *net) { struct bond_net *bn = net_generic(net, bond_net_id); + struct bonding *bond, *tmp_bond; + LIST_HEAD(list); bond_destroy_sysfs(bn); bond_destroy_proc_dir(bn); + + /* Kill off any bonds created after unregistering bond rtnl ops */ + rtnl_lock(); + list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) + unregister_netdevice_queue(bond->dev, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); } static struct pernet_operations bond_net_ops = { diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 1c9e09fbdff8..ea7a388f4843 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -183,6 +183,11 @@ int bond_create_slave_symlinks(struct net_device *master, sprintf(linkname, "slave_%s", slave->name); ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj), linkname); + + /* free the master link created earlier in case of error */ + if (ret) + sysfs_remove_link(&(slave->dev.kobj), "master"); + return ret; } @@ -522,7 +527,7 @@ static ssize_t bonding_store_arp_interval(struct device *d, goto out; } if (new_value < 0) { - pr_err("%s: Invalid arp_interval value %d not in range 1-%d; rejected.\n", + pr_err("%s: Invalid arp_interval value %d not in range 0-%d; rejected.\n", bond->dev->name, new_value, INT_MAX); ret = -EINVAL; goto out; @@ -537,14 +542,15 @@ static ssize_t bonding_store_arp_interval(struct device *d, pr_info("%s: Setting ARP monitoring interval to %d.\n", bond->dev->name, new_value); bond->params.arp_interval = new_value; - if (bond->params.miimon) { - pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n", - bond->dev->name, bond->dev->name); - bond->params.miimon = 0; - } - if (!bond->params.arp_targets[0]) { - pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n", - bond->dev->name); + if (new_value) { + if (bond->params.miimon) { + pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n", + bond->dev->name, bond->dev->name); + bond->params.miimon = 0; + } + if (!bond->params.arp_targets[0]) + pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n", + bond->dev->name); } if (bond->dev->flags & IFF_UP) { /* If the interface is up, we may need to fire off @@ -552,10 +558,13 @@ static ssize_t bonding_store_arp_interval(struct device *d, * timer will get fired off when the open function * is called. */ - cancel_delayed_work_sync(&bond->mii_work); - queue_delayed_work(bond->wq, &bond->arp_work, 0); + if (!new_value) { + cancel_delayed_work_sync(&bond->arp_work); + } else { + cancel_delayed_work_sync(&bond->mii_work); + queue_delayed_work(bond->wq, &bond->arp_work, 0); + } } - out: rtnl_unlock(); return ret; @@ -697,7 +706,7 @@ static ssize_t bonding_store_downdelay(struct device *d, } if (new_value < 0) { pr_err("%s: Invalid down delay value %d not in range %d-%d; rejected.\n", - bond->dev->name, new_value, 1, INT_MAX); + bond->dev->name, new_value, 0, INT_MAX); ret = -EINVAL; goto out; } else { @@ -752,8 +761,8 @@ static ssize_t bonding_store_updelay(struct device *d, goto out; } if (new_value < 0) { - pr_err("%s: Invalid down delay value %d not in range %d-%d; rejected.\n", - bond->dev->name, new_value, 1, INT_MAX); + pr_err("%s: Invalid up delay value %d not in range %d-%d; rejected.\n", + bond->dev->name, new_value, 0, INT_MAX); ret = -EINVAL; goto out; } else { @@ -963,37 +972,37 @@ static ssize_t bonding_store_miimon(struct device *d, } if (new_value < 0) { pr_err("%s: Invalid miimon value %d not in range %d-%d; rejected.\n", - bond->dev->name, new_value, 1, INT_MAX); + bond->dev->name, new_value, 0, INT_MAX); ret = -EINVAL; goto out; - } else { - pr_info("%s: Setting MII monitoring interval to %d.\n", - bond->dev->name, new_value); - bond->params.miimon = new_value; - if (bond->params.updelay) - pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value.\n", - bond->dev->name, - bond->params.updelay * bond->params.miimon); - if (bond->params.downdelay) - pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value.\n", - bond->dev->name, - bond->params.downdelay * bond->params.miimon); - if (bond->params.arp_interval) { - pr_info("%s: MII monitoring cannot be used with ARP monitoring. Disabling ARP monitoring...\n", - bond->dev->name); - bond->params.arp_interval = 0; - if (bond->params.arp_validate) { - bond->params.arp_validate = - BOND_ARP_VALIDATE_NONE; - } - } - - if (bond->dev->flags & IFF_UP) { - /* If the interface is up, we may need to fire off - * the MII timer. If the interface is down, the - * timer will get fired off when the open function - * is called. - */ + } + pr_info("%s: Setting MII monitoring interval to %d.\n", + bond->dev->name, new_value); + bond->params.miimon = new_value; + if (bond->params.updelay) + pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value.\n", + bond->dev->name, + bond->params.updelay * bond->params.miimon); + if (bond->params.downdelay) + pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value.\n", + bond->dev->name, + bond->params.downdelay * bond->params.miimon); + if (new_value && bond->params.arp_interval) { + pr_info("%s: MII monitoring cannot be used with ARP monitoring. Disabling ARP monitoring...\n", + bond->dev->name); + bond->params.arp_interval = 0; + if (bond->params.arp_validate) + bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; + } + if (bond->dev->flags & IFF_UP) { + /* If the interface is up, we may need to fire off + * the MII timer. If the interface is down, the + * timer will get fired off when the open function + * is called. + */ + if (!new_value) { + cancel_delayed_work_sync(&bond->mii_work); + } else { cancel_delayed_work_sync(&bond->arp_work); queue_delayed_work(bond->wq, &bond->mii_work, 0); } diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 60c2142373c9..a966128c2a7a 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -32,13 +32,6 @@ config CAIF_SPI_SYNC help to synchronize to the next transfer in case of over or under-runs. This option also needs to be enabled on the modem. -config CAIF_SHM - tristate "CAIF shared memory protocol driver" - depends on CAIF && U5500_MBOX - default n - ---help--- - The CAIF shared memory protocol driver for the STE UX5500 platform. - config CAIF_HSI tristate "CAIF HSI transport driver" depends on CAIF diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile index 91dff861560f..15a9d2fc753d 100644 --- a/drivers/net/caif/Makefile +++ b/drivers/net/caif/Makefile @@ -7,9 +7,5 @@ obj-$(CONFIG_CAIF_TTY) += caif_serial.o cfspi_slave-objs := caif_spi.o caif_spi_slave.o obj-$(CONFIG_CAIF_SPI_SLAVE) += cfspi_slave.o -# Shared memory -caif_shm-objs := caif_shmcore.o caif_shm_u5500.o -obj-$(CONFIG_CAIF_SHM) += caif_shm.o - # HSI interface obj-$(CONFIG_CAIF_HSI) += caif_hsi.o diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 0def8b3106f4..5e40a8b68cbe 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1,8 +1,7 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Daniel Martensson / daniel.martensson@stericsson.com - * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com + * Author: Daniel Martensson + * Dmitry.Tarnyagin / dmitry.tarnyagin@lockless.no * License terms: GNU General Public License (GPL) version 2. */ @@ -25,7 +24,7 @@ #include <net/caif/caif_hsi.h> MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Daniel Martensson<daniel.martensson@stericsson.com>"); +MODULE_AUTHOR("Daniel Martensson"); MODULE_DESCRIPTION("CAIF HSI driver"); /* Returns the number of padding bytes for alignment. */ diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 666891a9a248..e56b56c08b27 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -21,7 +21,7 @@ #include <linux/debugfs.h> MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sjur Brendeland<sjur.brandeland@stericsson.com>"); +MODULE_AUTHOR("Sjur Brendeland"); MODULE_DESCRIPTION("CAIF serial device TTY line discipline"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_CAIF); diff --git a/drivers/net/caif/caif_shm_u5500.c b/drivers/net/caif/caif_shm_u5500.c deleted file mode 100644 index 89d76b7b325a..000000000000 --- a/drivers/net/caif/caif_shm_u5500.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":" fmt - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <mach/mbox-db5500.h> -#include <net/caif/caif_shm.h> - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("CAIF Shared Memory protocol driver"); - -#define MAX_SHM_INSTANCES 1 - -enum { - MBX_ACC0, - MBX_ACC1, - MBX_DSP -}; - -static struct shmdev_layer shmdev_lyr[MAX_SHM_INSTANCES]; - -static unsigned int shm_start; -static unsigned int shm_size; - -module_param(shm_size, uint , 0440); -MODULE_PARM_DESC(shm_total_size, "Start of SHM shared memory"); - -module_param(shm_start, uint , 0440); -MODULE_PARM_DESC(shm_total_start, "Total Size of SHM shared memory"); - -static int shmdev_send_msg(u32 dev_id, u32 mbx_msg) -{ - /* Always block until msg is written successfully */ - mbox_send(shmdev_lyr[dev_id].hmbx, mbx_msg, true); - return 0; -} - -static int shmdev_mbx_setup(void *pshmdrv_cb, struct shmdev_layer *pshm_dev, - void *pshm_drv) -{ - /* - * For UX5500, we have only 1 SHM instance which uses MBX0 - * for communication with the peer modem - */ - pshm_dev->hmbx = mbox_setup(MBX_ACC0, pshmdrv_cb, pshm_drv); - - if (!pshm_dev->hmbx) - return -ENODEV; - else - return 0; -} - -static int __init caif_shmdev_init(void) -{ - int i, result; - - /* Loop is currently overkill, there is only one instance */ - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - - shmdev_lyr[i].shm_base_addr = shm_start; - shmdev_lyr[i].shm_total_sz = shm_size; - - if (((char *)shmdev_lyr[i].shm_base_addr == NULL) - || (shmdev_lyr[i].shm_total_sz <= 0)) { - pr_warn("ERROR," - "Shared memory Address and/or Size incorrect" - ", Bailing out ...\n"); - result = -EINVAL; - goto clean; - } - - pr_info("SHM AREA (instance %d) STARTS" - " AT %p\n", i, (char *)shmdev_lyr[i].shm_base_addr); - - shmdev_lyr[i].shm_id = i; - shmdev_lyr[i].pshmdev_mbxsend = shmdev_send_msg; - shmdev_lyr[i].pshmdev_mbxsetup = shmdev_mbx_setup; - - /* - * Finally, CAIF core module is called with details in place: - * 1. SHM base address - * 2. SHM size - * 3. MBX handle - */ - result = caif_shmcore_probe(&shmdev_lyr[i]); - if (result) { - pr_warn("ERROR[%d]," - "Could not probe SHM core (instance %d)" - " Bailing out ...\n", result, i); - goto clean; - } - } - - return 0; - -clean: - /* - * For now, we assume that even if one instance of SHM fails, we bail - * out of the driver support completely. For this, we need to release - * any memory allocated and unregister any instance of SHM net device. - */ - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - if (shmdev_lyr[i].pshm_netdev) - unregister_netdev(shmdev_lyr[i].pshm_netdev); - } - return result; -} - -static void __exit caif_shmdev_exit(void) -{ - int i; - - for (i = 0; i < MAX_SHM_INSTANCES; i++) { - caif_shmcore_remove(shmdev_lyr[i].pshm_netdev); - kfree((void *)shmdev_lyr[i].shm_base_addr); - } - -} - -module_init(caif_shmdev_init); -module_exit(caif_shmdev_exit); diff --git a/drivers/net/caif/caif_shmcore.c b/drivers/net/caif/caif_shmcore.c deleted file mode 100644 index bce8bac311c9..000000000000 --- a/drivers/net/caif/caif_shmcore.c +++ /dev/null @@ -1,747 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Authors: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com, - * Daniel Martensson / daniel.martensson@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ":" fmt - -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/io.h> - -#include <net/caif/caif_device.h> -#include <net/caif/caif_shm.h> - -#define NR_TX_BUF 6 -#define NR_RX_BUF 6 -#define TX_BUF_SZ 0x2000 -#define RX_BUF_SZ 0x2000 - -#define CAIF_NEEDED_HEADROOM 32 - -#define CAIF_FLOW_ON 1 -#define CAIF_FLOW_OFF 0 - -#define LOW_WATERMARK 3 -#define HIGH_WATERMARK 4 - -/* Maximum number of CAIF buffers per shared memory buffer. */ -#define SHM_MAX_FRMS_PER_BUF 10 - -/* - * Size in bytes of the descriptor area - * (With end of descriptor signalling) - */ -#define SHM_CAIF_DESC_SIZE ((SHM_MAX_FRMS_PER_BUF + 1) * \ - sizeof(struct shm_pck_desc)) - -/* - * Offset to the first CAIF frame within a shared memory buffer. - * Aligned on 32 bytes. - */ -#define SHM_CAIF_FRM_OFS (SHM_CAIF_DESC_SIZE + (SHM_CAIF_DESC_SIZE % 32)) - -/* Number of bytes for CAIF shared memory header. */ -#define SHM_HDR_LEN 1 - -/* Number of padding bytes for the complete CAIF frame. */ -#define SHM_FRM_PAD_LEN 4 - -#define CAIF_MAX_MTU 4096 - -#define SHM_SET_FULL(x) (((x+1) & 0x0F) << 0) -#define SHM_GET_FULL(x) (((x >> 0) & 0x0F) - 1) - -#define SHM_SET_EMPTY(x) (((x+1) & 0x0F) << 4) -#define SHM_GET_EMPTY(x) (((x >> 4) & 0x0F) - 1) - -#define SHM_FULL_MASK (0x0F << 0) -#define SHM_EMPTY_MASK (0x0F << 4) - -struct shm_pck_desc { - /* - * Offset from start of shared memory area to start of - * shared memory CAIF frame. - */ - u32 frm_ofs; - u32 frm_len; -}; - -struct buf_list { - unsigned char *desc_vptr; - u32 phy_addr; - u32 index; - u32 len; - u32 frames; - u32 frm_ofs; - struct list_head list; -}; - -struct shm_caif_frm { - /* Number of bytes of padding before the CAIF frame. */ - u8 hdr_ofs; -}; - -struct shmdrv_layer { - /* caif_dev_common must always be first in the structure*/ - struct caif_dev_common cfdev; - - u32 shm_tx_addr; - u32 shm_rx_addr; - u32 shm_base_addr; - u32 tx_empty_available; - spinlock_t lock; - - struct list_head tx_empty_list; - struct list_head tx_pend_list; - struct list_head tx_full_list; - struct list_head rx_empty_list; - struct list_head rx_pend_list; - struct list_head rx_full_list; - - struct workqueue_struct *pshm_tx_workqueue; - struct workqueue_struct *pshm_rx_workqueue; - - struct work_struct shm_tx_work; - struct work_struct shm_rx_work; - - struct sk_buff_head sk_qhead; - struct shmdev_layer *pshm_dev; -}; - -static int shm_netdev_open(struct net_device *shm_netdev) -{ - netif_wake_queue(shm_netdev); - return 0; -} - -static int shm_netdev_close(struct net_device *shm_netdev) -{ - netif_stop_queue(shm_netdev); - return 0; -} - -int caif_shmdrv_rx_cb(u32 mbx_msg, void *priv) -{ - struct buf_list *pbuf; - struct shmdrv_layer *pshm_drv; - struct list_head *pos; - u32 avail_emptybuff = 0; - unsigned long flags = 0; - - pshm_drv = priv; - - /* Check for received buffers. */ - if (mbx_msg & SHM_FULL_MASK) { - int idx; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check whether we have any outstanding buffers. */ - if (list_empty(&pshm_drv->rx_empty_list)) { - - /* Release spin lock. */ - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* We print even in IRQ context... */ - pr_warn("No empty Rx buffers to fill: " - "mbx_msg:%x\n", mbx_msg); - - /* Bail out. */ - goto err_sync; - } - - pbuf = - list_entry(pshm_drv->rx_empty_list.next, - struct buf_list, list); - idx = pbuf->index; - - /* Check buffer synchronization. */ - if (idx != SHM_GET_FULL(mbx_msg)) { - - /* We print even in IRQ context... */ - pr_warn( - "phyif_shm_mbx_msg_cb: RX full out of sync:" - " idx:%d, msg:%x SHM_GET_FULL(mbx_msg):%x\n", - idx, mbx_msg, SHM_GET_FULL(mbx_msg)); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Bail out. */ - goto err_sync; - } - - list_del_init(&pbuf->list); - list_add_tail(&pbuf->list, &pshm_drv->rx_full_list); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Schedule RX work queue. */ - if (!work_pending(&pshm_drv->shm_rx_work)) - queue_work(pshm_drv->pshm_rx_workqueue, - &pshm_drv->shm_rx_work); - } - - /* Check for emptied buffers. */ - if (mbx_msg & SHM_EMPTY_MASK) { - int idx; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check whether we have any outstanding buffers. */ - if (list_empty(&pshm_drv->tx_full_list)) { - - /* We print even in IRQ context... */ - pr_warn("No TX to empty: msg:%x\n", mbx_msg); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Bail out. */ - goto err_sync; - } - - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - idx = pbuf->index; - - /* Check buffer synchronization. */ - if (idx != SHM_GET_EMPTY(mbx_msg)) { - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* We print even in IRQ context... */ - pr_warn("TX empty " - "out of sync:idx:%d, msg:%x\n", idx, mbx_msg); - - /* Bail out. */ - goto err_sync; - } - list_del_init(&pbuf->list); - - /* Reset buffer parameters. */ - pbuf->frames = 0; - pbuf->frm_ofs = SHM_CAIF_FRM_OFS; - - list_add_tail(&pbuf->list, &pshm_drv->tx_empty_list); - - /* Check the available no. of buffers in the empty list */ - list_for_each(pos, &pshm_drv->tx_empty_list) - avail_emptybuff++; - - /* Check whether we have to wake up the transmitter. */ - if ((avail_emptybuff > HIGH_WATERMARK) && - (!pshm_drv->tx_empty_available)) { - pshm_drv->tx_empty_available = 1; - spin_unlock_irqrestore(&pshm_drv->lock, flags); - pshm_drv->cfdev.flowctrl - (pshm_drv->pshm_dev->pshm_netdev, - CAIF_FLOW_ON); - - - /* Schedule the work queue. if required */ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, - &pshm_drv->shm_tx_work); - } else - spin_unlock_irqrestore(&pshm_drv->lock, flags); - } - - return 0; - -err_sync: - return -EIO; -} - -static void shm_rx_work_func(struct work_struct *rx_work) -{ - struct shmdrv_layer *pshm_drv; - struct buf_list *pbuf; - unsigned long flags = 0; - struct sk_buff *skb; - char *p; - int ret; - - pshm_drv = container_of(rx_work, struct shmdrv_layer, shm_rx_work); - - while (1) { - - struct shm_pck_desc *pck_desc; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check for received buffers. */ - if (list_empty(&pshm_drv->rx_full_list)) { - spin_unlock_irqrestore(&pshm_drv->lock, flags); - break; - } - - pbuf = - list_entry(pshm_drv->rx_full_list.next, struct buf_list, - list); - list_del_init(&pbuf->list); - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - /* Retrieve pointer to start of the packet descriptor area. */ - pck_desc = (struct shm_pck_desc *) pbuf->desc_vptr; - - /* - * Check whether descriptor contains a CAIF shared memory - * frame. - */ - while (pck_desc->frm_ofs) { - unsigned int frm_buf_ofs; - unsigned int frm_pck_ofs; - unsigned int frm_pck_len; - /* - * Check whether offset is within buffer limits - * (lower). - */ - if (pck_desc->frm_ofs < - (pbuf->phy_addr - pshm_drv->shm_base_addr)) - break; - /* - * Check whether offset is within buffer limits - * (higher). - */ - if (pck_desc->frm_ofs > - ((pbuf->phy_addr - pshm_drv->shm_base_addr) + - pbuf->len)) - break; - - /* Calculate offset from start of buffer. */ - frm_buf_ofs = - pck_desc->frm_ofs - (pbuf->phy_addr - - pshm_drv->shm_base_addr); - - /* - * Calculate offset and length of CAIF packet while - * taking care of the shared memory header. - */ - frm_pck_ofs = - frm_buf_ofs + SHM_HDR_LEN + - (*(pbuf->desc_vptr + frm_buf_ofs)); - frm_pck_len = - (pck_desc->frm_len - SHM_HDR_LEN - - (*(pbuf->desc_vptr + frm_buf_ofs))); - - /* Check whether CAIF packet is within buffer limits */ - if ((frm_pck_ofs + pck_desc->frm_len) > pbuf->len) - break; - - /* Get a suitable CAIF packet and copy in data. */ - skb = netdev_alloc_skb(pshm_drv->pshm_dev->pshm_netdev, - frm_pck_len + 1); - - if (skb == NULL) { - pr_info("OOM: Try next frame in descriptor\n"); - break; - } - - p = skb_put(skb, frm_pck_len); - memcpy(p, pbuf->desc_vptr + frm_pck_ofs, frm_pck_len); - - skb->protocol = htons(ETH_P_CAIF); - skb_reset_mac_header(skb); - skb->dev = pshm_drv->pshm_dev->pshm_netdev; - - /* Push received packet up the stack. */ - ret = netif_rx_ni(skb); - - if (!ret) { - pshm_drv->pshm_dev->pshm_netdev->stats. - rx_packets++; - pshm_drv->pshm_dev->pshm_netdev->stats. - rx_bytes += pck_desc->frm_len; - } else - ++pshm_drv->pshm_dev->pshm_netdev->stats. - rx_dropped; - /* Move to next packet descriptor. */ - pck_desc++; - } - - spin_lock_irqsave(&pshm_drv->lock, flags); - list_add_tail(&pbuf->list, &pshm_drv->rx_pend_list); - - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - } - - /* Schedule the work queue. if required */ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work); - -} - -static void shm_tx_work_func(struct work_struct *tx_work) -{ - u32 mbox_msg; - unsigned int frmlen, avail_emptybuff, append = 0; - unsigned long flags = 0; - struct buf_list *pbuf = NULL; - struct shmdrv_layer *pshm_drv; - struct shm_caif_frm *frm; - struct sk_buff *skb; - struct shm_pck_desc *pck_desc; - struct list_head *pos; - - pshm_drv = container_of(tx_work, struct shmdrv_layer, shm_tx_work); - - do { - /* Initialize mailbox message. */ - mbox_msg = 0x00; - avail_emptybuff = 0; - - spin_lock_irqsave(&pshm_drv->lock, flags); - - /* Check for pending receive buffers. */ - if (!list_empty(&pshm_drv->rx_pend_list)) { - - pbuf = list_entry(pshm_drv->rx_pend_list.next, - struct buf_list, list); - - list_del_init(&pbuf->list); - list_add_tail(&pbuf->list, &pshm_drv->rx_empty_list); - /* - * Value index is never changed, - * so read access should be safe. - */ - mbox_msg |= SHM_SET_EMPTY(pbuf->index); - } - - skb = skb_peek(&pshm_drv->sk_qhead); - - if (skb == NULL) - goto send_msg; - /* Check the available no. of buffers in the empty list */ - list_for_each(pos, &pshm_drv->tx_empty_list) - avail_emptybuff++; - - if ((avail_emptybuff < LOW_WATERMARK) && - pshm_drv->tx_empty_available) { - /* Update blocking condition. */ - pshm_drv->tx_empty_available = 0; - spin_unlock_irqrestore(&pshm_drv->lock, flags); - pshm_drv->cfdev.flowctrl - (pshm_drv->pshm_dev->pshm_netdev, - CAIF_FLOW_OFF); - spin_lock_irqsave(&pshm_drv->lock, flags); - } - /* - * We simply return back to the caller if we do not have space - * either in Tx pending list or Tx empty list. In this case, - * we hold the received skb in the skb list, waiting to - * be transmitted once Tx buffers become available - */ - if (list_empty(&pshm_drv->tx_empty_list)) - goto send_msg; - - /* Get the first free Tx buffer. */ - pbuf = list_entry(pshm_drv->tx_empty_list.next, - struct buf_list, list); - do { - if (append) { - skb = skb_peek(&pshm_drv->sk_qhead); - if (skb == NULL) - break; - } - - frm = (struct shm_caif_frm *) - (pbuf->desc_vptr + pbuf->frm_ofs); - - frm->hdr_ofs = 0; - frmlen = 0; - frmlen += SHM_HDR_LEN + frm->hdr_ofs + skb->len; - - /* Add tail padding if needed. */ - if (frmlen % SHM_FRM_PAD_LEN) - frmlen += SHM_FRM_PAD_LEN - - (frmlen % SHM_FRM_PAD_LEN); - - /* - * Verify that packet, header and additional padding - * can fit within the buffer frame area. - */ - if (frmlen >= (pbuf->len - pbuf->frm_ofs)) - break; - - if (!append) { - list_del_init(&pbuf->list); - append = 1; - } - - skb = skb_dequeue(&pshm_drv->sk_qhead); - if (skb == NULL) - break; - /* Copy in CAIF frame. */ - skb_copy_bits(skb, 0, pbuf->desc_vptr + - pbuf->frm_ofs + SHM_HDR_LEN + - frm->hdr_ofs, skb->len); - - pshm_drv->pshm_dev->pshm_netdev->stats.tx_packets++; - pshm_drv->pshm_dev->pshm_netdev->stats.tx_bytes += - frmlen; - dev_kfree_skb_irq(skb); - - /* Fill in the shared memory packet descriptor area. */ - pck_desc = (struct shm_pck_desc *) (pbuf->desc_vptr); - /* Forward to current frame. */ - pck_desc += pbuf->frames; - pck_desc->frm_ofs = (pbuf->phy_addr - - pshm_drv->shm_base_addr) + - pbuf->frm_ofs; - pck_desc->frm_len = frmlen; - /* Terminate packet descriptor area. */ - pck_desc++; - pck_desc->frm_ofs = 0; - /* Update buffer parameters. */ - pbuf->frames++; - pbuf->frm_ofs += frmlen + (frmlen % 32); - - } while (pbuf->frames < SHM_MAX_FRMS_PER_BUF); - - /* Assign buffer as full. */ - list_add_tail(&pbuf->list, &pshm_drv->tx_full_list); - append = 0; - mbox_msg |= SHM_SET_FULL(pbuf->index); -send_msg: - spin_unlock_irqrestore(&pshm_drv->lock, flags); - - if (mbox_msg) - pshm_drv->pshm_dev->pshmdev_mbxsend - (pshm_drv->pshm_dev->shm_id, mbox_msg); - } while (mbox_msg); -} - -static int shm_netdev_tx(struct sk_buff *skb, struct net_device *shm_netdev) -{ - struct shmdrv_layer *pshm_drv; - - pshm_drv = netdev_priv(shm_netdev); - - skb_queue_tail(&pshm_drv->sk_qhead, skb); - - /* Schedule Tx work queue. for deferred processing of skbs*/ - if (!work_pending(&pshm_drv->shm_tx_work)) - queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work); - - return 0; -} - -static const struct net_device_ops netdev_ops = { - .ndo_open = shm_netdev_open, - .ndo_stop = shm_netdev_close, - .ndo_start_xmit = shm_netdev_tx, -}; - -static void shm_netdev_setup(struct net_device *pshm_netdev) -{ - struct shmdrv_layer *pshm_drv; - pshm_netdev->netdev_ops = &netdev_ops; - - pshm_netdev->mtu = CAIF_MAX_MTU; - pshm_netdev->type = ARPHRD_CAIF; - pshm_netdev->hard_header_len = CAIF_NEEDED_HEADROOM; - pshm_netdev->tx_queue_len = 0; - pshm_netdev->destructor = free_netdev; - - pshm_drv = netdev_priv(pshm_netdev); - - /* Initialize structures in a clean state. */ - memset(pshm_drv, 0, sizeof(struct shmdrv_layer)); - - pshm_drv->cfdev.link_select = CAIF_LINK_LOW_LATENCY; -} - -int caif_shmcore_probe(struct shmdev_layer *pshm_dev) -{ - int result, j; - struct shmdrv_layer *pshm_drv = NULL; - - pshm_dev->pshm_netdev = alloc_netdev(sizeof(struct shmdrv_layer), - "cfshm%d", shm_netdev_setup); - if (!pshm_dev->pshm_netdev) - return -ENOMEM; - - pshm_drv = netdev_priv(pshm_dev->pshm_netdev); - pshm_drv->pshm_dev = pshm_dev; - - /* - * Initialization starts with the verification of the - * availability of MBX driver by calling its setup function. - * MBX driver must be available by this time for proper - * functioning of SHM driver. - */ - if ((pshm_dev->pshmdev_mbxsetup - (caif_shmdrv_rx_cb, pshm_dev, pshm_drv)) != 0) { - pr_warn("Could not config. SHM Mailbox," - " Bailing out.....\n"); - free_netdev(pshm_dev->pshm_netdev); - return -ENODEV; - } - - skb_queue_head_init(&pshm_drv->sk_qhead); - - pr_info("SHM DEVICE[%d] PROBED BY DRIVER, NEW SHM DRIVER" - " INSTANCE AT pshm_drv =0x%p\n", - pshm_drv->pshm_dev->shm_id, pshm_drv); - - if (pshm_dev->shm_total_sz < - (NR_TX_BUF * TX_BUF_SZ + NR_RX_BUF * RX_BUF_SZ)) { - - pr_warn("ERROR, Amount of available" - " Phys. SHM cannot accommodate current SHM " - "driver configuration, Bailing out ...\n"); - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - - pshm_drv->shm_base_addr = pshm_dev->shm_base_addr; - pshm_drv->shm_tx_addr = pshm_drv->shm_base_addr; - - if (pshm_dev->shm_loopback) - pshm_drv->shm_rx_addr = pshm_drv->shm_tx_addr; - else - pshm_drv->shm_rx_addr = pshm_dev->shm_base_addr + - (NR_TX_BUF * TX_BUF_SZ); - - spin_lock_init(&pshm_drv->lock); - INIT_LIST_HEAD(&pshm_drv->tx_empty_list); - INIT_LIST_HEAD(&pshm_drv->tx_pend_list); - INIT_LIST_HEAD(&pshm_drv->tx_full_list); - - INIT_LIST_HEAD(&pshm_drv->rx_empty_list); - INIT_LIST_HEAD(&pshm_drv->rx_pend_list); - INIT_LIST_HEAD(&pshm_drv->rx_full_list); - - INIT_WORK(&pshm_drv->shm_tx_work, shm_tx_work_func); - INIT_WORK(&pshm_drv->shm_rx_work, shm_rx_work_func); - - pshm_drv->pshm_tx_workqueue = - create_singlethread_workqueue("shm_tx_work"); - pshm_drv->pshm_rx_workqueue = - create_singlethread_workqueue("shm_rx_work"); - - for (j = 0; j < NR_TX_BUF; j++) { - struct buf_list *tx_buf = - kmalloc(sizeof(struct buf_list), GFP_KERNEL); - - if (tx_buf == NULL) { - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - tx_buf->index = j; - tx_buf->phy_addr = pshm_drv->shm_tx_addr + (TX_BUF_SZ * j); - tx_buf->len = TX_BUF_SZ; - tx_buf->frames = 0; - tx_buf->frm_ofs = SHM_CAIF_FRM_OFS; - - if (pshm_dev->shm_loopback) - tx_buf->desc_vptr = (unsigned char *)tx_buf->phy_addr; - else - /* - * FIXME: the result of ioremap is not a pointer - arnd - */ - tx_buf->desc_vptr = - ioremap(tx_buf->phy_addr, TX_BUF_SZ); - - list_add_tail(&tx_buf->list, &pshm_drv->tx_empty_list); - } - - for (j = 0; j < NR_RX_BUF; j++) { - struct buf_list *rx_buf = - kmalloc(sizeof(struct buf_list), GFP_KERNEL); - - if (rx_buf == NULL) { - free_netdev(pshm_dev->pshm_netdev); - return -ENOMEM; - } - rx_buf->index = j; - rx_buf->phy_addr = pshm_drv->shm_rx_addr + (RX_BUF_SZ * j); - rx_buf->len = RX_BUF_SZ; - - if (pshm_dev->shm_loopback) - rx_buf->desc_vptr = (unsigned char *)rx_buf->phy_addr; - else - rx_buf->desc_vptr = - ioremap(rx_buf->phy_addr, RX_BUF_SZ); - list_add_tail(&rx_buf->list, &pshm_drv->rx_empty_list); - } - - pshm_drv->tx_empty_available = 1; - result = register_netdev(pshm_dev->pshm_netdev); - if (result) - pr_warn("ERROR[%d], SHM could not, " - "register with NW FRMWK Bailing out ...\n", result); - - return result; -} - -void caif_shmcore_remove(struct net_device *pshm_netdev) -{ - struct buf_list *pbuf; - struct shmdrv_layer *pshm_drv = NULL; - - pshm_drv = netdev_priv(pshm_netdev); - - while (!(list_empty(&pshm_drv->tx_pend_list))) { - pbuf = - list_entry(pshm_drv->tx_pend_list.next, - struct buf_list, list); - - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->tx_full_list))) { - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->tx_empty_list))) { - pbuf = - list_entry(pshm_drv->tx_empty_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_full_list))) { - pbuf = - list_entry(pshm_drv->tx_full_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_pend_list))) { - pbuf = - list_entry(pshm_drv->tx_pend_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - while (!(list_empty(&pshm_drv->rx_empty_list))) { - pbuf = - list_entry(pshm_drv->rx_empty_list.next, - struct buf_list, list); - list_del(&pbuf->list); - kfree(pbuf); - } - - /* Destroy work queues. */ - destroy_workqueue(pshm_drv->pshm_tx_workqueue); - destroy_workqueue(pshm_drv->pshm_rx_workqueue); - - unregister_netdev(pshm_netdev); -} diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index b71ce9bf0afb..2fb279a63c50 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -1,7 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Daniel Martensson / Daniel.Martensson@stericsson.com + * Author: Daniel Martensson * License terms: GNU General Public License (GPL) version 2. */ @@ -29,7 +28,7 @@ #endif /* CONFIG_CAIF_SPI_SYNC */ MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Daniel Martensson<daniel.martensson@stericsson.com>"); +MODULE_AUTHOR("Daniel Martensson"); MODULE_DESCRIPTION("CAIF SPI driver"); /* Returns the number of padding bytes for alignment. */ diff --git a/drivers/net/caif/caif_spi_slave.c b/drivers/net/caif/caif_spi_slave.c index e139e133fc79..ee92ad5a6cf8 100644 --- a/drivers/net/caif/caif_spi_slave.c +++ b/drivers/net/caif/caif_spi_slave.c @@ -1,7 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Daniel Martensson / Daniel.Martensson@stericsson.com + * Author: Daniel Martensson * License terms: GNU General Public License (GPL) version 2. */ #include <linux/init.h> diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 9862b2e07644..e456b70933c2 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -65,7 +65,7 @@ config CAN_LEDS config CAN_AT91 tristate "Atmel AT91 onchip CAN controller" - depends on ARCH_AT91SAM9263 || ARCH_AT91SAM9X5 + depends on ARM ---help--- This is a driver for the SoC CAN controller in Atmel's AT91SAM9263 and AT91SAM9X5 processors. diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 44f363792b59..db52f4414def 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -27,6 +27,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> @@ -155,19 +156,20 @@ struct at91_priv { canid_t mb0_id; }; -static const struct at91_devtype_data at91_devtype_data[] = { - [AT91_DEVTYPE_SAM9263] = { - .rx_first = 1, - .rx_split = 8, - .rx_last = 11, - .tx_shift = 2, - }, - [AT91_DEVTYPE_SAM9X5] = { - .rx_first = 0, - .rx_split = 4, - .rx_last = 5, - .tx_shift = 1, - }, +static const struct at91_devtype_data at91_at91sam9263_data = { + .rx_first = 1, + .rx_split = 8, + .rx_last = 11, + .tx_shift = 2, + .type = AT91_DEVTYPE_SAM9263, +}; + +static const struct at91_devtype_data at91_at91sam9x5_data = { + .rx_first = 0, + .rx_split = 4, + .rx_last = 5, + .tx_shift = 1, + .type = AT91_DEVTYPE_SAM9X5, }; static const struct can_bittiming_const at91_bittiming_const = { @@ -1249,10 +1251,42 @@ static struct attribute_group at91_sysfs_attr_group = { .attrs = at91_sysfs_attrs, }; +#if defined(CONFIG_OF) +static const struct of_device_id at91_can_dt_ids[] = { + { + .compatible = "atmel,at91sam9x5-can", + .data = &at91_at91sam9x5_data, + }, { + .compatible = "atmel,at91sam9263-can", + .data = &at91_at91sam9263_data, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, at91_can_dt_ids); +#else +#define at91_can_dt_ids NULL +#endif + +static const struct at91_devtype_data *at91_can_get_driver_data(struct platform_device *pdev) +{ + if (pdev->dev.of_node) { + const struct of_device_id *match; + + match = of_match_node(at91_can_dt_ids, pdev->dev.of_node); + if (!match) { + dev_err(&pdev->dev, "no matching node found in dtb\n"); + return NULL; + } + return (const struct at91_devtype_data *)match->data; + } + return (const struct at91_devtype_data *) + platform_get_device_id(pdev)->driver_data; +} + static int at91_can_probe(struct platform_device *pdev) { const struct at91_devtype_data *devtype_data; - enum at91_devtype devtype; struct net_device *dev; struct at91_priv *priv; struct resource *res; @@ -1260,8 +1294,12 @@ static int at91_can_probe(struct platform_device *pdev) void __iomem *addr; int err, irq; - devtype = pdev->id_entry->driver_data; - devtype_data = &at91_devtype_data[devtype]; + devtype_data = at91_can_get_driver_data(pdev); + if (!devtype_data) { + dev_err(&pdev->dev, "no driver data\n"); + err = -ENODEV; + goto exit; + } clk = clk_get(&pdev->dev, "can_clk"); if (IS_ERR(clk)) { @@ -1310,7 +1348,6 @@ static int at91_can_probe(struct platform_device *pdev) priv->dev = dev; priv->reg_base = addr; priv->devtype_data = *devtype_data; - priv->devtype_data.type = devtype; priv->clk = clk; priv->pdata = pdev->dev.platform_data; priv->mb0_id = 0x7ff; @@ -1373,10 +1410,10 @@ static int at91_can_remove(struct platform_device *pdev) static const struct platform_device_id at91_can_id_table[] = { { .name = "at91_can", - .driver_data = AT91_DEVTYPE_SAM9263, + .driver_data = (kernel_ulong_t)&at91_at91sam9x5_data, }, { .name = "at91sam9x5_can", - .driver_data = AT91_DEVTYPE_SAM9X5, + .driver_data = (kernel_ulong_t)&at91_at91sam9263_data, }, { /* sentinel */ } @@ -1389,6 +1426,7 @@ static struct platform_driver at91_can_driver = { .driver = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, + .of_match_table = at91_can_dt_ids, }, .id_table = at91_can_id_table, }; diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c index 6a0532176b69..d4a15e82bfc0 100644 --- a/drivers/net/can/bfin_can.c +++ b/drivers/net/can/bfin_can.c @@ -412,7 +412,7 @@ static int bfin_can_err(struct net_device *dev, u16 isrc, u16 status) return 0; } -irqreturn_t bfin_can_interrupt(int irq, void *dev_id) +static irqreturn_t bfin_can_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; struct bfin_can_priv *priv = netdev_priv(dev); @@ -504,7 +504,7 @@ static int bfin_can_close(struct net_device *dev) return 0; } -struct net_device *alloc_bfin_candev(void) +static struct net_device *alloc_bfin_candev(void) { struct net_device *dev; struct bfin_can_priv *priv; diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index f32b9fc6a983..8cda23bf0614 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -269,7 +269,7 @@ struct mcp251x_priv { #define MCP251X_IS(_model) \ static inline int mcp251x_is_##_model(struct spi_device *spi) \ { \ - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); \ + struct mcp251x_priv *priv = spi_get_drvdata(spi); \ return priv->model == CAN_MCP251X_MCP##_model; \ } @@ -305,7 +305,7 @@ static void mcp251x_clean(struct net_device *net) */ static int mcp251x_spi_trans(struct spi_device *spi, int len) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); struct spi_transfer t = { .tx_buf = priv->spi_tx_buf, .rx_buf = priv->spi_rx_buf, @@ -333,7 +333,7 @@ static int mcp251x_spi_trans(struct spi_device *spi, int len) static u8 mcp251x_read_reg(struct spi_device *spi, uint8_t reg) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); u8 val = 0; priv->spi_tx_buf[0] = INSTRUCTION_READ; @@ -348,7 +348,7 @@ static u8 mcp251x_read_reg(struct spi_device *spi, uint8_t reg) static void mcp251x_read_2regs(struct spi_device *spi, uint8_t reg, uint8_t *v1, uint8_t *v2) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); priv->spi_tx_buf[0] = INSTRUCTION_READ; priv->spi_tx_buf[1] = reg; @@ -361,7 +361,7 @@ static void mcp251x_read_2regs(struct spi_device *spi, uint8_t reg, static void mcp251x_write_reg(struct spi_device *spi, u8 reg, uint8_t val) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); priv->spi_tx_buf[0] = INSTRUCTION_WRITE; priv->spi_tx_buf[1] = reg; @@ -373,7 +373,7 @@ static void mcp251x_write_reg(struct spi_device *spi, u8 reg, uint8_t val) static void mcp251x_write_bits(struct spi_device *spi, u8 reg, u8 mask, uint8_t val) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); priv->spi_tx_buf[0] = INSTRUCTION_BIT_MODIFY; priv->spi_tx_buf[1] = reg; @@ -386,7 +386,7 @@ static void mcp251x_write_bits(struct spi_device *spi, u8 reg, static void mcp251x_hw_tx_frame(struct spi_device *spi, u8 *buf, int len, int tx_buf_idx) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); if (mcp251x_is_2510(spi)) { int i; @@ -403,7 +403,7 @@ static void mcp251x_hw_tx_frame(struct spi_device *spi, u8 *buf, static void mcp251x_hw_tx(struct spi_device *spi, struct can_frame *frame, int tx_buf_idx) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); u32 sid, eid, exide, rtr; u8 buf[SPI_TRANSFER_BUF_LEN]; @@ -434,7 +434,7 @@ static void mcp251x_hw_tx(struct spi_device *spi, struct can_frame *frame, static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf, int buf_idx) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); if (mcp251x_is_2510(spi)) { int i, len; @@ -454,7 +454,7 @@ static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf, static void mcp251x_hw_rx(struct spi_device *spi, int buf_idx) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); struct sk_buff *skb; struct can_frame *frame; u8 buf[SPI_TRANSFER_BUF_LEN]; @@ -550,7 +550,7 @@ static int mcp251x_do_set_mode(struct net_device *net, enum can_mode mode) static int mcp251x_set_normal_mode(struct spi_device *spi) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); unsigned long timeout; /* Enable interrupts */ @@ -620,7 +620,7 @@ static int mcp251x_setup(struct net_device *net, struct mcp251x_priv *priv, static int mcp251x_hw_reset(struct spi_device *spi) { - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); int ret; unsigned long timeout; @@ -929,6 +929,7 @@ static int mcp251x_open(struct net_device *net) struct mcp251x_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; struct mcp251x_platform_data *pdata = spi->dev.platform_data; + unsigned long flags; int ret; ret = open_candev(net); @@ -945,9 +946,14 @@ static int mcp251x_open(struct net_device *net) priv->tx_skb = NULL; priv->tx_len = 0; + flags = IRQF_ONESHOT; + if (pdata->irq_flags) + flags |= pdata->irq_flags; + else + flags |= IRQF_TRIGGER_FALLING; + ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist, - pdata->irq_flags ? pdata->irq_flags : IRQF_TRIGGER_FALLING, - DEVICE_NAME, priv); + flags, DEVICE_NAME, priv); if (ret) { dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq); if (pdata->transceiver_enable) @@ -1020,7 +1026,7 @@ static int mcp251x_can_probe(struct spi_device *spi) CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY; priv->model = spi_get_device_id(spi)->driver_data; priv->net = net; - dev_set_drvdata(&spi->dev, priv); + spi_set_drvdata(spi, priv); priv->spi = spi; mutex_init(&priv->mcp_lock); @@ -1118,7 +1124,7 @@ error_out: static int mcp251x_can_remove(struct spi_device *spi) { struct mcp251x_platform_data *pdata = spi->dev.platform_data; - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); struct net_device *net = priv->net; unregister_candev(net); @@ -1138,11 +1144,13 @@ static int mcp251x_can_remove(struct spi_device *spi) return 0; } -#ifdef CONFIG_PM -static int mcp251x_can_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP + +static int mcp251x_can_suspend(struct device *dev) { + struct spi_device *spi = to_spi_device(dev); struct mcp251x_platform_data *pdata = spi->dev.platform_data; - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); struct net_device *net = priv->net; priv->force_quit = 1; @@ -1170,10 +1178,11 @@ static int mcp251x_can_suspend(struct spi_device *spi, pm_message_t state) return 0; } -static int mcp251x_can_resume(struct spi_device *spi) +static int mcp251x_can_resume(struct device *dev) { + struct spi_device *spi = to_spi_device(dev); struct mcp251x_platform_data *pdata = spi->dev.platform_data; - struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev); + struct mcp251x_priv *priv = spi_get_drvdata(spi); if (priv->after_suspend & AFTER_SUSPEND_POWER) { pdata->power_enable(1); @@ -1191,11 +1200,11 @@ static int mcp251x_can_resume(struct spi_device *spi) enable_irq(spi->irq); return 0; } -#else -#define mcp251x_can_suspend NULL -#define mcp251x_can_resume NULL #endif +static SIMPLE_DEV_PM_OPS(mcp251x_can_pm_ops, mcp251x_can_suspend, + mcp251x_can_resume); + static const struct spi_device_id mcp251x_id_table[] = { { "mcp2510", CAN_MCP251X_MCP2510 }, { "mcp2515", CAN_MCP251X_MCP2515 }, @@ -1207,29 +1216,15 @@ MODULE_DEVICE_TABLE(spi, mcp251x_id_table); static struct spi_driver mcp251x_can_driver = { .driver = { .name = DEVICE_NAME, - .bus = &spi_bus_type, .owner = THIS_MODULE, + .pm = &mcp251x_can_pm_ops, }, .id_table = mcp251x_id_table, .probe = mcp251x_can_probe, .remove = mcp251x_can_remove, - .suspend = mcp251x_can_suspend, - .resume = mcp251x_can_resume, }; - -static int __init mcp251x_can_init(void) -{ - return spi_register_driver(&mcp251x_can_driver); -} - -static void __exit mcp251x_can_exit(void) -{ - spi_unregister_driver(&mcp251x_can_driver); -} - -module_init(mcp251x_can_init); -module_exit(mcp251x_can_exit); +module_spi_driver(mcp251x_can_driver); MODULE_AUTHOR("Chris Elston <celston@katalix.com>, " "Christian Pellegrin <chripell@evolware.org>"); diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig index b39ca5b3ea7f..ff2ba86cd4a4 100644 --- a/drivers/net/can/sja1000/Kconfig +++ b/drivers/net/can/sja1000/Kconfig @@ -46,6 +46,7 @@ config CAN_EMS_PCI config CAN_PEAK_PCMCIA tristate "PEAK PCAN-PC Card" depends on PCMCIA + depends on HAS_IOPORT ---help--- This driver is for the PCAN-PC Card PCMCIA adapter (1 or 2 channels) from PEAK-System (http://www.peak-system.com). To compile this diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c index 36d298da2af6..3752342a678a 100644 --- a/drivers/net/can/sja1000/ems_pci.c +++ b/drivers/net/can/sja1000/ems_pci.c @@ -168,12 +168,12 @@ static inline int ems_pci_check_chan(const struct sja1000_priv *priv) unsigned char res; /* Make sure SJA1000 is in reset mode */ - priv->write_reg(priv, REG_MOD, 1); + priv->write_reg(priv, SJA1000_MOD, 1); - priv->write_reg(priv, REG_CDR, CDR_PELICAN); + priv->write_reg(priv, SJA1000_CDR, CDR_PELICAN); /* read reset-values */ - res = priv->read_reg(priv, REG_CDR); + res = priv->read_reg(priv, SJA1000_CDR); if (res == CDR_PELICAN) return 1; diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c index 5c2f3fbbf5ae..a3aa6817b515 100644 --- a/drivers/net/can/sja1000/ems_pcmcia.c +++ b/drivers/net/can/sja1000/ems_pcmcia.c @@ -126,11 +126,11 @@ static irqreturn_t ems_pcmcia_interrupt(int irq, void *dev_id) static inline int ems_pcmcia_check_chan(struct sja1000_priv *priv) { /* Make sure SJA1000 is in reset mode */ - ems_pcmcia_write_reg(priv, REG_MOD, 1); - ems_pcmcia_write_reg(priv, REG_CDR, CDR_PELICAN); + ems_pcmcia_write_reg(priv, SJA1000_MOD, 1); + ems_pcmcia_write_reg(priv, SJA1000_CDR, CDR_PELICAN); /* read reset-values */ - if (ems_pcmcia_read_reg(priv, REG_CDR) == CDR_PELICAN) + if (ems_pcmcia_read_reg(priv, SJA1000_CDR) == CDR_PELICAN) return 1; return 0; diff --git a/drivers/net/can/sja1000/kvaser_pci.c b/drivers/net/can/sja1000/kvaser_pci.c index 37b0381f532e..217585b97cd3 100644 --- a/drivers/net/can/sja1000/kvaser_pci.c +++ b/drivers/net/can/sja1000/kvaser_pci.c @@ -159,9 +159,9 @@ static int number_of_sja1000_chip(void __iomem *base_addr) for (i = 0; i < MAX_NO_OF_CHANNELS; i++) { /* reset chip */ iowrite8(MOD_RM, base_addr + - (i * KVASER_PCI_PORT_BYTES) + REG_MOD); + (i * KVASER_PCI_PORT_BYTES) + SJA1000_MOD); status = ioread8(base_addr + - (i * KVASER_PCI_PORT_BYTES) + REG_MOD); + (i * KVASER_PCI_PORT_BYTES) + SJA1000_MOD); /* check reset bit */ if (!(status & MOD_RM)) break; diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index d1e7f1006ddd..6b6f0ad75090 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -402,7 +402,7 @@ static void peak_pciec_write_reg(const struct sja1000_priv *priv, int c = (priv->reg_base - card->reg_base) / PEAK_PCI_CHAN_SIZE; /* sja1000 register changes control the leds state */ - if (port == REG_MOD) + if (port == SJA1000_MOD) switch (val) { case MOD_RM: /* Reset Mode: set led on */ diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index 1a7020ba37f5..977901a0214a 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -196,7 +196,7 @@ static void pcan_write_canreg(const struct sja1000_priv *priv, int port, u8 v) int c = (priv->reg_base - card->ioport_addr) / PCC_CHAN_SIZE; /* sja1000 register changes control the leds state */ - if (port == REG_MOD) + if (port == SJA1000_MOD) switch (v) { case MOD_RM: /* Reset Mode: set led on */ @@ -509,11 +509,11 @@ static void pcan_free_channels(struct pcan_pccard *card) static inline int pcan_channel_present(struct sja1000_priv *priv) { /* make sure SJA1000 is in reset mode */ - pcan_write_canreg(priv, REG_MOD, 1); - pcan_write_canreg(priv, REG_CDR, CDR_PELICAN); + pcan_write_canreg(priv, SJA1000_MOD, 1); + pcan_write_canreg(priv, SJA1000_CDR, CDR_PELICAN); /* read reset-values */ - if (pcan_read_canreg(priv, REG_CDR) == CDR_PELICAN) + if (pcan_read_canreg(priv, SJA1000_CDR) == CDR_PELICAN) return 1; return 0; diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c index a042cdc260dc..c52c1e96bf90 100644 --- a/drivers/net/can/sja1000/plx_pci.c +++ b/drivers/net/can/sja1000/plx_pci.c @@ -348,20 +348,20 @@ static inline int plx_pci_check_sja1000(const struct sja1000_priv *priv) */ if ((priv->read_reg(priv, REG_CR) & REG_CR_BASICCAN_INITIAL_MASK) == REG_CR_BASICCAN_INITIAL && - (priv->read_reg(priv, REG_SR) == REG_SR_BASICCAN_INITIAL) && - (priv->read_reg(priv, REG_IR) == REG_IR_BASICCAN_INITIAL)) + (priv->read_reg(priv, SJA1000_SR) == REG_SR_BASICCAN_INITIAL) && + (priv->read_reg(priv, SJA1000_IR) == REG_IR_BASICCAN_INITIAL)) flag = 1; /* Bring the SJA1000 into the PeliCAN mode*/ - priv->write_reg(priv, REG_CDR, CDR_PELICAN); + priv->write_reg(priv, SJA1000_CDR, CDR_PELICAN); /* * Check registers after reset in the PeliCAN mode. * See states on p. 23 of the Datasheet. */ - if (priv->read_reg(priv, REG_MOD) == REG_MOD_PELICAN_INITIAL && - priv->read_reg(priv, REG_SR) == REG_SR_PELICAN_INITIAL && - priv->read_reg(priv, REG_IR) == REG_IR_PELICAN_INITIAL) + if (priv->read_reg(priv, SJA1000_MOD) == REG_MOD_PELICAN_INITIAL && + priv->read_reg(priv, SJA1000_SR) == REG_SR_PELICAN_INITIAL && + priv->read_reg(priv, SJA1000_IR) == REG_IR_PELICAN_INITIAL) return flag; return 0; diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index daf4013a8fc7..7164a999f50f 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -91,14 +91,14 @@ static void sja1000_write_cmdreg(struct sja1000_priv *priv, u8 val) * the write_reg() operation - especially on SMP systems. */ spin_lock_irqsave(&priv->cmdreg_lock, flags); - priv->write_reg(priv, REG_CMR, val); - priv->read_reg(priv, REG_SR); + priv->write_reg(priv, SJA1000_CMR, val); + priv->read_reg(priv, SJA1000_SR); spin_unlock_irqrestore(&priv->cmdreg_lock, flags); } static int sja1000_is_absent(struct sja1000_priv *priv) { - return (priv->read_reg(priv, REG_MOD) == 0xFF); + return (priv->read_reg(priv, SJA1000_MOD) == 0xFF); } static int sja1000_probe_chip(struct net_device *dev) @@ -116,11 +116,11 @@ static int sja1000_probe_chip(struct net_device *dev) static void set_reset_mode(struct net_device *dev) { struct sja1000_priv *priv = netdev_priv(dev); - unsigned char status = priv->read_reg(priv, REG_MOD); + unsigned char status = priv->read_reg(priv, SJA1000_MOD); int i; /* disable interrupts */ - priv->write_reg(priv, REG_IER, IRQ_OFF); + priv->write_reg(priv, SJA1000_IER, IRQ_OFF); for (i = 0; i < 100; i++) { /* check reset bit */ @@ -129,9 +129,10 @@ static void set_reset_mode(struct net_device *dev) return; } - priv->write_reg(priv, REG_MOD, MOD_RM); /* reset chip */ + /* reset chip */ + priv->write_reg(priv, SJA1000_MOD, MOD_RM); udelay(10); - status = priv->read_reg(priv, REG_MOD); + status = priv->read_reg(priv, SJA1000_MOD); } netdev_err(dev, "setting SJA1000 into reset mode failed!\n"); @@ -140,7 +141,7 @@ static void set_reset_mode(struct net_device *dev) static void set_normal_mode(struct net_device *dev) { struct sja1000_priv *priv = netdev_priv(dev); - unsigned char status = priv->read_reg(priv, REG_MOD); + unsigned char status = priv->read_reg(priv, SJA1000_MOD); int i; for (i = 0; i < 100; i++) { @@ -149,22 +150,22 @@ static void set_normal_mode(struct net_device *dev) priv->can.state = CAN_STATE_ERROR_ACTIVE; /* enable interrupts */ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) - priv->write_reg(priv, REG_IER, IRQ_ALL); + priv->write_reg(priv, SJA1000_IER, IRQ_ALL); else - priv->write_reg(priv, REG_IER, + priv->write_reg(priv, SJA1000_IER, IRQ_ALL & ~IRQ_BEI); return; } /* set chip to normal mode */ if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) - priv->write_reg(priv, REG_MOD, MOD_LOM); + priv->write_reg(priv, SJA1000_MOD, MOD_LOM); else - priv->write_reg(priv, REG_MOD, 0x00); + priv->write_reg(priv, SJA1000_MOD, 0x00); udelay(10); - status = priv->read_reg(priv, REG_MOD); + status = priv->read_reg(priv, SJA1000_MOD); } netdev_err(dev, "setting SJA1000 into normal mode failed!\n"); @@ -179,9 +180,9 @@ static void sja1000_start(struct net_device *dev) set_reset_mode(dev); /* Clear error counters and error code capture */ - priv->write_reg(priv, REG_TXERR, 0x0); - priv->write_reg(priv, REG_RXERR, 0x0); - priv->read_reg(priv, REG_ECC); + priv->write_reg(priv, SJA1000_TXERR, 0x0); + priv->write_reg(priv, SJA1000_RXERR, 0x0); + priv->read_reg(priv, SJA1000_ECC); /* leave reset mode */ set_normal_mode(dev); @@ -217,8 +218,8 @@ static int sja1000_set_bittiming(struct net_device *dev) netdev_info(dev, "setting BTR0=0x%02x BTR1=0x%02x\n", btr0, btr1); - priv->write_reg(priv, REG_BTR0, btr0); - priv->write_reg(priv, REG_BTR1, btr1); + priv->write_reg(priv, SJA1000_BTR0, btr0); + priv->write_reg(priv, SJA1000_BTR1, btr1); return 0; } @@ -228,8 +229,8 @@ static int sja1000_get_berr_counter(const struct net_device *dev, { struct sja1000_priv *priv = netdev_priv(dev); - bec->txerr = priv->read_reg(priv, REG_TXERR); - bec->rxerr = priv->read_reg(priv, REG_RXERR); + bec->txerr = priv->read_reg(priv, SJA1000_TXERR); + bec->rxerr = priv->read_reg(priv, SJA1000_RXERR); return 0; } @@ -247,20 +248,20 @@ static void chipset_init(struct net_device *dev) struct sja1000_priv *priv = netdev_priv(dev); /* set clock divider and output control register */ - priv->write_reg(priv, REG_CDR, priv->cdr | CDR_PELICAN); + priv->write_reg(priv, SJA1000_CDR, priv->cdr | CDR_PELICAN); /* set acceptance filter (accept all) */ - priv->write_reg(priv, REG_ACCC0, 0x00); - priv->write_reg(priv, REG_ACCC1, 0x00); - priv->write_reg(priv, REG_ACCC2, 0x00); - priv->write_reg(priv, REG_ACCC3, 0x00); + priv->write_reg(priv, SJA1000_ACCC0, 0x00); + priv->write_reg(priv, SJA1000_ACCC1, 0x00); + priv->write_reg(priv, SJA1000_ACCC2, 0x00); + priv->write_reg(priv, SJA1000_ACCC3, 0x00); - priv->write_reg(priv, REG_ACCM0, 0xFF); - priv->write_reg(priv, REG_ACCM1, 0xFF); - priv->write_reg(priv, REG_ACCM2, 0xFF); - priv->write_reg(priv, REG_ACCM3, 0xFF); + priv->write_reg(priv, SJA1000_ACCM0, 0xFF); + priv->write_reg(priv, SJA1000_ACCM1, 0xFF); + priv->write_reg(priv, SJA1000_ACCM2, 0xFF); + priv->write_reg(priv, SJA1000_ACCM3, 0xFF); - priv->write_reg(priv, REG_OCR, priv->ocr | OCR_MODE_NORMAL); + priv->write_reg(priv, SJA1000_OCR, priv->ocr | OCR_MODE_NORMAL); } /* @@ -289,21 +290,21 @@ static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb, id = cf->can_id; if (id & CAN_RTR_FLAG) - fi |= FI_RTR; + fi |= SJA1000_FI_RTR; if (id & CAN_EFF_FLAG) { - fi |= FI_FF; - dreg = EFF_BUF; - priv->write_reg(priv, REG_FI, fi); - priv->write_reg(priv, REG_ID1, (id & 0x1fe00000) >> (5 + 16)); - priv->write_reg(priv, REG_ID2, (id & 0x001fe000) >> (5 + 8)); - priv->write_reg(priv, REG_ID3, (id & 0x00001fe0) >> 5); - priv->write_reg(priv, REG_ID4, (id & 0x0000001f) << 3); + fi |= SJA1000_FI_FF; + dreg = SJA1000_EFF_BUF; + priv->write_reg(priv, SJA1000_FI, fi); + priv->write_reg(priv, SJA1000_ID1, (id & 0x1fe00000) >> 21); + priv->write_reg(priv, SJA1000_ID2, (id & 0x001fe000) >> 13); + priv->write_reg(priv, SJA1000_ID3, (id & 0x00001fe0) >> 5); + priv->write_reg(priv, SJA1000_ID4, (id & 0x0000001f) << 3); } else { - dreg = SFF_BUF; - priv->write_reg(priv, REG_FI, fi); - priv->write_reg(priv, REG_ID1, (id & 0x000007f8) >> 3); - priv->write_reg(priv, REG_ID2, (id & 0x00000007) << 5); + dreg = SJA1000_SFF_BUF; + priv->write_reg(priv, SJA1000_FI, fi); + priv->write_reg(priv, SJA1000_ID1, (id & 0x000007f8) >> 3); + priv->write_reg(priv, SJA1000_ID2, (id & 0x00000007) << 5); } for (i = 0; i < dlc; i++) @@ -335,25 +336,25 @@ static void sja1000_rx(struct net_device *dev) if (skb == NULL) return; - fi = priv->read_reg(priv, REG_FI); + fi = priv->read_reg(priv, SJA1000_FI); - if (fi & FI_FF) { + if (fi & SJA1000_FI_FF) { /* extended frame format (EFF) */ - dreg = EFF_BUF; - id = (priv->read_reg(priv, REG_ID1) << (5 + 16)) - | (priv->read_reg(priv, REG_ID2) << (5 + 8)) - | (priv->read_reg(priv, REG_ID3) << 5) - | (priv->read_reg(priv, REG_ID4) >> 3); + dreg = SJA1000_EFF_BUF; + id = (priv->read_reg(priv, SJA1000_ID1) << 21) + | (priv->read_reg(priv, SJA1000_ID2) << 13) + | (priv->read_reg(priv, SJA1000_ID3) << 5) + | (priv->read_reg(priv, SJA1000_ID4) >> 3); id |= CAN_EFF_FLAG; } else { /* standard frame format (SFF) */ - dreg = SFF_BUF; - id = (priv->read_reg(priv, REG_ID1) << 3) - | (priv->read_reg(priv, REG_ID2) >> 5); + dreg = SJA1000_SFF_BUF; + id = (priv->read_reg(priv, SJA1000_ID1) << 3) + | (priv->read_reg(priv, SJA1000_ID2) >> 5); } cf->can_dlc = get_can_dlc(fi & 0x0F); - if (fi & FI_RTR) { + if (fi & SJA1000_FI_RTR) { id |= CAN_RTR_FLAG; } else { for (i = 0; i < cf->can_dlc; i++) @@ -414,7 +415,7 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) priv->can.can_stats.bus_error++; stats->rx_errors++; - ecc = priv->read_reg(priv, REG_ECC); + ecc = priv->read_reg(priv, SJA1000_ECC); cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; @@ -448,7 +449,7 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) if (isrc & IRQ_ALI) { /* arbitration lost interrupt */ netdev_dbg(dev, "arbitration lost interrupt\n"); - alc = priv->read_reg(priv, REG_ALC); + alc = priv->read_reg(priv, SJA1000_ALC); priv->can.can_stats.arbitration_lost++; stats->tx_errors++; cf->can_id |= CAN_ERR_LOSTARB; @@ -457,8 +458,8 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) if (state != priv->can.state && (state == CAN_STATE_ERROR_WARNING || state == CAN_STATE_ERROR_PASSIVE)) { - uint8_t rxerr = priv->read_reg(priv, REG_RXERR); - uint8_t txerr = priv->read_reg(priv, REG_TXERR); + uint8_t rxerr = priv->read_reg(priv, SJA1000_RXERR); + uint8_t txerr = priv->read_reg(priv, SJA1000_TXERR); cf->can_id |= CAN_ERR_CRTL; if (state == CAN_STATE_ERROR_WARNING) { priv->can.can_stats.error_warning++; @@ -494,15 +495,16 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) int n = 0; /* Shared interrupts and IRQ off? */ - if (priv->read_reg(priv, REG_IER) == IRQ_OFF) + if (priv->read_reg(priv, SJA1000_IER) == IRQ_OFF) return IRQ_NONE; if (priv->pre_irq) priv->pre_irq(priv); - while ((isrc = priv->read_reg(priv, REG_IR)) && (n < SJA1000_MAX_IRQ)) { + while ((isrc = priv->read_reg(priv, SJA1000_IR)) && + (n < SJA1000_MAX_IRQ)) { n++; - status = priv->read_reg(priv, REG_SR); + status = priv->read_reg(priv, SJA1000_SR); /* check for absent controller due to hw unplug */ if (status == 0xFF && sja1000_is_absent(priv)) return IRQ_NONE; @@ -519,7 +521,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) } else { /* transmission complete */ stats->tx_bytes += - priv->read_reg(priv, REG_FI) & 0xf; + priv->read_reg(priv, SJA1000_FI) & 0xf; stats->tx_packets++; can_get_echo_skb(dev, 0); } @@ -530,7 +532,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) /* receive interrupt */ while (status & SR_RBS) { sja1000_rx(dev); - status = priv->read_reg(priv, REG_SR); + status = priv->read_reg(priv, SJA1000_SR); /* check for absent controller */ if (status == 0xFF && sja1000_is_absent(priv)) return IRQ_NONE; diff --git a/drivers/net/can/sja1000/sja1000.h b/drivers/net/can/sja1000/sja1000.h index afa99847a510..9d46398f8154 100644 --- a/drivers/net/can/sja1000/sja1000.h +++ b/drivers/net/can/sja1000/sja1000.h @@ -54,46 +54,46 @@ #define SJA1000_MAX_IRQ 20 /* max. number of interrupts handled in ISR */ /* SJA1000 registers - manual section 6.4 (Pelican Mode) */ -#define REG_MOD 0x00 -#define REG_CMR 0x01 -#define REG_SR 0x02 -#define REG_IR 0x03 -#define REG_IER 0x04 -#define REG_ALC 0x0B -#define REG_ECC 0x0C -#define REG_EWL 0x0D -#define REG_RXERR 0x0E -#define REG_TXERR 0x0F -#define REG_ACCC0 0x10 -#define REG_ACCC1 0x11 -#define REG_ACCC2 0x12 -#define REG_ACCC3 0x13 -#define REG_ACCM0 0x14 -#define REG_ACCM1 0x15 -#define REG_ACCM2 0x16 -#define REG_ACCM3 0x17 -#define REG_RMC 0x1D -#define REG_RBSA 0x1E +#define SJA1000_MOD 0x00 +#define SJA1000_CMR 0x01 +#define SJA1000_SR 0x02 +#define SJA1000_IR 0x03 +#define SJA1000_IER 0x04 +#define SJA1000_ALC 0x0B +#define SJA1000_ECC 0x0C +#define SJA1000_EWL 0x0D +#define SJA1000_RXERR 0x0E +#define SJA1000_TXERR 0x0F +#define SJA1000_ACCC0 0x10 +#define SJA1000_ACCC1 0x11 +#define SJA1000_ACCC2 0x12 +#define SJA1000_ACCC3 0x13 +#define SJA1000_ACCM0 0x14 +#define SJA1000_ACCM1 0x15 +#define SJA1000_ACCM2 0x16 +#define SJA1000_ACCM3 0x17 +#define SJA1000_RMC 0x1D +#define SJA1000_RBSA 0x1E /* Common registers - manual section 6.5 */ -#define REG_BTR0 0x06 -#define REG_BTR1 0x07 -#define REG_OCR 0x08 -#define REG_CDR 0x1F +#define SJA1000_BTR0 0x06 +#define SJA1000_BTR1 0x07 +#define SJA1000_OCR 0x08 +#define SJA1000_CDR 0x1F -#define REG_FI 0x10 -#define SFF_BUF 0x13 -#define EFF_BUF 0x15 +#define SJA1000_FI 0x10 +#define SJA1000_SFF_BUF 0x13 +#define SJA1000_EFF_BUF 0x15 -#define FI_FF 0x80 -#define FI_RTR 0x40 +#define SJA1000_FI_FF 0x80 +#define SJA1000_FI_RTR 0x40 -#define REG_ID1 0x11 -#define REG_ID2 0x12 -#define REG_ID3 0x13 -#define REG_ID4 0x14 +#define SJA1000_ID1 0x11 +#define SJA1000_ID2 0x12 +#define SJA1000_ID3 0x13 +#define SJA1000_ID4 0x14 -#define CAN_RAM 0x20 +#define SJA1000_CAN_RAM 0x20 /* mode register */ #define MOD_RM 0x01 diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c index 6433b81256cd..8e0c4a001939 100644 --- a/drivers/net/can/sja1000/sja1000_of_platform.c +++ b/drivers/net/can/sja1000/sja1000_of_platform.c @@ -96,8 +96,8 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) struct net_device *dev; struct sja1000_priv *priv; struct resource res; - const u32 *prop; - int err, irq, res_size, prop_size; + u32 prop; + int err, irq, res_size; void __iomem *base; err = of_address_to_resource(np, 0, &res); @@ -138,27 +138,27 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) priv->read_reg = sja1000_ofp_read_reg; priv->write_reg = sja1000_ofp_write_reg; - prop = of_get_property(np, "nxp,external-clock-frequency", &prop_size); - if (prop && (prop_size == sizeof(u32))) - priv->can.clock.freq = *prop / 2; + err = of_property_read_u32(np, "nxp,external-clock-frequency", &prop); + if (!err) + priv->can.clock.freq = prop / 2; else priv->can.clock.freq = SJA1000_OFP_CAN_CLOCK; /* default */ - prop = of_get_property(np, "nxp,tx-output-mode", &prop_size); - if (prop && (prop_size == sizeof(u32))) - priv->ocr |= *prop & OCR_MODE_MASK; + err = of_property_read_u32(np, "nxp,tx-output-mode", &prop); + if (!err) + priv->ocr |= prop & OCR_MODE_MASK; else priv->ocr |= OCR_MODE_NORMAL; /* default */ - prop = of_get_property(np, "nxp,tx-output-config", &prop_size); - if (prop && (prop_size == sizeof(u32))) - priv->ocr |= (*prop << OCR_TX_SHIFT) & OCR_TX_MASK; + err = of_property_read_u32(np, "nxp,tx-output-config", &prop); + if (!err) + priv->ocr |= (prop << OCR_TX_SHIFT) & OCR_TX_MASK; else priv->ocr |= OCR_TX0_PULLDOWN; /* default */ - prop = of_get_property(np, "nxp,clock-out-frequency", &prop_size); - if (prop && (prop_size == sizeof(u32)) && *prop) { - u32 divider = priv->can.clock.freq * 2 / *prop; + err = of_property_read_u32(np, "nxp,clock-out-frequency", &prop); + if (!err && prop) { + u32 divider = priv->can.clock.freq * 2 / prop; if (divider > 1) priv->cdr |= divider / 2 - 1; @@ -168,8 +168,7 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) priv->cdr |= CDR_CLK_OFF; /* default */ } - prop = of_get_property(np, "nxp,no-comparator-bypass", NULL); - if (!prop) + if (!of_property_read_bool(np, "nxp,no-comparator-bypass")) priv->cdr |= CDR_CBP; /* default */ priv->irq_flags = IRQF_SHARED; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 27aaaf99e73e..144942f6372b 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -1690,7 +1690,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read skb_checksum_none_assert(new_skb); if (rx->rxStatus & TYPHOON_RX_VLAN) - __vlan_hwaccel_put_tag(new_skb, + __vlan_hwaccel_put_tag(new_skb, htons(ETH_P_8021Q), ntohl(rx->vlanTag) & 0xffff); netif_receive_skb(new_skb); @@ -2445,9 +2445,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * settings -- so we only allow the user to toggle the TX processing. */ dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; dev->features = dev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; if(register_netdev(dev) < 0) { err_msg = "unable to register netdev"; diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index cab306a9888e..e1d26433d619 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -828,7 +828,7 @@ static int ax_probe(struct platform_device *pdev) struct ei_device *ei_local; struct ax_device *ax; struct resource *irq, *mem, *mem2; - resource_size_t mem_size, mem2_size = 0; + unsigned long mem_size, mem2_size = 0; int ret = 0; dev = ax__alloc_ei_netdev(sizeof(struct ax_device)); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 549b77500579..8b04bfc20cfb 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -594,7 +594,8 @@ static const struct ethtool_ops ethtool_ops; #ifdef VLAN_SUPPORT -static int netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int netdev_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct netdev_private *np = netdev_priv(dev); @@ -608,7 +609,8 @@ static int netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int netdev_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct netdev_private *np = netdev_priv(dev); @@ -702,7 +704,7 @@ static int starfire_init_one(struct pci_dev *pdev, #endif /* ZEROCOPY */ #ifdef VLAN_SUPPORT - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; #endif /* VLAN_RX_KILL_VID */ #ifdef ADDR_64BITS dev->features |= NETIF_F_HIGHDMA; @@ -1496,7 +1498,7 @@ static int __netdev_rx(struct net_device *dev, int *quota) printk(KERN_DEBUG " netdev_rx() vlanid = %d\n", vlid); } - __vlan_hwaccel_put_tag(skb, vlid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlid); } #endif /* VLAN_SUPPORT */ netif_receive_skb(skb); diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index a175d0be1ae1..ee705771bd2c 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -188,10 +188,9 @@ static int desc_list_init(struct net_device *dev) /* allocate a new skb for next time receive */ new_skb = netdev_alloc_skb(dev, PKT_BUF_SZ + NET_IP_ALIGN); - if (!new_skb) { - pr_notice("init: low on mem - packet dropped\n"); + if (!new_skb) goto init_error; - } + skb_reserve(new_skb, NET_IP_ALIGN); /* Invidate the data cache of skb->data range when it is write back * cache. It will prevent overwritting the new data from DMA @@ -1236,7 +1235,6 @@ static void bfin_mac_rx(struct net_device *dev) new_skb = netdev_alloc_skb(dev, PKT_BUF_SZ + NET_IP_ALIGN); if (!new_skb) { - netdev_notice(dev, "rx: low on mem - packet dropped\n"); dev->stats.rx_dropped++; goto out; } diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 0be2195e5034..269295403fc4 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1464,35 +1464,23 @@ static int greth_of_probe(struct platform_device *ofdev) } /* Allocate TX descriptor ring in coherent memory */ - greth->tx_bd_base = (struct greth_bd *) dma_alloc_coherent(greth->dev, - 1024, - &greth->tx_bd_base_phys, - GFP_KERNEL); - + greth->tx_bd_base = dma_alloc_coherent(greth->dev, 1024, + &greth->tx_bd_base_phys, + GFP_KERNEL | __GFP_ZERO); if (!greth->tx_bd_base) { - if (netif_msg_probe(greth)) - dev_err(&dev->dev, "could not allocate descriptor memory.\n"); err = -ENOMEM; goto error3; } - memset(greth->tx_bd_base, 0, 1024); - /* Allocate RX descriptor ring in coherent memory */ - greth->rx_bd_base = (struct greth_bd *) dma_alloc_coherent(greth->dev, - 1024, - &greth->rx_bd_base_phys, - GFP_KERNEL); - + greth->rx_bd_base = dma_alloc_coherent(greth->dev, 1024, + &greth->rx_bd_base_phys, + GFP_KERNEL | __GFP_ZERO); if (!greth->rx_bd_base) { - if (netif_msg_probe(greth)) - dev_err(greth->dev, "could not allocate descriptor memory.\n"); err = -ENOMEM; goto error4; } - memset(greth->rx_bd_base, 0, 1024); - /* Get MAC address from: module param, OF property or ID prom */ for (i = 0; i < 6; i++) { if (macaddr[i] != 0) diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index c0bc41a784ca..b7894f8af9d1 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -472,7 +472,7 @@ static int acenic_probe_one(struct pci_dev *pdev, ap->name = pci_name(pdev); dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->watchdog_timeo = 5*HZ; @@ -2019,7 +2019,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) /* send it up */ if ((bd_flags & BD_FLG_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, retdesc->vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), retdesc->vlan); netif_rx(skb); dev->stats.rx_packets++; diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index 6e722dc37db7..65926a956575 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -318,8 +318,6 @@ static int lance_rx (struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, len + 2); if (!skb) { - printk ("%s: Memory squeeze, deferring packet.\n", - dev->name); dev->stats.rx_dropped++; rd->mblength = 0; rd->rmd1_bits = LE_R1_OWN; diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 3789affbc0e5..0866e7627433 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -293,7 +293,6 @@ static int lance_rx(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, len + 2); if (!skb) { - netdev_warn(dev, "Memory squeeze, deferring packet\n"); dev->stats.rx_dropped++; rd->mblength = 0; rd->rmd1_bits = LE_R1_OWN; diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 60e2b701afe7..9793767996a2 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -528,7 +528,6 @@ am79c961_rx(struct net_device *dev, struct dev_priv *priv) dev->stats.rx_packets++; } else { am_writeword (dev, hdraddr + 2, RMD_OWN); - printk (KERN_WARNING "%s: memory squeeze, dropping packet.\n", dev->name); dev->stats.rx_dropped++; break; } diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 42d4e6ad58a5..8e6b665a6726 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -793,7 +793,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) #if AMD8111E_VLAN_TAG_USED if (vtag == TT_VLAN_TAGGED){ u16 vlan_tag = le16_to_cpu(lp->rx_ring[rx_index].tag_ctrl_info); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } #endif netif_receive_skb(skb); @@ -1869,7 +1869,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, SET_NETDEV_DEV(dev, &pdev->dev); #if AMD8111E_VLAN_TAG_USED - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX ; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX ; #endif lp = netdev_priv(dev); @@ -1907,7 +1907,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32); #if AMD8111E_VLAN_TAG_USED - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; #endif /* Probe the external PHY */ amd8111e_probe_ext_phy(dev); diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index 98f4522fd17b..c178eb4c8166 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -193,7 +193,6 @@ static int ariadne_rx(struct net_device *dev) skb = netdev_alloc_skb(dev, pkt_len + 2); if (skb == NULL) { - netdev_warn(dev, "Memory squeeze, deferring packet\n"); for (i = 0; i < RX_RING_SIZE; i++) if (lowb(priv->rx_ring[(entry + i) % RX_RING_SIZE]->RMD1) & RF_OWN) break; diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index 84219df72f51..e8d0ef508f48 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -996,8 +996,6 @@ static int lance_rx( struct net_device *dev ) else { skb = netdev_alloc_skb(dev, pkt_len + 2); if (skb == NULL) { - DPRINTK( 1, ( "%s: Memory squeeze, deferring packet.\n", - dev->name )); for( i = 0; i < RX_RING_SIZE; i++ ) if (MEM->rx_head[(entry+i) & RX_RING_MOD_MASK].flag & RMD1_OWN_CHIP) @@ -1149,9 +1147,7 @@ static struct net_device *atarilance_dev; static int __init atarilance_module_init(void) { atarilance_dev = atarilance_probe(-1); - if (IS_ERR(atarilance_dev)) - return PTR_ERR(atarilance_dev); - return 0; + return PTR_RET(atarilance_dev); } static void __exit atarilance_module_exit(void) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index de774d419144..688aede742c7 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -727,7 +727,6 @@ static int au1000_rx(struct net_device *dev) frmlen -= 4; /* Remove FCS */ skb = netdev_alloc_skb(dev, frmlen + 2); if (skb == NULL) { - netdev_err(dev, "Memory squeeze, dropping packet.\n"); dev->stats.rx_dropped++; continue; } diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index baca0bd1b393..3d86ffeb4e15 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -607,8 +607,6 @@ static int lance_rx(struct net_device *dev) skb = netdev_alloc_skb(dev, len + 2); if (skb == 0) { - printk("%s: Memory squeeze, deferring packet.\n", - dev->name); dev->stats.rx_dropped++; *rds_ptr(rd, mblength, lp->type) = 0; *rds_ptr(rd, rmd1, lp->type) = diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c index 9af3c307862c..a51497c9d2af 100644 --- a/drivers/net/ethernet/amd/mvme147.c +++ b/drivers/net/ethernet/amd/mvme147.c @@ -188,9 +188,7 @@ static struct net_device *dev_mvme147_lance; int __init init_module(void) { dev_mvme147_lance = mvme147lance_probe(-1); - if (IS_ERR(dev_mvme147_lance)) - return PTR_ERR(dev_mvme147_lance); - return 0; + return PTR_RET(dev_mvme147_lance); } void __exit cleanup_module(void) diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index 013b65108536..26fc0ce0faa3 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -1238,7 +1238,7 @@ MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)"); int __init init_module(void) { dev_ni65 = ni65_probe(-1); - return IS_ERR(dev_ni65) ? PTR_ERR(dev_ni65) : 0; + return PTR_RET(dev_ni65); } void __exit cleanup_module(void) diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 797f847edf13..ed2130727643 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1166,7 +1166,6 @@ static void pcnet32_rx_entry(struct net_device *dev, skb = netdev_alloc_skb(dev, pkt_len + NET_IP_ALIGN); if (skb == NULL) { - netif_err(lp, drv, dev, "Memory squeeze, dropping packet\n"); dev->stats.rx_dropped++; return; } diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index 74b3891b6483..4375abe61da1 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -812,9 +812,6 @@ static int lance_rx( struct net_device *dev ) else { skb = netdev_alloc_skb(dev, pkt_len + 2); if (skb == NULL) { - DPRINTK( 1, ( "%s: Memory squeeze, deferring packet.\n", - dev->name )); - dev->stats.rx_dropped++; head->msg_length = 0; head->flag |= RMD1_OWN_CHIP; @@ -943,9 +940,7 @@ static struct net_device *sun3lance_dev; int __init init_module(void) { sun3lance_dev = sun3lance_probe(-1); - if (IS_ERR(sun3lance_dev)) - return PTR_ERR(sun3lance_dev); - return 0; + return PTR_RET(sun3lance_dev); } void __exit cleanup_module(void) diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 6a40290d3727..f47b780892e9 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -536,8 +536,6 @@ static void lance_rx_dvma(struct net_device *dev) skb = netdev_alloc_skb(dev, len + 2); if (skb == NULL) { - printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n", - dev->name); dev->stats.rx_dropped++; rd->mblength = 0; rd->rmd1_bits = LE_R1_OWN; @@ -708,8 +706,6 @@ static void lance_rx_pio(struct net_device *dev) skb = netdev_alloc_skb(dev, len + 2); if (skb == NULL) { - printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n", - dev->name); dev->stats.rx_dropped++; sbus_writew(0, &rd->mblength); sbus_writeb(LE_R1_OWN, &rd->rmd1_bits); @@ -1377,10 +1373,9 @@ static int sparc_lance_probe_one(struct platform_device *op, dma_alloc_coherent(&op->dev, sizeof(struct lance_init_block), &lp->init_block_dvma, GFP_ATOMIC); - if (!lp->init_block_mem) { - printk(KERN_ERR "SunLance: Cannot allocate consistent DMA memory.\n"); + if (!lp->init_block_mem) goto fail; - } + lp->pio_buffer = 0; lp->init_ring = lance_init_ring_dvma; lp->rx = lance_rx_dvma; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index a206779c68cf..4ce8ceb62205 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -386,20 +386,16 @@ static int mace_open(struct net_device *dev) /* Allocate the DMA ring buffers */ mp->tx_ring = dma_alloc_coherent(mp->device, - N_TX_RING * MACE_BUFF_SIZE, - &mp->tx_ring_phys, GFP_KERNEL); - if (mp->tx_ring == NULL) { - printk(KERN_ERR "%s: unable to allocate DMA tx buffers\n", dev->name); + N_TX_RING * MACE_BUFF_SIZE, + &mp->tx_ring_phys, GFP_KERNEL); + if (mp->tx_ring == NULL) goto out1; - } mp->rx_ring = dma_alloc_coherent(mp->device, - N_RX_RING * MACE_BUFF_SIZE, - &mp->rx_ring_phys, GFP_KERNEL); - if (mp->rx_ring == NULL) { - printk(KERN_ERR "%s: unable to allocate DMA rx buffers\n", dev->name); + N_RX_RING * MACE_BUFF_SIZE, + &mp->rx_ring_phys, GFP_KERNEL); + if (mp->rx_ring == NULL) goto out2; - } mace_dma_off(dev); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 1f07fc633ab9..0ba900762b13 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -417,7 +417,7 @@ static void atl1c_set_multi(struct net_device *netdev) static void __atl1c_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *mac_ctrl_data |= MAC_CTRL_RMV_VLAN; } else { @@ -494,10 +494,10 @@ static netdev_features_t atl1c_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; if (netdev->mtu > MAX_TSO_FRAME_SIZE) features &= ~(NETIF_F_TSO | NETIF_F_TSO6); @@ -510,7 +510,7 @@ static int atl1c_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atl1c_vlan_mode(netdev, features); return 0; @@ -1809,7 +1809,7 @@ rrs_checked: AT_TAG_TO_VLAN(rrs->vlan_tag, vlan); vlan = le16_to_cpu(vlan); - __vlan_hwaccel_put_tag(skb, vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); } netif_receive_skb(skb); @@ -2475,13 +2475,13 @@ static int atl1c_init_netdev(struct net_device *netdev, struct pci_dev *pdev) atl1c_set_ethtool_ops(netdev); /* TODO: add when ready */ - netdev->hw_features = NETIF_F_SG | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_RX | - NETIF_F_TSO | + netdev->hw_features = NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_TSO | NETIF_F_TSO6; - netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_TX; + netdev->features = netdev->hw_features | + NETIF_F_HW_VLAN_CTAG_TX; return 0; } diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e.h b/drivers/net/ethernet/atheros/atl1e/atl1e.h index 829b5ad71d0d..b5fd934585e9 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e.h +++ b/drivers/net/ethernet/atheros/atl1e/atl1e.h @@ -186,7 +186,7 @@ struct atl1e_tpd_desc { /* how about 0x2000 */ #define MAX_TX_BUF_LEN 0x2000 #define MAX_TX_BUF_SHIFT 13 -/*#define MAX_TX_BUF_LEN 0x3000 */ +#define MAX_TSO_SEG_SIZE 0x3c00 /* rrs word 1 bit 0:31 */ #define RRS_RX_CSUM_MASK 0xFFFF @@ -438,7 +438,6 @@ struct atl1e_adapter { struct atl1e_hw hw; struct atl1e_hw_stats hw_stats; - bool have_msi; u32 wol; u16 link_speed; u16 link_duplex; diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 92f4734f860d..0688bb82b442 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -315,7 +315,7 @@ static void atl1e_set_multi(struct net_device *netdev) static void __atl1e_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *mac_ctrl_data |= MAC_CTRL_RMV_VLAN; } else { @@ -378,10 +378,10 @@ static netdev_features_t atl1e_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -391,7 +391,7 @@ static int atl1e_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atl1e_vlan_mode(netdev, features); return 0; @@ -1420,11 +1420,9 @@ static void atl1e_clean_rx_irq(struct atl1e_adapter *adapter, u8 que, packet_size = ((prrs->word1 >> RRS_PKT_SIZE_SHIFT) & RRS_PKT_SIZE_MASK) - 4; /* CRC */ skb = netdev_alloc_skb_ip_align(netdev, packet_size); - if (skb == NULL) { - netdev_warn(netdev, - "Memory squeeze, deferring packet\n"); + if (skb == NULL) goto skip_pkt; - } + memcpy(skb->data, (u8 *)(prrs + 1), packet_size); skb_put(skb, packet_size); skb->protocol = eth_type_trans(skb, netdev); @@ -1437,7 +1435,7 @@ static void atl1e_clean_rx_irq(struct atl1e_adapter *adapter, u8 que, netdev_dbg(netdev, "RXD VLAN TAG<RRD>=0x%04x\n", prrs->vtag); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } netif_receive_skb(skb); @@ -1849,34 +1847,19 @@ static void atl1e_free_irq(struct atl1e_adapter *adapter) struct net_device *netdev = adapter->netdev; free_irq(adapter->pdev->irq, netdev); - - if (adapter->have_msi) - pci_disable_msi(adapter->pdev); } static int atl1e_request_irq(struct atl1e_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct net_device *netdev = adapter->netdev; - int flags = 0; int err = 0; - adapter->have_msi = true; - err = pci_enable_msi(pdev); - if (err) { - netdev_dbg(netdev, - "Unable to allocate MSI interrupt Error: %d\n", err); - adapter->have_msi = false; - } - - if (!adapter->have_msi) - flags |= IRQF_SHARED; - err = request_irq(pdev->irq, atl1e_intr, flags, netdev->name, netdev); + err = request_irq(pdev->irq, atl1e_intr, IRQF_SHARED, netdev->name, + netdev); if (err) { netdev_dbg(adapter->netdev, "Unable to allocate interrupt Error: %d\n", err); - if (adapter->have_msi) - pci_disable_msi(pdev); return err; } netdev_dbg(netdev, "atl1e_request_irq OK\n"); @@ -2215,9 +2198,9 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) atl1e_set_ethtool_ops(netdev); netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_RX; netdev->features = netdev->hw_features | NETIF_F_LLTX | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; return 0; } @@ -2344,6 +2327,7 @@ static int atl1e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->reset_task, atl1e_reset_task); INIT_WORK(&adapter->link_chg_task, atl1e_link_chg_task); + netif_set_gso_max_size(netdev, MAX_TSO_SEG_SIZE); err = register_netdev(netdev); if (err) { netdev_err(netdev, "register netdevice failed\n"); diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 5b0d9931c720..fa0915f3999b 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2024,7 +2024,7 @@ rrd_ok: ((rrd->vlan_tag & 7) << 13) | ((rrd->vlan_tag & 8) << 9); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } netif_receive_skb(skb); @@ -2774,7 +2774,7 @@ static int atl1_close(struct net_device *netdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int atl1_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2876,23 +2876,18 @@ static int atl1_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(atl1_pm_ops, atl1_suspend, atl1_resume); -#define ATL1_PM_OPS (&atl1_pm_ops) - -#else - -static int atl1_suspend(struct device *dev) { return 0; } - -#define ATL1_PM_OPS NULL -#endif static void atl1_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct atl1_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_PM_SLEEP atl1_suspend(&pdev->dev); +#endif pci_wake_from_d3(pdev, adapter->wol); pci_set_power_state(pdev, PCI_D3hot); } @@ -3023,10 +3018,10 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features = NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SG; - netdev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); + netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); netdev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_TSO | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_RX; /* is this valid? see atl1_setup_mac_ctrl() */ netdev->features |= NETIF_F_RXCSUM; @@ -3147,7 +3142,7 @@ static struct pci_driver atl1_driver = { .probe = atl1_probe, .remove = atl1_remove, .shutdown = atl1_shutdown, - .driver.pm = ATL1_PM_OPS, + .driver.pm = &atl1_pm_ops, }; /** diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 1278b47022e0..265ce1b752ed 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -363,7 +363,7 @@ static inline void atl2_irq_disable(struct atl2_adapter *adapter) static void __atl2_vlan_mode(netdev_features_t features, u32 *ctrl) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *ctrl |= MAC_CTRL_RMV_VLAN; } else { @@ -399,10 +399,10 @@ static netdev_features_t atl2_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -412,7 +412,7 @@ static int atl2_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atl2_vlan_mode(netdev, features); return 0; @@ -437,9 +437,6 @@ static void atl2_intr_rx(struct atl2_adapter *adapter) /* alloc new buffer */ skb = netdev_alloc_skb_ip_align(netdev, rx_size); if (NULL == skb) { - printk(KERN_WARNING - "%s: Mem squeeze, deferring packet.\n", - netdev->name); /* * Check that some rx space is free. If not, * free one and mark stats->rx_dropped++. @@ -455,7 +452,7 @@ static void atl2_intr_rx(struct atl2_adapter *adapter) ((rxd->status.vtag&7) << 13) | ((rxd->status.vtag&8) << 9); - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } netif_rx(skb); netdev->stats.rx_bytes += rx_size; @@ -890,7 +887,7 @@ static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb, skb->len-copy_len); offset = ((u32)(skb->len-copy_len + 3) & ~3); } -#ifdef NETIF_F_HW_VLAN_TX +#ifdef NETIF_F_HW_VLAN_CTAG_TX if (vlan_tx_tag_present(skb)) { u16 vlan_tag = vlan_tx_tag_get(skb); vlan_tag = (vlan_tag << 4) | @@ -1416,8 +1413,8 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -EIO; - netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_RX; - netdev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); + netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX; + netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); /* Init PHY as early as possible due to power saving issue */ atl2_phy_init(&adapter->hw); diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c index f82eb1699464..46a622cceee4 100644 --- a/drivers/net/ethernet/atheros/atlx/atlx.c +++ b/drivers/net/ethernet/atheros/atlx/atlx.c @@ -220,7 +220,7 @@ static void atlx_link_chg_task(struct work_struct *work) static void __atlx_vlan_mode(netdev_features_t features, u32 *ctrl) { - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ *ctrl |= MAC_CTRL_RMV_VLAN; } else { @@ -257,10 +257,10 @@ static netdev_features_t atlx_fix_features(struct net_device *netdev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -270,7 +270,7 @@ static int atlx_set_features(struct net_device *netdev, { netdev_features_t changed = netdev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) atlx_vlan_mode(netdev, features); return 0; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 7d81e059e811..0b3e23ec37f7 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -862,27 +862,25 @@ static int bcm_enet_open(struct net_device *dev) /* allocate rx dma ring */ size = priv->rx_ring_size * sizeof(struct bcm_enet_desc); - p = dma_alloc_coherent(kdev, size, &priv->rx_desc_dma, GFP_KERNEL); + p = dma_alloc_coherent(kdev, size, &priv->rx_desc_dma, + GFP_KERNEL | __GFP_ZERO); if (!p) { - dev_err(kdev, "cannot allocate rx ring %u\n", size); ret = -ENOMEM; goto out_freeirq_tx; } - memset(p, 0, size); priv->rx_desc_alloc_size = size; priv->rx_desc_cpu = p; /* allocate tx dma ring */ size = priv->tx_ring_size * sizeof(struct bcm_enet_desc); - p = dma_alloc_coherent(kdev, size, &priv->tx_desc_dma, GFP_KERNEL); + p = dma_alloc_coherent(kdev, size, &priv->tx_desc_dma, + GFP_KERNEL | __GFP_ZERO); if (!p) { - dev_err(kdev, "cannot allocate tx ring\n"); ret = -ENOMEM; goto out_free_rx_ring; } - memset(p, 0, size); priv->tx_desc_alloc_size = size; priv->tx_desc_cpu = p; @@ -1619,7 +1617,6 @@ static int bcm_enet_probe(struct platform_device *pdev) struct resource *res_mem, *res_irq, *res_irq_rx, *res_irq_tx; struct mii_bus *bus; const char *clk_name; - unsigned int iomem_size; int i, ret; /* stop if shared driver failed, assume driver->probe will be @@ -1644,17 +1641,12 @@ static int bcm_enet_probe(struct platform_device *pdev) if (ret) goto out; - iomem_size = resource_size(res_mem); - if (!request_mem_region(res_mem->start, iomem_size, "bcm63xx_enet")) { - ret = -EBUSY; - goto out; - } - - priv->base = ioremap(res_mem->start, iomem_size); + priv->base = devm_request_and_ioremap(&pdev->dev, res_mem); if (priv->base == NULL) { ret = -ENOMEM; - goto out_release_mem; + goto out; } + dev->irq = priv->irq = res_irq->start; priv->irq_rx = res_irq_rx->start; priv->irq_tx = res_irq_tx->start; @@ -1674,9 +1666,9 @@ static int bcm_enet_probe(struct platform_device *pdev) priv->mac_clk = clk_get(&pdev->dev, clk_name); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); - goto out_unmap; + goto out; } - clk_enable(priv->mac_clk); + clk_prepare_enable(priv->mac_clk); /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1705,7 +1697,7 @@ static int bcm_enet_probe(struct platform_device *pdev) priv->phy_clk = NULL; goto out_put_clk_mac; } - clk_enable(priv->phy_clk); + clk_prepare_enable(priv->phy_clk); } /* do minimal hardware init to be able to probe mii bus */ @@ -1733,7 +1725,8 @@ static int bcm_enet_probe(struct platform_device *pdev) * if a slave is not present on hw */ bus->phy_mask = ~(1 << priv->phy_id); - bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); + bus->irq = devm_kzalloc(&pdev->dev, sizeof(int) * PHY_MAX_ADDR, + GFP_KERNEL); if (!bus->irq) { ret = -ENOMEM; goto out_free_mdio; @@ -1794,10 +1787,8 @@ static int bcm_enet_probe(struct platform_device *pdev) return 0; out_unregister_mdio: - if (priv->mii_bus) { + if (priv->mii_bus) mdiobus_unregister(priv->mii_bus); - kfree(priv->mii_bus->irq); - } out_free_mdio: if (priv->mii_bus) @@ -1807,19 +1798,13 @@ out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); if (priv->phy_clk) { - clk_disable(priv->phy_clk); + clk_disable_unprepare(priv->phy_clk); clk_put(priv->phy_clk); } out_put_clk_mac: - clk_disable(priv->mac_clk); + clk_disable_unprepare(priv->mac_clk); clk_put(priv->mac_clk); - -out_unmap: - iounmap(priv->base); - -out_release_mem: - release_mem_region(res_mem->start, iomem_size); out: free_netdev(dev); return ret; @@ -1833,7 +1818,6 @@ static int bcm_enet_remove(struct platform_device *pdev) { struct bcm_enet_priv *priv; struct net_device *dev; - struct resource *res; /* stop netdevice */ dev = platform_get_drvdata(pdev); @@ -1845,7 +1829,6 @@ static int bcm_enet_remove(struct platform_device *pdev) if (priv->has_phy) { mdiobus_unregister(priv->mii_bus); - kfree(priv->mii_bus->irq); mdiobus_free(priv->mii_bus); } else { struct bcm63xx_enet_platform_data *pd; @@ -1856,17 +1839,12 @@ static int bcm_enet_remove(struct platform_device *pdev) bcm_enet_mdio_write_mii); } - /* release device resources */ - iounmap(priv->base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - /* disable hw block clocks */ if (priv->phy_clk) { - clk_disable(priv->phy_clk); + clk_disable_unprepare(priv->phy_clk); clk_put(priv->phy_clk); } - clk_disable(priv->mac_clk); + clk_disable_unprepare(priv->mac_clk); clk_put(priv->mac_clk); platform_set_drvdata(pdev, NULL); @@ -1889,31 +1867,20 @@ struct platform_driver bcm63xx_enet_driver = { static int bcm_enet_shared_probe(struct platform_device *pdev) { struct resource *res; - unsigned int iomem_size; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; - iomem_size = resource_size(res); - if (!request_mem_region(res->start, iomem_size, "bcm63xx_enet_dma")) - return -EBUSY; - - bcm_enet_shared_base = ioremap(res->start, iomem_size); - if (!bcm_enet_shared_base) { - release_mem_region(res->start, iomem_size); + bcm_enet_shared_base = devm_request_and_ioremap(&pdev->dev, res); + if (!bcm_enet_shared_base) return -ENOMEM; - } + return 0; } static int bcm_enet_shared_remove(struct platform_device *pdev) { - struct resource *res; - - iounmap(bcm_enet_shared_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); return 0; } diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index da5f4397f87c..eec0af45b859 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -13,6 +13,7 @@ #include <linux/delay.h> #include <linux/etherdevice.h> #include <linux/mii.h> +#include <linux/phy.h> #include <linux/interrupt.h> #include <linux/dma-mapping.h> #include <bcm47xx_nvram.h> @@ -244,10 +245,8 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac, /* Alloc skb */ slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE); - if (!slot->skb) { - bgmac_err(bgmac, "Allocation of skb failed!\n"); + if (!slot->skb) return -ENOMEM; - } /* Poison - if everything goes fine, hardware will overwrite it */ rx = (struct bgmac_rx_header *)slot->skb->data; @@ -1313,6 +1312,73 @@ static const struct ethtool_ops bgmac_ethtool_ops = { }; /************************************************** + * MII + **************************************************/ + +static int bgmac_mii_read(struct mii_bus *bus, int mii_id, int regnum) +{ + return bgmac_phy_read(bus->priv, mii_id, regnum); +} + +static int bgmac_mii_write(struct mii_bus *bus, int mii_id, int regnum, + u16 value) +{ + return bgmac_phy_write(bus->priv, mii_id, regnum, value); +} + +static int bgmac_mii_register(struct bgmac *bgmac) +{ + struct mii_bus *mii_bus; + int i, err = 0; + + mii_bus = mdiobus_alloc(); + if (!mii_bus) + return -ENOMEM; + + mii_bus->name = "bgmac mii bus"; + sprintf(mii_bus->id, "%s-%d-%d", "bgmac", bgmac->core->bus->num, + bgmac->core->core_unit); + mii_bus->priv = bgmac; + mii_bus->read = bgmac_mii_read; + mii_bus->write = bgmac_mii_write; + mii_bus->parent = &bgmac->core->dev; + mii_bus->phy_mask = ~(1 << bgmac->phyaddr); + + mii_bus->irq = kmalloc_array(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); + if (!mii_bus->irq) { + err = -ENOMEM; + goto err_free_bus; + } + for (i = 0; i < PHY_MAX_ADDR; i++) + mii_bus->irq[i] = PHY_POLL; + + err = mdiobus_register(mii_bus); + if (err) { + bgmac_err(bgmac, "Registration of mii bus failed\n"); + goto err_free_irq; + } + + bgmac->mii_bus = mii_bus; + + return err; + +err_free_irq: + kfree(mii_bus->irq); +err_free_bus: + mdiobus_free(mii_bus); + return err; +} + +static void bgmac_mii_unregister(struct bgmac *bgmac) +{ + struct mii_bus *mii_bus = bgmac->mii_bus; + + mdiobus_unregister(mii_bus); + kfree(mii_bus->irq); + mdiobus_free(mii_bus); +} + +/************************************************** * BCMA bus ops **************************************************/ @@ -1404,11 +1470,18 @@ static int bgmac_probe(struct bcma_device *core) if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM) bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n"); + err = bgmac_mii_register(bgmac); + if (err) { + bgmac_err(bgmac, "Cannot register MDIO\n"); + err = -ENOTSUPP; + goto err_dma_free; + } + err = register_netdev(bgmac->net_dev); if (err) { bgmac_err(bgmac, "Cannot register net device\n"); err = -ENOTSUPP; - goto err_dma_free; + goto err_mii_unregister; } netif_carrier_off(net_dev); @@ -1417,6 +1490,8 @@ static int bgmac_probe(struct bcma_device *core) return 0; +err_mii_unregister: + bgmac_mii_unregister(bgmac); err_dma_free: bgmac_dma_free(bgmac); @@ -1433,6 +1508,7 @@ static void bgmac_remove(struct bcma_device *core) netif_napi_del(&bgmac->napi); unregister_netdev(bgmac->net_dev); + bgmac_mii_unregister(bgmac); bgmac_dma_free(bgmac); bcma_set_drvdata(core, NULL); free_netdev(bgmac->net_dev); diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index 4ede614c81f8..98d4b5fcc070 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -399,6 +399,7 @@ struct bgmac { struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */ struct net_device *net_dev; struct napi_struct napi; + struct mii_bus *mii_bus; /* DMA */ struct bgmac_dma_ring tx_ring[BGMAC_MAX_TX_RINGS]; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 2f0ba8f2fd6c..5d204492c603 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -416,7 +416,7 @@ static int bnx2_unregister_cnic(struct net_device *dev) return 0; } -struct cnic_eth_dev *bnx2_cnic_probe(struct net_device *dev) +static struct cnic_eth_dev *bnx2_cnic_probe(struct net_device *dev) { struct bnx2 *bp = netdev_priv(dev); struct cnic_eth_dev *cp = &bp->cnic_eth_dev; @@ -854,12 +854,11 @@ bnx2_alloc_mem(struct bnx2 *bp) sizeof(struct statistics_block); status_blk = dma_alloc_coherent(&bp->pdev->dev, bp->status_stats_size, - &bp->status_blk_mapping, GFP_KERNEL); + &bp->status_blk_mapping, + GFP_KERNEL | __GFP_ZERO); if (status_blk == NULL) goto alloc_mem_err; - memset(status_blk, 0, bp->status_stats_size); - bnapi = &bp->bnx2_napi[0]; bnapi->status_blk.msi = status_blk; bnapi->hw_tx_cons_ptr = @@ -3212,7 +3211,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) } if ((status & L2_FHDR_STATUS_L2_VLAN_TAG) && !(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, rx_hdr->l2_fhdr_vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rx_hdr->l2_fhdr_vlan_tag); skb->protocol = eth_type_trans(skb, bp->dev); @@ -3554,7 +3553,7 @@ bnx2_set_rx_mode(struct net_device *dev) rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS | BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG); sort_mode = 1 | BNX2_RPM_SORT_USER0_BC_EN; - if (!(dev->features & NETIF_F_HW_VLAN_RX) && + if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX) && (bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)) rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG; if (dev->flags & IFF_PROMISC) { @@ -7696,7 +7695,7 @@ bnx2_fix_features(struct net_device *dev, netdev_features_t features) struct bnx2 *bp = netdev_priv(dev); if (!(bp->flags & BNX2_FLAG_CAN_KEEP_VLAN)) - features |= NETIF_F_HW_VLAN_RX; + features |= NETIF_F_HW_VLAN_CTAG_RX; return features; } @@ -7707,12 +7706,12 @@ bnx2_set_features(struct net_device *dev, netdev_features_t features) struct bnx2 *bp = netdev_priv(dev); /* TSO with VLAN tag won't work with current firmware */ - if (features & NETIF_F_HW_VLAN_TX) + if (features & NETIF_F_HW_VLAN_CTAG_TX) dev->vlan_features |= (dev->hw_features & NETIF_F_ALL_TSO); else dev->vlan_features &= ~NETIF_F_ALL_TSO; - if ((!!(features & NETIF_F_HW_VLAN_RX) != + if ((!!(features & NETIF_F_HW_VLAN_CTAG_RX) != !!(bp->rx_mode & BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG)) && netif_running(dev)) { bnx2_netif_stop(bp, false); @@ -8552,7 +8551,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; dev->vlan_features = dev->hw_features; - dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= dev->hw_features; dev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index e4605a965084..3dba2a70a00e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -26,8 +26,8 @@ * (you will need to reboot afterwards) */ /* #define BNX2X_STOP_ON_ERROR */ -#define DRV_MODULE_VERSION "1.78.02-0" -#define DRV_MODULE_RELDATE "2013/01/14" +#define DRV_MODULE_VERSION "1.78.17-0" +#define DRV_MODULE_RELDATE "2013/04/11" #define BNX2X_BC_VER 0x040200 #if defined(CONFIG_DCB) @@ -492,7 +492,6 @@ enum bnx2x_tpa_mode_t { struct bnx2x_fastpath { struct bnx2x *bp; /* parent */ -#define BNX2X_NAPI_WEIGHT 128 struct napi_struct napi; union host_hc_status_block status_blk; /* chip independed shortcuts into sb structure */ @@ -613,9 +612,10 @@ struct bnx2x_fastpath { * START_BD - describes packed * START_BD(splitted) - includes unpaged data segment for GSO * PARSING_BD - for TSO and CSUM data + * PARSING_BD2 - for encapsulation data * Frag BDs - decribes pages for frags */ -#define BDS_PER_TX_PKT 3 +#define BDS_PER_TX_PKT 4 #define MAX_BDS_PER_TX_PKT (MAX_SKB_FRAGS + BDS_PER_TX_PKT) /* max BDs per tx packet including next pages */ #define MAX_DESC_PER_TX_PKT (MAX_BDS_PER_TX_PKT + \ @@ -730,18 +730,24 @@ struct bnx2x_fastpath { #define SKB_CS(skb) (*(u16 *)(skb_transport_header(skb) + \ skb->csum_offset)) -#define pbd_tcp_flags(skb) (ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff) +#define pbd_tcp_flags(tcp_hdr) (ntohl(tcp_flag_word(tcp_hdr))>>16 & 0xff) -#define XMIT_PLAIN 0 -#define XMIT_CSUM_V4 0x1 -#define XMIT_CSUM_V6 0x2 -#define XMIT_CSUM_TCP 0x4 -#define XMIT_GSO_V4 0x8 -#define XMIT_GSO_V6 0x10 +#define XMIT_PLAIN 0 +#define XMIT_CSUM_V4 (1 << 0) +#define XMIT_CSUM_V6 (1 << 1) +#define XMIT_CSUM_TCP (1 << 2) +#define XMIT_GSO_V4 (1 << 3) +#define XMIT_GSO_V6 (1 << 4) +#define XMIT_CSUM_ENC_V4 (1 << 5) +#define XMIT_CSUM_ENC_V6 (1 << 6) +#define XMIT_GSO_ENC_V4 (1 << 7) +#define XMIT_GSO_ENC_V6 (1 << 8) -#define XMIT_CSUM (XMIT_CSUM_V4 | XMIT_CSUM_V6) -#define XMIT_GSO (XMIT_GSO_V4 | XMIT_GSO_V6) +#define XMIT_CSUM_ENC (XMIT_CSUM_ENC_V4 | XMIT_CSUM_ENC_V6) +#define XMIT_GSO_ENC (XMIT_GSO_ENC_V4 | XMIT_GSO_ENC_V6) +#define XMIT_CSUM (XMIT_CSUM_V4 | XMIT_CSUM_V6 | XMIT_CSUM_ENC) +#define XMIT_GSO (XMIT_GSO_V4 | XMIT_GSO_V6 | XMIT_GSO_ENC) /* stuff added to make the code fit 80Col */ #define CQE_TYPE(cqe_fp_flags) ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE) @@ -844,6 +850,9 @@ struct bnx2x_common { #define CHIP_IS_57840_VF(bp) (CHIP_NUM(bp) == CHIP_NUM_57840_VF) #define CHIP_IS_E1H(bp) (CHIP_IS_57711(bp) || \ CHIP_IS_57711E(bp)) +#define CHIP_IS_57811xx(bp) (CHIP_IS_57811(bp) || \ + CHIP_IS_57811_MF(bp) || \ + CHIP_IS_57811_VF(bp)) #define CHIP_IS_E2(bp) (CHIP_IS_57712(bp) || \ CHIP_IS_57712_MF(bp) || \ CHIP_IS_57712_VF(bp)) @@ -853,9 +862,7 @@ struct bnx2x_common { CHIP_IS_57810(bp) || \ CHIP_IS_57810_MF(bp) || \ CHIP_IS_57810_VF(bp) || \ - CHIP_IS_57811(bp) || \ - CHIP_IS_57811_MF(bp) || \ - CHIP_IS_57811_VF(bp) || \ + CHIP_IS_57811xx(bp) || \ CHIP_IS_57840(bp) || \ CHIP_IS_57840_MF(bp) || \ CHIP_IS_57840_VF(bp)) @@ -1215,14 +1222,16 @@ enum { BNX2X_SP_RTNL_ENABLE_SRIOV, BNX2X_SP_RTNL_VFPF_MCAST, BNX2X_SP_RTNL_VFPF_STORM_RX_MODE, + BNX2X_SP_RTNL_HYPERVISOR_VLAN, }; struct bnx2x_prev_path_list { + struct list_head list; u8 bus; u8 slot; u8 path; - struct list_head list; + u8 aer; u8 undi; }; @@ -1269,6 +1278,8 @@ struct bnx2x { #define BP_FW_MB_IDX(bp) BP_FW_MB_IDX_VN(bp, BP_VN(bp)) #ifdef CONFIG_BNX2X_SRIOV + /* protects vf2pf mailbox from simultaneous access */ + struct mutex vf2pf_mutex; /* vf pf channel mailbox contains request and response buffers */ struct bnx2x_vf_mbx_msg *vf2pf_mbox; dma_addr_t vf2pf_mbox_mapping; @@ -1281,6 +1292,8 @@ struct bnx2x { dma_addr_t pf2vf_bulletin_mapping; struct pf_vf_bulletin_content old_bulletin; + + u16 requested_nr_virtfn; #endif /* CONFIG_BNX2X_SRIOV */ struct net_device *dev; @@ -1944,12 +1957,9 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, bool is_pf); -#define BNX2X_ILT_ZALLOC(x, y, size) \ - do { \ - x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ - if (x) \ - memset(x, 0, size); \ - } while (0) +#define BNX2X_ILT_ZALLOC(x, y, size) \ + x = dma_alloc_coherent(&bp->pdev->dev, size, y, \ + GFP_KERNEL | __GFP_ZERO) #define BNX2X_ILT_FREE(x, y, size) \ do { \ @@ -2286,7 +2296,7 @@ static const u32 dmae_reg_go_c[] = { DMAE_REG_GO_C12, DMAE_REG_GO_C13, DMAE_REG_GO_C14, DMAE_REG_GO_C15 }; -void bnx2x_set_ethtool_ops(struct net_device *netdev); +void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev); void bnx2x_notify_link_changed(struct bnx2x *bp); #define BNX2X_MF_SD_PROTOCOL(bp) \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index a923bc4d5a1f..d72bd8c40aa1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -451,7 +451,8 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue, * Compute number of aggregated segments, and gso_type. */ static void bnx2x_set_gro_params(struct sk_buff *skb, u16 parsing_flags, - u16 len_on_bd, unsigned int pkt_len) + u16 len_on_bd, unsigned int pkt_len, + u16 num_of_coalesced_segs) { /* TPA aggregation won't have either IP options or TCP options * other than timestamp or IPv6 extension headers. @@ -480,8 +481,7 @@ static void bnx2x_set_gro_params(struct sk_buff *skb, u16 parsing_flags, /* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count * to skb_shinfo(skb)->gso_segs */ - NAPI_GRO_CB(skb)->count = DIV_ROUND_UP(pkt_len - hdrs_len, - skb_shinfo(skb)->gso_size); + NAPI_GRO_CB(skb)->count = num_of_coalesced_segs; } static int bnx2x_alloc_rx_sge(struct bnx2x *bp, @@ -537,7 +537,8 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, /* This is needed in order to enable forwarding support */ if (frag_size) bnx2x_set_gro_params(skb, tpa_info->parsing_flags, len_on_bd, - le16_to_cpu(cqe->pkt_len)); + le16_to_cpu(cqe->pkt_len), + le16_to_cpu(cqe->num_of_coalesced_segs)); #ifdef BNX2X_STOP_ON_ERROR if (pages > min_t(u32, 8, MAX_SKB_FRAGS) * SGE_PAGES) { @@ -718,7 +719,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, if (!bnx2x_fill_frag_skb(bp, fp, tpa_info, pages, skb, cqe, cqe_idx)) { if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN) - __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tpa_info->vlan_tag); bnx2x_gro_receive(bp, fp, skb); } else { DP(NETIF_MSG_RX_STATUS, @@ -993,7 +994,7 @@ reuse_rx: if (le16_to_cpu(cqe_fp->pars_flags.flags) & PARSING_FLAGS_VLAN) - __vlan_hwaccel_put_tag(skb, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); napi_gro_receive(&fp->napi, skb); @@ -2009,7 +2010,7 @@ static int bnx2x_init_hw(struct bnx2x *bp, u32 load_code) * Cleans the object that have internal lists without sending * ramrods. Should be run when interrutps are disabled. */ -static void bnx2x_squeeze_objects(struct bnx2x *bp) +void bnx2x_squeeze_objects(struct bnx2x *bp) { int rc; unsigned long ramrod_flags = 0, vlan_mac_flags = 0; @@ -2614,6 +2615,9 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } } + /* initialize FW coalescing state machines in RAM */ + bnx2x_update_coalesce(bp); + /* setup the leading queue */ rc = bnx2x_setup_leading(bp); if (rc) { @@ -2760,6 +2764,7 @@ load_error2: bp->port.pmf = 0; load_error1: bnx2x_napi_disable(bp); + bnx2x_del_all_napi(bp); /* clear pf_load status, as it was already set */ if (IS_PF(bp)) @@ -2773,7 +2778,7 @@ load_error0: #endif /* ! BNX2X_STOP_ON_ERROR */ } -static int bnx2x_drain_tx_queues(struct bnx2x *bp) +int bnx2x_drain_tx_queues(struct bnx2x *bp) { u8 rc = 0, cos, i; @@ -3085,11 +3090,11 @@ int bnx2x_poll(struct napi_struct *napi, int budget) * to ease the pain of our fellow microcode engineers * we use one mapping for both BDs */ -static noinline u16 bnx2x_tx_split(struct bnx2x *bp, - struct bnx2x_fp_txdata *txdata, - struct sw_tx_bd *tx_buf, - struct eth_tx_start_bd **tx_bd, u16 hlen, - u16 bd_prod, int nbd) +static u16 bnx2x_tx_split(struct bnx2x *bp, + struct bnx2x_fp_txdata *txdata, + struct sw_tx_bd *tx_buf, + struct eth_tx_start_bd **tx_bd, u16 hlen, + u16 bd_prod) { struct eth_tx_start_bd *h_tx_bd = *tx_bd; struct eth_tx_bd *d_tx_bd; @@ -3097,11 +3102,10 @@ static noinline u16 bnx2x_tx_split(struct bnx2x *bp, int old_len = le16_to_cpu(h_tx_bd->nbytes); /* first fix first BD */ - h_tx_bd->nbd = cpu_to_le16(nbd); h_tx_bd->nbytes = cpu_to_le16(hlen); - DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d (%x:%x) nbd %d\n", - h_tx_bd->nbytes, h_tx_bd->addr_hi, h_tx_bd->addr_lo, h_tx_bd->nbd); + DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d (%x:%x)\n", + h_tx_bd->nbytes, h_tx_bd->addr_hi, h_tx_bd->addr_lo); /* now get a new data BD * (after the pbd) and fill it */ @@ -3130,7 +3134,7 @@ static noinline u16 bnx2x_tx_split(struct bnx2x *bp, #define bswab32(b32) ((__force __le32) swab32((__force __u32) (b32))) #define bswab16(b16) ((__force __le16) swab16((__force __u16) (b16))) -static inline __le16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix) +static __le16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix) { __sum16 tsum = (__force __sum16) csum; @@ -3145,30 +3149,47 @@ static inline __le16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix) return bswab16(tsum); } -static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb) +static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb) { u32 rc; + __u8 prot = 0; + __be16 protocol; if (skb->ip_summed != CHECKSUM_PARTIAL) - rc = XMIT_PLAIN; + return XMIT_PLAIN; - else { - if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) { - rc = XMIT_CSUM_V6; - if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) - rc |= XMIT_CSUM_TCP; + protocol = vlan_get_protocol(skb); + if (protocol == htons(ETH_P_IPV6)) { + rc = XMIT_CSUM_V6; + prot = ipv6_hdr(skb)->nexthdr; + } else { + rc = XMIT_CSUM_V4; + prot = ip_hdr(skb)->protocol; + } + if (!CHIP_IS_E1x(bp) && skb->encapsulation) { + if (inner_ip_hdr(skb)->version == 6) { + rc |= XMIT_CSUM_ENC_V6; + if (inner_ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + rc |= XMIT_CSUM_TCP; } else { - rc = XMIT_CSUM_V4; - if (ip_hdr(skb)->protocol == IPPROTO_TCP) + rc |= XMIT_CSUM_ENC_V4; + if (inner_ip_hdr(skb)->protocol == IPPROTO_TCP) rc |= XMIT_CSUM_TCP; } } + if (prot == IPPROTO_TCP) + rc |= XMIT_CSUM_TCP; - if (skb_is_gso_v6(skb)) - rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6; - else if (skb_is_gso(skb)) - rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP; + if (skb_is_gso_v6(skb)) { + rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6); + if (rc & XMIT_CSUM_ENC) + rc |= XMIT_GSO_ENC_V6; + } else if (skb_is_gso(skb)) { + rc |= (XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP); + if (rc & XMIT_CSUM_ENC) + rc |= XMIT_GSO_ENC_V4; + } return rc; } @@ -3253,14 +3274,23 @@ exit_lbl: } #endif -static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data, - u32 xmit_type) +static void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data, + u32 xmit_type) { + struct ipv6hdr *ipv6; + *parsing_data |= (skb_shinfo(skb)->gso_size << ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & ETH_TX_PARSE_BD_E2_LSO_MSS; - if ((xmit_type & XMIT_GSO_V6) && - (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6)) + + if (xmit_type & XMIT_GSO_ENC_V6) + ipv6 = inner_ipv6_hdr(skb); + else if (xmit_type & XMIT_GSO_V6) + ipv6 = ipv6_hdr(skb); + else + ipv6 = NULL; + + if (ipv6 && ipv6->nexthdr == NEXTHDR_IPV6) *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR; } @@ -3271,13 +3301,13 @@ static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data, * @pbd: parse BD * @xmit_type: xmit flags */ -static inline void bnx2x_set_pbd_gso(struct sk_buff *skb, - struct eth_tx_parse_bd_e1x *pbd, - u32 xmit_type) +static void bnx2x_set_pbd_gso(struct sk_buff *skb, + struct eth_tx_parse_bd_e1x *pbd, + u32 xmit_type) { pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size); pbd->tcp_send_seq = bswab32(tcp_hdr(skb)->seq); - pbd->tcp_flags = pbd_tcp_flags(skb); + pbd->tcp_flags = pbd_tcp_flags(tcp_hdr(skb)); if (xmit_type & XMIT_GSO_V4) { pbd->ip_id = bswab16(ip_hdr(skb)->id); @@ -3297,6 +3327,40 @@ static inline void bnx2x_set_pbd_gso(struct sk_buff *skb, } /** + * bnx2x_set_pbd_csum_enc - update PBD with checksum and return header length + * + * @bp: driver handle + * @skb: packet skb + * @parsing_data: data to be updated + * @xmit_type: xmit flags + * + * 57712/578xx related, when skb has encapsulation + */ +static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb, + u32 *parsing_data, u32 xmit_type) +{ + *parsing_data |= + ((((u8 *)skb_inner_transport_header(skb) - skb->data) >> 1) << + ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & + ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; + + if (xmit_type & XMIT_CSUM_TCP) { + *parsing_data |= ((inner_tcp_hdrlen(skb) / 4) << + ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & + ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW; + + return skb_inner_transport_header(skb) + + inner_tcp_hdrlen(skb) - skb->data; + } + + /* We support checksum offload for TCP and UDP only. + * No need to pass the UDP header length - it's a constant. + */ + return skb_inner_transport_header(skb) + + sizeof(struct udphdr) - skb->data; +} + +/** * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length * * @bp: driver handle @@ -3304,15 +3368,15 @@ static inline void bnx2x_set_pbd_gso(struct sk_buff *skb, * @parsing_data: data to be updated * @xmit_type: xmit flags * - * 57712 related + * 57712/578xx related */ -static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, - u32 *parsing_data, u32 xmit_type) +static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, + u32 *parsing_data, u32 xmit_type) { *parsing_data |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) << - ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) & - ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W; + ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & + ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; if (xmit_type & XMIT_CSUM_TCP) { *parsing_data |= ((tcp_hdrlen(skb) / 4) << @@ -3327,17 +3391,15 @@ static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, return skb_transport_header(skb) + sizeof(struct udphdr) - skb->data; } -static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb, - struct eth_tx_start_bd *tx_start_bd, u32 xmit_type) +/* set FW indication according to inner or outer protocols if tunneled */ +static void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb, + struct eth_tx_start_bd *tx_start_bd, + u32 xmit_type) { tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM; - if (xmit_type & XMIT_CSUM_V4) - tx_start_bd->bd_flags.as_bitfield |= - ETH_TX_BD_FLAGS_IP_CSUM; - else - tx_start_bd->bd_flags.as_bitfield |= - ETH_TX_BD_FLAGS_IPV6; + if (xmit_type & (XMIT_CSUM_ENC_V6 | XMIT_CSUM_V6)) + tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IPV6; if (!(xmit_type & XMIT_CSUM_TCP)) tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP; @@ -3351,9 +3413,9 @@ static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb, * @pbd: parse BD to be updated * @xmit_type: xmit flags */ -static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, - struct eth_tx_parse_bd_e1x *pbd, - u32 xmit_type) +static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, + struct eth_tx_parse_bd_e1x *pbd, + u32 xmit_type) { u8 hlen = (skb_network_header(skb) - skb->data) >> 1; @@ -3399,6 +3461,70 @@ static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, return hlen; } +static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, + struct eth_tx_parse_bd_e2 *pbd_e2, + struct eth_tx_parse_2nd_bd *pbd2, + u16 *global_data, + u32 xmit_type) +{ + u16 hlen_w = 0; + u8 outerip_off, outerip_len = 0; + /* from outer IP to transport */ + hlen_w = (skb_inner_transport_header(skb) - + skb_network_header(skb)) >> 1; + + /* transport len */ + if (xmit_type & XMIT_CSUM_TCP) + hlen_w += inner_tcp_hdrlen(skb) >> 1; + else + hlen_w += sizeof(struct udphdr) >> 1; + + pbd2->fw_ip_hdr_to_payload_w = hlen_w; + + if (xmit_type & XMIT_CSUM_ENC_V4) { + struct iphdr *iph = ip_hdr(skb); + pbd2->fw_ip_csum_wo_len_flags_frag = + bswab16(csum_fold((~iph->check) - + iph->tot_len - iph->frag_off)); + } else { + pbd2->fw_ip_hdr_to_payload_w = + hlen_w - ((sizeof(struct ipv6hdr)) >> 1); + } + + pbd2->tcp_send_seq = bswab32(inner_tcp_hdr(skb)->seq); + + pbd2->tcp_flags = pbd_tcp_flags(inner_tcp_hdr(skb)); + + if (xmit_type & XMIT_GSO_V4) { + pbd2->hw_ip_id = bswab16(inner_ip_hdr(skb)->id); + + pbd_e2->data.tunnel_data.pseudo_csum = + bswab16(~csum_tcpudp_magic( + inner_ip_hdr(skb)->saddr, + inner_ip_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0)); + + outerip_len = ip_hdr(skb)->ihl << 1; + } else { + pbd_e2->data.tunnel_data.pseudo_csum = + bswab16(~csum_ipv6_magic( + &inner_ipv6_hdr(skb)->saddr, + &inner_ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0)); + } + + outerip_off = (skb_network_header(skb) - skb->data) >> 1; + + *global_data |= + outerip_off | + (!!(xmit_type & XMIT_CSUM_V6) << + ETH_TX_PARSE_2ND_BD_IP_HDR_TYPE_OUTER_SHIFT) | + (outerip_len << + ETH_TX_PARSE_2ND_BD_IP_HDR_LEN_OUTER_W_SHIFT) | + ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) << + ETH_TX_PARSE_2ND_BD_LLC_SNAP_EN_SHIFT); +} + /* called with netif_tx_lock * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call * netif_wake_queue() @@ -3414,6 +3540,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL; struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; + struct eth_tx_parse_2nd_bd *pbd2 = NULL; u32 pbd_e2_parsing_data = 0; u16 pkt_prod, bd_prod; int nbd, txq_index; @@ -3481,7 +3608,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) mac_type = MULTICAST_ADDRESS; } -#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3) +#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - BDS_PER_TX_PKT) /* First, check if we need to linearize the skb (due to FW restrictions). No need to check fragmentation if page size > 8K (there will be no violation to FW restrictions) */ @@ -3529,12 +3656,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) first_bd = tx_start_bd; tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; - SET_FLAG(tx_start_bd->general_data, - ETH_TX_START_BD_PARSE_NBDS, - 0); - /* header nbd */ - SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1); + /* header nbd: indirectly zero other flags! */ + tx_start_bd->general_data = 1 << ETH_TX_START_BD_HDR_NBDS_SHIFT; /* remember the first BD of the packet */ tx_buf->first_bd = txdata->tx_bd_prod; @@ -3554,19 +3678,16 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* when transmitting in a vf, start bd must hold the ethertype * for fw to enforce it */ -#ifndef BNX2X_STOP_ON_ERROR - if (IS_VF(bp)) { -#endif + if (IS_VF(bp)) tx_start_bd->vlan_or_ethertype = cpu_to_le16(ntohs(eth->h_proto)); -#ifndef BNX2X_STOP_ON_ERROR - } else { + else /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod); - } -#endif } + nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */ + /* turn on parsing and get a BD */ bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); @@ -3576,23 +3697,58 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!CHIP_IS_E1x(bp)) { pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2; memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); - /* Set PBD in checksum offload case */ - if (xmit_type & XMIT_CSUM) + + if (xmit_type & XMIT_CSUM_ENC) { + u16 global_data = 0; + + /* Set PBD in enc checksum offload case */ + hlen = bnx2x_set_pbd_csum_enc(bp, skb, + &pbd_e2_parsing_data, + xmit_type); + + /* turn on 2nd parsing and get a BD */ + bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); + + pbd2 = &txdata->tx_desc_ring[bd_prod].parse_2nd_bd; + + memset(pbd2, 0, sizeof(*pbd2)); + + pbd_e2->data.tunnel_data.ip_hdr_start_inner_w = + (skb_inner_network_header(skb) - + skb->data) >> 1; + + if (xmit_type & XMIT_GSO_ENC) + bnx2x_update_pbds_gso_enc(skb, pbd_e2, pbd2, + &global_data, + xmit_type); + + pbd2->global_data = cpu_to_le16(global_data); + + /* add addition parse BD indication to start BD */ + SET_FLAG(tx_start_bd->general_data, + ETH_TX_START_BD_PARSE_NBDS, 1); + /* set encapsulation flag in start BD */ + SET_FLAG(tx_start_bd->general_data, + ETH_TX_START_BD_TUNNEL_EXIST, 1); + nbd++; + } else if (xmit_type & XMIT_CSUM) { + /* Set PBD in checksum offload case w/o encapsulation */ hlen = bnx2x_set_pbd_csum_e2(bp, skb, &pbd_e2_parsing_data, xmit_type); + } - if (IS_MF_SI(bp) || IS_VF(bp)) { - /* fill in the MAC addresses in the PBD - for local - * switching - */ - bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi, - &pbd_e2->src_mac_addr_mid, - &pbd_e2->src_mac_addr_lo, + /* Add the macs to the parsing BD this is a vf */ + if (IS_VF(bp)) { + /* override GRE parameters in BD */ + bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi, + &pbd_e2->data.mac_addr.src_mid, + &pbd_e2->data.mac_addr.src_lo, eth->h_source); - bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi, - &pbd_e2->dst_mac_addr_mid, - &pbd_e2->dst_mac_addr_lo, + + bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.dst_hi, + &pbd_e2->data.mac_addr.dst_mid, + &pbd_e2->data.mac_addr.dst_lo, eth->h_dest); } @@ -3614,14 +3770,13 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Setup the data pointer of the first BD of the packet */ tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping)); tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping)); - nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */ tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb)); pkt_size = tx_start_bd->nbytes; DP(NETIF_MSG_TX_QUEUED, - "first bd @%p addr (%x:%x) nbd %d nbytes %d flags %x vlan %x\n", + "first bd @%p addr (%x:%x) nbytes %d flags %x vlan %x\n", tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo, - le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes), + le16_to_cpu(tx_start_bd->nbytes), tx_start_bd->bd_flags.as_bitfield, le16_to_cpu(tx_start_bd->vlan_or_ethertype)); @@ -3634,10 +3789,12 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; - if (unlikely(skb_headlen(skb) > hlen)) + if (unlikely(skb_headlen(skb) > hlen)) { + nbd++; bd_prod = bnx2x_tx_split(bp, txdata, tx_buf, &tx_start_bd, hlen, - bd_prod, ++nbd); + bd_prod); + } if (!CHIP_IS_E1x(bp)) bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data, xmit_type); @@ -3727,9 +3884,13 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) if (pbd_e2) DP(NETIF_MSG_TX_QUEUED, "PBD (E2) @%p dst %x %x %x src %x %x %x parsing_data %x\n", - pbd_e2, pbd_e2->dst_mac_addr_hi, pbd_e2->dst_mac_addr_mid, - pbd_e2->dst_mac_addr_lo, pbd_e2->src_mac_addr_hi, - pbd_e2->src_mac_addr_mid, pbd_e2->src_mac_addr_lo, + pbd_e2, + pbd_e2->data.mac_addr.dst_hi, + pbd_e2->data.mac_addr.dst_mid, + pbd_e2->data.mac_addr.dst_lo, + pbd_e2->data.mac_addr.src_hi, + pbd_e2->data.mac_addr.src_mid, + pbd_e2->data.mac_addr.src_lo, pbd_e2->parsing_data); DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod); @@ -4579,11 +4740,11 @@ static void storm_memset_hc_disable(struct bnx2x *bp, u8 port, u32 enable_flag = disable ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT); u32 addr = BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index); - u16 flags = REG_RD16(bp, addr); + u8 flags = REG_RD8(bp, addr); /* clear and set */ flags &= ~HC_INDEX_DATA_HC_ENABLED; flags |= enable_flag; - REG_WR16(bp, addr, flags); + REG_WR8(bp, addr, flags); DP(NETIF_MSG_IFUP, "port %x fw_sb_id %d sb_index %d disable %d\n", port, fw_sb_id, sb_index, disable); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index aee7671ff4c1..54e1b149acb3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -50,13 +50,13 @@ extern int int_mode; } \ } while (0) -#define BNX2X_PCI_ALLOC(x, y, size) \ - do { \ - x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ - if (x == NULL) \ - goto alloc_mem_err; \ - memset((void *)x, 0, size); \ - } while (0) +#define BNX2X_PCI_ALLOC(x, y, size) \ +do { \ + x = dma_alloc_coherent(&bp->pdev->dev, size, y, \ + GFP_KERNEL | __GFP_ZERO); \ + if (x == NULL) \ + goto alloc_mem_err; \ +} while (0) #define BNX2X_ALLOC(x, size) \ do { \ @@ -496,7 +496,10 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev); /* setup_tc callback */ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc); +int bnx2x_get_vf_config(struct net_device *dev, int vf, + struct ifla_vf_info *ivi); int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac); +int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb); @@ -834,7 +837,7 @@ static inline void bnx2x_add_all_napi_cnic(struct bnx2x *bp) /* Add NAPI objects */ for_each_rx_queue_cnic(bp, i) netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), - bnx2x_poll, BNX2X_NAPI_WEIGHT); + bnx2x_poll, NAPI_POLL_WEIGHT); } static inline void bnx2x_add_all_napi(struct bnx2x *bp) @@ -844,7 +847,7 @@ static inline void bnx2x_add_all_napi(struct bnx2x *bp) /* Add NAPI objects */ for_each_eth_queue(bp, i) netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), - bnx2x_poll, BNX2X_NAPI_WEIGHT); + bnx2x_poll, NAPI_POLL_WEIGHT); } static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp) @@ -970,6 +973,9 @@ static inline int bnx2x_func_start(struct bnx2x *bp) else /* CHIP_IS_E1X */ start_params->network_cos_mode = FW_WRR; + start_params->gre_tunnel_mode = IPGRE_TUNNEL; + start_params->gre_tunnel_rss = GRE_INNER_HEADERS_RSS; + return bnx2x_func_state_change(bp, &func_params); } @@ -1396,4 +1402,8 @@ static inline bool bnx2x_is_valid_ether_addr(struct bnx2x *bp, u8 *addr) * */ void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len); + +int bnx2x_drain_tx_queues(struct bnx2x *bp); +void bnx2x_squeeze_objects(struct bnx2x *bp); + #endif /* BNX2X_CMN_H */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c index 568205436a15..91ecd6a00d05 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c @@ -2139,12 +2139,12 @@ static u8 bnx2x_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap) break; default: BNX2X_ERR("Non valid capability ID\n"); - rval = -EINVAL; + rval = 1; break; } } else { DP(BNX2X_MSG_DCB, "DCB disabled\n"); - rval = -EINVAL; + rval = 1; } DP(BNX2X_MSG_DCB, "capid %d:%x\n", capid, *cap); @@ -2170,12 +2170,12 @@ static int bnx2x_dcbnl_get_numtcs(struct net_device *netdev, int tcid, u8 *num) break; default: BNX2X_ERR("Non valid TC-ID\n"); - rval = -EINVAL; + rval = 1; break; } } else { DP(BNX2X_MSG_DCB, "DCB disabled\n"); - rval = -EINVAL; + rval = 1; } return rval; @@ -2188,7 +2188,7 @@ static int bnx2x_dcbnl_set_numtcs(struct net_device *netdev, int tcid, u8 num) return -EINVAL; } -static u8 bnx2x_dcbnl_get_pfc_state(struct net_device *netdev) +static u8 bnx2x_dcbnl_get_pfc_state(struct net_device *netdev) { struct bnx2x *bp = netdev_priv(netdev); DP(BNX2X_MSG_DCB, "state = %d\n", bp->dcbx_local_feat.pfc.enabled); @@ -2390,12 +2390,12 @@ static u8 bnx2x_dcbnl_get_featcfg(struct net_device *netdev, int featid, break; default: BNX2X_ERR("Non valid featrue-ID\n"); - rval = -EINVAL; + rval = 1; break; } } else { DP(BNX2X_MSG_DCB, "DCB disabled\n"); - rval = -EINVAL; + rval = 1; } return rval; @@ -2431,12 +2431,12 @@ static u8 bnx2x_dcbnl_set_featcfg(struct net_device *netdev, int featid, break; default: BNX2X_ERR("Non valid featrue-ID\n"); - rval = -EINVAL; + rval = 1; break; } } else { DP(BNX2X_MSG_DCB, "dcbnl call not valid\n"); - rval = -EINVAL; + rval = 1; } return rval; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index edfa67adf2f9..88e9b47a3eb6 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1364,11 +1364,27 @@ static int bnx2x_nvram_read(struct bnx2x *bp, u32 offset, u8 *ret_buf, return rc; } +static int bnx2x_nvram_read32(struct bnx2x *bp, u32 offset, u32 *buf, + int buf_size) +{ + int rc; + + rc = bnx2x_nvram_read(bp, offset, (u8 *)buf, buf_size); + + if (!rc) { + __be32 *be = (__be32 *)buf; + + while ((buf_size -= 4) >= 0) + *buf++ = be32_to_cpu(*be++); + } + + return rc; +} + static int bnx2x_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *eebuf) { struct bnx2x *bp = netdev_priv(dev); - int rc; if (!netif_running(dev)) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, @@ -1383,9 +1399,7 @@ static int bnx2x_get_eeprom(struct net_device *dev, /* parameters already validated in ethtool_get_eeprom */ - rc = bnx2x_nvram_read(bp, eeprom->offset, eebuf, eeprom->len); - - return rc; + return bnx2x_nvram_read(bp, eeprom->offset, eebuf, eeprom->len); } static int bnx2x_get_module_eeprom(struct net_device *dev, @@ -1393,10 +1407,9 @@ static int bnx2x_get_module_eeprom(struct net_device *dev, u8 *data) { struct bnx2x *bp = netdev_priv(dev); - int rc = 0, phy_idx; + int rc = -EINVAL, phy_idx; u8 *user_data = data; - int remaining_len = ee->len, xfer_size; - unsigned int page_off = ee->offset; + unsigned int start_addr = ee->offset, xfer_size = 0; if (!netif_running(dev)) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, @@ -1405,21 +1418,52 @@ static int bnx2x_get_module_eeprom(struct net_device *dev, } phy_idx = bnx2x_get_cur_phy_idx(bp); - bnx2x_acquire_phy_lock(bp); - while (!rc && remaining_len > 0) { - xfer_size = (remaining_len > SFP_EEPROM_PAGE_SIZE) ? - SFP_EEPROM_PAGE_SIZE : remaining_len; + + /* Read A0 section */ + if (start_addr < ETH_MODULE_SFF_8079_LEN) { + /* Limit transfer size to the A0 section boundary */ + if (start_addr + ee->len > ETH_MODULE_SFF_8079_LEN) + xfer_size = ETH_MODULE_SFF_8079_LEN - start_addr; + else + xfer_size = ee->len; + bnx2x_acquire_phy_lock(bp); rc = bnx2x_read_sfp_module_eeprom(&bp->link_params.phy[phy_idx], &bp->link_params, - page_off, + I2C_DEV_ADDR_A0, + start_addr, xfer_size, user_data); - remaining_len -= xfer_size; + bnx2x_release_phy_lock(bp); + if (rc) { + DP(BNX2X_MSG_ETHTOOL, "Failed reading A0 section\n"); + + return -EINVAL; + } user_data += xfer_size; - page_off += xfer_size; + start_addr += xfer_size; } - bnx2x_release_phy_lock(bp); + /* Read A2 section */ + if ((start_addr >= ETH_MODULE_SFF_8079_LEN) && + (start_addr < ETH_MODULE_SFF_8472_LEN)) { + xfer_size = ee->len - xfer_size; + /* Limit transfer size to the A2 section boundary */ + if (start_addr + xfer_size > ETH_MODULE_SFF_8472_LEN) + xfer_size = ETH_MODULE_SFF_8472_LEN - start_addr; + start_addr -= ETH_MODULE_SFF_8079_LEN; + bnx2x_acquire_phy_lock(bp); + rc = bnx2x_read_sfp_module_eeprom(&bp->link_params.phy[phy_idx], + &bp->link_params, + I2C_DEV_ADDR_A2, + start_addr, + xfer_size, + user_data); + bnx2x_release_phy_lock(bp); + if (rc) { + DP(BNX2X_MSG_ETHTOOL, "Failed reading A2 section\n"); + return -EINVAL; + } + } return rc; } @@ -1427,24 +1471,50 @@ static int bnx2x_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { struct bnx2x *bp = netdev_priv(dev); - int phy_idx; + int phy_idx, rc; + u8 sff8472_comp, diag_type; + if (!netif_running(dev)) { - DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "cannot access eeprom when the interface is down\n"); return -EAGAIN; } - phy_idx = bnx2x_get_cur_phy_idx(bp); - switch (bp->link_params.phy[phy_idx].media_type) { - case ETH_PHY_SFPP_10G_FIBER: - case ETH_PHY_SFP_1G_FIBER: - case ETH_PHY_DA_TWINAX: + bnx2x_acquire_phy_lock(bp); + rc = bnx2x_read_sfp_module_eeprom(&bp->link_params.phy[phy_idx], + &bp->link_params, + I2C_DEV_ADDR_A0, + SFP_EEPROM_SFF_8472_COMP_ADDR, + SFP_EEPROM_SFF_8472_COMP_SIZE, + &sff8472_comp); + bnx2x_release_phy_lock(bp); + if (rc) { + DP(BNX2X_MSG_ETHTOOL, "Failed reading SFF-8472 comp field\n"); + return -EINVAL; + } + + bnx2x_acquire_phy_lock(bp); + rc = bnx2x_read_sfp_module_eeprom(&bp->link_params.phy[phy_idx], + &bp->link_params, + I2C_DEV_ADDR_A0, + SFP_EEPROM_DIAG_TYPE_ADDR, + SFP_EEPROM_DIAG_TYPE_SIZE, + &diag_type); + bnx2x_release_phy_lock(bp); + if (rc) { + DP(BNX2X_MSG_ETHTOOL, "Failed reading Diag Type field\n"); + return -EINVAL; + } + + if (!sff8472_comp || + (diag_type & SFP_EEPROM_DIAG_ADDR_CHANGE_REQ)) { modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - return 0; - default: - return -EOPNOTSUPP; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; } + return 0; } static int bnx2x_nvram_write_dword(struct bnx2x *bp, u32 offset, u32 val, @@ -1496,9 +1566,8 @@ static int bnx2x_nvram_write1(struct bnx2x *bp, u32 offset, u8 *data_buf, int buf_size) { int rc; - u32 cmd_flags; - u32 align_offset; - __be32 val; + u32 cmd_flags, align_offset, val; + __be32 val_be; if (offset + buf_size > bp->common.flash_size) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, @@ -1517,16 +1586,16 @@ static int bnx2x_nvram_write1(struct bnx2x *bp, u32 offset, u8 *data_buf, cmd_flags = (MCPR_NVM_COMMAND_FIRST | MCPR_NVM_COMMAND_LAST); align_offset = (offset & ~0x03); - rc = bnx2x_nvram_read_dword(bp, align_offset, &val, cmd_flags); + rc = bnx2x_nvram_read_dword(bp, align_offset, &val_be, cmd_flags); if (rc == 0) { - val &= ~(0xff << BYTE_OFFSET(offset)); - val |= (*data_buf << BYTE_OFFSET(offset)); - /* nvram data is returned as an array of bytes * convert it back to cpu order */ - val = be32_to_cpu(val); + val = be32_to_cpu(val_be); + + val &= ~le32_to_cpu(0xff << BYTE_OFFSET(offset)); + val |= le32_to_cpu(*data_buf << BYTE_OFFSET(offset)); rc = bnx2x_nvram_write_dword(bp, align_offset, val, cmd_flags); @@ -2526,14 +2595,168 @@ static int bnx2x_test_ext_loopback(struct bnx2x *bp) return rc; } +struct code_entry { + u32 sram_start_addr; + u32 code_attribute; +#define CODE_IMAGE_TYPE_MASK 0xf0800003 +#define CODE_IMAGE_VNTAG_PROFILES_DATA 0xd0000003 +#define CODE_IMAGE_LENGTH_MASK 0x007ffffc +#define CODE_IMAGE_TYPE_EXTENDED_DIR 0xe0000000 + u32 nvm_start_addr; +}; + +#define CODE_ENTRY_MAX 16 +#define CODE_ENTRY_EXTENDED_DIR_IDX 15 +#define MAX_IMAGES_IN_EXTENDED_DIR 64 +#define NVRAM_DIR_OFFSET 0x14 + +#define EXTENDED_DIR_EXISTS(code) \ + ((code & CODE_IMAGE_TYPE_MASK) == CODE_IMAGE_TYPE_EXTENDED_DIR && \ + (code & CODE_IMAGE_LENGTH_MASK) != 0) + #define CRC32_RESIDUAL 0xdebb20e3 +#define CRC_BUFF_SIZE 256 + +static int bnx2x_nvram_crc(struct bnx2x *bp, + int offset, + int size, + u8 *buff) +{ + u32 crc = ~0; + int rc = 0, done = 0; + + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, + "NVRAM CRC from 0x%08x to 0x%08x\n", offset, offset + size); + + while (done < size) { + int count = min_t(int, size - done, CRC_BUFF_SIZE); + + rc = bnx2x_nvram_read(bp, offset + done, buff, count); + + if (rc) + return rc; + + crc = crc32_le(crc, buff, count); + done += count; + } + + if (crc != CRC32_RESIDUAL) + rc = -EINVAL; + + return rc; +} + +static int bnx2x_test_nvram_dir(struct bnx2x *bp, + struct code_entry *entry, + u8 *buff) +{ + size_t size = entry->code_attribute & CODE_IMAGE_LENGTH_MASK; + u32 type = entry->code_attribute & CODE_IMAGE_TYPE_MASK; + int rc; + + /* Zero-length images and AFEX profiles do not have CRC */ + if (size == 0 || type == CODE_IMAGE_VNTAG_PROFILES_DATA) + return 0; + + rc = bnx2x_nvram_crc(bp, entry->nvm_start_addr, size, buff); + if (rc) + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, + "image %x has failed crc test (rc %d)\n", type, rc); + + return rc; +} + +static int bnx2x_test_dir_entry(struct bnx2x *bp, u32 addr, u8 *buff) +{ + int rc; + struct code_entry entry; + + rc = bnx2x_nvram_read32(bp, addr, (u32 *)&entry, sizeof(entry)); + if (rc) + return rc; + + return bnx2x_test_nvram_dir(bp, &entry, buff); +} + +static int bnx2x_test_nvram_ext_dirs(struct bnx2x *bp, u8 *buff) +{ + u32 rc, cnt, dir_offset = NVRAM_DIR_OFFSET; + struct code_entry entry; + int i; + + rc = bnx2x_nvram_read32(bp, + dir_offset + + sizeof(entry) * CODE_ENTRY_EXTENDED_DIR_IDX, + (u32 *)&entry, sizeof(entry)); + if (rc) + return rc; + + if (!EXTENDED_DIR_EXISTS(entry.code_attribute)) + return 0; + + rc = bnx2x_nvram_read32(bp, entry.nvm_start_addr, + &cnt, sizeof(u32)); + if (rc) + return rc; + + dir_offset = entry.nvm_start_addr + 8; + + for (i = 0; i < cnt && i < MAX_IMAGES_IN_EXTENDED_DIR; i++) { + rc = bnx2x_test_dir_entry(bp, dir_offset + + sizeof(struct code_entry) * i, + buff); + if (rc) + return rc; + } + + return 0; +} + +static int bnx2x_test_nvram_dirs(struct bnx2x *bp, u8 *buff) +{ + u32 rc, dir_offset = NVRAM_DIR_OFFSET; + int i; + + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "NVRAM DIRS CRC test-set\n"); + + for (i = 0; i < CODE_ENTRY_EXTENDED_DIR_IDX; i++) { + rc = bnx2x_test_dir_entry(bp, dir_offset + + sizeof(struct code_entry) * i, + buff); + if (rc) + return rc; + } + + return bnx2x_test_nvram_ext_dirs(bp, buff); +} + +struct crc_pair { + int offset; + int size; +}; + +static int bnx2x_test_nvram_tbl(struct bnx2x *bp, + const struct crc_pair *nvram_tbl, u8 *buf) +{ + int i; + + for (i = 0; nvram_tbl[i].size; i++) { + int rc = bnx2x_nvram_crc(bp, nvram_tbl[i].offset, + nvram_tbl[i].size, buf); + if (rc) { + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, + "nvram_tbl[%d] has failed crc test (rc %d)\n", + i, rc); + return rc; + } + } + + return 0; +} static int bnx2x_test_nvram(struct bnx2x *bp) { - static const struct { - int offset; - int size; - } nvram_tbl[] = { + const struct crc_pair nvram_tbl[] = { { 0, 0x14 }, /* bootstrap */ { 0x14, 0xec }, /* dir */ { 0x100, 0x350 }, /* manuf_info */ @@ -2542,30 +2765,33 @@ static int bnx2x_test_nvram(struct bnx2x *bp) { 0x708, 0x70 }, /* manuf_key_info */ { 0, 0 } }; - __be32 *buf; - u8 *data; - int i, rc; - u32 magic, crc; + const struct crc_pair nvram_tbl2[] = { + { 0x7e8, 0x350 }, /* manuf_info2 */ + { 0xb38, 0xf0 }, /* feature_info */ + { 0, 0 } + }; + + u8 *buf; + int rc; + u32 magic; if (BP_NOMCP(bp)) return 0; - buf = kmalloc(0x350, GFP_KERNEL); + buf = kmalloc(CRC_BUFF_SIZE, GFP_KERNEL); if (!buf) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "kmalloc failed\n"); rc = -ENOMEM; goto test_nvram_exit; } - data = (u8 *)buf; - rc = bnx2x_nvram_read(bp, 0, data, 4); + rc = bnx2x_nvram_read32(bp, 0, &magic, sizeof(magic)); if (rc) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "magic value read (rc %d)\n", rc); goto test_nvram_exit; } - magic = be32_to_cpu(buf[0]); if (magic != 0x669955aa) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "wrong magic value (0x%08x)\n", magic); @@ -2573,25 +2799,26 @@ static int bnx2x_test_nvram(struct bnx2x *bp) goto test_nvram_exit; } - for (i = 0; nvram_tbl[i].size; i++) { + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "Port 0 CRC test-set\n"); + rc = bnx2x_test_nvram_tbl(bp, nvram_tbl, buf); + if (rc) + goto test_nvram_exit; - rc = bnx2x_nvram_read(bp, nvram_tbl[i].offset, data, - nvram_tbl[i].size); - if (rc) { - DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, - "nvram_tbl[%d] read data (rc %d)\n", i, rc); - goto test_nvram_exit; - } + if (!CHIP_IS_E1x(bp) && !CHIP_IS_57811xx(bp)) { + u32 hide = SHMEM_RD(bp, dev_info.shared_hw_config.config2) & + SHARED_HW_CFG_HIDE_PORT1; - crc = ether_crc_le(nvram_tbl[i].size, data); - if (crc != CRC32_RESIDUAL) { + if (!hide) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, - "nvram_tbl[%d] wrong crc value (0x%08x)\n", i, crc); - rc = -ENODEV; - goto test_nvram_exit; + "Port 1 CRC test-set\n"); + rc = bnx2x_test_nvram_tbl(bp, nvram_tbl2, buf); + if (rc) + goto test_nvram_exit; } } + rc = bnx2x_test_nvram_dirs(bp, buf); + test_nvram_exit: kfree(buf); return rc; @@ -2637,9 +2864,16 @@ static void bnx2x_self_test(struct net_device *dev, memset(buf, 0, sizeof(u64) * BNX2X_NUM_TESTS(bp)); + if (bnx2x_test_nvram(bp) != 0) { + if (!IS_MF(bp)) + buf[4] = 1; + else + buf[0] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } + if (!netif_running(dev)) { - DP(BNX2X_MSG_ETHTOOL, - "Can't perform self-test when interface is down\n"); + DP(BNX2X_MSG_ETHTOOL, "Interface is down\n"); return; } @@ -2701,13 +2935,7 @@ static void bnx2x_self_test(struct net_device *dev, /* wait until link state is restored */ bnx2x_wait_for_link(bp, link_up, is_serdes); } - if (bnx2x_test_nvram(bp) != 0) { - if (!IS_MF(bp)) - buf[4] = 1; - else - buf[0] = 1; - etest->flags |= ETH_TEST_FL_FAILED; - } + if (bnx2x_test_intr(bp) != 0) { if (!IS_MF(bp)) buf[5] = 1; @@ -3232,7 +3460,32 @@ static const struct ethtool_ops bnx2x_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; -void bnx2x_set_ethtool_ops(struct net_device *netdev) +static const struct ethtool_ops bnx2x_vf_ethtool_ops = { + .get_settings = bnx2x_get_settings, + .set_settings = bnx2x_set_settings, + .get_drvinfo = bnx2x_get_drvinfo, + .get_msglevel = bnx2x_get_msglevel, + .set_msglevel = bnx2x_set_msglevel, + .get_link = bnx2x_get_link, + .get_coalesce = bnx2x_get_coalesce, + .get_ringparam = bnx2x_get_ringparam, + .set_ringparam = bnx2x_set_ringparam, + .get_sset_count = bnx2x_get_sset_count, + .get_strings = bnx2x_get_strings, + .get_ethtool_stats = bnx2x_get_ethtool_stats, + .get_rxnfc = bnx2x_get_rxnfc, + .set_rxnfc = bnx2x_set_rxnfc, + .get_rxfh_indir_size = bnx2x_get_rxfh_indir_size, + .get_rxfh_indir = bnx2x_get_rxfh_indir, + .set_rxfh_indir = bnx2x_set_rxfh_indir, + .get_channels = bnx2x_get_channels, + .set_channels = bnx2x_set_channels, +}; + +void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops); + if (IS_PF(bp)) + SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops); + else /* vf */ + SET_ETHTOOL_OPS(netdev, &bnx2x_vf_ethtool_ops); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h index e5f808377c91..40f22c6794cd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h @@ -30,31 +30,31 @@ * IRO[138].m2) + ((sbId) * IRO[138].m3)) #define CSTORM_IGU_MODE_OFFSET (IRO[157].base) #define CSTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \ - (IRO[316].base + ((pfId) * IRO[316].m1)) -#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \ (IRO[317].base + ((pfId) * IRO[317].m1)) +#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \ + (IRO[318].base + ((pfId) * IRO[318].m1)) #define CSTORM_ISCSI_EQ_CONS_OFFSET(pfId, iscsiEqId) \ - (IRO[309].base + ((pfId) * IRO[309].m1) + ((iscsiEqId) * IRO[309].m2)) + (IRO[310].base + ((pfId) * IRO[310].m1) + ((iscsiEqId) * IRO[310].m2)) #define CSTORM_ISCSI_EQ_NEXT_EQE_ADDR_OFFSET(pfId, iscsiEqId) \ - (IRO[311].base + ((pfId) * IRO[311].m1) + ((iscsiEqId) * IRO[311].m2)) + (IRO[312].base + ((pfId) * IRO[312].m1) + ((iscsiEqId) * IRO[312].m2)) #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_OFFSET(pfId, iscsiEqId) \ - (IRO[310].base + ((pfId) * IRO[310].m1) + ((iscsiEqId) * IRO[310].m2)) + (IRO[311].base + ((pfId) * IRO[311].m1) + ((iscsiEqId) * IRO[311].m2)) #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_VALID_OFFSET(pfId, iscsiEqId) \ - (IRO[312].base + ((pfId) * IRO[312].m1) + ((iscsiEqId) * IRO[312].m2)) + (IRO[313].base + ((pfId) * IRO[313].m1) + ((iscsiEqId) * IRO[313].m2)) #define CSTORM_ISCSI_EQ_PROD_OFFSET(pfId, iscsiEqId) \ - (IRO[308].base + ((pfId) * IRO[308].m1) + ((iscsiEqId) * IRO[308].m2)) + (IRO[309].base + ((pfId) * IRO[309].m1) + ((iscsiEqId) * IRO[309].m2)) #define CSTORM_ISCSI_EQ_SB_INDEX_OFFSET(pfId, iscsiEqId) \ - (IRO[314].base + ((pfId) * IRO[314].m1) + ((iscsiEqId) * IRO[314].m2)) + (IRO[315].base + ((pfId) * IRO[315].m1) + ((iscsiEqId) * IRO[315].m2)) #define CSTORM_ISCSI_EQ_SB_NUM_OFFSET(pfId, iscsiEqId) \ - (IRO[313].base + ((pfId) * IRO[313].m1) + ((iscsiEqId) * IRO[313].m2)) + (IRO[314].base + ((pfId) * IRO[314].m1) + ((iscsiEqId) * IRO[314].m2)) #define CSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \ - (IRO[315].base + ((pfId) * IRO[315].m1)) + (IRO[316].base + ((pfId) * IRO[316].m1)) #define CSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \ - (IRO[307].base + ((pfId) * IRO[307].m1)) + (IRO[308].base + ((pfId) * IRO[308].m1)) #define CSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \ - (IRO[306].base + ((pfId) * IRO[306].m1)) + (IRO[307].base + ((pfId) * IRO[307].m1)) #define CSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \ - (IRO[305].base + ((pfId) * IRO[305].m1)) + (IRO[306].base + ((pfId) * IRO[306].m1)) #define CSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \ (IRO[151].base + ((funcId) * IRO[151].m1)) #define CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(pfId) \ @@ -114,7 +114,7 @@ #define TSTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \ (IRO[268].base + ((pfId) * IRO[268].m1)) #define TSTORM_ISCSI_TCP_LOCAL_ADV_WND_OFFSET(pfId) \ - (IRO[277].base + ((pfId) * IRO[277].m1)) + (IRO[278].base + ((pfId) * IRO[278].m1)) #define TSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \ (IRO[264].base + ((pfId) * IRO[264].m1)) #define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \ @@ -136,35 +136,32 @@ #define USTORM_ASSERT_LIST_INDEX_OFFSET (IRO[177].base) #define USTORM_ASSERT_LIST_OFFSET(assertListEntry) \ (IRO[176].base + ((assertListEntry) * IRO[176].m1)) -#define USTORM_CQE_PAGE_NEXT_OFFSET(portId, clientId) \ - (IRO[205].base + ((portId) * IRO[205].m1) + ((clientId) * \ - IRO[205].m2)) #define USTORM_ETH_PAUSE_ENABLED_OFFSET(portId) \ (IRO[183].base + ((portId) * IRO[183].m1)) #define USTORM_FCOE_EQ_PROD_OFFSET(pfId) \ - (IRO[318].base + ((pfId) * IRO[318].m1)) + (IRO[319].base + ((pfId) * IRO[319].m1)) #define USTORM_FUNC_EN_OFFSET(funcId) \ (IRO[178].base + ((funcId) * IRO[178].m1)) #define USTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \ - (IRO[282].base + ((pfId) * IRO[282].m1)) -#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \ (IRO[283].base + ((pfId) * IRO[283].m1)) +#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \ + (IRO[284].base + ((pfId) * IRO[284].m1)) #define USTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \ - (IRO[287].base + ((pfId) * IRO[287].m1)) + (IRO[288].base + ((pfId) * IRO[288].m1)) #define USTORM_ISCSI_GLOBAL_BUF_PHYS_ADDR_OFFSET(pfId) \ - (IRO[284].base + ((pfId) * IRO[284].m1)) + (IRO[285].base + ((pfId) * IRO[285].m1)) #define USTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \ - (IRO[280].base + ((pfId) * IRO[280].m1)) + (IRO[281].base + ((pfId) * IRO[281].m1)) #define USTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \ - (IRO[279].base + ((pfId) * IRO[279].m1)) + (IRO[280].base + ((pfId) * IRO[280].m1)) #define USTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \ - (IRO[278].base + ((pfId) * IRO[278].m1)) + (IRO[279].base + ((pfId) * IRO[279].m1)) #define USTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \ - (IRO[281].base + ((pfId) * IRO[281].m1)) + (IRO[282].base + ((pfId) * IRO[282].m1)) #define USTORM_ISCSI_RQ_BUFFER_SIZE_OFFSET(pfId) \ - (IRO[285].base + ((pfId) * IRO[285].m1)) -#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \ (IRO[286].base + ((pfId) * IRO[286].m1)) +#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \ + (IRO[287].base + ((pfId) * IRO[287].m1)) #define USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(pfId) \ (IRO[182].base + ((pfId) * IRO[182].m1)) #define USTORM_RECORD_SLOW_PATH_OFFSET(funcId) \ @@ -190,39 +187,39 @@ #define XSTORM_FUNC_EN_OFFSET(funcId) \ (IRO[47].base + ((funcId) * IRO[47].m1)) #define XSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \ - (IRO[295].base + ((pfId) * IRO[295].m1)) + (IRO[296].base + ((pfId) * IRO[296].m1)) #define XSTORM_ISCSI_LOCAL_MAC_ADDR0_OFFSET(pfId) \ - (IRO[298].base + ((pfId) * IRO[298].m1)) -#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \ (IRO[299].base + ((pfId) * IRO[299].m1)) -#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \ +#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \ (IRO[300].base + ((pfId) * IRO[300].m1)) -#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \ +#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \ (IRO[301].base + ((pfId) * IRO[301].m1)) -#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \ +#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \ (IRO[302].base + ((pfId) * IRO[302].m1)) -#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \ +#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \ (IRO[303].base + ((pfId) * IRO[303].m1)) -#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \ +#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \ (IRO[304].base + ((pfId) * IRO[304].m1)) +#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \ + (IRO[305].base + ((pfId) * IRO[305].m1)) #define XSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \ - (IRO[294].base + ((pfId) * IRO[294].m1)) + (IRO[295].base + ((pfId) * IRO[295].m1)) #define XSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \ - (IRO[293].base + ((pfId) * IRO[293].m1)) + (IRO[294].base + ((pfId) * IRO[294].m1)) #define XSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \ - (IRO[292].base + ((pfId) * IRO[292].m1)) + (IRO[293].base + ((pfId) * IRO[293].m1)) #define XSTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \ - (IRO[297].base + ((pfId) * IRO[297].m1)) + (IRO[298].base + ((pfId) * IRO[298].m1)) #define XSTORM_ISCSI_SQ_SIZE_OFFSET(pfId) \ - (IRO[296].base + ((pfId) * IRO[296].m1)) + (IRO[297].base + ((pfId) * IRO[297].m1)) #define XSTORM_ISCSI_TCP_VARS_ADV_WND_SCL_OFFSET(pfId) \ - (IRO[291].base + ((pfId) * IRO[291].m1)) + (IRO[292].base + ((pfId) * IRO[292].m1)) #define XSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \ - (IRO[290].base + ((pfId) * IRO[290].m1)) + (IRO[291].base + ((pfId) * IRO[291].m1)) #define XSTORM_ISCSI_TCP_VARS_TOS_OFFSET(pfId) \ - (IRO[289].base + ((pfId) * IRO[289].m1)) + (IRO[290].base + ((pfId) * IRO[290].m1)) #define XSTORM_ISCSI_TCP_VARS_TTL_OFFSET(pfId) \ - (IRO[288].base + ((pfId) * IRO[288].m1)) + (IRO[289].base + ((pfId) * IRO[289].m1)) #define XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(pfId) \ (IRO[44].base + ((pfId) * IRO[44].m1)) #define XSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index 037860ecc343..12f00a40cdf0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -114,6 +114,10 @@ struct license_key { #define EPIO_CFG_EPIO30 0x0000001f #define EPIO_CFG_EPIO31 0x00000020 +struct mac_addr { + u32 upper; + u32 lower; +}; struct shared_hw_cfg { /* NVRAM Offset */ /* Up to 16 bytes of NULL-terminated string */ @@ -508,7 +512,22 @@ struct port_hw_cfg { /* port 0: 0x12c port 1: 0x2bc */ #define PORT_HW_CFG_PAUSE_ON_HOST_RING_DISABLED 0x00000000 #define PORT_HW_CFG_PAUSE_ON_HOST_RING_ENABLED 0x00000001 - u32 reserved0[6]; /* 0x178 */ + /* SFP+ Tx Equalization: NIC recommended and tested value is 0xBEB2 + * LOM recommended and tested value is 0xBEB2. Using a different + * value means using a value not tested by BRCM + */ + u32 sfi_tap_values; /* 0x178 */ + #define PORT_HW_CFG_TX_EQUALIZATION_MASK 0x0000FFFF + #define PORT_HW_CFG_TX_EQUALIZATION_SHIFT 0 + + /* SFP+ Tx driver broadcast IDRIVER: NIC recommended and tested + * value is 0x2. LOM recommended and tested value is 0x2. Using a + * different value means using a value not tested by BRCM + */ + #define PORT_HW_CFG_TX_DRV_BROADCAST_MASK 0x000F0000 + #define PORT_HW_CFG_TX_DRV_BROADCAST_SHIFT 16 + + u32 reserved0[5]; /* 0x17c */ u32 aeu_int_mask; /* 0x190 */ @@ -2821,8 +2840,8 @@ struct afex_stats { #define BCM_5710_FW_MAJOR_VERSION 7 #define BCM_5710_FW_MINOR_VERSION 8 -#define BCM_5710_FW_REVISION_VERSION 2 -#define BCM_5710_FW_ENGINEERING_VERSION 0 +#define BCM_5710_FW_REVISION_VERSION 17 +#define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 @@ -3513,11 +3532,14 @@ struct client_init_tx_data { #define CLIENT_INIT_TX_DATA_BCAST_ACCEPT_ALL_SHIFT 2 #define CLIENT_INIT_TX_DATA_ACCEPT_ANY_VLAN (0x1<<3) #define CLIENT_INIT_TX_DATA_ACCEPT_ANY_VLAN_SHIFT 3 -#define CLIENT_INIT_TX_DATA_RESERVED1 (0xFFF<<4) -#define CLIENT_INIT_TX_DATA_RESERVED1_SHIFT 4 +#define CLIENT_INIT_TX_DATA_RESERVED0 (0xFFF<<4) +#define CLIENT_INIT_TX_DATA_RESERVED0_SHIFT 4 u8 default_vlan_flg; u8 force_default_pri_flg; - __le32 reserved3; + u8 tunnel_lso_inc_ip_id; + u8 refuse_outband_vlan_flg; + u8 tunnel_non_lso_pcsum_location; + u8 reserved1; }; /* @@ -3551,6 +3573,11 @@ struct client_update_ramrod_data { __le16 silent_vlan_mask; u8 silent_vlan_removal_flg; u8 silent_vlan_change_flg; + u8 refuse_outband_vlan_flg; + u8 refuse_outband_vlan_change_flg; + u8 tx_switching_flg; + u8 tx_switching_change_flg; + __le32 reserved1; __le32 echo; }; @@ -3620,7 +3647,8 @@ struct eth_classify_header { */ struct eth_classify_mac_cmd { struct eth_classify_cmd_header header; - __le32 reserved0; + __le16 reserved0; + __le16 inner_mac; __le16 mac_lsb; __le16 mac_mid; __le16 mac_msb; @@ -3633,7 +3661,8 @@ struct eth_classify_mac_cmd { */ struct eth_classify_pair_cmd { struct eth_classify_cmd_header header; - __le32 reserved0; + __le16 reserved0; + __le16 inner_mac; __le16 mac_lsb; __le16 mac_mid; __le16 mac_msb; @@ -3855,8 +3884,68 @@ struct eth_halt_ramrod_data { /* - * Command for setting multicast classification for a client + * destination and source mac address. + */ +struct eth_mac_addresses { +#if defined(__BIG_ENDIAN) + __le16 dst_mid; + __le16 dst_lo; +#elif defined(__LITTLE_ENDIAN) + __le16 dst_lo; + __le16 dst_mid; +#endif +#if defined(__BIG_ENDIAN) + __le16 src_lo; + __le16 dst_hi; +#elif defined(__LITTLE_ENDIAN) + __le16 dst_hi; + __le16 src_lo; +#endif +#if defined(__BIG_ENDIAN) + __le16 src_hi; + __le16 src_mid; +#elif defined(__LITTLE_ENDIAN) + __le16 src_mid; + __le16 src_hi; +#endif +}; + +/* tunneling related data */ +struct eth_tunnel_data { +#if defined(__BIG_ENDIAN) + __le16 dst_mid; + __le16 dst_lo; +#elif defined(__LITTLE_ENDIAN) + __le16 dst_lo; + __le16 dst_mid; +#endif +#if defined(__BIG_ENDIAN) + __le16 reserved0; + __le16 dst_hi; +#elif defined(__LITTLE_ENDIAN) + __le16 dst_hi; + __le16 reserved0; +#endif +#if defined(__BIG_ENDIAN) + u8 reserved1; + u8 ip_hdr_start_inner_w; + __le16 pseudo_csum; +#elif defined(__LITTLE_ENDIAN) + __le16 pseudo_csum; + u8 ip_hdr_start_inner_w; + u8 reserved1; +#endif +}; + +/* union for mac addresses and for tunneling data. + * considered as tunneling data only if (tunnel_exist == 1). */ +union eth_mac_addr_or_tunnel_data { + struct eth_mac_addresses mac_addr; + struct eth_tunnel_data tunnel_data; +}; + +/*Command for setting multicast classification for a client */ struct eth_multicast_rules_cmd { u8 cmd_general_data; #define ETH_MULTICAST_RULES_CMD_RX_CMD (0x1<<0) @@ -3874,7 +3963,6 @@ struct eth_multicast_rules_cmd { struct regpair reserved3; }; - /* * parameters for multicast classification ramrod */ @@ -3883,7 +3971,6 @@ struct eth_multicast_rules_ramrod_data { struct eth_multicast_rules_cmd rules[MULTICAST_RULES_COUNT]; }; - /* * Place holder for ramrods protocol specific data */ @@ -3947,11 +4034,14 @@ struct eth_rss_update_ramrod_data { #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_TCP_CAPABILITY_SHIFT 4 #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY (0x1<<5) #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY_SHIFT 5 +#define ETH_RSS_UPDATE_RAMROD_DATA_EN_5_TUPLE_CAPABILITY (0x1<<6) +#define ETH_RSS_UPDATE_RAMROD_DATA_EN_5_TUPLE_CAPABILITY_SHIFT 6 #define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY (0x1<<7) #define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY_SHIFT 7 u8 rss_result_mask; u8 rss_mode; - __le32 __reserved2; + __le16 udp_4tuple_dst_port_mask; + __le16 udp_4tuple_dst_port_value; u8 indirection_table[T_ETH_INDIRECTION_TABLE_SIZE]; __le32 rss_key[T_ETH_RSS_KEY]; __le32 echo; @@ -4115,6 +4205,23 @@ enum eth_tpa_update_command { MAX_ETH_TPA_UPDATE_COMMAND }; +/* In case of LSO over IPv4 tunnel, whether to increment + * IP ID on external IP header or internal IP header + */ +enum eth_tunnel_lso_inc_ip_id { + EXT_HEADER, + INT_HEADER, + MAX_ETH_TUNNEL_LSO_INC_IP_ID +}; + +/* In case tunnel exist and L4 checksum offload, + * the pseudo checksum location, on packet or on BD. + */ +enum eth_tunnel_non_lso_pcsum_location { + PCSUM_ON_PKT, + PCSUM_ON_BD, + MAX_ETH_TUNNEL_NON_LSO_PCSUM_LOCATION +}; /* * Tx regular BD structure @@ -4166,8 +4273,8 @@ struct eth_tx_start_bd { #define ETH_TX_START_BD_FORCE_VLAN_MODE_SHIFT 4 #define ETH_TX_START_BD_PARSE_NBDS (0x3<<5) #define ETH_TX_START_BD_PARSE_NBDS_SHIFT 5 -#define ETH_TX_START_BD_RESREVED (0x1<<7) -#define ETH_TX_START_BD_RESREVED_SHIFT 7 +#define ETH_TX_START_BD_TUNNEL_EXIST (0x1<<7) +#define ETH_TX_START_BD_TUNNEL_EXIST_SHIFT 7 }; /* @@ -4216,15 +4323,10 @@ struct eth_tx_parse_bd_e1x { * Tx parsing BD structure for ETH E2 */ struct eth_tx_parse_bd_e2 { - __le16 dst_mac_addr_lo; - __le16 dst_mac_addr_mid; - __le16 dst_mac_addr_hi; - __le16 src_mac_addr_lo; - __le16 src_mac_addr_mid; - __le16 src_mac_addr_hi; + union eth_mac_addr_or_tunnel_data data; __le32 parsing_data; -#define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W (0x7FF<<0) -#define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT 0 +#define ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W (0x7FF<<0) +#define ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT 0 #define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW (0xF<<11) #define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT 11 #define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR (0x1<<15) @@ -4236,8 +4338,51 @@ struct eth_tx_parse_bd_e2 { }; /* - * The last BD in the BD memory will hold a pointer to the next BD memory + * Tx 2nd parsing BD structure for ETH packet */ +struct eth_tx_parse_2nd_bd { + __le16 global_data; +#define ETH_TX_PARSE_2ND_BD_IP_HDR_START_OUTER_W (0xF<<0) +#define ETH_TX_PARSE_2ND_BD_IP_HDR_START_OUTER_W_SHIFT 0 +#define ETH_TX_PARSE_2ND_BD_IP_HDR_TYPE_OUTER (0x1<<4) +#define ETH_TX_PARSE_2ND_BD_IP_HDR_TYPE_OUTER_SHIFT 4 +#define ETH_TX_PARSE_2ND_BD_LLC_SNAP_EN (0x1<<5) +#define ETH_TX_PARSE_2ND_BD_LLC_SNAP_EN_SHIFT 5 +#define ETH_TX_PARSE_2ND_BD_NS_FLG (0x1<<6) +#define ETH_TX_PARSE_2ND_BD_NS_FLG_SHIFT 6 +#define ETH_TX_PARSE_2ND_BD_TUNNEL_UDP_EXIST (0x1<<7) +#define ETH_TX_PARSE_2ND_BD_TUNNEL_UDP_EXIST_SHIFT 7 +#define ETH_TX_PARSE_2ND_BD_IP_HDR_LEN_OUTER_W (0x1F<<8) +#define ETH_TX_PARSE_2ND_BD_IP_HDR_LEN_OUTER_W_SHIFT 8 +#define ETH_TX_PARSE_2ND_BD_RESERVED0 (0x7<<13) +#define ETH_TX_PARSE_2ND_BD_RESERVED0_SHIFT 13 + __le16 reserved1; + u8 tcp_flags; +#define ETH_TX_PARSE_2ND_BD_FIN_FLG (0x1<<0) +#define ETH_TX_PARSE_2ND_BD_FIN_FLG_SHIFT 0 +#define ETH_TX_PARSE_2ND_BD_SYN_FLG (0x1<<1) +#define ETH_TX_PARSE_2ND_BD_SYN_FLG_SHIFT 1 +#define ETH_TX_PARSE_2ND_BD_RST_FLG (0x1<<2) +#define ETH_TX_PARSE_2ND_BD_RST_FLG_SHIFT 2 +#define ETH_TX_PARSE_2ND_BD_PSH_FLG (0x1<<3) +#define ETH_TX_PARSE_2ND_BD_PSH_FLG_SHIFT 3 +#define ETH_TX_PARSE_2ND_BD_ACK_FLG (0x1<<4) +#define ETH_TX_PARSE_2ND_BD_ACK_FLG_SHIFT 4 +#define ETH_TX_PARSE_2ND_BD_URG_FLG (0x1<<5) +#define ETH_TX_PARSE_2ND_BD_URG_FLG_SHIFT 5 +#define ETH_TX_PARSE_2ND_BD_ECE_FLG (0x1<<6) +#define ETH_TX_PARSE_2ND_BD_ECE_FLG_SHIFT 6 +#define ETH_TX_PARSE_2ND_BD_CWR_FLG (0x1<<7) +#define ETH_TX_PARSE_2ND_BD_CWR_FLG_SHIFT 7 + u8 reserved2; + u8 tunnel_udp_hdr_start_w; + u8 fw_ip_hdr_to_payload_w; + __le16 fw_ip_csum_wo_len_flags_frag; + __le16 hw_ip_id; + __le32 tcp_send_seq; +}; + +/* The last BD in the BD memory will hold a pointer to the next BD memory */ struct eth_tx_next_bd { __le32 addr_lo; __le32 addr_hi; @@ -4252,6 +4397,7 @@ union eth_tx_bd_types { struct eth_tx_bd reg_bd; struct eth_tx_parse_bd_e1x parse_bd_e1x; struct eth_tx_parse_bd_e2 parse_bd_e2; + struct eth_tx_parse_2nd_bd parse_2nd_bd; struct eth_tx_next_bd next_bd; }; @@ -4663,10 +4809,10 @@ enum common_spqe_cmd_id { RAMROD_CMD_ID_COMMON_STOP_TRAFFIC, RAMROD_CMD_ID_COMMON_START_TRAFFIC, RAMROD_CMD_ID_COMMON_AFEX_VIF_LISTS, + RAMROD_CMD_ID_COMMON_SET_TIMESYNC, MAX_COMMON_SPQE_CMD_ID }; - /* * Per-protocol connection types */ @@ -4863,7 +5009,7 @@ struct vf_flr_event_data { */ struct malicious_vf_event_data { u8 vf_id; - u8 reserved0; + u8 err_id; u16 reserved1; u32 reserved2; u32 reserved3; @@ -4969,10 +5115,10 @@ enum event_ring_opcode { EVENT_RING_OPCODE_CLASSIFICATION_RULES, EVENT_RING_OPCODE_FILTERS_RULES, EVENT_RING_OPCODE_MULTICAST_RULES, + EVENT_RING_OPCODE_SET_TIMESYNC, MAX_EVENT_RING_OPCODE }; - /* * Modes for fairness algorithm */ @@ -5010,14 +5156,18 @@ struct flow_control_configuration { */ struct function_start_data { u8 function_mode; - u8 reserved; + u8 allow_npar_tx_switching; __le16 sd_vlan_tag; __le16 vif_id; u8 path_id; u8 network_cos_mode; + u8 dmae_cmd_id; + u8 gre_tunnel_mode; + u8 gre_tunnel_rss; + u8 nvgre_clss_en; + __le16 reserved1[2]; }; - struct function_update_data { u8 vif_id_change_flg; u8 afex_default_vlan_change_flg; @@ -5027,14 +5177,19 @@ struct function_update_data { __le16 afex_default_vlan; u8 allowed_priorities; u8 network_cos_mode; + u8 lb_mode_en_change_flg; u8 lb_mode_en; u8 tx_switch_suspend_change_flg; u8 tx_switch_suspend; u8 echo; - __le16 reserved1; + u8 reserved1; + u8 update_gre_cfg_flg; + u8 gre_tunnel_mode; + u8 gre_tunnel_rss; + u8 nvgre_clss_en; + u32 reserved3; }; - /* * FW version stored in the Xstorm RAM */ @@ -5061,6 +5216,22 @@ struct fw_version { #define __FW_VERSION_RESERVED_SHIFT 4 }; +/* GRE RSS Mode */ +enum gre_rss_mode { + GRE_OUTER_HEADERS_RSS, + GRE_INNER_HEADERS_RSS, + NVGRE_KEY_ENTROPY_RSS, + MAX_GRE_RSS_MODE +}; + +/* GRE Tunnel Mode */ +enum gre_tunnel_type { + NO_GRE_TUNNEL, + NVGRE_TUNNEL, + L2GRE_TUNNEL, + IPGRE_TUNNEL, + MAX_GRE_TUNNEL_TYPE +}; /* * Dynamic Host-Coalescing - Driver(host) counters @@ -5224,6 +5395,26 @@ enum ip_ver { MAX_IP_VER }; +/* + * Malicious VF error ID + */ +enum malicious_vf_error_id { + VF_PF_CHANNEL_NOT_READY, + ETH_ILLEGAL_BD_LENGTHS, + ETH_PACKET_TOO_SHORT, + ETH_PAYLOAD_TOO_BIG, + ETH_ILLEGAL_ETH_TYPE, + ETH_ILLEGAL_LSO_HDR_LEN, + ETH_TOO_MANY_BDS, + ETH_ZERO_HDR_NBDS, + ETH_START_BD_NOT_SET, + ETH_ILLEGAL_PARSE_NBDS, + ETH_IPV6_AND_CHECKSUM, + ETH_VLAN_FLG_INCORRECT, + ETH_ILLEGAL_LSO_MSS, + ETH_TUNNEL_NOT_SUPPORTED, + MAX_MALICIOUS_VF_ERROR_ID +}; /* * Multi-function modes @@ -5368,7 +5559,6 @@ struct protocol_common_spe { union protocol_common_specific_data data; }; - /* * The send queue element */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 31c5787970db..40f58d73de78 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -27,6 +27,10 @@ #include "bnx2x.h" #include "bnx2x_cmn.h" +typedef int (*read_sfp_module_eeprom_func_p)(struct bnx2x_phy *phy, + struct link_params *params, + u8 dev_addr, u16 addr, u8 byte_cnt, + u8 *o_buf, u8); /********************************************************/ #define ETH_HLEN 14 /* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */ @@ -152,6 +156,7 @@ #define SFP_EEPROM_CON_TYPE_ADDR 0x2 #define SFP_EEPROM_CON_TYPE_VAL_LC 0x7 #define SFP_EEPROM_CON_TYPE_VAL_COPPER 0x21 + #define SFP_EEPROM_CON_TYPE_VAL_RJ45 0x22 #define SFP_EEPROM_COMP_CODE_ADDR 0x3 @@ -3127,11 +3132,6 @@ static int bnx2x_bsc_read(struct link_params *params, int rc = 0; struct bnx2x *bp = params->bp; - if ((sl_devid != 0xa0) && (sl_devid != 0xa2)) { - DP(NETIF_MSG_LINK, "invalid sl_devid 0x%x\n", sl_devid); - return -EINVAL; - } - if (xfer_cnt > 16) { DP(NETIF_MSG_LINK, "invalid xfer_cnt %d. Max is 16 bytes\n", xfer_cnt); @@ -3629,6 +3629,16 @@ static u8 bnx2x_ext_phy_resolve_fc(struct bnx2x_phy *phy, * init configuration, and set/clear SGMII flag. Internal * phy init is done purely in phy_init stage. */ +#define WC_TX_DRIVER(post2, idriver, ipre) \ + ((post2 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | \ + (idriver << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | \ + (ipre << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET)) + +#define WC_TX_FIR(post, main, pre) \ + ((post << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) | \ + (main << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) | \ + (pre << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET)) + static void bnx2x_warpcore_enable_AN_KR2(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) @@ -3728,7 +3738,7 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, if (((vars->line_speed == SPEED_AUTO_NEG) && (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) || (vars->line_speed == SPEED_1000)) { - u32 addr = MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2; + u16 addr = MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2; an_adv |= (1<<5); /* Enable CL37 1G Parallel Detect */ @@ -3753,20 +3763,13 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, /* Set Transmit PMD settings */ lane = bnx2x_get_warpcore_lane(phy, params); bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_TX0_TX_DRIVER + 0x10*lane, - ((0x02 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | - (0x06 << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | - (0x09 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET))); + MDIO_WC_REG_TX0_TX_DRIVER + 0x10*lane, + WC_TX_DRIVER(0x02, 0x06, 0x09)); /* Configure the next lane if dual mode */ if (phy->flags & FLAGS_WC_DUAL_MODE) bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_TX0_TX_DRIVER + 0x10*(lane+1), - ((0x02 << - MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | - (0x06 << - MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | - (0x09 << - MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET))); + WC_TX_DRIVER(0x02, 0x06, 0x09)); bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_CL72_USERB0_CL72_OS_DEF_CTRL, 0x03f0); @@ -3909,6 +3912,8 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy, { struct bnx2x *bp = params->bp; u16 misc1_val, tap_val, tx_driver_val, lane, val; + u32 cfg_tap_val, tx_drv_brdct, tx_equal; + /* Hold rxSeqStart */ bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0, 0x8000); @@ -3952,23 +3957,33 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy, if (is_xfi) { misc1_val |= 0x5; - tap_val = ((0x08 << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) | - (0x37 << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) | - (0x00 << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET)); - tx_driver_val = - ((0x00 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | - (0x02 << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | - (0x03 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET)); - + tap_val = WC_TX_FIR(0x08, 0x37, 0x00); + tx_driver_val = WC_TX_DRIVER(0x00, 0x02, 0x03); } else { + cfg_tap_val = REG_RD(bp, params->shmem_base + + offsetof(struct shmem_region, dev_info. + port_hw_config[params->port]. + sfi_tap_values)); + + tx_equal = cfg_tap_val & PORT_HW_CFG_TX_EQUALIZATION_MASK; + + tx_drv_brdct = (cfg_tap_val & + PORT_HW_CFG_TX_DRV_BROADCAST_MASK) >> + PORT_HW_CFG_TX_DRV_BROADCAST_SHIFT; + misc1_val |= 0x9; - tap_val = ((0x0f << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) | - (0x2b << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) | - (0x02 << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET)); - tx_driver_val = - ((0x03 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | - (0x02 << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | - (0x06 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET)); + + /* TAP values are controlled by nvram, if value there isn't 0 */ + if (tx_equal) + tap_val = (u16)tx_equal; + else + tap_val = WC_TX_FIR(0x0f, 0x2b, 0x02); + + if (tx_drv_brdct) + tx_driver_val = WC_TX_DRIVER(0x03, (u16)tx_drv_brdct, + 0x06); + else + tx_driver_val = WC_TX_DRIVER(0x03, 0x02, 0x06); } bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_MISC1, misc1_val); @@ -4105,15 +4120,11 @@ static void bnx2x_warpcore_set_20G_DXGXS(struct bnx2x *bp, /* Set Transmit PMD settings */ bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_TX_FIR_TAP, - ((0x12 << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) | - (0x2d << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) | - (0x00 << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET) | - MDIO_WC_REG_TX_FIR_TAP_ENABLE)); + (WC_TX_FIR(0x12, 0x2d, 0x00) | + MDIO_WC_REG_TX_FIR_TAP_ENABLE)); bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_TX0_TX_DRIVER + 0x10*lane, - ((0x02 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | - (0x02 << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | - (0x02 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET))); + MDIO_WC_REG_TX0_TX_DRIVER + 0x10*lane, + WC_TX_DRIVER(0x02, 0x02, 0x02)); } static void bnx2x_warpcore_set_sgmii_speed(struct bnx2x_phy *phy, @@ -4750,8 +4761,8 @@ void bnx2x_link_status_update(struct link_params *params, port_mb[port].link_status)); /* Force link UP in non LOOPBACK_EXT loopback mode(s) */ - if (bp->link_params.loopback_mode != LOOPBACK_NONE && - bp->link_params.loopback_mode != LOOPBACK_EXT) + if (params->loopback_mode != LOOPBACK_NONE && + params->loopback_mode != LOOPBACK_EXT) vars->link_status |= LINK_STATUS_LINK_UP; if (bnx2x_eee_has_cap(params)) @@ -7758,7 +7769,8 @@ static void bnx2x_sfp_set_transmitter(struct link_params *params, static int bnx2x_8726_read_sfp_module_eeprom(struct bnx2x_phy *phy, struct link_params *params, - u16 addr, u8 byte_cnt, u8 *o_buf) + u8 dev_addr, u16 addr, u8 byte_cnt, + u8 *o_buf, u8 is_init) { struct bnx2x *bp = params->bp; u16 val = 0; @@ -7771,7 +7783,7 @@ static int bnx2x_8726_read_sfp_module_eeprom(struct bnx2x_phy *phy, /* Set the read command byte count */ bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_SFP_TWO_WIRE_BYTE_CNT, - (byte_cnt | 0xa000)); + (byte_cnt | (dev_addr << 8))); /* Set the read command address */ bnx2x_cl45_write(bp, phy, @@ -7845,6 +7857,7 @@ static void bnx2x_warpcore_power_module(struct link_params *params, } static int bnx2x_warpcore_read_sfp_module_eeprom(struct bnx2x_phy *phy, struct link_params *params, + u8 dev_addr, u16 addr, u8 byte_cnt, u8 *o_buf, u8 is_init) { @@ -7869,7 +7882,7 @@ static int bnx2x_warpcore_read_sfp_module_eeprom(struct bnx2x_phy *phy, usleep_range(1000, 2000); bnx2x_warpcore_power_module(params, 1); } - rc = bnx2x_bsc_read(params, phy, 0xa0, addr32, 0, byte_cnt, + rc = bnx2x_bsc_read(params, phy, dev_addr, addr32, 0, byte_cnt, data_array); } while ((rc != 0) && (++cnt < I2C_WA_RETRY_CNT)); @@ -7885,7 +7898,8 @@ static int bnx2x_warpcore_read_sfp_module_eeprom(struct bnx2x_phy *phy, static int bnx2x_8727_read_sfp_module_eeprom(struct bnx2x_phy *phy, struct link_params *params, - u16 addr, u8 byte_cnt, u8 *o_buf) + u8 dev_addr, u16 addr, u8 byte_cnt, + u8 *o_buf, u8 is_init) { struct bnx2x *bp = params->bp; u16 val, i; @@ -7896,6 +7910,15 @@ static int bnx2x_8727_read_sfp_module_eeprom(struct bnx2x_phy *phy, return -EINVAL; } + /* Set 2-wire transfer rate of SFP+ module EEPROM + * to 100Khz since some DACs(direct attached cables) do + * not work at 400Khz. + */ + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, + MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR, + ((dev_addr << 8) | 1)); + /* Need to read from 1.8000 to clear it */ bnx2x_cl45_read(bp, phy, MDIO_PMA_DEVAD, @@ -7968,26 +7991,44 @@ static int bnx2x_8727_read_sfp_module_eeprom(struct bnx2x_phy *phy, return -EINVAL; } - int bnx2x_read_sfp_module_eeprom(struct bnx2x_phy *phy, - struct link_params *params, u16 addr, - u8 byte_cnt, u8 *o_buf) + struct link_params *params, u8 dev_addr, + u16 addr, u16 byte_cnt, u8 *o_buf) { - int rc = -EOPNOTSUPP; + int rc = 0; + struct bnx2x *bp = params->bp; + u8 xfer_size; + u8 *user_data = o_buf; + read_sfp_module_eeprom_func_p read_func; + + if ((dev_addr != 0xa0) && (dev_addr != 0xa2)) { + DP(NETIF_MSG_LINK, "invalid dev_addr 0x%x\n", dev_addr); + return -EINVAL; + } + switch (phy->type) { case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8726: - rc = bnx2x_8726_read_sfp_module_eeprom(phy, params, addr, - byte_cnt, o_buf); - break; + read_func = bnx2x_8726_read_sfp_module_eeprom; + break; case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727: case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8722: - rc = bnx2x_8727_read_sfp_module_eeprom(phy, params, addr, - byte_cnt, o_buf); - break; + read_func = bnx2x_8727_read_sfp_module_eeprom; + break; case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT: - rc = bnx2x_warpcore_read_sfp_module_eeprom(phy, params, addr, - byte_cnt, o_buf, 0); - break; + read_func = bnx2x_warpcore_read_sfp_module_eeprom; + break; + default: + return -EOPNOTSUPP; + } + + while (!rc && (byte_cnt > 0)) { + xfer_size = (byte_cnt > SFP_EEPROM_PAGE_SIZE) ? + SFP_EEPROM_PAGE_SIZE : byte_cnt; + rc = read_func(phy, params, dev_addr, addr, xfer_size, + user_data, 0); + byte_cnt -= xfer_size; + user_data += xfer_size; + addr += xfer_size; } return rc; } @@ -8004,6 +8045,7 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy, /* First check for copper cable */ if (bnx2x_read_sfp_module_eeprom(phy, params, + I2C_DEV_ADDR_A0, SFP_EEPROM_CON_TYPE_ADDR, 2, (u8 *)val) != 0) { @@ -8021,6 +8063,7 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy, */ if (bnx2x_read_sfp_module_eeprom(phy, params, + I2C_DEV_ADDR_A0, SFP_EEPROM_FC_TX_TECH_ADDR, 1, &copper_module_type) != 0) { @@ -8049,20 +8092,24 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy, break; } case SFP_EEPROM_CON_TYPE_VAL_LC: + case SFP_EEPROM_CON_TYPE_VAL_RJ45: check_limiting_mode = 1; if ((val[1] & (SFP_EEPROM_COMP_CODE_SR_MASK | SFP_EEPROM_COMP_CODE_LR_MASK | SFP_EEPROM_COMP_CODE_LRM_MASK)) == 0) { - DP(NETIF_MSG_LINK, "1G Optic module detected\n"); + DP(NETIF_MSG_LINK, "1G SFP module detected\n"); gport = params->port; phy->media_type = ETH_PHY_SFP_1G_FIBER; - phy->req_line_speed = SPEED_1000; - if (!CHIP_IS_E1x(bp)) - gport = BP_PATH(bp) + (params->port << 1); - netdev_err(bp->dev, "Warning: Link speed was forced to 1000Mbps." - " Current SFP module in port %d is not" - " compliant with 10G Ethernet\n", - gport); + if (phy->req_line_speed != SPEED_1000) { + phy->req_line_speed = SPEED_1000; + if (!CHIP_IS_E1x(bp)) { + gport = BP_PATH(bp) + + (params->port << 1); + } + netdev_err(bp->dev, + "Warning: Link speed was forced to 1000Mbps. Current SFP module in port %d is not compliant with 10G Ethernet\n", + gport); + } } else { int idx, cfg_idx = 0; DP(NETIF_MSG_LINK, "10G Optic module detected\n"); @@ -8101,6 +8148,7 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy, u8 options[SFP_EEPROM_OPTIONS_SIZE]; if (bnx2x_read_sfp_module_eeprom(phy, params, + I2C_DEV_ADDR_A0, SFP_EEPROM_OPTIONS_ADDR, SFP_EEPROM_OPTIONS_SIZE, options) != 0) { @@ -8167,6 +8215,7 @@ static int bnx2x_verify_sfp_module(struct bnx2x_phy *phy, /* Format the warning message */ if (bnx2x_read_sfp_module_eeprom(phy, params, + I2C_DEV_ADDR_A0, SFP_EEPROM_VENDOR_NAME_ADDR, SFP_EEPROM_VENDOR_NAME_SIZE, (u8 *)vendor_name)) @@ -8175,6 +8224,7 @@ static int bnx2x_verify_sfp_module(struct bnx2x_phy *phy, vendor_name[SFP_EEPROM_VENDOR_NAME_SIZE] = '\0'; if (bnx2x_read_sfp_module_eeprom(phy, params, + I2C_DEV_ADDR_A0, SFP_EEPROM_PART_NO_ADDR, SFP_EEPROM_PART_NO_SIZE, (u8 *)vendor_pn)) @@ -8205,12 +8255,13 @@ static int bnx2x_wait_for_sfp_module_initialized(struct bnx2x_phy *phy, for (timeout = 0; timeout < 60; timeout++) { if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) - rc = bnx2x_warpcore_read_sfp_module_eeprom(phy, - params, 1, - 1, &val, 1); + rc = bnx2x_warpcore_read_sfp_module_eeprom( + phy, params, I2C_DEV_ADDR_A0, 1, 1, &val, + 1); else - rc = bnx2x_read_sfp_module_eeprom(phy, params, 1, 1, - &val); + rc = bnx2x_read_sfp_module_eeprom(phy, params, + I2C_DEV_ADDR_A0, + 1, 1, &val); if (rc == 0) { DP(NETIF_MSG_LINK, "SFP+ module initialization took %d ms\n", @@ -8219,7 +8270,8 @@ static int bnx2x_wait_for_sfp_module_initialized(struct bnx2x_phy *phy, } usleep_range(5000, 10000); } - rc = bnx2x_read_sfp_module_eeprom(phy, params, 1, 1, &val); + rc = bnx2x_read_sfp_module_eeprom(phy, params, I2C_DEV_ADDR_A0, + 1, 1, &val); return rc; } @@ -8376,15 +8428,6 @@ static void bnx2x_8727_specific_func(struct bnx2x_phy *phy, bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, val); - - /* Set 2-wire transfer rate of SFP+ module EEPROM - * to 100Khz since some DACs(direct attached cables) do - * not work at 400Khz. - */ - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, - MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR, - 0xa001); break; default: DP(NETIF_MSG_LINK, "Function 0x%x not supported by 8727\n", @@ -8647,7 +8690,9 @@ void bnx2x_handle_module_detect_int(struct link_params *params) MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL5_MISC6, &rx_tx_in_reset); - if (!rx_tx_in_reset) { + if ((!rx_tx_in_reset) && + (params->link_flags & + PHY_INITIALIZED)) { bnx2x_warpcore_reset_lane(bp, phy, 1); bnx2x_warpcore_config_sfi(phy, params); bnx2x_warpcore_reset_lane(bp, phy, 0); @@ -9526,8 +9571,7 @@ static void bnx2x_save_848xx_spirom_version(struct bnx2x_phy *phy, } else { /* For 32-bit registers in 848xx, access via MDIO2ARM i/f. */ /* (1) set reg 0xc200_0014(SPI_BRIDGE_CTRL_2) to 0x03000000 */ - for (i = 0; i < ARRAY_SIZE(reg_set); - i++) + for (i = 0; i < ARRAY_SIZE(reg_set); i++) bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg, reg_set[i].val); @@ -10279,7 +10323,8 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy, LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE; /* Determine if EEE was negotiated */ - if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) + if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) || + (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) bnx2x_eee_an_resolve(phy, params, vars); } @@ -12240,7 +12285,7 @@ static void bnx2x_init_bmac_loopback(struct link_params *params, bnx2x_xgxs_deassert(params); - /* set bmac loopback */ + /* Set bmac loopback */ bnx2x_bmac_enable(params, vars, 1, 1); REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); @@ -12259,7 +12304,7 @@ static void bnx2x_init_emac_loopback(struct link_params *params, vars->phy_flags = PHY_XGXS_FLAG; bnx2x_xgxs_deassert(params); - /* set bmac loopback */ + /* Set bmac loopback */ bnx2x_emac_enable(params, vars, 1); bnx2x_emac_program(params, vars); REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); @@ -12519,6 +12564,7 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars) params->req_line_speed[0], params->req_flow_ctrl[0]); DP(NETIF_MSG_LINK, "(2) req_speed %d, req_flowctrl %d\n", params->req_line_speed[1], params->req_flow_ctrl[1]); + DP(NETIF_MSG_LINK, "req_adv_flow_ctrl 0x%x\n", params->req_fc_auto_adv); vars->link_status = 0; vars->phy_link_up = 0; vars->link_up = 0; @@ -12527,6 +12573,8 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars) vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; vars->mac_type = MAC_TYPE_NONE; vars->phy_flags = 0; + vars->check_kr2_recovery_cnt = 0; + params->link_flags = PHY_INITIALIZED; /* Driver opens NIG-BRB filters */ bnx2x_set_rx_filter(params, 1); /* Check if link flap can be avoided */ @@ -12691,6 +12739,7 @@ int bnx2x_lfa_reset(struct link_params *params, struct bnx2x *bp = params->bp; vars->link_up = 0; vars->phy_flags = 0; + params->link_flags &= ~PHY_INITIALIZED; if (!params->lfa_base) return bnx2x_link_reset(params, vars, 1); /* @@ -13411,6 +13460,7 @@ static void bnx2x_disable_kr2(struct link_params *params, vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE; bnx2x_update_link_attr(params, vars->link_attr_sync); + vars->check_kr2_recovery_cnt = CHECK_KR2_RECOVERY_CNT; /* Restart AN on leading lane */ bnx2x_warpcore_restart_AN_KR(phy, params); } @@ -13431,11 +13481,24 @@ static void bnx2x_check_kr2_wa(struct link_params *params, { struct bnx2x *bp = params->bp; u16 base_page, next_page, not_kr2_device, lane; - int sigdet = bnx2x_warpcore_get_sigdet(phy, params); + int sigdet; + /* Once KR2 was disabled, wait 5 seconds before checking KR2 recovery + * Since some switches tend to reinit the AN process and clear the + * the advertised BP/NP after ~2 seconds causing the KR2 to be disabled + * and recovered many times + */ + if (vars->check_kr2_recovery_cnt > 0) { + vars->check_kr2_recovery_cnt--; + return; + } + + sigdet = bnx2x_warpcore_get_sigdet(phy, params); if (!sigdet) { - if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) + if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) { bnx2x_kr2_recovery(params, vars, phy); + DP(NETIF_MSG_LINK, "No sigdet\n"); + } return; } @@ -13450,8 +13513,10 @@ static void bnx2x_check_kr2_wa(struct link_params *params, /* CL73 has not begun yet */ if (base_page == 0) { - if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) + if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) { bnx2x_kr2_recovery(params, vars, phy); + DP(NETIF_MSG_LINK, "No BP\n"); + } return; } @@ -13467,7 +13532,7 @@ static void bnx2x_check_kr2_wa(struct link_params *params, if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) { if (!not_kr2_device) { DP(NETIF_MSG_LINK, "BP=0x%x, NP=0x%x\n", base_page, - next_page); + next_page); bnx2x_kr2_recovery(params, vars, phy); } return; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h index be5c195d03dd..4df45234fdc0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h @@ -41,6 +41,9 @@ #define SPEED_AUTO_NEG 0 #define SPEED_20000 20000 +#define I2C_DEV_ADDR_A0 0xa0 +#define I2C_DEV_ADDR_A2 0xa2 + #define SFP_EEPROM_PAGE_SIZE 16 #define SFP_EEPROM_VENDOR_NAME_ADDR 0x14 #define SFP_EEPROM_VENDOR_NAME_SIZE 16 @@ -54,6 +57,15 @@ #define SFP_EEPROM_SERIAL_SIZE 16 #define SFP_EEPROM_DATE_ADDR 0x54 /* ASCII YYMMDD */ #define SFP_EEPROM_DATE_SIZE 6 +#define SFP_EEPROM_DIAG_TYPE_ADDR 0x5c +#define SFP_EEPROM_DIAG_TYPE_SIZE 1 +#define SFP_EEPROM_DIAG_ADDR_CHANGE_REQ (1<<2) +#define SFP_EEPROM_SFF_8472_COMP_ADDR 0x5e +#define SFP_EEPROM_SFF_8472_COMP_SIZE 1 + +#define SFP_EEPROM_A2_CHECKSUM_RANGE 0x5e +#define SFP_EEPROM_A2_CC_DMI_ADDR 0x5f + #define PWR_FLT_ERR_MSG_LEN 250 #define XGXS_EXT_PHY_TYPE(ext_phy_config) \ @@ -309,6 +321,7 @@ struct link_params { req_flow_ctrl is set to AUTO */ u16 link_flags; #define LINK_FLAGS_INT_DISABLED (1<<0) +#define PHY_INITIALIZED (1<<1) u32 lfa_base; }; @@ -342,7 +355,8 @@ struct link_vars { u32 link_status; u32 eee_status; u8 fault_detected; - u8 rsrv1; + u8 check_kr2_recovery_cnt; +#define CHECK_KR2_RECOVERY_CNT 5 u16 periodic_flags; #define PERIODIC_FLAGS_LINK_EVENT 0x0001 @@ -418,8 +432,8 @@ void bnx2x_sfx7101_sp_sw_reset(struct bnx2x *bp, struct bnx2x_phy *phy); /* Read "byte_cnt" bytes from address "addr" from the SFP+ EEPROM */ int bnx2x_read_sfp_module_eeprom(struct bnx2x_phy *phy, - struct link_params *params, u16 addr, - u8 byte_cnt, u8 *o_buf); + struct link_params *params, u8 dev_addr, + u16 addr, u16 byte_cnt, u8 *o_buf); void bnx2x_hw_reset_phy(struct link_params *params); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e81a747ea8ce..86d13870399e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -75,8 +75,6 @@ #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw" #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw" -#define MAC_LEADING_ZERO_CNT (ALIGN(ETH_ALEN, sizeof(u32)) - ETH_ALEN) - /* Time in jiffies before concluding the transmitter is hung */ #define TX_TIMEOUT (5*HZ) @@ -2955,14 +2953,16 @@ static unsigned long bnx2x_get_common_flags(struct bnx2x *bp, __set_bit(BNX2X_Q_FLG_ACTIVE, &flags); /* tx only connections collect statistics (on the same index as the - * parent connection). The statistics are zeroed when the parent - * connection is initialized. + * parent connection). The statistics are zeroed when the parent + * connection is initialized. */ __set_bit(BNX2X_Q_FLG_STATS, &flags); if (zero_stats) __set_bit(BNX2X_Q_FLG_ZERO_STATS, &flags); + __set_bit(BNX2X_Q_FLG_PCSUM_ON_PKT, &flags); + __set_bit(BNX2X_Q_FLG_TUN_INC_INNER_IP_ID, &flags); #ifdef BNX2X_STOP_ON_ERROR __set_bit(BNX2X_Q_FLG_TX_SEC, &flags); @@ -3227,16 +3227,29 @@ static void bnx2x_drv_info_ether_stat(struct bnx2x *bp) { struct eth_stats_info *ether_stat = &bp->slowpath->drv_info_to_mcp.ether_stat; + struct bnx2x_vlan_mac_obj *mac_obj = + &bp->sp_objs->mac_obj; + int i; strlcpy(ether_stat->version, DRV_MODULE_VERSION, ETH_STAT_INFO_VERSION_LEN); - bp->sp_objs[0].mac_obj.get_n_elements(bp, &bp->sp_objs[0].mac_obj, - DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, - ether_stat->mac_local); - + /* get DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED macs, placing them in the + * mac_local field in ether_stat struct. The base address is offset by 2 + * bytes to account for the field being 8 bytes but a mac address is + * only 6 bytes. Likewise, the stride for the get_n_elements function is + * 2 bytes to compensate from the 6 bytes of a mac to the 8 bytes + * allocated by the ether_stat struct, so the macs will land in their + * proper positions. + */ + for (i = 0; i < DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED; i++) + memset(ether_stat->mac_local + i, 0, + sizeof(ether_stat->mac_local[0])); + mac_obj->get_n_elements(bp, &bp->sp_objs[0].mac_obj, + DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, + ether_stat->mac_local + MAC_PAD, MAC_PAD, + ETH_ALEN); ether_stat->mtu_size = bp->dev->mtu; - if (bp->dev->features & NETIF_F_RXCSUM) ether_stat->feature_flags |= FEATURE_ETH_CHKSUM_OFFLOAD_MASK; if (bp->dev->features & NETIF_F_TSO) @@ -3258,8 +3271,7 @@ static void bnx2x_drv_info_fcoe_stat(struct bnx2x *bp) if (!CNIC_LOADED(bp)) return; - memcpy(fcoe_stat->mac_local + MAC_LEADING_ZERO_CNT, - bp->fip_mac, ETH_ALEN); + memcpy(fcoe_stat->mac_local + MAC_PAD, bp->fip_mac, ETH_ALEN); fcoe_stat->qos_priority = app->traffic_type_priority[LLFC_TRAFFIC_TYPE_FCOE]; @@ -3361,8 +3373,8 @@ static void bnx2x_drv_info_iscsi_stat(struct bnx2x *bp) if (!CNIC_LOADED(bp)) return; - memcpy(iscsi_stat->mac_local + MAC_LEADING_ZERO_CNT, - bp->cnic_eth_dev.iscsi_mac, ETH_ALEN); + memcpy(iscsi_stat->mac_local + MAC_PAD, bp->cnic_eth_dev.iscsi_mac, + ETH_ALEN); iscsi_stat->qos_priority = app->traffic_type_priority[LLFC_TRAFFIC_TYPE_ISCSI]; @@ -4947,7 +4959,7 @@ static void bnx2x_after_function_update(struct bnx2x *bp) q); } - if (!NO_FCOE(bp)) { + if (!NO_FCOE(bp) && CNIC_ENABLED(bp)) { fp = &bp->fp[FCOE_IDX(bp)]; queue_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; @@ -6029,9 +6041,10 @@ void bnx2x_nic_init(struct bnx2x *bp, u32 load_code) rmb(); bnx2x_init_rx_rings(bp); bnx2x_init_tx_rings(bp); - - if (IS_VF(bp)) + if (IS_VF(bp)) { + bnx2x_memset_stats(bp); return; + } /* Initialize MOD_ABS interrupts */ bnx2x_init_mod_abs_int(bp, &bp->link_vars, bp->common.chip_id, @@ -9525,6 +9538,10 @@ sp_rtnl_not_reset: bnx2x_vfpf_storm_rx_mode(bp); } + if (test_and_clear_bit(BNX2X_SP_RTNL_HYPERVISOR_VLAN, + &bp->sp_rtnl_state)) + bnx2x_pf_set_vfs_vlan(bp); + /* work which needs rtnl lock not-taken (as it takes the lock itself and * can be called from other contexts as well) */ @@ -9532,8 +9549,10 @@ sp_rtnl_not_reset: /* enable SR-IOV if applicable */ if (IS_SRIOV(bp) && test_and_clear_bit(BNX2X_SP_RTNL_ENABLE_SRIOV, - &bp->sp_rtnl_state)) + &bp->sp_rtnl_state)) { + bnx2x_disable_sriov(bp); bnx2x_enable_sriov(bp); + } } static void bnx2x_period_task(struct work_struct *work) @@ -9701,6 +9720,31 @@ static struct bnx2x_prev_path_list * return NULL; } +static int bnx2x_prev_path_mark_eeh(struct bnx2x *bp) +{ + struct bnx2x_prev_path_list *tmp_list; + int rc; + + rc = down_interruptible(&bnx2x_prev_sem); + if (rc) { + BNX2X_ERR("Received %d when tried to take lock\n", rc); + return rc; + } + + tmp_list = bnx2x_prev_path_get_entry(bp); + if (tmp_list) { + tmp_list->aer = 1; + rc = 0; + } else { + BNX2X_ERR("path %d: Entry does not exist for eeh; Flow occurs before initial insmod is over ?\n", + BP_PATH(bp)); + } + + up(&bnx2x_prev_sem); + + return rc; +} + static bool bnx2x_prev_is_path_marked(struct bnx2x *bp) { struct bnx2x_prev_path_list *tmp_list; @@ -9709,14 +9753,15 @@ static bool bnx2x_prev_is_path_marked(struct bnx2x *bp) if (down_trylock(&bnx2x_prev_sem)) return false; - list_for_each_entry(tmp_list, &bnx2x_prev_list, list) { - if (PCI_SLOT(bp->pdev->devfn) == tmp_list->slot && - bp->pdev->bus->number == tmp_list->bus && - BP_PATH(bp) == tmp_list->path) { + tmp_list = bnx2x_prev_path_get_entry(bp); + if (tmp_list) { + if (tmp_list->aer) { + DP(NETIF_MSG_HW, "Path %d was marked by AER\n", + BP_PATH(bp)); + } else { rc = true; BNX2X_DEV_INFO("Path %d was already cleaned from previous drivers\n", BP_PATH(bp)); - break; } } @@ -9730,6 +9775,28 @@ static int bnx2x_prev_mark_path(struct bnx2x *bp, bool after_undi) struct bnx2x_prev_path_list *tmp_list; int rc; + rc = down_interruptible(&bnx2x_prev_sem); + if (rc) { + BNX2X_ERR("Received %d when tried to take lock\n", rc); + return rc; + } + + /* Check whether the entry for this path already exists */ + tmp_list = bnx2x_prev_path_get_entry(bp); + if (tmp_list) { + if (!tmp_list->aer) { + BNX2X_ERR("Re-Marking the path.\n"); + } else { + DP(NETIF_MSG_HW, "Removing AER indication from path %d\n", + BP_PATH(bp)); + tmp_list->aer = 0; + } + up(&bnx2x_prev_sem); + return 0; + } + up(&bnx2x_prev_sem); + + /* Create an entry for this path and add it */ tmp_list = kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL); if (!tmp_list) { BNX2X_ERR("Failed to allocate 'bnx2x_prev_path_list'\n"); @@ -9739,6 +9806,7 @@ static int bnx2x_prev_mark_path(struct bnx2x *bp, bool after_undi) tmp_list->bus = bp->pdev->bus->number; tmp_list->slot = PCI_SLOT(bp->pdev->devfn); tmp_list->path = BP_PATH(bp); + tmp_list->aer = 0; tmp_list->undi = after_undi ? (1 << BP_PORT(bp)) : 0; rc = down_interruptible(&bnx2x_prev_sem); @@ -9746,8 +9814,8 @@ static int bnx2x_prev_mark_path(struct bnx2x *bp, bool after_undi) BNX2X_ERR("Received %d when tried to take lock\n", rc); kfree(tmp_list); } else { - BNX2X_DEV_INFO("Marked path [%d] - finished previous unload\n", - BP_PATH(bp)); + DP(NETIF_MSG_HW, "Marked path [%d] - finished previous unload\n", + BP_PATH(bp)); list_add(&tmp_list->list, &bnx2x_prev_list); up(&bnx2x_prev_sem); } @@ -9878,6 +9946,10 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) REG_RD(bp, NIG_REG_NIG_INT_STS_CLR_0); } } + if (!CHIP_IS_E1x(bp)) + /* block FW from writing to host */ + REG_WR(bp, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); + /* wait until BRB is empty */ tmp_reg = REG_RD(bp, BRB1_REG_NUM_OF_FULL_BLOCKS); while (timer_count) { @@ -9986,6 +10058,7 @@ static int bnx2x_prev_unload(struct bnx2x *bp) } do { + int aer = 0; /* Lock MCP using an unload request */ fw = bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0); if (!fw) { @@ -9994,7 +10067,18 @@ static int bnx2x_prev_unload(struct bnx2x *bp) break; } - if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON) { + rc = down_interruptible(&bnx2x_prev_sem); + if (rc) { + BNX2X_ERR("Cannot check for AER; Received %d when tried to take lock\n", + rc); + } else { + /* If Path is marked by EEH, ignore unload status */ + aer = !!(bnx2x_prev_path_get_entry(bp) && + bnx2x_prev_path_get_entry(bp)->aer); + up(&bnx2x_prev_sem); + } + + if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON || aer) { rc = bnx2x_prev_unload_common(bp); break; } @@ -10034,8 +10118,12 @@ static void bnx2x_get_common_hwinfo(struct bnx2x *bp) id = ((val & 0xffff) << 16); val = REG_RD(bp, MISC_REG_CHIP_REV); id |= ((val & 0xf) << 12); - val = REG_RD(bp, MISC_REG_CHIP_METAL); - id |= ((val & 0xff) << 4); + + /* Metal is read from PCI regs, but we can't access >=0x400 from + * the configuration space (so we need to reg_rd) + */ + val = REG_RD(bp, PCICFG_OFFSET + PCI_ID_VAL3); + id |= (((val >> 24) & 0xf) << 4); val = REG_RD(bp, MISC_REG_BOND_ID); id |= (val & 0xf); bp->common.chip_id = id; @@ -10812,14 +10900,12 @@ static void bnx2x_get_cnic_mac_hwinfo(struct bnx2x *bp) } } - if (IS_MF_STORAGE_SD(bp)) - /* Zero primary MAC configuration */ - memset(bp->dev->dev_addr, 0, ETH_ALEN); - - if (IS_MF_FCOE_AFEX(bp) || IS_MF_FCOE_SD(bp)) - /* use FIP MAC as primary MAC */ + /* If this is a storage-only interface, use SAN mac as + * primary MAC. Notice that for SD this is already the case, + * as the SAN mac was copied from the primary MAC. + */ + if (IS_MF_FCOE_AFEX(bp)) memcpy(bp->dev->dev_addr, fip_mac, ETH_ALEN); - } else { val2 = SHMEM_RD(bp, dev_info.port_hw_config[port]. iscsi_mac_upper); @@ -11056,6 +11142,9 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp) } else BNX2X_DEV_INFO("illegal OV for SD\n"); break; + case SHARED_FEAT_CFG_FORCE_SF_MODE_FORCED_SF: + bp->mf_config[vn] = 0; + break; default: /* Unknown configuration: reset mf_config */ bp->mf_config[vn] = 0; @@ -11402,26 +11491,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) * net_device service functions */ -static int bnx2x_open_epilog(struct bnx2x *bp) -{ - /* Enable sriov via delayed work. This must be done via delayed work - * because it causes the probe of the vf devices to be run, which invoke - * register_netdevice which must have rtnl lock taken. As we are holding - * the lock right now, that could only work if the probe would not take - * the lock. However, as the probe of the vf may be called from other - * contexts as well (such as passthrough to vm failes) it can't assume - * the lock is being held for it. Using delayed work here allows the - * probe code to simply take the lock (i.e. wait for it to be released - * if it is being held). - */ - smp_mb__before_clear_bit(); - set_bit(BNX2X_SP_RTNL_ENABLE_SRIOV, &bp->sp_rtnl_state); - smp_mb__after_clear_bit(); - schedule_delayed_work(&bp->sp_rtnl_task, 0); - - return 0; -} - /* called with rtnl_lock */ static int bnx2x_open(struct net_device *dev) { @@ -11791,6 +11860,8 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_setup_tc = bnx2x_setup_tc, #ifdef CONFIG_BNX2X_SRIOV .ndo_set_vf_mac = bnx2x_set_vf_mac, + .ndo_set_vf_vlan = bnx2x_set_vf_vlan, + .ndo_get_vf_config = bnx2x_get_vf_config, #endif #ifdef NETDEV_FCOE_WWNN .ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn, @@ -11953,19 +12024,26 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->watchdog_timeo = TX_TIMEOUT; dev->netdev_ops = &bnx2x_netdev_ops; - bnx2x_set_ethtool_ops(dev); + bnx2x_set_ethtool_ops(bp, dev); dev->priv_flags |= IFF_UNICAST_FLT; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO | - NETIF_F_RXHASH | NETIF_F_HW_VLAN_TX; + NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX; + if (!CHIP_IS_E1x(bp)) { + dev->hw_features |= NETIF_F_GSO_GRE; + dev->hw_enc_features = + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | + NETIF_F_GSO_GRE; + } dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA; - dev->features |= dev->hw_features | NETIF_F_HW_VLAN_RX; + dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; if (bp->flags & USING_DAC_FLAG) dev->features |= NETIF_F_HIGHDMA; @@ -12447,7 +12525,7 @@ static int bnx2x_init_one(struct pci_dev *pdev, * l2 connections. */ if (IS_VF(bp)) { - bnx2x_vf_map_doorbells(bp); + bp->doorbells = bnx2x_vf_doorbells(bp); rc = bnx2x_vf_pci_alloc(bp); if (rc) goto init_one_exit; @@ -12475,13 +12553,8 @@ static int bnx2x_init_one(struct pci_dev *pdev, goto init_one_exit; } - /* Enable SRIOV if capability found in configuration space. - * Once the generic SR-IOV framework makes it in from the - * pci tree this will be revised, to allow dynamic control - * over the number of VFs. Right now, change the num of vfs - * param below to enable SR-IOV. - */ - rc = bnx2x_iov_init_one(bp, int_mode, 0/*num vfs*/); + /* Enable SRIOV if capability found in configuration space */ + rc = bnx2x_iov_init_one(bp, int_mode, BNX2X_MAX_NUM_OF_VFS); if (rc) goto init_one_exit; @@ -12493,16 +12566,6 @@ static int bnx2x_init_one(struct pci_dev *pdev, if (CHIP_IS_E1x(bp)) bp->flags |= NO_FCOE_FLAG; - /* disable FCOE for 57840 device, until FW supports it */ - switch (ent->driver_data) { - case BCM57840_O: - case BCM57840_4_10: - case BCM57840_2_20: - case BCM57840_MFO: - case BCM57840_MF: - bp->flags |= NO_FCOE_FLAG; - } - /* Set bp->num_queues for MSI-X mode*/ bnx2x_set_num_queues(bp); @@ -12636,9 +12699,7 @@ static void bnx2x_remove_one(struct pci_dev *pdev) static int bnx2x_eeh_nic_unload(struct bnx2x *bp) { - int i; - - bp->state = BNX2X_STATE_ERROR; + bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT; bp->rx_mode = BNX2X_RX_MODE_NONE; @@ -12647,29 +12708,21 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) /* Stop Tx */ bnx2x_tx_disable(bp); - - bnx2x_netif_stop(bp, 0); /* Delete all NAPI objects */ bnx2x_del_all_napi(bp); if (CNIC_LOADED(bp)) bnx2x_del_all_napi_cnic(bp); + netdev_reset_tc(bp->dev); del_timer_sync(&bp->timer); + cancel_delayed_work(&bp->sp_task); + cancel_delayed_work(&bp->period_task); - bnx2x_stats_handle(bp, STATS_EVENT_STOP); - - /* Release IRQs */ - bnx2x_free_irq(bp); - - /* Free SKBs, SGEs, TPA pool and driver internals */ - bnx2x_free_skbs(bp); - - for_each_rx_queue(bp, i) - bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE); - - bnx2x_free_mem(bp); + spin_lock_bh(&bp->stats_lock); + bp->stats_state = STATS_STATE_DISABLED; + spin_unlock_bh(&bp->stats_lock); - bp->state = BNX2X_STATE_CLOSED; + bnx2x_save_statistics(bp); netif_carrier_off(bp->dev); @@ -12705,6 +12758,8 @@ static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev, rtnl_lock(); + BNX2X_ERR("IO error detected\n"); + netif_device_detach(dev); if (state == pci_channel_io_perm_failure) { @@ -12715,6 +12770,8 @@ static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev, if (netif_running(dev)) bnx2x_eeh_nic_unload(bp); + bnx2x_prev_path_mark_eeh(bp); + pci_disable_device(pdev); rtnl_unlock(); @@ -12733,9 +12790,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct bnx2x *bp = netdev_priv(dev); + int i; rtnl_lock(); - + BNX2X_ERR("IO slot reset initializing...\n"); if (pci_enable_device(pdev)) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset\n"); @@ -12749,6 +12807,42 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) if (netif_running(dev)) bnx2x_set_power_state(bp, PCI_D0); + if (netif_running(dev)) { + BNX2X_ERR("IO slot reset --> driver unload\n"); + if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { + u32 v; + + v = SHMEM2_RD(bp, + drv_capabilities_flag[BP_FW_MB_IDX(bp)]); + SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)], + v & ~DRV_FLAGS_CAPABILITIES_LOADED_L2); + } + bnx2x_drain_tx_queues(bp); + bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); + bnx2x_netif_stop(bp, 1); + bnx2x_free_irq(bp); + + /* Report UNLOAD_DONE to MCP */ + bnx2x_send_unload_done(bp, true); + + bp->sp_state = 0; + bp->port.pmf = 0; + + bnx2x_prev_unload(bp); + + /* We should have resetted the engine, so It's fair to + * assume the FW will no longer write to the bnx2x driver. + */ + bnx2x_squeeze_objects(bp); + bnx2x_free_skbs(bp); + for_each_rx_queue(bp, i) + bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE); + bnx2x_free_fp_mem(bp); + bnx2x_free_mem(bp); + + bp->state = BNX2X_STATE_CLOSED; + } + rtnl_unlock(); return PCI_ERS_RESULT_RECOVERED; @@ -12775,6 +12869,9 @@ static void bnx2x_io_resume(struct pci_dev *pdev) bnx2x_eeh_recover(bp); + bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & + DRV_MSG_SEQ_NUMBER_MASK; + if (netif_running(dev)) bnx2x_nic_load(bp, LOAD_NORMAL); @@ -12797,6 +12894,9 @@ static struct pci_driver bnx2x_pci_driver = { .suspend = bnx2x_suspend, .resume = bnx2x_resume, .err_handler = &bnx2x_err_handler, +#ifdef CONFIG_BNX2X_SRIOV + .sriov_configure = bnx2x_sriov_configure, +#endif }; static int __init bnx2x_init(void) @@ -13354,6 +13454,7 @@ static int bnx2x_unregister_cnic(struct net_device *dev) RCU_INIT_POINTER(bp->cnic_ops, NULL); mutex_unlock(&bp->cnic_mutex); synchronize_rcu(); + bp->cnic_enabled = false; kfree(bp->cnic_kwq); bp->cnic_kwq = NULL; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index 791eb2d53011..d22bc40091ec 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -1491,10 +1491,6 @@ /* [R 4] This field indicates the type of the device. '0' - 2 Ports; '1' - 1 Port. */ #define MISC_REG_BOND_ID 0xa400 -/* [R 8] These bits indicate the metal revision of the chip. This value - starts at 0x00 for each all-layer tape-out and increments by one for each - tape-out. */ -#define MISC_REG_CHIP_METAL 0xa404 /* [R 16] These bits indicate the part number for the chip. */ #define MISC_REG_CHIP_NUM 0xa408 /* [R 4] These bits indicate the base revision of the chip. This value @@ -6331,6 +6327,8 @@ #define PCI_PM_DATA_B 0x414 #define PCI_ID_VAL1 0x434 #define PCI_ID_VAL2 0x438 +#define PCI_ID_VAL3 0x43c + #define GRC_CONFIG_REG_PF_INIT_VF 0x624 #define GRC_CR_PF_INIT_VF_PF_FIRST_VF_NUM_MASK 0xf /* First VF_NUM for PF is encoded in this register. diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 7306416bc90d..32a9609cc98b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -30,8 +30,6 @@ #define BNX2X_MAX_EMUL_MULTI 16 -#define MAC_LEADING_ZERO_CNT (ALIGN(ETH_ALEN, sizeof(u32)) - ETH_ALEN) - /**** Exe Queue interfaces ****/ /** @@ -444,30 +442,21 @@ static bool bnx2x_put_credit_vlan_mac(struct bnx2x_vlan_mac_obj *o) } static int bnx2x_get_n_elements(struct bnx2x *bp, struct bnx2x_vlan_mac_obj *o, - int n, u8 *buf) + int n, u8 *base, u8 stride, u8 size) { struct bnx2x_vlan_mac_registry_elem *pos; - u8 *next = buf; + u8 *next = base; int counter = 0; /* traverse list */ list_for_each_entry(pos, &o->head, link) { if (counter < n) { - /* place leading zeroes in buffer */ - memset(next, 0, MAC_LEADING_ZERO_CNT); - - /* place mac after leading zeroes*/ - memcpy(next + MAC_LEADING_ZERO_CNT, pos->u.mac.mac, - ETH_ALEN); - - /* calculate address of next element and - * advance counter - */ + memcpy(next, &pos->u, size); counter++; - next = buf + counter * ALIGN(ETH_ALEN, sizeof(u32)); + DP(BNX2X_MSG_SP, "copied element number %d to address %p element was:\n", + counter, next); + next += stride + size; - DP(BNX2X_MSG_SP, "copied element number %d to address %p element was %pM\n", - counter, next, pos->u.mac.mac); } } return counter * ETH_ALEN; @@ -487,7 +476,8 @@ static int bnx2x_check_mac_add(struct bnx2x *bp, /* Check if a requested MAC already exists */ list_for_each_entry(pos, &o->head, link) - if (!memcmp(data->mac.mac, pos->u.mac.mac, ETH_ALEN)) + if (!memcmp(data->mac.mac, pos->u.mac.mac, ETH_ALEN) && + (data->mac.is_inner_mac == pos->u.mac.is_inner_mac)) return -EEXIST; return 0; @@ -520,7 +510,9 @@ static int bnx2x_check_vlan_mac_add(struct bnx2x *bp, list_for_each_entry(pos, &o->head, link) if ((data->vlan_mac.vlan == pos->u.vlan_mac.vlan) && (!memcmp(data->vlan_mac.mac, pos->u.vlan_mac.mac, - ETH_ALEN))) + ETH_ALEN)) && + (data->vlan_mac.is_inner_mac == + pos->u.vlan_mac.is_inner_mac)) return -EEXIST; return 0; @@ -538,7 +530,8 @@ static struct bnx2x_vlan_mac_registry_elem * DP(BNX2X_MSG_SP, "Checking MAC %pM for DEL command\n", data->mac.mac); list_for_each_entry(pos, &o->head, link) - if (!memcmp(data->mac.mac, pos->u.mac.mac, ETH_ALEN)) + if ((!memcmp(data->mac.mac, pos->u.mac.mac, ETH_ALEN)) && + (data->mac.is_inner_mac == pos->u.mac.is_inner_mac)) return pos; return NULL; @@ -573,7 +566,9 @@ static struct bnx2x_vlan_mac_registry_elem * list_for_each_entry(pos, &o->head, link) if ((data->vlan_mac.vlan == pos->u.vlan_mac.vlan) && (!memcmp(data->vlan_mac.mac, pos->u.vlan_mac.mac, - ETH_ALEN))) + ETH_ALEN)) && + (data->vlan_mac.is_inner_mac == + pos->u.vlan_mac.is_inner_mac)) return pos; return NULL; @@ -770,6 +765,8 @@ static void bnx2x_set_one_mac_e2(struct bnx2x *bp, bnx2x_set_fw_mac_addr(&rule_entry->mac.mac_msb, &rule_entry->mac.mac_mid, &rule_entry->mac.mac_lsb, mac); + rule_entry->mac.inner_mac = + cpu_to_le16(elem->cmd_data.vlan_mac.u.mac.is_inner_mac); /* MOVE: Add a rule that will add this MAC to the target Queue */ if (cmd == BNX2X_VLAN_MAC_MOVE) { @@ -786,6 +783,9 @@ static void bnx2x_set_one_mac_e2(struct bnx2x *bp, bnx2x_set_fw_mac_addr(&rule_entry->mac.mac_msb, &rule_entry->mac.mac_mid, &rule_entry->mac.mac_lsb, mac); + rule_entry->mac.inner_mac = + cpu_to_le16(elem->cmd_data.vlan_mac. + u.mac.is_inner_mac); } /* Set the ramrod data header */ @@ -974,7 +974,8 @@ static void bnx2x_set_one_vlan_mac_e2(struct bnx2x *bp, bnx2x_set_fw_mac_addr(&rule_entry->pair.mac_msb, &rule_entry->pair.mac_mid, &rule_entry->pair.mac_lsb, mac); - + rule_entry->pair.inner_mac = + cpu_to_le16(elem->cmd_data.vlan_mac.u.vlan_mac.is_inner_mac); /* MOVE: Add a rule that will add this MAC to the target Queue */ if (cmd == BNX2X_VLAN_MAC_MOVE) { rule_entry++; @@ -991,6 +992,9 @@ static void bnx2x_set_one_vlan_mac_e2(struct bnx2x *bp, bnx2x_set_fw_mac_addr(&rule_entry->pair.mac_msb, &rule_entry->pair.mac_mid, &rule_entry->pair.mac_lsb, mac); + rule_entry->pair.inner_mac = + cpu_to_le16(elem->cmd_data.vlan_mac.u. + vlan_mac.is_inner_mac); } /* Set the ramrod data header */ @@ -1854,6 +1858,7 @@ static int bnx2x_vlan_mac_del_all(struct bnx2x *bp, return rc; } list_del(&exeq_pos->link); + bnx2x_exe_queue_free_elem(bp, exeq_pos); } } @@ -2012,6 +2017,7 @@ void bnx2x_init_vlan_obj(struct bnx2x *bp, vlan_obj->check_move = bnx2x_check_move; vlan_obj->ramrod_cmd = RAMROD_CMD_ID_ETH_CLASSIFICATION_RULES; + vlan_obj->get_n_elements = bnx2x_get_n_elements; /* Exe Queue */ bnx2x_exe_queue_init(bp, @@ -4426,6 +4432,12 @@ static void bnx2x_q_fill_init_tx_data(struct bnx2x_queue_sp_obj *o, tx_data->force_default_pri_flg = test_bit(BNX2X_Q_FLG_FORCE_DEFAULT_PRI, flags); + tx_data->tunnel_lso_inc_ip_id = + test_bit(BNX2X_Q_FLG_TUN_INC_INNER_IP_ID, flags); + tx_data->tunnel_non_lso_pcsum_location = + test_bit(BNX2X_Q_FLG_PCSUM_ON_PKT, flags) ? PCSUM_ON_PKT : + PCSUM_ON_BD; + tx_data->tx_status_block_id = params->fw_sb_id; tx_data->tx_sb_index_number = params->sb_cq_index; tx_data->tss_leading_client_id = params->tss_leading_cl_id; @@ -5669,17 +5681,18 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp, memset(rdata, 0, sizeof(*rdata)); /* Fill the ramrod data with provided parameters */ - rdata->function_mode = (u8)start_params->mf_mode; - rdata->sd_vlan_tag = cpu_to_le16(start_params->sd_vlan_tag); - rdata->path_id = BP_PATH(bp); - rdata->network_cos_mode = start_params->network_cos_mode; - - /* - * No need for an explicit memory barrier here as long we would - * need to ensure the ordering of writing to the SPQ element - * and updating of the SPQ producer which involves a memory - * read and we will have to put a full memory barrier there - * (inside bnx2x_sp_post()). + rdata->function_mode = (u8)start_params->mf_mode; + rdata->sd_vlan_tag = cpu_to_le16(start_params->sd_vlan_tag); + rdata->path_id = BP_PATH(bp); + rdata->network_cos_mode = start_params->network_cos_mode; + rdata->gre_tunnel_mode = start_params->gre_tunnel_mode; + rdata->gre_tunnel_rss = start_params->gre_tunnel_rss; + + /* No need for an explicit memory barrier here as long we would + * need to ensure the ordering of writing to the SPQ element + * and updating of the SPQ producer which involves a memory + * read and we will have to put a full memory barrier there + * (inside bnx2x_sp_post()). */ return bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_FUNCTION_START, 0, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index ff907609b9fc..43c00bc84a08 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -100,6 +100,7 @@ struct bnx2x_raw_obj { /************************* VLAN-MAC commands related parameters ***************/ struct bnx2x_mac_ramrod_data { u8 mac[ETH_ALEN]; + u8 is_inner_mac; }; struct bnx2x_vlan_ramrod_data { @@ -108,6 +109,7 @@ struct bnx2x_vlan_ramrod_data { struct bnx2x_vlan_mac_ramrod_data { u8 mac[ETH_ALEN]; + u8 is_inner_mac; u16 vlan; }; @@ -313,8 +315,9 @@ struct bnx2x_vlan_mac_obj { * * @return number of copied bytes */ - int (*get_n_elements)(struct bnx2x *bp, struct bnx2x_vlan_mac_obj *o, - int n, u8 *buf); + int (*get_n_elements)(struct bnx2x *bp, + struct bnx2x_vlan_mac_obj *o, int n, u8 *base, + u8 stride, u8 size); /** * Checks if ADD-ramrod with the given params may be performed. @@ -824,7 +827,9 @@ enum { BNX2X_Q_FLG_TX_SEC, BNX2X_Q_FLG_ANTI_SPOOF, BNX2X_Q_FLG_SILENT_VLAN_REM, - BNX2X_Q_FLG_FORCE_DEFAULT_PRI + BNX2X_Q_FLG_FORCE_DEFAULT_PRI, + BNX2X_Q_FLG_PCSUM_ON_PKT, + BNX2X_Q_FLG_TUN_INC_INNER_IP_ID }; /* Queue type options: queue type may be a compination of below. */ @@ -842,6 +847,7 @@ enum bnx2x_q_type { #define BNX2X_MULTI_TX_COS_E3B0 3 #define BNX2X_MULTI_TX_COS 3 /* Maximum possible */ +#define MAC_PAD (ALIGN(ETH_ALEN, sizeof(u32)) - ETH_ALEN) struct bnx2x_queue_init_params { struct { @@ -1118,6 +1124,15 @@ struct bnx2x_func_start_params { /* Function cos mode */ u8 network_cos_mode; + + /* NVGRE classification enablement */ + u8 nvgre_clss_en; + + /* NO_GRE_TUNNEL/NVGRE_TUNNEL/L2GRE_TUNNEL/IPGRE_TUNNEL */ + u8 gre_tunnel_mode; + + /* GRE_OUTER_HEADERS_RSS/GRE_INNER_HEADERS_RSS/NVGRE_KEY_ENTROPY_RSS */ + u8 gre_tunnel_rss; }; struct bnx2x_func_switch_update_params { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 6adfa2093581..2ce7c7471367 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -20,7 +20,9 @@ #include "bnx2x.h" #include "bnx2x_init.h" #include "bnx2x_cmn.h" +#include "bnx2x_sp.h" #include <linux/crc32.h> +#include <linux/if_vlan.h> /* General service functions */ static void storm_memset_vf_to_pf(struct bnx2x *bp, u16 abs_fid, @@ -555,8 +557,7 @@ static int bnx2x_vfop_config_list(struct bnx2x *bp, rc = bnx2x_config_vlan_mac(bp, vlan_mac); if (rc >= 0) { cnt += pos->add ? 1 : -1; - list_del(&pos->link); - list_add(&pos->link, &rollback_list); + list_move(&pos->link, &rollback_list); rc = 0; } else if (rc == -EEXIST) { rc = 0; @@ -958,6 +959,12 @@ op_err: BNX2X_ERR("QSETUP[%d:%d] error: rc %d\n", vf->abs_vfid, qid, vfop->rc); op_done: case BNX2X_VFOP_QSETUP_DONE: + vf->cfg_flags |= VF_CFG_VLAN; + smp_mb__before_clear_bit(); + set_bit(BNX2X_SP_RTNL_HYPERVISOR_VLAN, + &bp->sp_rtnl_state); + smp_mb__after_clear_bit(); + schedule_delayed_work(&bp->sp_rtnl_task, 0); bnx2x_vfop_end(bp, vf, vfop); return; default: @@ -1459,7 +1466,6 @@ static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) return bnx2x_is_pcie_pending(dev); unknown_dev: - BNX2X_ERR("Unknown device\n"); return false; } @@ -1926,20 +1932,22 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, /* SRIOV can be enabled only with MSIX */ if (int_mode_param == BNX2X_INT_MODE_MSI || - int_mode_param == BNX2X_INT_MODE_INTX) + int_mode_param == BNX2X_INT_MODE_INTX) { BNX2X_ERR("Forced MSI/INTx mode is incompatible with SRIOV\n"); + return 0; + } err = -EIO; /* verify ari is enabled */ if (!bnx2x_ari_enabled(bp->pdev)) { - BNX2X_ERR("ARI not supported, SRIOV can not be enabled\n"); - return err; + BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n"); + return 0; } /* verify igu is in normal mode */ if (CHIP_INT_MODE_IS_BC(bp)) { BNX2X_ERR("IGU not normal mode, SRIOV can not be enabled\n"); - return err; + return 0; } /* allocate the vfs database */ @@ -1964,8 +1972,10 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, if (iov->total == 0) goto failed; - /* calculate the actual number of VFs */ - iov->nr_virtfn = min_t(u16, iov->total, (u16)num_vfs_param); + iov->nr_virtfn = min_t(u16, iov->total, num_vfs_param); + + DP(BNX2X_MSG_IOV, "num_vfs_param was %d, nr_virtfn was %d\n", + num_vfs_param, iov->nr_virtfn); /* allocate the vf array */ bp->vfdb->vfs = kzalloc(sizeof(struct bnx2x_virtf) * @@ -2378,8 +2388,8 @@ int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem) goto get_vf; case EVENT_RING_OPCODE_MALICIOUS_VF: abs_vfid = elem->message.data.malicious_vf_event.vf_id; - DP(BNX2X_MSG_IOV, "Got VF MALICIOUS notification abs_vfid=%d\n", - abs_vfid); + DP(BNX2X_MSG_IOV, "Got VF MALICIOUS notification abs_vfid=%d err_id=0x%x\n", + abs_vfid, elem->message.data.malicious_vf_event.err_id); goto get_vf; default: return 1; @@ -2436,8 +2446,8 @@ get_vf: /* Do nothing for now */ break; case EVENT_RING_OPCODE_MALICIOUS_VF: - DP(BNX2X_MSG_IOV, "got VF [%d] MALICIOUS notification\n", - vf->abs_vfid); + DP(BNX2X_MSG_IOV, "Got VF MALICIOUS notification abs_vfid=%d error id %x\n", + abs_vfid, elem->message.data.malicious_vf_event.err_id); /* Do nothing for now */ break; } @@ -3012,21 +3022,138 @@ void bnx2x_unlock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf, vf->op_current = CHANNEL_TLV_NONE; } -void bnx2x_enable_sriov(struct bnx2x *bp) +int bnx2x_sriov_configure(struct pci_dev *dev, int num_vfs_param) { - int rc = 0; - /* disbale sriov in case it is still enabled */ + struct bnx2x *bp = netdev_priv(pci_get_drvdata(dev)); + + DP(BNX2X_MSG_IOV, "bnx2x_sriov_configure called with %d, BNX2X_NR_VIRTFN(bp) was %d\n", + num_vfs_param, BNX2X_NR_VIRTFN(bp)); + + /* HW channel is only operational when PF is up */ + if (bp->state != BNX2X_STATE_OPEN) { + BNX2X_ERR("VF num configurtion via sysfs not supported while PF is down"); + return -EINVAL; + } + + /* we are always bound by the total_vfs in the configuration space */ + if (num_vfs_param > BNX2X_NR_VIRTFN(bp)) { + BNX2X_ERR("truncating requested number of VFs (%d) down to maximum allowed (%d)\n", + num_vfs_param, BNX2X_NR_VIRTFN(bp)); + num_vfs_param = BNX2X_NR_VIRTFN(bp); + } + + bp->requested_nr_virtfn = num_vfs_param; + if (num_vfs_param == 0) { + pci_disable_sriov(dev); + return 0; + } else { + return bnx2x_enable_sriov(bp); + } +} + +int bnx2x_enable_sriov(struct bnx2x *bp) +{ + int rc = 0, req_vfs = bp->requested_nr_virtfn; + + rc = pci_enable_sriov(bp->pdev, req_vfs); + if (rc) { + BNX2X_ERR("pci_enable_sriov failed with %d\n", rc); + return rc; + } + DP(BNX2X_MSG_IOV, "sriov enabled (%d vfs)\n", req_vfs); + return req_vfs; +} + +void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) +{ + int vfidx; + struct pf_vf_bulletin_content *bulletin; + + DP(BNX2X_MSG_IOV, "configuring vlan for VFs from sp-task\n"); + for_each_vf(bp, vfidx) { + bulletin = BP_VF_BULLETIN(bp, vfidx); + if (BP_VF(bp, vfidx)->cfg_flags & VF_CFG_VLAN) + bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0); + } +} + +void bnx2x_disable_sriov(struct bnx2x *bp) +{ pci_disable_sriov(bp->pdev); - DP(BNX2X_MSG_IOV, "sriov disabled\n"); +} + +static int bnx2x_vf_ndo_sanity(struct bnx2x *bp, int vfidx, + struct bnx2x_virtf *vf) +{ + if (!IS_SRIOV(bp)) { + BNX2X_ERR("vf ndo called though sriov is disabled\n"); + return -EINVAL; + } + + if (vfidx >= BNX2X_NR_VIRTFN(bp)) { + BNX2X_ERR("vf ndo called for uninitialized VF. vfidx was %d BNX2X_NR_VIRTFN was %d\n", + vfidx, BNX2X_NR_VIRTFN(bp)); + return -EINVAL; + } + + if (!vf) { + BNX2X_ERR("vf ndo called but vf was null. vfidx was %d\n", + vfidx); + return -EINVAL; + } - /* enable sriov */ - DP(BNX2X_MSG_IOV, "vf num (%d)\n", (bp->vfdb->sriov.nr_virtfn)); - rc = pci_enable_sriov(bp->pdev, (bp->vfdb->sriov.nr_virtfn)); + return 0; +} + +int bnx2x_get_vf_config(struct net_device *dev, int vfidx, + struct ifla_vf_info *ivi) +{ + struct bnx2x *bp = netdev_priv(dev); + struct bnx2x_virtf *vf = BP_VF(bp, vfidx); + struct bnx2x_vlan_mac_obj *mac_obj = &bnx2x_vfq(vf, 0, mac_obj); + struct bnx2x_vlan_mac_obj *vlan_obj = &bnx2x_vfq(vf, 0, vlan_obj); + struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx); + int rc; + + /* sanity */ + rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf); if (rc) - BNX2X_ERR("pci_enable_sriov failed with %d\n", rc); - else - DP(BNX2X_MSG_IOV, "sriov enabled\n"); + return rc; + if (!mac_obj || !vlan_obj || !bulletin) { + BNX2X_ERR("VF partially initialized\n"); + return -EINVAL; + } + + ivi->vf = vfidx; + ivi->qos = 0; + ivi->tx_rate = 10000; /* always 10G. TBA take from link struct */ + ivi->spoofchk = 1; /*always enabled */ + if (vf->state == VF_ENABLED) { + /* mac and vlan are in vlan_mac objects */ + mac_obj->get_n_elements(bp, mac_obj, 1, (u8 *)&ivi->mac, + 0, ETH_ALEN); + vlan_obj->get_n_elements(bp, vlan_obj, 1, (u8 *)&ivi->vlan, + 0, VLAN_HLEN); + } else { + /* mac */ + if (bulletin->valid_bitmap & (1 << MAC_ADDR_VALID)) + /* mac configured by ndo so its in bulletin board */ + memcpy(&ivi->mac, bulletin->mac, ETH_ALEN); + else + /* funtion has not been loaded yet. Show mac as 0s */ + memset(&ivi->mac, 0, ETH_ALEN); + + /* vlan */ + if (bulletin->valid_bitmap & (1 << VLAN_VALID)) + /* vlan configured by ndo so its in bulletin board */ + memcpy(&ivi->vlan, &bulletin->vlan, VLAN_HLEN); + else + /* funtion has not been loaded yet. Show vlans as 0s */ + memset(&ivi->vlan, 0, VLAN_HLEN); + } + + return 0; } /* New mac for VF. Consider these cases: @@ -3044,23 +3171,19 @@ void bnx2x_enable_sriov(struct bnx2x *bp) * VF to configure any mac for itself except for this mac. In case of a race * where the VF fails to see the new post on its bulletin board before sending a * mac configuration request, the PF will simply fail the request and VF can try - * again after consulting its bulletin board + * again after consulting its bulletin board. */ -int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac) +int bnx2x_set_vf_mac(struct net_device *dev, int vfidx, u8 *mac) { struct bnx2x *bp = netdev_priv(dev); - int rc, q_logical_state, vfidx = queue; + int rc, q_logical_state; struct bnx2x_virtf *vf = BP_VF(bp, vfidx); struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx); - /* if SRIOV is disabled there is nothing to do (and somewhere, someone - * has erred). - */ - if (!IS_SRIOV(bp)) { - BNX2X_ERR("bnx2x_set_vf_mac called though sriov is disabled\n"); - return -EINVAL; - } - + /* sanity */ + rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf); + if (rc) + return rc; if (!is_valid_ether_addr(mac)) { BNX2X_ERR("mac address invalid\n"); return -EINVAL; @@ -3085,7 +3208,7 @@ int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac) if (vf->state == VF_ENABLED && q_logical_state == BNX2X_Q_LOGICAL_STATE_ACTIVE) { /* configure the mac in device on this vf's queue */ - unsigned long flags = 0; + unsigned long ramrod_flags = 0; struct bnx2x_vlan_mac_obj *mac_obj = &bnx2x_vfq(vf, 0, mac_obj); /* must lock vfpf channel to protect against vf flows */ @@ -3106,14 +3229,133 @@ int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac) } /* configure the new mac to device */ - __set_bit(RAMROD_COMP_WAIT, &flags); + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); bnx2x_set_mac_one(bp, (u8 *)&bulletin->mac, mac_obj, true, - BNX2X_ETH_MAC, &flags); + BNX2X_ETH_MAC, &ramrod_flags); bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_MAC); } - return rc; + return 0; +} + +int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) +{ + struct bnx2x *bp = netdev_priv(dev); + int rc, q_logical_state; + struct bnx2x_virtf *vf = BP_VF(bp, vfidx); + struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx); + + /* sanity */ + rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf); + if (rc) + return rc; + + if (vlan > 4095) { + BNX2X_ERR("illegal vlan value %d\n", vlan); + return -EINVAL; + } + + DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n", + vfidx, vlan, 0); + + /* update PF's copy of the VF's bulletin. No point in posting the vlan + * to the VF since it doesn't have anything to do with it. But it useful + * to store it here in case the VF is not up yet and we can only + * configure the vlan later when it does. + */ + bulletin->valid_bitmap |= 1 << VLAN_VALID; + bulletin->vlan = vlan; + + /* is vf initialized and queue set up? */ + q_logical_state = + bnx2x_get_q_logical_state(bp, &bnx2x_vfq(vf, 0, sp_obj)); + if (vf->state == VF_ENABLED && + q_logical_state == BNX2X_Q_LOGICAL_STATE_ACTIVE) { + /* configure the vlan in device on this vf's queue */ + unsigned long ramrod_flags = 0; + unsigned long vlan_mac_flags = 0; + struct bnx2x_vlan_mac_obj *vlan_obj = + &bnx2x_vfq(vf, 0, vlan_obj); + struct bnx2x_vlan_mac_ramrod_params ramrod_param; + struct bnx2x_queue_state_params q_params = {NULL}; + struct bnx2x_queue_update_params *update_params; + + memset(&ramrod_param, 0, sizeof(ramrod_param)); + + /* must lock vfpf channel to protect against vf flows */ + bnx2x_lock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_VLAN); + + /* remove existing vlans */ + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); + rc = vlan_obj->delete_all(bp, vlan_obj, &vlan_mac_flags, + &ramrod_flags); + if (rc) { + BNX2X_ERR("failed to delete vlans\n"); + return -EINVAL; + } + + /* send queue update ramrod to configure default vlan and silent + * vlan removal + */ + __set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); + q_params.cmd = BNX2X_Q_CMD_UPDATE; + q_params.q_obj = &bnx2x_vfq(vf, 0, sp_obj); + update_params = &q_params.params.update; + __set_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN_CHNG, + &update_params->update_flags); + __set_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM_CHNG, + &update_params->update_flags); + + if (vlan == 0) { + /* if vlan is 0 then we want to leave the VF traffic + * untagged, and leave the incoming traffic untouched + * (i.e. do not remove any vlan tags). + */ + __clear_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN, + &update_params->update_flags); + __clear_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM, + &update_params->update_flags); + } else { + /* configure the new vlan to device */ + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); + ramrod_param.vlan_mac_obj = vlan_obj; + ramrod_param.ramrod_flags = ramrod_flags; + ramrod_param.user_req.u.vlan.vlan = vlan; + ramrod_param.user_req.cmd = BNX2X_VLAN_MAC_ADD; + rc = bnx2x_config_vlan_mac(bp, &ramrod_param); + if (rc) { + BNX2X_ERR("failed to configure vlan\n"); + return -EINVAL; + } + + /* configure default vlan to vf queue and set silent + * vlan removal (the vf remains unaware of this vlan). + */ + update_params = &q_params.params.update; + __set_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN, + &update_params->update_flags); + __set_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM, + &update_params->update_flags); + update_params->def_vlan = vlan; + } + + /* Update the Queue state */ + rc = bnx2x_queue_state_change(bp, &q_params); + if (rc) { + BNX2X_ERR("Failed to configure default VLAN\n"); + return rc; + } + + /* clear the flag indicating that this VF needs its vlan + * (will only be set if the HV configured th Vlan before vf was + * and we were called because the VF came up later + */ + vf->cfg_flags &= ~VF_CFG_VLAN; + + bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_VLAN); + } + return 0; } /* crc is the first field in the bulletin board. compute the crc over the @@ -3165,20 +3407,26 @@ enum sample_bulletin_result bnx2x_sample_bulletin(struct bnx2x *bp) memcpy(bp->dev->dev_addr, bulletin.mac, ETH_ALEN); } + /* the vlan in bulletin board is valid and is new */ + if (bulletin.valid_bitmap & 1 << VLAN_VALID) + memcpy(&bulletin.vlan, &bp->old_bulletin.vlan, VLAN_HLEN); + /* copy new bulletin board to bp */ bp->old_bulletin = bulletin; return PFVF_BULLETIN_UPDATED; } -void bnx2x_vf_map_doorbells(struct bnx2x *bp) +void __iomem *bnx2x_vf_doorbells(struct bnx2x *bp) { /* vf doorbells are embedded within the regview */ - bp->doorbells = bp->regview + PXP_VF_ADDR_DB_START; + return bp->regview + PXP_VF_ADDR_DB_START; } int bnx2x_vf_pci_alloc(struct bnx2x *bp) { + mutex_init(&bp->vf2pf_mutex); + /* allocate vf2pf mailbox for vf to pf channel */ BNX2X_PCI_ALLOC(bp->vf2pf_mbox, &bp->vf2pf_mbox_mapping, sizeof(struct bnx2x_vf_mbx_msg)); @@ -3196,3 +3444,26 @@ alloc_mem_err: sizeof(union pf_vf_bulletin)); return -ENOMEM; } + +int bnx2x_open_epilog(struct bnx2x *bp) +{ + /* Enable sriov via delayed work. This must be done via delayed work + * because it causes the probe of the vf devices to be run, which invoke + * register_netdevice which must have rtnl lock taken. As we are holding + * the lock right now, that could only work if the probe would not take + * the lock. However, as the probe of the vf may be called from other + * contexts as well (such as passthrough to vm failes) it can't assume + * the lock is being held for it. Using delayed work here allows the + * probe code to simply take the lock (i.e. wait for it to be released + * if it is being held). We only want to do this if the number of VFs + * was set before PF driver was loaded. + */ + if (IS_SRIOV(bp) && BNX2X_NR_VIRTFN(bp)) { + smp_mb__before_clear_bit(); + set_bit(BNX2X_SP_RTNL_ENABLE_SRIOV, &bp->sp_rtnl_state); + smp_mb__after_clear_bit(); + schedule_delayed_work(&bp->sp_rtnl_task, 0); + } + + return 0; +} diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index b4050173add9..d4b17b7a774e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -193,6 +193,7 @@ struct bnx2x_virtf { #define VF_CFG_TPA 0x0004 #define VF_CFG_INT_SIMD 0x0008 #define VF_CACHE_LINE 0x0010 +#define VF_CFG_VLAN 0x0020 u8 state; #define VF_FREE 0 /* VF ready to be acquired holds no resc */ @@ -712,6 +713,7 @@ void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type, u16 length); void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv, u16 type, u16 length); +void bnx2x_vfpf_finalize(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv); void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list); bool bnx2x_tlv_supported(u16 tlvtype); @@ -750,13 +752,17 @@ static inline int bnx2x_vf_ustorm_prods_offset(struct bnx2x *bp, } enum sample_bulletin_result bnx2x_sample_bulletin(struct bnx2x *bp); -void bnx2x_vf_map_doorbells(struct bnx2x *bp); +void __iomem *bnx2x_vf_doorbells(struct bnx2x *bp); int bnx2x_vf_pci_alloc(struct bnx2x *bp); -void bnx2x_enable_sriov(struct bnx2x *bp); +int bnx2x_enable_sriov(struct bnx2x *bp); +void bnx2x_disable_sriov(struct bnx2x *bp); static inline int bnx2x_vf_headroom(struct bnx2x *bp) { return bp->vfdb->sriov.nr_virtfn * BNX2X_CLIENTS_PER_VF; } +void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp); +int bnx2x_sriov_configure(struct pci_dev *dev, int num_vfs); +int bnx2x_open_epilog(struct bnx2x *bp); #else /* CONFIG_BNX2X_SRIOV */ @@ -779,7 +785,8 @@ static inline void bnx2x_iov_init_dmae(struct bnx2x *bp) {} static inline int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, int num_vfs_param) {return 0; } static inline void bnx2x_iov_remove_one(struct bnx2x *bp) {} -static inline void bnx2x_enable_sriov(struct bnx2x *bp) {} +static inline int bnx2x_enable_sriov(struct bnx2x *bp) {return 0; } +static inline void bnx2x_disable_sriov(struct bnx2x *bp) {} static inline int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) {return 0; } static inline int bnx2x_vfpf_release(struct bnx2x *bp) {return 0; } @@ -802,8 +809,15 @@ static inline enum sample_bulletin_result bnx2x_sample_bulletin(struct bnx2x *bp return PFVF_BULLETIN_UNCHANGED; } -static inline int bnx2x_vf_map_doorbells(struct bnx2x *bp) {return 0; } +static inline void __iomem *bnx2x_vf_doorbells(struct bnx2x *bp) +{ + return NULL; +} + static inline int bnx2x_vf_pci_alloc(struct bnx2x *bp) {return 0; } +static inline void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) {} +static inline int bnx2x_sriov_configure(struct pci_dev *dev, int num_vfs) {return 0; } +static inline int bnx2x_open_epilog(struct bnx2x *bp) {return 0; } #endif /* CONFIG_BNX2X_SRIOV */ #endif /* bnx2x_sriov.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index 4397f8b76f2e..2ca3d94fcec2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -1547,11 +1547,51 @@ static void bnx2x_prep_fw_stats_req(struct bnx2x *bp) } } +void bnx2x_memset_stats(struct bnx2x *bp) +{ + int i; + + /* function stats */ + for_each_queue(bp, i) { + struct bnx2x_fp_stats *fp_stats = &bp->fp_stats[i]; + + memset(&fp_stats->old_tclient, 0, + sizeof(fp_stats->old_tclient)); + memset(&fp_stats->old_uclient, 0, + sizeof(fp_stats->old_uclient)); + memset(&fp_stats->old_xclient, 0, + sizeof(fp_stats->old_xclient)); + if (bp->stats_init) { + memset(&fp_stats->eth_q_stats, 0, + sizeof(fp_stats->eth_q_stats)); + memset(&fp_stats->eth_q_stats_old, 0, + sizeof(fp_stats->eth_q_stats_old)); + } + } + + memset(&bp->dev->stats, 0, sizeof(bp->dev->stats)); + + if (bp->stats_init) { + memset(&bp->net_stats_old, 0, sizeof(bp->net_stats_old)); + memset(&bp->fw_stats_old, 0, sizeof(bp->fw_stats_old)); + memset(&bp->eth_stats_old, 0, sizeof(bp->eth_stats_old)); + memset(&bp->eth_stats, 0, sizeof(bp->eth_stats)); + memset(&bp->func_stats, 0, sizeof(bp->func_stats)); + } + + bp->stats_state = STATS_STATE_DISABLED; + + if (bp->port.pmf && bp->port.port_stx) + bnx2x_port_stats_base_init(bp); + + /* mark the end of statistics initializiation */ + bp->stats_init = false; +} + void bnx2x_stats_init(struct bnx2x *bp) { int /*abs*/port = BP_PORT(bp); int mb_idx = BP_FW_MB_IDX(bp); - int i; bp->stats_pending = 0; bp->executer_idx = 0; @@ -1587,36 +1627,11 @@ void bnx2x_stats_init(struct bnx2x *bp) &(bp->port.old_nig_stats.egress_mac_pkt1_lo), 2); } - /* function stats */ - for_each_queue(bp, i) { - struct bnx2x_fp_stats *fp_stats = &bp->fp_stats[i]; - - memset(&fp_stats->old_tclient, 0, - sizeof(fp_stats->old_tclient)); - memset(&fp_stats->old_uclient, 0, - sizeof(fp_stats->old_uclient)); - memset(&fp_stats->old_xclient, 0, - sizeof(fp_stats->old_xclient)); - if (bp->stats_init) { - memset(&fp_stats->eth_q_stats, 0, - sizeof(fp_stats->eth_q_stats)); - memset(&fp_stats->eth_q_stats_old, 0, - sizeof(fp_stats->eth_q_stats_old)); - } - } - /* Prepare statistics ramrod data */ bnx2x_prep_fw_stats_req(bp); - memset(&bp->dev->stats, 0, sizeof(bp->dev->stats)); + /* Clean SP from previous statistics */ if (bp->stats_init) { - memset(&bp->net_stats_old, 0, sizeof(bp->net_stats_old)); - memset(&bp->fw_stats_old, 0, sizeof(bp->fw_stats_old)); - memset(&bp->eth_stats_old, 0, sizeof(bp->eth_stats_old)); - memset(&bp->eth_stats, 0, sizeof(bp->eth_stats)); - memset(&bp->func_stats, 0, sizeof(bp->func_stats)); - - /* Clean SP from previous statistics */ if (bp->func_stx) { memset(bnx2x_sp(bp, func_stats), 0, sizeof(struct host_func_stats)); @@ -1626,13 +1641,7 @@ void bnx2x_stats_init(struct bnx2x *bp) } } - bp->stats_state = STATS_STATE_DISABLED; - - if (bp->port.pmf && bp->port.port_stx) - bnx2x_port_stats_base_init(bp); - - /* mark the end of statistics initializiation */ - bp->stats_init = false; + bnx2x_memset_stats(bp); } void bnx2x_save_statistics(struct bnx2x *bp) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h index 364e37ecbc5c..d117f472816c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h @@ -459,8 +459,9 @@ struct bnx2x_fw_port_stats_old { #define UPDATE_QSTAT(s, t) \ do { \ - qstats->t##_hi = qstats_old->t##_hi + le32_to_cpu(s.hi); \ qstats->t##_lo = qstats_old->t##_lo + le32_to_cpu(s.lo); \ + qstats->t##_hi = qstats_old->t##_hi + le32_to_cpu(s.hi) \ + + ((qstats->t##_lo < qstats_old->t##_lo) ? 1 : 0); \ } while (0) #define UPDATE_QSTAT_OLD(f) \ @@ -539,8 +540,8 @@ struct bnx2x_fw_port_stats_old { /* forward */ struct bnx2x; +void bnx2x_memset_stats(struct bnx2x *bp); void bnx2x_stats_init(struct bnx2x *bp); - void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event); /** diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 531eebf40d60..90fbf9cc2c2c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -36,6 +36,8 @@ void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type, void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv, u16 type, u16 length) { + mutex_lock(&bp->vf2pf_mutex); + DP(BNX2X_MSG_IOV, "preparing to send %d tlv over vf pf channel\n", type); @@ -49,6 +51,15 @@ void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv, first_tlv->resp_msg_offset = sizeof(bp->vf2pf_mbox->req); } +/* releases the mailbox */ +void bnx2x_vfpf_finalize(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv) +{ + DP(BNX2X_MSG_IOV, "done sending [%d] tlv over vf pf channel\n", + first_tlv->tl.type); + + mutex_unlock(&bp->vf2pf_mutex); +} + /* list the types and lengths of the tlvs on the buffer */ void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list) { @@ -181,8 +192,10 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) /* clear mailbox and prep first tlv */ bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_ACQUIRE, sizeof(*req)); - if (bnx2x_get_vf_id(bp, &vf_id)) - return -EAGAIN; + if (bnx2x_get_vf_id(bp, &vf_id)) { + rc = -EAGAIN; + goto out; + } req->vfdev_info.vf_id = vf_id; req->vfdev_info.vf_os = 0; @@ -213,7 +226,7 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) /* PF timeout */ if (rc) - return rc; + goto out; /* copy acquire response from buffer to bp */ memcpy(&bp->acquire_resp, resp, sizeof(bp->acquire_resp)); @@ -253,7 +266,8 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) /* PF reports error */ BNX2X_ERR("Failed to get the requested amount of resources: %d. Breaking...\n", bp->acquire_resp.hdr.status); - return -EAGAIN; + rc = -EAGAIN; + goto out; } } @@ -279,20 +293,24 @@ int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count) bp->acquire_resp.resc.current_mac_addr, ETH_ALEN); - return 0; +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); + return rc; } int bnx2x_vfpf_release(struct bnx2x *bp) { struct vfpf_release_tlv *req = &bp->vf2pf_mbox->req.release; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; - u32 rc = 0, vf_id; + u32 rc, vf_id; /* clear mailbox and prep first tlv */ bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_RELEASE, sizeof(*req)); - if (bnx2x_get_vf_id(bp, &vf_id)) - return -EAGAIN; + if (bnx2x_get_vf_id(bp, &vf_id)) { + rc = -EAGAIN; + goto out; + } req->vf_id = vf_id; @@ -308,7 +326,8 @@ int bnx2x_vfpf_release(struct bnx2x *bp) if (rc) /* PF timeout */ - return rc; + goto out; + if (resp->hdr.status == PFVF_STATUS_SUCCESS) { /* PF released us */ DP(BNX2X_MSG_SP, "vf released\n"); @@ -316,10 +335,13 @@ int bnx2x_vfpf_release(struct bnx2x *bp) /* PF reports error */ BNX2X_ERR("PF failed our release request - are we out of sync? response status: %d\n", resp->hdr.status); - return -EAGAIN; + rc = -EAGAIN; + goto out; } +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); - return 0; + return rc; } /* Tell PF about SB addresses */ @@ -350,16 +372,20 @@ int bnx2x_vfpf_init(struct bnx2x *bp) rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping); if (rc) - return rc; + goto out; if (resp->hdr.status != PFVF_STATUS_SUCCESS) { BNX2X_ERR("INIT VF failed: %d. Breaking...\n", resp->hdr.status); - return -EAGAIN; + rc = -EAGAIN; + goto out; } DP(BNX2X_MSG_SP, "INIT VF Succeeded\n"); - return 0; +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); + + return rc; } /* CLOSE VF - opposite to INIT_VF */ @@ -401,6 +427,8 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp) BNX2X_ERR("Sending CLOSE failed: pf response was %d\n", resp->hdr.status); + bnx2x_vfpf_finalize(bp, &req->first_tlv); + free_irq: /* Disable HW interrupts, NAPI */ bnx2x_netif_stop(bp, 0); @@ -435,7 +463,6 @@ int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx) /* calculate queue flags */ flags |= VFPF_QUEUE_FLG_STATS; flags |= VFPF_QUEUE_FLG_CACHE_ALIGN; - flags |= IS_MF_SD(bp) ? VFPF_QUEUE_FLG_OV : 0; flags |= VFPF_QUEUE_FLG_VLAN; DP(NETIF_MSG_IFUP, "vlan removal enabled\n"); @@ -486,8 +513,11 @@ int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx) if (resp->hdr.status != PFVF_STATUS_SUCCESS) { BNX2X_ERR("Status of SETUP_Q for queue[%d] is %d\n", fp_idx, resp->hdr.status); - return -EINVAL; + rc = -EINVAL; } + + bnx2x_vfpf_finalize(bp, &req->first_tlv); + return rc; } @@ -515,17 +545,19 @@ int bnx2x_vfpf_teardown_queue(struct bnx2x *bp, int qidx) if (rc) { BNX2X_ERR("Sending TEARDOWN for queue %d failed: %d\n", qidx, rc); - return rc; + goto out; } /* PF failed the transaction */ if (resp->hdr.status != PFVF_STATUS_SUCCESS) { BNX2X_ERR("TEARDOWN for queue %d failed: %d\n", qidx, resp->hdr.status); - return -EINVAL; + rc = -EINVAL; } - return 0; +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); + return rc; } /* request pf to add a mac for the vf */ @@ -533,7 +565,7 @@ int bnx2x_vfpf_set_mac(struct bnx2x *bp) { struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; - int rc; + int rc = 0; /* clear mailbox and prep first tlv */ bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_SET_Q_FILTERS, @@ -562,7 +594,7 @@ int bnx2x_vfpf_set_mac(struct bnx2x *bp) rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping); if (rc) { BNX2X_ERR("failed to send message to pf. rc was %d\n", rc); - return rc; + goto out; } /* failure may mean PF was configured with a new mac for us */ @@ -587,8 +619,10 @@ int bnx2x_vfpf_set_mac(struct bnx2x *bp) if (resp->hdr.status != PFVF_STATUS_SUCCESS) { BNX2X_ERR("vfpf SET MAC failed: %d\n", resp->hdr.status); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); return 0; } @@ -643,14 +677,16 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping); if (rc) { BNX2X_ERR("Sending a message failed: %d\n", rc); - return rc; + goto out; } if (resp->hdr.status != PFVF_STATUS_SUCCESS) { BNX2X_ERR("Set Rx mode/multicast failed: %d\n", resp->hdr.status); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); return 0; } @@ -689,7 +725,8 @@ int bnx2x_vfpf_storm_rx_mode(struct bnx2x *bp) break; default: BNX2X_ERR("BAD rx mode (%d)\n", mode); - return -EINVAL; + rc = -EINVAL; + goto out; } req->flags |= VFPF_SET_Q_FILTERS_RX_MASK_CHANGED; @@ -708,8 +745,10 @@ int bnx2x_vfpf_storm_rx_mode(struct bnx2x *bp) if (resp->hdr.status != PFVF_STATUS_SUCCESS) { BNX2X_ERR("Set Rx mode failed: %d\n", resp->hdr.status); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vfpf_finalize(bp, &req->first_tlv); return rc; } @@ -1004,7 +1043,7 @@ static void bnx2x_vf_mbx_init_vf(struct bnx2x *bp, struct bnx2x_virtf *vf, } /* convert MBX queue-flags to standard SP queue-flags */ -static void bnx2x_vf_mbx_set_q_flags(u32 mbx_q_flags, +static void bnx2x_vf_mbx_set_q_flags(struct bnx2x *bp, u32 mbx_q_flags, unsigned long *sp_q_flags) { if (mbx_q_flags & VFPF_QUEUE_FLG_TPA) @@ -1015,8 +1054,6 @@ static void bnx2x_vf_mbx_set_q_flags(u32 mbx_q_flags, __set_bit(BNX2X_Q_FLG_TPA_GRO, sp_q_flags); if (mbx_q_flags & VFPF_QUEUE_FLG_STATS) __set_bit(BNX2X_Q_FLG_STATS, sp_q_flags); - if (mbx_q_flags & VFPF_QUEUE_FLG_OV) - __set_bit(BNX2X_Q_FLG_OV, sp_q_flags); if (mbx_q_flags & VFPF_QUEUE_FLG_VLAN) __set_bit(BNX2X_Q_FLG_VLAN, sp_q_flags); if (mbx_q_flags & VFPF_QUEUE_FLG_COS) @@ -1025,6 +1062,10 @@ static void bnx2x_vf_mbx_set_q_flags(u32 mbx_q_flags, __set_bit(BNX2X_Q_FLG_HC, sp_q_flags); if (mbx_q_flags & VFPF_QUEUE_FLG_DHC) __set_bit(BNX2X_Q_FLG_DHC, sp_q_flags); + + /* outer vlan removal is set according to the PF's multi fuction mode */ + if (IS_MF_SD(bp)) + __set_bit(BNX2X_Q_FLG_OV, sp_q_flags); } static void bnx2x_vf_mbx_setup_q(struct bnx2x *bp, struct bnx2x_virtf *vf, @@ -1075,11 +1116,11 @@ static void bnx2x_vf_mbx_setup_q(struct bnx2x *bp, struct bnx2x_virtf *vf, init_p->tx.hc_rate = setup_q->txq.hc_rate; init_p->tx.sb_cq_index = setup_q->txq.sb_index; - bnx2x_vf_mbx_set_q_flags(setup_q->txq.flags, + bnx2x_vf_mbx_set_q_flags(bp, setup_q->txq.flags, &init_p->tx.flags); /* tx setup - flags */ - bnx2x_vf_mbx_set_q_flags(setup_q->txq.flags, + bnx2x_vf_mbx_set_q_flags(bp, setup_q->txq.flags, &setup_p->flags); /* tx setup - general, nothing */ @@ -1107,11 +1148,11 @@ static void bnx2x_vf_mbx_setup_q(struct bnx2x *bp, struct bnx2x_virtf *vf, /* rx init */ init_p->rx.hc_rate = setup_q->rxq.hc_rate; init_p->rx.sb_cq_index = setup_q->rxq.sb_index; - bnx2x_vf_mbx_set_q_flags(setup_q->rxq.flags, + bnx2x_vf_mbx_set_q_flags(bp, setup_q->rxq.flags, &init_p->rx.flags); /* rx setup - flags */ - bnx2x_vf_mbx_set_q_flags(setup_q->rxq.flags, + bnx2x_vf_mbx_set_q_flags(bp, setup_q->rxq.flags, &setup_p->flags); /* rx setup - general */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h index bfc80baec00d..41708faab575 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h @@ -328,9 +328,15 @@ struct pf_vf_bulletin_content { #define MAC_ADDR_VALID 0 /* alert the vf that a new mac address * is available for it */ +#define VLAN_VALID 1 /* when set, the vf should not access + * the vfpf channel + */ u8 mac[ETH_ALEN]; - u8 padding[2]; + u8 mac_padding[2]; + + u16 vlan; + u8 vlan_padding[6]; }; union pf_vf_bulletin { @@ -353,6 +359,7 @@ enum channel_tlvs { CHANNEL_TLV_LIST_END, CHANNEL_TLV_FLR, CHANNEL_TLV_PF_SET_MAC, + CHANNEL_TLV_PF_SET_VLAN, CHANNEL_TLV_MAX }; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index e9b35da375cb..e80bfb60c3ef 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -831,11 +831,8 @@ static int sbdma_add_rcvbuffer(struct sbmac_softc *sc, struct sbmacdma *d, sb_new = netdev_alloc_skb(dev, ENET_PACKET_SIZE + SMP_CACHE_BYTES * 2 + NET_IP_ALIGN); - if (sb_new == NULL) { - pr_info("%s: sk_buff allocation failed\n", - d->sbdma_eth->sbm_dev->name); + if (sb_new == NULL) return -ENOBUFS; - } sbdma_align_skb(sb_new, SMP_CACHE_BYTES, NET_IP_ALIGN); } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index fdb9b5655414..ac83c87e0b1b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -94,10 +94,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) #define DRV_MODULE_NAME "tg3" #define TG3_MAJ_NUM 3 -#define TG3_MIN_NUM 130 +#define TG3_MIN_NUM 131 #define DRV_MODULE_VERSION \ __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM) -#define DRV_MODULE_RELDATE "February 14, 2013" +#define DRV_MODULE_RELDATE "April 09, 2013" #define RESET_KIND_SHUTDOWN 0 #define RESET_KIND_INIT 1 @@ -212,6 +212,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) #define TG3_FW_UPDATE_FREQ_SEC (TG3_FW_UPDATE_TIMEOUT_SEC / 2) #define FIRMWARE_TG3 "tigon/tg3.bin" +#define FIRMWARE_TG357766 "tigon/tg357766.bin" #define FIRMWARE_TG3TSO "tigon/tg3_tso.bin" #define FIRMWARE_TG3TSO5 "tigon/tg3_tso5.bin" @@ -1869,6 +1870,22 @@ static void tg3_link_report(struct tg3 *tp) tg3_ump_link_report(tp); } + + tp->link_up = netif_carrier_ok(tp->dev); +} + +static u32 tg3_decode_flowctrl_1000T(u32 adv) +{ + u32 flowctrl = 0; + + if (adv & ADVERTISE_PAUSE_CAP) { + flowctrl |= FLOW_CTRL_RX; + if (!(adv & ADVERTISE_PAUSE_ASYM)) + flowctrl |= FLOW_CTRL_TX; + } else if (adv & ADVERTISE_PAUSE_ASYM) + flowctrl |= FLOW_CTRL_TX; + + return flowctrl; } static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl) @@ -1887,6 +1904,20 @@ static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl) return miireg; } +static u32 tg3_decode_flowctrl_1000X(u32 adv) +{ + u32 flowctrl = 0; + + if (adv & ADVERTISE_1000XPAUSE) { + flowctrl |= FLOW_CTRL_RX; + if (!(adv & ADVERTISE_1000XPSE_ASYM)) + flowctrl |= FLOW_CTRL_TX; + } else if (adv & ADVERTISE_1000XPSE_ASYM) + flowctrl |= FLOW_CTRL_TX; + + return flowctrl; +} + static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv) { u8 cap = 0; @@ -2197,7 +2228,7 @@ static void tg3_phy_toggle_apd(struct tg3 *tp, bool enable) tg3_writephy(tp, MII_TG3_MISC_SHDW, reg); } -static void tg3_phy_toggle_automdix(struct tg3 *tp, int enable) +static void tg3_phy_toggle_automdix(struct tg3 *tp, bool enable) { u32 phy; @@ -2289,7 +2320,7 @@ static void tg3_phy_apply_otp(struct tg3 *tp) tg3_phy_toggle_auxctl_smdsp(tp, false); } -static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up) +static void tg3_phy_eee_adjust(struct tg3 *tp, bool current_link_up) { u32 val; @@ -2299,7 +2330,7 @@ static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up) tp->setlpicnt = 0; if (tp->link_config.autoneg == AUTONEG_ENABLE && - current_link_up == 1 && + current_link_up && tp->link_config.active_duplex == DUPLEX_FULL && (tp->link_config.active_speed == SPEED_100 || tp->link_config.active_speed == SPEED_1000)) { @@ -2321,7 +2352,7 @@ static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up) } if (!tp->setlpicnt) { - if (current_link_up == 1 && + if (current_link_up && !tg3_phy_toggle_auxctl_smdsp(tp, true)) { tg3_phydsp_write(tp, MII_TG3_DSP_TAP26, 0x0000); tg3_phy_toggle_auxctl_smdsp(tp, false); @@ -2522,18 +2553,19 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp) return err; } -static void tg3_carrier_on(struct tg3 *tp) -{ - netif_carrier_on(tp->dev); - tp->link_up = true; -} - static void tg3_carrier_off(struct tg3 *tp) { netif_carrier_off(tp->dev); tp->link_up = false; } +static void tg3_warn_mgmt_link_flap(struct tg3 *tp) +{ + if (tg3_flag(tp, ENABLE_ASF)) + netdev_warn(tp->dev, + "Management side-band traffic will be interrupted during phy settings change\n"); +} + /* This will reset the tigon3 PHY if there is no valid * link unless the FORCE argument is non-zero. */ @@ -2553,7 +2585,7 @@ static int tg3_phy_reset(struct tg3 *tp) return -EBUSY; if (netif_running(tp->dev) && tp->link_up) { - tg3_carrier_off(tp); + netif_carrier_off(tp->dev); tg3_link_report(tp); } @@ -2673,7 +2705,7 @@ out: if (tg3_chip_rev_id(tp) == CHIPREV_ID_5762_A0) tg3_phydsp_write(tp, 0xffb, 0x4000); - tg3_phy_toggle_automdix(tp, 1); + tg3_phy_toggle_automdix(tp, true); tg3_phy_set_wirespeed(tp); return 0; } @@ -2929,6 +2961,9 @@ static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power) { u32 val; + if (tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN) + return; + if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES) { if (tg3_asic_rev(tp) == ASIC_REV_5704) { u32 sg_dig_ctrl = tr32(SG_DIG_CTRL); @@ -3452,11 +3487,58 @@ static int tg3_nvram_write_block(struct tg3 *tp, u32 offset, u32 len, u8 *buf) #define TX_CPU_SCRATCH_SIZE 0x04000 /* tp->lock is held. */ -static int tg3_halt_cpu(struct tg3 *tp, u32 offset) +static int tg3_pause_cpu(struct tg3 *tp, u32 cpu_base) { int i; + const int iters = 10000; - BUG_ON(offset == TX_CPU_BASE && tg3_flag(tp, 5705_PLUS)); + for (i = 0; i < iters; i++) { + tw32(cpu_base + CPU_STATE, 0xffffffff); + tw32(cpu_base + CPU_MODE, CPU_MODE_HALT); + if (tr32(cpu_base + CPU_MODE) & CPU_MODE_HALT) + break; + } + + return (i == iters) ? -EBUSY : 0; +} + +/* tp->lock is held. */ +static int tg3_rxcpu_pause(struct tg3 *tp) +{ + int rc = tg3_pause_cpu(tp, RX_CPU_BASE); + + tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff); + tw32_f(RX_CPU_BASE + CPU_MODE, CPU_MODE_HALT); + udelay(10); + + return rc; +} + +/* tp->lock is held. */ +static int tg3_txcpu_pause(struct tg3 *tp) +{ + return tg3_pause_cpu(tp, TX_CPU_BASE); +} + +/* tp->lock is held. */ +static void tg3_resume_cpu(struct tg3 *tp, u32 cpu_base) +{ + tw32(cpu_base + CPU_STATE, 0xffffffff); + tw32_f(cpu_base + CPU_MODE, 0x00000000); +} + +/* tp->lock is held. */ +static void tg3_rxcpu_resume(struct tg3 *tp) +{ + tg3_resume_cpu(tp, RX_CPU_BASE); +} + +/* tp->lock is held. */ +static int tg3_halt_cpu(struct tg3 *tp, u32 cpu_base) +{ + int rc; + + BUG_ON(cpu_base == TX_CPU_BASE && tg3_flag(tp, 5705_PLUS)); if (tg3_asic_rev(tp) == ASIC_REV_5906) { u32 val = tr32(GRC_VCPU_EXT_CTRL); @@ -3464,17 +3546,8 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) tw32(GRC_VCPU_EXT_CTRL, val | GRC_VCPU_EXT_CTRL_HALT_CPU); return 0; } - if (offset == RX_CPU_BASE) { - for (i = 0; i < 10000; i++) { - tw32(offset + CPU_STATE, 0xffffffff); - tw32(offset + CPU_MODE, CPU_MODE_HALT); - if (tr32(offset + CPU_MODE) & CPU_MODE_HALT) - break; - } - - tw32(offset + CPU_STATE, 0xffffffff); - tw32_f(offset + CPU_MODE, CPU_MODE_HALT); - udelay(10); + if (cpu_base == RX_CPU_BASE) { + rc = tg3_rxcpu_pause(tp); } else { /* * There is only an Rx CPU for the 5750 derivative in the @@ -3483,17 +3556,12 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) if (tg3_flag(tp, IS_SSB_CORE)) return 0; - for (i = 0; i < 10000; i++) { - tw32(offset + CPU_STATE, 0xffffffff); - tw32(offset + CPU_MODE, CPU_MODE_HALT); - if (tr32(offset + CPU_MODE) & CPU_MODE_HALT) - break; - } + rc = tg3_txcpu_pause(tp); } - if (i >= 10000) { + if (rc) { netdev_err(tp->dev, "%s timed out, %s CPU\n", - __func__, offset == RX_CPU_BASE ? "RX" : "TX"); + __func__, cpu_base == RX_CPU_BASE ? "RX" : "TX"); return -ENODEV; } @@ -3503,19 +3571,41 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) return 0; } -struct fw_info { - unsigned int fw_base; - unsigned int fw_len; - const __be32 *fw_data; -}; +static int tg3_fw_data_len(struct tg3 *tp, + const struct tg3_firmware_hdr *fw_hdr) +{ + int fw_len; + + /* Non fragmented firmware have one firmware header followed by a + * contiguous chunk of data to be written. The length field in that + * header is not the length of data to be written but the complete + * length of the bss. The data length is determined based on + * tp->fw->size minus headers. + * + * Fragmented firmware have a main header followed by multiple + * fragments. Each fragment is identical to non fragmented firmware + * with a firmware header followed by a contiguous chunk of data. In + * the main header, the length field is unused and set to 0xffffffff. + * In each fragment header the length is the entire size of that + * fragment i.e. fragment data + header length. Data length is + * therefore length field in the header minus TG3_FW_HDR_LEN. + */ + if (tp->fw_len == 0xffffffff) + fw_len = be32_to_cpu(fw_hdr->len); + else + fw_len = tp->fw->size; + + return (fw_len - TG3_FW_HDR_LEN) / sizeof(u32); +} /* tp->lock is held. */ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_base, int cpu_scratch_size, - struct fw_info *info) + const struct tg3_firmware_hdr *fw_hdr) { - int err, lock_err, i; + int err, i; void (*write_op)(struct tg3 *, u32, u32); + int total_len = tp->fw->size; if (cpu_base == TX_CPU_BASE && tg3_flag(tp, 5705_PLUS)) { netdev_err(tp->dev, @@ -3524,30 +3614,49 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, return -EINVAL; } - if (tg3_flag(tp, 5705_PLUS)) + if (tg3_flag(tp, 5705_PLUS) && tg3_asic_rev(tp) != ASIC_REV_57766) write_op = tg3_write_mem; else write_op = tg3_write_indirect_reg32; - /* It is possible that bootcode is still loading at this point. - * Get the nvram lock first before halting the cpu. - */ - lock_err = tg3_nvram_lock(tp); - err = tg3_halt_cpu(tp, cpu_base); - if (!lock_err) - tg3_nvram_unlock(tp); - if (err) - goto out; + if (tg3_asic_rev(tp) != ASIC_REV_57766) { + /* It is possible that bootcode is still loading at this point. + * Get the nvram lock first before halting the cpu. + */ + int lock_err = tg3_nvram_lock(tp); + err = tg3_halt_cpu(tp, cpu_base); + if (!lock_err) + tg3_nvram_unlock(tp); + if (err) + goto out; - for (i = 0; i < cpu_scratch_size; i += sizeof(u32)) - write_op(tp, cpu_scratch_base + i, 0); - tw32(cpu_base + CPU_STATE, 0xffffffff); - tw32(cpu_base + CPU_MODE, tr32(cpu_base+CPU_MODE)|CPU_MODE_HALT); - for (i = 0; i < (info->fw_len / sizeof(u32)); i++) - write_op(tp, (cpu_scratch_base + - (info->fw_base & 0xffff) + - (i * sizeof(u32))), - be32_to_cpu(info->fw_data[i])); + for (i = 0; i < cpu_scratch_size; i += sizeof(u32)) + write_op(tp, cpu_scratch_base + i, 0); + tw32(cpu_base + CPU_STATE, 0xffffffff); + tw32(cpu_base + CPU_MODE, + tr32(cpu_base + CPU_MODE) | CPU_MODE_HALT); + } else { + /* Subtract additional main header for fragmented firmware and + * advance to the first fragment + */ + total_len -= TG3_FW_HDR_LEN; + fw_hdr++; + } + + do { + u32 *fw_data = (u32 *)(fw_hdr + 1); + for (i = 0; i < tg3_fw_data_len(tp, fw_hdr); i++) + write_op(tp, cpu_scratch_base + + (be32_to_cpu(fw_hdr->base_addr) & 0xffff) + + (i * sizeof(u32)), + be32_to_cpu(fw_data[i])); + + total_len -= be32_to_cpu(fw_hdr->len); + + /* Advance to next fragment */ + fw_hdr = (struct tg3_firmware_hdr *) + ((void *)fw_hdr + be32_to_cpu(fw_hdr->len)); + } while (total_len > 0); err = 0; @@ -3556,13 +3665,33 @@ out: } /* tp->lock is held. */ +static int tg3_pause_cpu_and_set_pc(struct tg3 *tp, u32 cpu_base, u32 pc) +{ + int i; + const int iters = 5; + + tw32(cpu_base + CPU_STATE, 0xffffffff); + tw32_f(cpu_base + CPU_PC, pc); + + for (i = 0; i < iters; i++) { + if (tr32(cpu_base + CPU_PC) == pc) + break; + tw32(cpu_base + CPU_STATE, 0xffffffff); + tw32(cpu_base + CPU_MODE, CPU_MODE_HALT); + tw32_f(cpu_base + CPU_PC, pc); + udelay(1000); + } + + return (i == iters) ? -EBUSY : 0; +} + +/* tp->lock is held. */ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp) { - struct fw_info info; - const __be32 *fw_data; - int err, i; + const struct tg3_firmware_hdr *fw_hdr; + int err; - fw_data = (void *)tp->fw->data; + fw_hdr = (struct tg3_firmware_hdr *)tp->fw->data; /* Firmware blob starts with version numbers, followed by start address and length. We are setting complete length. @@ -3570,60 +3699,117 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp) Remainder is the blob to be loaded contiguously from start address. */ - info.fw_base = be32_to_cpu(fw_data[1]); - info.fw_len = tp->fw->size - 12; - info.fw_data = &fw_data[3]; - err = tg3_load_firmware_cpu(tp, RX_CPU_BASE, RX_CPU_SCRATCH_BASE, RX_CPU_SCRATCH_SIZE, - &info); + fw_hdr); if (err) return err; err = tg3_load_firmware_cpu(tp, TX_CPU_BASE, TX_CPU_SCRATCH_BASE, TX_CPU_SCRATCH_SIZE, - &info); + fw_hdr); if (err) return err; /* Now startup only the RX cpu. */ - tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff); - tw32_f(RX_CPU_BASE + CPU_PC, info.fw_base); - - for (i = 0; i < 5; i++) { - if (tr32(RX_CPU_BASE + CPU_PC) == info.fw_base) - break; - tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff); - tw32(RX_CPU_BASE + CPU_MODE, CPU_MODE_HALT); - tw32_f(RX_CPU_BASE + CPU_PC, info.fw_base); - udelay(1000); - } - if (i >= 5) { + err = tg3_pause_cpu_and_set_pc(tp, RX_CPU_BASE, + be32_to_cpu(fw_hdr->base_addr)); + if (err) { netdev_err(tp->dev, "%s fails to set RX CPU PC, is %08x " "should be %08x\n", __func__, - tr32(RX_CPU_BASE + CPU_PC), info.fw_base); + tr32(RX_CPU_BASE + CPU_PC), + be32_to_cpu(fw_hdr->base_addr)); return -ENODEV; } - tw32(RX_CPU_BASE + CPU_STATE, 0xffffffff); - tw32_f(RX_CPU_BASE + CPU_MODE, 0x00000000); + + tg3_rxcpu_resume(tp); + + return 0; +} + +static int tg3_validate_rxcpu_state(struct tg3 *tp) +{ + const int iters = 1000; + int i; + u32 val; + + /* Wait for boot code to complete initialization and enter service + * loop. It is then safe to download service patches + */ + for (i = 0; i < iters; i++) { + if (tr32(RX_CPU_HWBKPT) == TG3_SBROM_IN_SERVICE_LOOP) + break; + + udelay(10); + } + + if (i == iters) { + netdev_err(tp->dev, "Boot code not ready for service patches\n"); + return -EBUSY; + } + + val = tg3_read_indirect_reg32(tp, TG3_57766_FW_HANDSHAKE); + if (val & 0xff) { + netdev_warn(tp->dev, + "Other patches exist. Not downloading EEE patch\n"); + return -EEXIST; + } return 0; } /* tp->lock is held. */ +static void tg3_load_57766_firmware(struct tg3 *tp) +{ + struct tg3_firmware_hdr *fw_hdr; + + if (!tg3_flag(tp, NO_NVRAM)) + return; + + if (tg3_validate_rxcpu_state(tp)) + return; + + if (!tp->fw) + return; + + /* This firmware blob has a different format than older firmware + * releases as given below. The main difference is we have fragmented + * data to be written to non-contiguous locations. + * + * In the beginning we have a firmware header identical to other + * firmware which consists of version, base addr and length. The length + * here is unused and set to 0xffffffff. + * + * This is followed by a series of firmware fragments which are + * individually identical to previous firmware. i.e. they have the + * firmware header and followed by data for that fragment. The version + * field of the individual fragment header is unused. + */ + + fw_hdr = (struct tg3_firmware_hdr *)tp->fw->data; + if (be32_to_cpu(fw_hdr->base_addr) != TG3_57766_FW_BASE_ADDR) + return; + + if (tg3_rxcpu_pause(tp)) + return; + + /* tg3_load_firmware_cpu() will always succeed for the 57766 */ + tg3_load_firmware_cpu(tp, 0, TG3_57766_FW_BASE_ADDR, 0, fw_hdr); + + tg3_rxcpu_resume(tp); +} + +/* tp->lock is held. */ static int tg3_load_tso_firmware(struct tg3 *tp) { - struct fw_info info; - const __be32 *fw_data; + const struct tg3_firmware_hdr *fw_hdr; unsigned long cpu_base, cpu_scratch_base, cpu_scratch_size; - int err, i; + int err; - if (tg3_flag(tp, HW_TSO_1) || - tg3_flag(tp, HW_TSO_2) || - tg3_flag(tp, HW_TSO_3)) + if (!tg3_flag(tp, FW_TSO)) return 0; - fw_data = (void *)tp->fw->data; + fw_hdr = (struct tg3_firmware_hdr *)tp->fw->data; /* Firmware blob starts with version numbers, followed by start address and length. We are setting complete length. @@ -3631,10 +3817,7 @@ static int tg3_load_tso_firmware(struct tg3 *tp) Remainder is the blob to be loaded contiguously from start address. */ - info.fw_base = be32_to_cpu(fw_data[1]); cpu_scratch_size = tp->fw_len; - info.fw_len = tp->fw->size - 12; - info.fw_data = &fw_data[3]; if (tg3_asic_rev(tp) == ASIC_REV_5705) { cpu_base = RX_CPU_BASE; @@ -3647,36 +3830,28 @@ static int tg3_load_tso_firmware(struct tg3 *tp) err = tg3_load_firmware_cpu(tp, cpu_base, cpu_scratch_base, cpu_scratch_size, - &info); + fw_hdr); if (err) return err; /* Now startup the cpu. */ - tw32(cpu_base + CPU_STATE, 0xffffffff); - tw32_f(cpu_base + CPU_PC, info.fw_base); - - for (i = 0; i < 5; i++) { - if (tr32(cpu_base + CPU_PC) == info.fw_base) - break; - tw32(cpu_base + CPU_STATE, 0xffffffff); - tw32(cpu_base + CPU_MODE, CPU_MODE_HALT); - tw32_f(cpu_base + CPU_PC, info.fw_base); - udelay(1000); - } - if (i >= 5) { + err = tg3_pause_cpu_and_set_pc(tp, cpu_base, + be32_to_cpu(fw_hdr->base_addr)); + if (err) { netdev_err(tp->dev, "%s fails to set CPU PC, is %08x should be %08x\n", - __func__, tr32(cpu_base + CPU_PC), info.fw_base); + __func__, tr32(cpu_base + CPU_PC), + be32_to_cpu(fw_hdr->base_addr)); return -ENODEV; } - tw32(cpu_base + CPU_STATE, 0xffffffff); - tw32_f(cpu_base + CPU_MODE, 0x00000000); + + tg3_resume_cpu(tp, cpu_base); return 0; } /* tp->lock is held. */ -static void __tg3_set_mac_addr(struct tg3 *tp, int skip_mac_1) +static void __tg3_set_mac_addr(struct tg3 *tp, bool skip_mac_1) { u32 addr_high, addr_low; int i; @@ -3739,7 +3914,7 @@ static int tg3_power_up(struct tg3 *tp) return err; } -static int tg3_setup_phy(struct tg3 *, int); +static int tg3_setup_phy(struct tg3 *, bool); static int tg3_power_down_prepare(struct tg3 *tp) { @@ -3811,7 +3986,7 @@ static int tg3_power_down_prepare(struct tg3 *tp) tp->phy_flags |= TG3_PHYFLG_IS_LOW_POWER; if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) - tg3_setup_phy(tp, 0); + tg3_setup_phy(tp, false); } if (tg3_asic_rev(tp) == ASIC_REV_5906) { @@ -3852,7 +4027,13 @@ static int tg3_power_down_prepare(struct tg3 *tp) if (tp->phy_flags & TG3_PHYFLG_MII_SERDES) mac_mode = MAC_MODE_PORT_MODE_GMII; - else + else if (tp->phy_flags & + TG3_PHYFLG_KEEP_LINK_ON_PWRDN) { + if (tp->link_config.active_speed == SPEED_1000) + mac_mode = MAC_MODE_PORT_MODE_GMII; + else + mac_mode = MAC_MODE_PORT_MODE_MII; + } else mac_mode = MAC_MODE_PORT_MODE_MII; mac_mode |= tp->mac_mode & MAC_MODE_LINK_POLARITY; @@ -4106,12 +4287,16 @@ static void tg3_phy_copper_begin(struct tg3 *tp) (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) { u32 adv, fc; - if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) { + if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) && + !(tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN)) { adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full; if (tg3_flag(tp, WOL_SPEED_100MB)) adv |= ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; + if (tp->phy_flags & TG3_PHYFLG_1G_ON_VAUX_OK) + adv |= ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full; fc = FLOW_CTRL_TX | FLOW_CTRL_RX; } else { @@ -4125,6 +4310,15 @@ static void tg3_phy_copper_begin(struct tg3 *tp) tg3_phy_autoneg_cfg(tp, adv, fc); + if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) && + (tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN)) { + /* Normally during power down we want to autonegotiate + * the lowest possible speed for WOL. However, to avoid + * link flap, we leave it untouched. + */ + return; + } + tg3_writephy(tp, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART); } else { @@ -4134,6 +4328,14 @@ static void tg3_phy_copper_begin(struct tg3 *tp) tp->link_config.active_speed = tp->link_config.speed; tp->link_config.active_duplex = tp->link_config.duplex; + if (tg3_asic_rev(tp) == ASIC_REV_5714) { + /* With autoneg disabled, 5715 only links up when the + * advertisement register has the configured speed + * enabled. + */ + tg3_writephy(tp, MII_ADVERTISE, ADVERTISE_ALL); + } + bmcr = 0; switch (tp->link_config.speed) { default: @@ -4173,6 +4375,103 @@ static void tg3_phy_copper_begin(struct tg3 *tp) } } +static int tg3_phy_pull_config(struct tg3 *tp) +{ + int err; + u32 val; + + err = tg3_readphy(tp, MII_BMCR, &val); + if (err) + goto done; + + if (!(val & BMCR_ANENABLE)) { + tp->link_config.autoneg = AUTONEG_DISABLE; + tp->link_config.advertising = 0; + tg3_flag_clear(tp, PAUSE_AUTONEG); + + err = -EIO; + + switch (val & (BMCR_SPEED1000 | BMCR_SPEED100)) { + case 0: + if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) + goto done; + + tp->link_config.speed = SPEED_10; + break; + case BMCR_SPEED100: + if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) + goto done; + + tp->link_config.speed = SPEED_100; + break; + case BMCR_SPEED1000: + if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) { + tp->link_config.speed = SPEED_1000; + break; + } + /* Fall through */ + default: + goto done; + } + + if (val & BMCR_FULLDPLX) + tp->link_config.duplex = DUPLEX_FULL; + else + tp->link_config.duplex = DUPLEX_HALF; + + tp->link_config.flowctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; + + err = 0; + goto done; + } + + tp->link_config.autoneg = AUTONEG_ENABLE; + tp->link_config.advertising = ADVERTISED_Autoneg; + tg3_flag_set(tp, PAUSE_AUTONEG); + + if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { + u32 adv; + + err = tg3_readphy(tp, MII_ADVERTISE, &val); + if (err) + goto done; + + adv = mii_adv_to_ethtool_adv_t(val & ADVERTISE_ALL); + tp->link_config.advertising |= adv | ADVERTISED_TP; + + tp->link_config.flowctrl = tg3_decode_flowctrl_1000T(val); + } else { + tp->link_config.advertising |= ADVERTISED_FIBRE; + } + + if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) { + u32 adv; + + if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { + err = tg3_readphy(tp, MII_CTRL1000, &val); + if (err) + goto done; + + adv = mii_ctrl1000_to_ethtool_adv_t(val); + } else { + err = tg3_readphy(tp, MII_ADVERTISE, &val); + if (err) + goto done; + + adv = tg3_decode_flowctrl_1000X(val); + tp->link_config.flowctrl = adv; + + val &= (ADVERTISE_1000XHALF | ADVERTISE_1000XFULL); + adv = mii_adv_to_ethtool_adv_x(val); + } + + tp->link_config.advertising |= adv; + } + +done: + return err; +} + static int tg3_init_5401phy_dsp(struct tg3 *tp) { int err; @@ -4192,6 +4491,32 @@ static int tg3_init_5401phy_dsp(struct tg3 *tp) return err; } +static bool tg3_phy_eee_config_ok(struct tg3 *tp) +{ + u32 val; + u32 tgtadv = 0; + u32 advertising = tp->link_config.advertising; + + if (!(tp->phy_flags & TG3_PHYFLG_EEE_CAP)) + return true; + + if (tg3_phy_cl45_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, &val)) + return false; + + val &= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T); + + + if (advertising & ADVERTISED_100baseT_Full) + tgtadv |= MDIO_AN_EEE_ADV_100TX; + if (advertising & ADVERTISED_1000baseT_Full) + tgtadv |= MDIO_AN_EEE_ADV_1000T; + + if (val != tgtadv) + return false; + + return true; +} + static bool tg3_phy_copper_an_config_ok(struct tg3 *tp, u32 *lcladv) { u32 advmsk, tgtadv, advertising; @@ -4258,13 +4583,13 @@ static bool tg3_phy_copper_fetch_rmtadv(struct tg3 *tp, u32 *rmtadv) return true; } -static bool tg3_test_and_report_link_chg(struct tg3 *tp, int curr_link_up) +static bool tg3_test_and_report_link_chg(struct tg3 *tp, bool curr_link_up) { if (curr_link_up != tp->link_up) { if (curr_link_up) { - tg3_carrier_on(tp); + netif_carrier_on(tp->dev); } else { - tg3_carrier_off(tp); + netif_carrier_off(tp->dev); if (tp->phy_flags & TG3_PHYFLG_MII_SERDES) tp->phy_flags &= ~TG3_PHYFLG_PARALLEL_DETECT; } @@ -4276,23 +4601,28 @@ static bool tg3_test_and_report_link_chg(struct tg3 *tp, int curr_link_up) return false; } -static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) +static void tg3_clear_mac_status(struct tg3 *tp) { - int current_link_up; + tw32(MAC_EVENT, 0); + + tw32_f(MAC_STATUS, + MAC_STATUS_SYNC_CHANGED | + MAC_STATUS_CFG_CHANGED | + MAC_STATUS_MI_COMPLETION | + MAC_STATUS_LNKSTATE_CHANGED); + udelay(40); +} + +static int tg3_setup_copper_phy(struct tg3 *tp, bool force_reset) +{ + bool current_link_up; u32 bmsr, val; u32 lcl_adv, rmt_adv; u16 current_speed; u8 current_duplex; int i, err; - tw32(MAC_EVENT, 0); - - tw32_f(MAC_STATUS, - (MAC_STATUS_SYNC_CHANGED | - MAC_STATUS_CFG_CHANGED | - MAC_STATUS_MI_COMPLETION | - MAC_STATUS_LNKSTATE_CHANGED)); - udelay(40); + tg3_clear_mac_status(tp); if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) { tw32_f(MAC_MI_MODE, @@ -4312,7 +4642,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) tg3_readphy(tp, MII_BMSR, &bmsr); if (!tg3_readphy(tp, MII_BMSR, &bmsr) && !(bmsr & BMSR_LSTATUS)) - force_reset = 1; + force_reset = true; } if (force_reset) tg3_phy_reset(tp); @@ -4376,7 +4706,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) tg3_writephy(tp, MII_TG3_EXT_CTRL, 0); } - current_link_up = 0; + current_link_up = false; current_speed = SPEED_UNKNOWN; current_duplex = DUPLEX_UNKNOWN; tp->phy_flags &= ~TG3_PHYFLG_MDIX_STATE; @@ -4435,21 +4765,31 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) tp->link_config.active_duplex = current_duplex; if (tp->link_config.autoneg == AUTONEG_ENABLE) { + bool eee_config_ok = tg3_phy_eee_config_ok(tp); + if ((bmcr & BMCR_ANENABLE) && + eee_config_ok && tg3_phy_copper_an_config_ok(tp, &lcl_adv) && tg3_phy_copper_fetch_rmtadv(tp, &rmt_adv)) - current_link_up = 1; + current_link_up = true; + + /* EEE settings changes take effect only after a phy + * reset. If we have skipped a reset due to Link Flap + * Avoidance being enabled, do it now. + */ + if (!eee_config_ok && + (tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN) && + !force_reset) + tg3_phy_reset(tp); } else { if (!(bmcr & BMCR_ANENABLE) && tp->link_config.speed == current_speed && - tp->link_config.duplex == current_duplex && - tp->link_config.flowctrl == - tp->link_config.active_flowctrl) { - current_link_up = 1; + tp->link_config.duplex == current_duplex) { + current_link_up = true; } } - if (current_link_up == 1 && + if (current_link_up && tp->link_config.active_duplex == DUPLEX_FULL) { u32 reg, bit; @@ -4469,11 +4809,11 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) } relink: - if (current_link_up == 0 || (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) { + if (!current_link_up || (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) { tg3_phy_copper_begin(tp); if (tg3_flag(tp, ROBOSWITCH)) { - current_link_up = 1; + current_link_up = true; /* FIXME: when BCM5325 switch is used use 100 MBit/s */ current_speed = SPEED_1000; current_duplex = DUPLEX_FULL; @@ -4484,11 +4824,11 @@ relink: tg3_readphy(tp, MII_BMSR, &bmsr); if ((!tg3_readphy(tp, MII_BMSR, &bmsr) && (bmsr & BMSR_LSTATUS)) || (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK)) - current_link_up = 1; + current_link_up = true; } tp->mac_mode &= ~MAC_MODE_PORT_MODE_MASK; - if (current_link_up == 1) { + if (current_link_up) { if (tp->link_config.active_speed == SPEED_100 || tp->link_config.active_speed == SPEED_10) tp->mac_mode |= MAC_MODE_PORT_MODE_MII; @@ -4524,7 +4864,7 @@ relink: tp->mac_mode |= MAC_MODE_HALF_DUPLEX; if (tg3_asic_rev(tp) == ASIC_REV_5700) { - if (current_link_up == 1 && + if (current_link_up && tg3_5700_link_polarity(tp, tp->link_config.active_speed)) tp->mac_mode |= MAC_MODE_LINK_POLARITY; else @@ -4555,7 +4895,7 @@ relink: udelay(40); if (tg3_asic_rev(tp) == ASIC_REV_5700 && - current_link_up == 1 && + current_link_up && tp->link_config.active_speed == SPEED_1000 && (tg3_flag(tp, PCIX_MODE) || tg3_flag(tp, PCI_HIGH_SPEED))) { udelay(120); @@ -4995,19 +5335,19 @@ static void tg3_init_bcm8002(struct tg3 *tp) tg3_writephy(tp, 0x10, 0x8011); } -static int tg3_setup_fiber_hw_autoneg(struct tg3 *tp, u32 mac_status) +static bool tg3_setup_fiber_hw_autoneg(struct tg3 *tp, u32 mac_status) { u16 flowctrl; + bool current_link_up; u32 sg_dig_ctrl, sg_dig_status; u32 serdes_cfg, expected_sg_dig_ctrl; int workaround, port_a; - int current_link_up; serdes_cfg = 0; expected_sg_dig_ctrl = 0; workaround = 0; port_a = 1; - current_link_up = 0; + current_link_up = false; if (tg3_chip_rev_id(tp) != CHIPREV_ID_5704_A0 && tg3_chip_rev_id(tp) != CHIPREV_ID_5704_A1) { @@ -5038,7 +5378,7 @@ static int tg3_setup_fiber_hw_autoneg(struct tg3 *tp, u32 mac_status) } if (mac_status & MAC_STATUS_PCS_SYNCED) { tg3_setup_flow_control(tp, 0, 0); - current_link_up = 1; + current_link_up = true; } goto out; } @@ -5059,7 +5399,7 @@ static int tg3_setup_fiber_hw_autoneg(struct tg3 *tp, u32 mac_status) MAC_STATUS_RCVD_CFG)) == MAC_STATUS_PCS_SYNCED)) { tp->serdes_counter--; - current_link_up = 1; + current_link_up = true; goto out; } restart_autoneg: @@ -5094,7 +5434,7 @@ restart_autoneg: mii_adv_to_ethtool_adv_x(remote_adv); tg3_setup_flow_control(tp, local_adv, remote_adv); - current_link_up = 1; + current_link_up = true; tp->serdes_counter = 0; tp->phy_flags &= ~TG3_PHYFLG_PARALLEL_DETECT; } else if (!(sg_dig_status & SG_DIG_AUTONEG_COMPLETE)) { @@ -5122,7 +5462,7 @@ restart_autoneg: if ((mac_status & MAC_STATUS_PCS_SYNCED) && !(mac_status & MAC_STATUS_RCVD_CFG)) { tg3_setup_flow_control(tp, 0, 0); - current_link_up = 1; + current_link_up = true; tp->phy_flags |= TG3_PHYFLG_PARALLEL_DETECT; tp->serdes_counter = @@ -5140,9 +5480,9 @@ out: return current_link_up; } -static int tg3_setup_fiber_by_hand(struct tg3 *tp, u32 mac_status) +static bool tg3_setup_fiber_by_hand(struct tg3 *tp, u32 mac_status) { - int current_link_up = 0; + bool current_link_up = false; if (!(mac_status & MAC_STATUS_PCS_SYNCED)) goto out; @@ -5169,7 +5509,7 @@ static int tg3_setup_fiber_by_hand(struct tg3 *tp, u32 mac_status) tg3_setup_flow_control(tp, local_adv, remote_adv); - current_link_up = 1; + current_link_up = true; } for (i = 0; i < 30; i++) { udelay(20); @@ -5184,15 +5524,15 @@ static int tg3_setup_fiber_by_hand(struct tg3 *tp, u32 mac_status) } mac_status = tr32(MAC_STATUS); - if (current_link_up == 0 && + if (!current_link_up && (mac_status & MAC_STATUS_PCS_SYNCED) && !(mac_status & MAC_STATUS_RCVD_CFG)) - current_link_up = 1; + current_link_up = true; } else { tg3_setup_flow_control(tp, 0, 0); /* Forcing 1000FD link up. */ - current_link_up = 1; + current_link_up = true; tw32_f(MAC_MODE, (tp->mac_mode | MAC_MODE_SEND_CONFIGS)); udelay(40); @@ -5205,13 +5545,13 @@ out: return current_link_up; } -static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset) +static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset) { u32 orig_pause_cfg; u16 orig_active_speed; u8 orig_active_duplex; u32 mac_status; - int current_link_up; + bool current_link_up; int i; orig_pause_cfg = tp->link_config.active_flowctrl; @@ -5248,7 +5588,7 @@ static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset) tw32_f(MAC_EVENT, MAC_EVENT_LNKSTATE_CHANGED); udelay(40); - current_link_up = 0; + current_link_up = false; tp->link_config.rmt_adv = 0; mac_status = tr32(MAC_STATUS); @@ -5273,7 +5613,7 @@ static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset) mac_status = tr32(MAC_STATUS); if ((mac_status & MAC_STATUS_PCS_SYNCED) == 0) { - current_link_up = 0; + current_link_up = false; if (tp->link_config.autoneg == AUTONEG_ENABLE && tp->serdes_counter == 0) { tw32_f(MAC_MODE, (tp->mac_mode | @@ -5283,7 +5623,7 @@ static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset) } } - if (current_link_up == 1) { + if (current_link_up) { tp->link_config.active_speed = SPEED_1000; tp->link_config.active_duplex = DUPLEX_FULL; tw32(MAC_LED_CTRL, (tp->led_ctrl | @@ -5308,33 +5648,63 @@ static int tg3_setup_fiber_phy(struct tg3 *tp, int force_reset) return 0; } -static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset) +static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) { - int current_link_up, err = 0; + int err = 0; u32 bmsr, bmcr; - u16 current_speed; - u8 current_duplex; - u32 local_adv, remote_adv; + u16 current_speed = SPEED_UNKNOWN; + u8 current_duplex = DUPLEX_UNKNOWN; + bool current_link_up = false; + u32 local_adv, remote_adv, sgsr; + + if ((tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) && + !tg3_readphy(tp, SERDES_TG3_1000X_STATUS, &sgsr) && + (sgsr & SERDES_TG3_SGMII_MODE)) { + + if (force_reset) + tg3_phy_reset(tp); + + tp->mac_mode &= ~MAC_MODE_PORT_MODE_MASK; + + if (!(sgsr & SERDES_TG3_LINK_UP)) { + tp->mac_mode |= MAC_MODE_PORT_MODE_GMII; + } else { + current_link_up = true; + if (sgsr & SERDES_TG3_SPEED_1000) { + current_speed = SPEED_1000; + tp->mac_mode |= MAC_MODE_PORT_MODE_GMII; + } else if (sgsr & SERDES_TG3_SPEED_100) { + current_speed = SPEED_100; + tp->mac_mode |= MAC_MODE_PORT_MODE_MII; + } else { + current_speed = SPEED_10; + tp->mac_mode |= MAC_MODE_PORT_MODE_MII; + } + + if (sgsr & SERDES_TG3_FULL_DUPLEX) + current_duplex = DUPLEX_FULL; + else + current_duplex = DUPLEX_HALF; + } + + tw32_f(MAC_MODE, tp->mac_mode); + udelay(40); + + tg3_clear_mac_status(tp); + + goto fiber_setup_done; + } tp->mac_mode |= MAC_MODE_PORT_MODE_GMII; tw32_f(MAC_MODE, tp->mac_mode); udelay(40); - tw32(MAC_EVENT, 0); - - tw32_f(MAC_STATUS, - (MAC_STATUS_SYNC_CHANGED | - MAC_STATUS_CFG_CHANGED | - MAC_STATUS_MI_COMPLETION | - MAC_STATUS_LNKSTATE_CHANGED)); - udelay(40); + tg3_clear_mac_status(tp); if (force_reset) tg3_phy_reset(tp); - current_link_up = 0; - current_speed = SPEED_UNKNOWN; - current_duplex = DUPLEX_UNKNOWN; tp->link_config.rmt_adv = 0; err |= tg3_readphy(tp, MII_BMSR, &bmsr); @@ -5420,7 +5790,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset) if (bmsr & BMSR_LSTATUS) { current_speed = SPEED_1000; - current_link_up = 1; + current_link_up = true; if (bmcr & BMCR_FULLDPLX) current_duplex = DUPLEX_FULL; else @@ -5447,12 +5817,13 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset) } else if (!tg3_flag(tp, 5780_CLASS)) { /* Link is up via parallel detect */ } else { - current_link_up = 0; + current_link_up = false; } } } - if (current_link_up == 1 && current_duplex == DUPLEX_FULL) +fiber_setup_done: + if (current_link_up && current_duplex == DUPLEX_FULL) tg3_setup_flow_control(tp, local_adv, remote_adv); tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX; @@ -5531,7 +5902,7 @@ static void tg3_serdes_parallel_detect(struct tg3 *tp) } } -static int tg3_setup_phy(struct tg3 *tp, int force_reset) +static int tg3_setup_phy(struct tg3 *tp, bool force_reset) { u32 val; int err; @@ -6344,7 +6715,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) if (desc->type_flags & RXD_FLAG_VLAN && !(tp->rx_mode & RX_MODE_KEEP_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), desc->err_vlan & RXD_VLAN_MASK); napi_gro_receive(&tnapi->napi, skb); @@ -6432,7 +6803,7 @@ static void tg3_poll_link(struct tg3 *tp) MAC_STATUS_LNKSTATE_CHANGED)); udelay(40); } else - tg3_setup_phy(tp, 0); + tg3_setup_phy(tp, false); spin_unlock(&tp->lock); } } @@ -7529,7 +7900,7 @@ static int tg3_phy_lpbk_set(struct tg3 *tp, u32 speed, bool extlpbk) u32 val, bmcr, mac_mode, ptest = 0; tg3_phy_toggle_apd(tp, false); - tg3_phy_toggle_automdix(tp, 0); + tg3_phy_toggle_automdix(tp, false); if (extlpbk && tg3_phy_set_extloopbk(tp)) return -EIO; @@ -7637,7 +8008,7 @@ static void tg3_set_loopback(struct net_device *dev, netdev_features_t features) spin_lock_bh(&tp->lock); tg3_mac_loopback(tp, false); /* Force link status check */ - tg3_setup_phy(tp, 1); + tg3_setup_phy(tp, true); spin_unlock_bh(&tp->lock); netdev_info(dev, "Internal MAC loopback mode disabled.\n"); } @@ -8035,11 +8406,9 @@ static int tg3_mem_rx_acquire(struct tg3 *tp) tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev, TG3_RX_RCB_RING_BYTES(tp), &tnapi->rx_rcb_mapping, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!tnapi->rx_rcb) goto err_out; - - memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); } return 0; @@ -8089,12 +8458,10 @@ static int tg3_alloc_consistent(struct tg3 *tp) tp->hw_stats = dma_alloc_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), &tp->stats_mapping, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!tp->hw_stats) goto err_out; - memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); - for (i = 0; i < tp->irq_cnt; i++) { struct tg3_napi *tnapi = &tp->napi[i]; struct tg3_hw_status *sblk; @@ -8102,11 +8469,10 @@ static int tg3_alloc_consistent(struct tg3 *tp) tnapi->hw_status = dma_alloc_coherent(&tp->pdev->dev, TG3_HW_STATUS_SIZE, &tnapi->status_mapping, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!tnapi->hw_status) goto err_out; - memset(tnapi->hw_status, 0, TG3_HW_STATUS_SIZE); sblk = tnapi->hw_status; if (tg3_flag(tp, ENABLE_RSS)) { @@ -8153,7 +8519,7 @@ err_out: /* To stop a block, clear the enable bit and poll till it * clears. tp->lock is held. */ -static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit, int silent) +static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit, bool silent) { unsigned int i; u32 val; @@ -8197,7 +8563,7 @@ static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit, int } /* tp->lock is held. */ -static int tg3_abort_hw(struct tg3 *tp, int silent) +static int tg3_abort_hw(struct tg3 *tp, bool silent) { int i, err; @@ -8557,6 +8923,9 @@ static int tg3_chip_reset(struct tg3 *tp) /* Reprobe ASF enable state. */ tg3_flag_clear(tp, ENABLE_ASF); + tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK | + TG3_PHYFLG_KEEP_LINK_ON_PWRDN); + tg3_flag_clear(tp, ASF_NEW_HANDSHAKE); tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val); if (val == NIC_SRAM_DATA_SIG_MAGIC) { @@ -8568,6 +8937,12 @@ static int tg3_chip_reset(struct tg3 *tp) tp->last_event_jiffies = jiffies; if (tg3_flag(tp, 5750_PLUS)) tg3_flag_set(tp, ASF_NEW_HANDSHAKE); + + tg3_read_mem(tp, NIC_SRAM_DATA_CFG_3, &nic_cfg); + if (nic_cfg & NIC_SRAM_1G_ON_VAUX_OK) + tp->phy_flags |= TG3_PHYFLG_1G_ON_VAUX_OK; + if (nic_cfg & NIC_SRAM_LNK_FLAP_AVOID) + tp->phy_flags |= TG3_PHYFLG_KEEP_LINK_ON_PWRDN; } } @@ -8578,7 +8953,7 @@ static void tg3_get_nstats(struct tg3 *, struct rtnl_link_stats64 *); static void tg3_get_estats(struct tg3 *, struct tg3_ethtool_stats *); /* tp->lock is held. */ -static int tg3_halt(struct tg3 *tp, int kind, int silent) +static int tg3_halt(struct tg3 *tp, int kind, bool silent) { int err; @@ -8589,7 +8964,7 @@ static int tg3_halt(struct tg3 *tp, int kind, int silent) tg3_abort_hw(tp, silent); err = tg3_chip_reset(tp); - __tg3_set_mac_addr(tp, 0); + __tg3_set_mac_addr(tp, false); tg3_write_sig_legacy(tp, kind); tg3_write_sig_post_reset(tp, kind); @@ -8613,7 +8988,8 @@ static int tg3_set_mac_addr(struct net_device *dev, void *p) { struct tg3 *tp = netdev_priv(dev); struct sockaddr *addr = p; - int err = 0, skip_mac_1 = 0; + int err = 0; + bool skip_mac_1 = false; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -8634,7 +9010,7 @@ static int tg3_set_mac_addr(struct net_device *dev, void *p) /* Skip MAC addr 1 if ASF is using it. */ if ((addr0_high != addr1_high || addr0_low != addr1_low) && !(addr1_high == 0 && addr1_low == 0)) - skip_mac_1 = 1; + skip_mac_1 = true; } spin_lock_bh(&tp->lock); __tg3_set_mac_addr(tp, skip_mac_1); @@ -9053,7 +9429,7 @@ static void tg3_rss_write_indir_tbl(struct tg3 *tp) } /* tp->lock is held. */ -static int tg3_reset_hw(struct tg3 *tp, int reset_phy) +static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) { u32 val, rdmac_mode; int i, err, limit; @@ -9102,6 +9478,12 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) TG3_CPMU_DBTMR2_TXIDXEQ_2047US); } + if ((tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN) && + !(tp->phy_flags & TG3_PHYFLG_USER_CONFIGURED)) { + tg3_phy_pull_config(tp); + tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; + } + if (reset_phy) tg3_phy_reset(tp); @@ -9440,7 +9822,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) tg3_rings_reset(tp); /* Initialize MAC address and backoff seed. */ - __tg3_set_mac_addr(tp, 0); + __tg3_set_mac_addr(tp, false); /* MTU + ethernet header + FCS + optional VLAN tag */ tw32(MAC_RX_MTU_SIZE, @@ -9777,6 +10159,13 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) return err; } + if (tg3_asic_rev(tp) == ASIC_REV_57766) { + /* Ignore any errors for the firmware download. If download + * fails, the device will operate with EEE disabled + */ + tg3_load_57766_firmware(tp); + } + if (tg3_flag(tp, TSO_CAPABLE)) { err = tg3_load_tso_firmware(tp); if (err) @@ -9884,7 +10273,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) tp->phy_flags &= ~TG3_PHYFLG_IS_LOW_POWER; - err = tg3_setup_phy(tp, 0); + err = tg3_setup_phy(tp, false); if (err) return err; @@ -9964,7 +10353,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) /* Called at device open time to get the chip ready for * packet processing. Invoked with tp->lock held. */ -static int tg3_init_hw(struct tg3 *tp, int reset_phy) +static int tg3_init_hw(struct tg3 *tp, bool reset_phy) { tg3_switch_clocks(tp); @@ -10225,7 +10614,7 @@ static void tg3_timer(unsigned long __opaque) phy_event = 1; if (phy_event) - tg3_setup_phy(tp, 0); + tg3_setup_phy(tp, false); } else if (tg3_flag(tp, POLL_SERDES)) { u32 mac_stat = tr32(MAC_STATUS); int need_setup = 0; @@ -10248,7 +10637,7 @@ static void tg3_timer(unsigned long __opaque) tw32_f(MAC_MODE, tp->mac_mode); udelay(40); } - tg3_setup_phy(tp, 0); + tg3_setup_phy(tp, false); } } else if ((tp->phy_flags & TG3_PHYFLG_MII_SERDES) && tg3_flag(tp, 5780_CLASS)) { @@ -10334,7 +10723,7 @@ static void tg3_timer_stop(struct tg3 *tp) /* Restart hardware after configuration changes, self-test, etc. * Invoked with tp->lock held. */ -static int tg3_restart_hw(struct tg3 *tp, int reset_phy) +static int tg3_restart_hw(struct tg3 *tp, bool reset_phy) __releases(tp->lock) __acquires(tp->lock) { @@ -10384,7 +10773,7 @@ static void tg3_reset_task(struct work_struct *work) } tg3_halt(tp, RESET_KIND_SHUTDOWN, 0); - err = tg3_init_hw(tp, 1); + err = tg3_init_hw(tp, true); if (err) goto out; @@ -10554,7 +10943,7 @@ static int tg3_test_msi(struct tg3 *tp) tg3_full_lock(tp, 1); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_init_hw(tp, 1); + err = tg3_init_hw(tp, true); tg3_full_unlock(tp); @@ -10566,7 +10955,7 @@ static int tg3_test_msi(struct tg3 *tp) static int tg3_request_firmware(struct tg3 *tp) { - const __be32 *fw_data; + const struct tg3_firmware_hdr *fw_hdr; if (request_firmware(&tp->fw, tp->fw_needed, &tp->pdev->dev)) { netdev_err(tp->dev, "Failed to load firmware \"%s\"\n", @@ -10574,15 +10963,15 @@ static int tg3_request_firmware(struct tg3 *tp) return -ENOENT; } - fw_data = (void *)tp->fw->data; + fw_hdr = (struct tg3_firmware_hdr *)tp->fw->data; /* Firmware blob starts with version numbers, followed by * start address and _full_ length including BSS sections * (which must be longer than the actual data, of course */ - tp->fw_len = be32_to_cpu(fw_data[2]); /* includes bss */ - if (tp->fw_len < (tp->fw->size - 12)) { + tp->fw_len = be32_to_cpu(fw_hdr->len); /* includes bss */ + if (tp->fw_len < (tp->fw->size - TG3_FW_HDR_LEN)) { netdev_err(tp->dev, "bogus length %d in \"%s\"\n", tp->fw_len, tp->fw_needed); release_firmware(tp->fw); @@ -10881,7 +11270,15 @@ static int tg3_open(struct net_device *dev) if (tp->fw_needed) { err = tg3_request_firmware(tp); - if (tg3_chip_rev_id(tp) == CHIPREV_ID_5701_A0) { + if (tg3_asic_rev(tp) == ASIC_REV_57766) { + if (err) { + netdev_warn(tp->dev, "EEE capability disabled\n"); + tp->phy_flags &= ~TG3_PHYFLG_EEE_CAP; + } else if (!(tp->phy_flags & TG3_PHYFLG_EEE_CAP)) { + netdev_warn(tp->dev, "EEE capability restored\n"); + tp->phy_flags |= TG3_PHYFLG_EEE_CAP; + } + } else if (tg3_chip_rev_id(tp) == CHIPREV_ID_5701_A0) { if (err) return err; } else if (err) { @@ -10906,7 +11303,9 @@ static int tg3_open(struct net_device *dev) tg3_full_unlock(tp); - err = tg3_start(tp, true, true, true); + err = tg3_start(tp, + !(tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN), + true, true); if (err) { tg3_frob_aux_power(tp, false); pci_set_power_state(tp->pdev, PCI_D3hot); @@ -11412,8 +11811,12 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) tp->link_config.duplex = cmd->duplex; } + tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; + + tg3_warn_mgmt_link_flap(tp); + if (netif_running(dev)) - tg3_setup_phy(tp, 1); + tg3_setup_phy(tp, true); tg3_full_unlock(tp); @@ -11490,6 +11893,8 @@ static int tg3_nway_reset(struct net_device *dev) if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES) return -EINVAL; + tg3_warn_mgmt_link_flap(tp); + if (tg3_flag(tp, USE_PHYLIB)) { if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; @@ -11567,7 +11972,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, 1); + err = tg3_restart_hw(tp, false); if (!err) tg3_netif_start(tp); } @@ -11602,6 +12007,9 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam struct tg3 *tp = netdev_priv(dev); int err = 0; + if (tp->link_config.autoneg == AUTONEG_ENABLE) + tg3_warn_mgmt_link_flap(tp); + if (tg3_flag(tp, USE_PHYLIB)) { u32 newadv; struct phy_device *phydev; @@ -11688,7 +12096,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, 1); + err = tg3_restart_hw(tp, false); if (!err) tg3_netif_start(tp); } @@ -11696,6 +12104,8 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam tg3_full_unlock(tp); } + tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; + return err; } @@ -12756,7 +13166,7 @@ static int tg3_test_loopback(struct tg3 *tp, u64 *data, bool do_extlpbk) goto done; } - err = tg3_reset_hw(tp, 1); + err = tg3_reset_hw(tp, true); if (err) { data[TG3_MAC_LOOPB_TEST] = TG3_LOOPBACK_FAILED; data[TG3_PHY_LOOPB_TEST] = TG3_LOOPBACK_FAILED; @@ -12923,7 +13333,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); if (netif_running(dev)) { tg3_flag_set(tp, INIT_COMPLETE); - err2 = tg3_restart_hw(tp, 1); + err2 = tg3_restart_hw(tp, true); if (!err2) tg3_netif_start(tp); } @@ -13240,7 +13650,8 @@ static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp, static int tg3_change_mtu(struct net_device *dev, int new_mtu) { struct tg3 *tp = netdev_priv(dev); - int err, reset_phy = 0; + int err; + bool reset_phy = false; if (new_mtu < TG3_MIN_MTU || new_mtu > TG3_MAX_MTU(tp)) return -EINVAL; @@ -13267,7 +13678,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) * breaks all requests to 256 bytes. */ if (tg3_asic_rev(tp) == ASIC_REV_57766) - reset_phy = 1; + reset_phy = true; err = tg3_restart_hw(tp, reset_phy); @@ -13833,6 +14244,12 @@ static void tg3_get_5720_nvram_info(struct tg3 *tp) case FLASH_5762_EEPROM_LD: nvmpinstrp = FLASH_5720_EEPROM_LD; break; + case FLASH_5720VENDOR_M_ST_M45PE20: + /* This pinstrap supports multiple sizes, so force it + * to read the actual size from location 0xf0. + */ + nvmpinstrp = FLASH_5720VENDOR_ST_45USPT; + break; } } @@ -14285,14 +14702,18 @@ static void tg3_get_eeprom_hw_cfg(struct tg3 *tp) (cfg2 & NIC_SRAM_DATA_CFG_2_APD_EN)) tp->phy_flags |= TG3_PHYFLG_ENABLE_APD; - if (tg3_flag(tp, PCI_EXPRESS) && - tg3_asic_rev(tp) != ASIC_REV_5785 && - !tg3_flag(tp, 57765_PLUS)) { + if (tg3_flag(tp, PCI_EXPRESS)) { u32 cfg3; tg3_read_mem(tp, NIC_SRAM_DATA_CFG_3, &cfg3); - if (cfg3 & NIC_SRAM_ASPM_DEBOUNCE) + if (tg3_asic_rev(tp) != ASIC_REV_5785 && + !tg3_flag(tp, 57765_PLUS) && + (cfg3 & NIC_SRAM_ASPM_DEBOUNCE)) tg3_flag_set(tp, ASPM_WORKAROUND); + if (cfg3 & NIC_SRAM_LNK_FLAP_AVOID) + tp->phy_flags |= TG3_PHYFLG_KEEP_LINK_ON_PWRDN; + if (cfg3 & NIC_SRAM_1G_ON_VAUX_OK) + tp->phy_flags |= TG3_PHYFLG_1G_ON_VAUX_OK; } if (cfg4 & NIC_SRAM_RGMII_INBAND_DISABLE) @@ -14446,6 +14867,12 @@ static int tg3_phy_probe(struct tg3 *tp) } } + if (!tg3_flag(tp, ENABLE_ASF) && + !(tp->phy_flags & TG3_PHYFLG_ANY_SERDES) && + !(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) + tp->phy_flags &= ~(TG3_PHYFLG_1G_ON_VAUX_OK | + TG3_PHYFLG_KEEP_LINK_ON_PWRDN); + if (tg3_flag(tp, USE_PHYLIB)) return tg3_phy_init(tp); @@ -14511,6 +14938,7 @@ static int tg3_phy_probe(struct tg3 *tp) if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES) && (tg3_asic_rev(tp) == ASIC_REV_5719 || tg3_asic_rev(tp) == ASIC_REV_5720 || + tg3_asic_rev(tp) == ASIC_REV_57766 || tg3_asic_rev(tp) == ASIC_REV_5762 || (tg3_asic_rev(tp) == ASIC_REV_5717 && tg3_chip_rev_id(tp) != CHIPREV_ID_5717_A0) || @@ -14520,7 +14948,8 @@ static int tg3_phy_probe(struct tg3 *tp) tg3_phy_init_link_config(tp); - if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES) && + if (!(tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN) && + !(tp->phy_flags & TG3_PHYFLG_ANY_SERDES) && !tg3_flag(tp, ENABLE_APE) && !tg3_flag(tp, ENABLE_ASF)) { u32 bmsr, dummy; @@ -14600,8 +15029,11 @@ static void tg3_read_vpd(struct tg3 *tp) if (j + len > block_end) goto partno; - memcpy(tp->fw_ver, &vpd_data[j], len); - strncat(tp->fw_ver, " bc ", vpdlen - len - 1); + if (len >= sizeof(tp->fw_ver)) + len = sizeof(tp->fw_ver) - 1; + memset(tp->fw_ver, 0, sizeof(tp->fw_ver)); + snprintf(tp->fw_ver, sizeof(tp->fw_ver), "%.*s bc ", len, + &vpd_data[j]); } partno: @@ -15293,7 +15725,8 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) } else if (tg3_asic_rev(tp) != ASIC_REV_5700 && tg3_asic_rev(tp) != ASIC_REV_5701 && tg3_chip_rev_id(tp) != CHIPREV_ID_5705_A0) { - tg3_flag_set(tp, TSO_BUG); + tg3_flag_set(tp, FW_TSO); + tg3_flag_set(tp, TSO_BUG); if (tg3_asic_rev(tp) == ASIC_REV_5705) tp->fw_needed = FIRMWARE_TG3TSO5; else @@ -15304,7 +15737,7 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) if (tg3_flag(tp, HW_TSO_1) || tg3_flag(tp, HW_TSO_2) || tg3_flag(tp, HW_TSO_3) || - tp->fw_needed) { + tg3_flag(tp, FW_TSO)) { /* For firmware TSO, assume ASF is disabled. * We'll disable TSO later if we discover ASF * is enabled in tg3_get_eeprom_hw_cfg(). @@ -15319,6 +15752,9 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) if (tg3_chip_rev_id(tp) == CHIPREV_ID_5701_A0) tp->fw_needed = FIRMWARE_TG3; + if (tg3_asic_rev(tp) == ASIC_REV_57766) + tp->fw_needed = FIRMWARE_TG357766; + tp->irq_max = 1; if (tg3_flag(tp, 5750_PLUS)) { @@ -15591,7 +16027,7 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) */ tg3_get_eeprom_hw_cfg(tp); - if (tp->fw_needed && tg3_flag(tp, ENABLE_ASF)) { + if (tg3_flag(tp, FW_TSO) && tg3_flag(tp, ENABLE_ASF)) { tg3_flag_clear(tp, TSO_CAPABLE); tg3_flag_clear(tp, TSO_BUG); tp->fw_needed = NULL; @@ -15779,6 +16215,11 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) udelay(50); tg3_nvram_init(tp); + /* If the device has an NVRAM, no need to load patch firmware */ + if (tg3_asic_rev(tp) == ASIC_REV_57766 && + !tg3_flag(tp, NO_NVRAM)) + tp->fw_needed = NULL; + grc_misc_cfg = tr32(GRC_MISC_CFG); grc_misc_cfg &= GRC_MISC_CFG_BOARD_ID_MASK; @@ -16137,7 +16578,7 @@ out: } static int tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dma, - int size, int to_device) + int size, bool to_device) { struct tg3_internal_buffer_desc test_desc; u32 sram_dma_descs; @@ -16337,7 +16778,7 @@ static int tg3_test_dma(struct tg3 *tp) p[i] = i; /* Send the buffer to the chip. */ - ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, 1); + ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, true); if (ret) { dev_err(&tp->pdev->dev, "%s: Buffer write failed. err = %d\n", @@ -16360,7 +16801,7 @@ static int tg3_test_dma(struct tg3 *tp) } #endif /* Now read it back. */ - ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, 0); + ret = tg3_do_test_dma(tp, buf, buf_dma, TEST_BUFFER_SIZE, false); if (ret) { dev_err(&tp->pdev->dev, "%s: Buffer read failed. " "err = %d\n", __func__, ret); @@ -16756,7 +17197,7 @@ static int tg3_init_one(struct pci_dev *pdev, tg3_init_bufmgr_config(tp); - features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; /* 5700 B0 chips do not support checksumming correctly due * to hardware bugs. @@ -17041,7 +17482,7 @@ static int tg3_suspend(struct device *device) tg3_full_lock(tp, 0); tg3_flag_set(tp, INIT_COMPLETE); - err2 = tg3_restart_hw(tp, 1); + err2 = tg3_restart_hw(tp, true); if (err2) goto out; @@ -17075,7 +17516,8 @@ static int tg3_resume(struct device *device) tg3_full_lock(tp, 0); tg3_flag_set(tp, INIT_COMPLETE); - err = tg3_restart_hw(tp, 1); + err = tg3_restart_hw(tp, + !(tp->phy_flags & TG3_PHYFLG_KEEP_LINK_ON_PWRDN)); if (err) goto out; @@ -17091,15 +17533,9 @@ out: return err; } +#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume); -#define TG3_PM_OPS (&tg3_pm_ops) - -#else - -#define TG3_PM_OPS NULL - -#endif /* CONFIG_PM_SLEEP */ /** * tg3_io_error_detected - called when PCI error is detected @@ -17214,7 +17650,7 @@ static void tg3_io_resume(struct pci_dev *pdev) tg3_full_lock(tp, 0); tg3_flag_set(tp, INIT_COMPLETE); - err = tg3_restart_hw(tp, 1); + err = tg3_restart_hw(tp, true); if (err) { tg3_full_unlock(tp); netdev_err(netdev, "Cannot restart hardware after reset.\n"); @@ -17247,7 +17683,7 @@ static struct pci_driver tg3_driver = { .probe = tg3_init_one, .remove = tg3_remove_one, .err_handler = &tg3_err_handler, - .driver.pm = TG3_PM_OPS, + .driver.pm = &tg3_pm_ops, }; static int __init tg3_init(void) diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 8d7d4c2ab5d6..9b2d3ac2474a 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2198,6 +2198,8 @@ #define NIC_SRAM_DATA_CFG_3 0x00000d3c #define NIC_SRAM_ASPM_DEBOUNCE 0x00000002 +#define NIC_SRAM_LNK_FLAP_AVOID 0x00400000 +#define NIC_SRAM_1G_ON_VAUX_OK 0x00800000 #define NIC_SRAM_DATA_CFG_4 0x00000d60 #define NIC_SRAM_GMII_MODE 0x00000002 @@ -2222,6 +2224,12 @@ #define NIC_SRAM_MBUF_POOL_BASE5705 0x00010000 #define NIC_SRAM_MBUF_POOL_SIZE5705 0x0000e000 +#define TG3_SRAM_RXCPU_SCRATCH_BASE_57766 0x00030000 +#define TG3_SRAM_RXCPU_SCRATCH_SIZE_57766 0x00010000 +#define TG3_57766_FW_BASE_ADDR 0x00030000 +#define TG3_57766_FW_HANDSHAKE 0x0003fccc +#define TG3_SBROM_IN_SERVICE_LOOP 0x51 + #define TG3_SRAM_RX_STD_BDCACHE_SIZE_5700 128 #define TG3_SRAM_RX_STD_BDCACHE_SIZE_5755 64 #define TG3_SRAM_RX_STD_BDCACHE_SIZE_5906 32 @@ -2365,6 +2373,13 @@ #define MII_TG3_FET_SHDW_AUXSTAT2 0x1b #define MII_TG3_FET_SHDW_AUXSTAT2_APD 0x0020 +/* Serdes PHY Register Definitions */ +#define SERDES_TG3_1000X_STATUS 0x14 +#define SERDES_TG3_SGMII_MODE 0x0001 +#define SERDES_TG3_LINK_UP 0x0002 +#define SERDES_TG3_FULL_DUPLEX 0x0004 +#define SERDES_TG3_SPEED_100 0x0008 +#define SERDES_TG3_SPEED_1000 0x0010 /* APE registers. Accessible through BAR1 */ #define TG3_APE_GPIO_MSG 0x0008 @@ -3009,17 +3024,18 @@ enum TG3_FLAGS { TG3_FLAG_JUMBO_CAPABLE, TG3_FLAG_CHIP_RESETTING, TG3_FLAG_INIT_COMPLETE, - TG3_FLAG_TSO_BUG, TG3_FLAG_MAX_RXPEND_64, - TG3_FLAG_TSO_CAPABLE, TG3_FLAG_PCI_EXPRESS, /* BCM5785 + pci_is_pcie() */ TG3_FLAG_ASF_NEW_HANDSHAKE, TG3_FLAG_HW_AUTONEG, TG3_FLAG_IS_NIC, TG3_FLAG_FLASH, + TG3_FLAG_FW_TSO, TG3_FLAG_HW_TSO_1, TG3_FLAG_HW_TSO_2, TG3_FLAG_HW_TSO_3, + TG3_FLAG_TSO_CAPABLE, + TG3_FLAG_TSO_BUG, TG3_FLAG_ICH_WORKAROUND, TG3_FLAG_1SHOT_MSI, TG3_FLAG_NO_FWARE_REPORTED, @@ -3064,6 +3080,13 @@ enum TG3_FLAGS { TG3_FLAG_NUMBER_OF_FLAGS, /* Last entry in enum TG3_FLAGS */ }; +struct tg3_firmware_hdr { + __be32 version; /* unused for fragments */ + __be32 base_addr; + __be32 len; +}; +#define TG3_FW_HDR_LEN (sizeof(struct tg3_firmware_hdr)) + struct tg3 { /* begin "general, frequently-used members" cacheline section */ @@ -3267,6 +3290,7 @@ struct tg3 { #define TG3_PHYFLG_IS_LOW_POWER 0x00000001 #define TG3_PHYFLG_IS_CONNECTED 0x00000002 #define TG3_PHYFLG_USE_MI_INTERRUPT 0x00000004 +#define TG3_PHYFLG_USER_CONFIGURED 0x00000008 #define TG3_PHYFLG_PHY_SERDES 0x00000010 #define TG3_PHYFLG_MII_SERDES 0x00000020 #define TG3_PHYFLG_ANY_SERDES (TG3_PHYFLG_PHY_SERDES | \ @@ -3284,6 +3308,8 @@ struct tg3 { #define TG3_PHYFLG_SERDES_PREEMPHASIS 0x00010000 #define TG3_PHYFLG_PARALLEL_DETECT 0x00020000 #define TG3_PHYFLG_EEE_CAP 0x00040000 +#define TG3_PHYFLG_1G_ON_VAUX_OK 0x00080000 +#define TG3_PHYFLG_KEEP_LINK_ON_PWRDN 0x00100000 #define TG3_PHYFLG_MDIX_STATE 0x00200000 u32 led_ctrl; diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 3227fdde521b..f2b73ffa9122 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -76,7 +76,7 @@ static void bfa_ioc_pf_disabled(struct bfa_ioc *ioc); static void bfa_ioc_pf_failed(struct bfa_ioc *ioc); static void bfa_ioc_pf_hwfailed(struct bfa_ioc *ioc); static void bfa_ioc_pf_fwmismatch(struct bfa_ioc *ioc); -static void bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type, +static void bfa_ioc_boot(struct bfa_ioc *ioc, enum bfi_fwboot_type boot_type, u32 boot_param); static u32 bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr); static void bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 7cce42dc2f20..ce4a030d3d0c 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -610,7 +610,7 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) rcb->rxq->rx_bytes += length; if (flags & BNA_CQ_EF_VLAN) - __vlan_hwaccel_put_tag(skb, ntohs(cmpl->vlan_tag)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cmpl->vlan_tag)); if (BNAD_RXBUF_IS_PAGE(unmap_q->type)) napi_gro_frags(&rx_ctrl->napi); @@ -1264,9 +1264,8 @@ bnad_mem_alloc(struct bnad *bnad, mem_info->mdl[i].len = mem_info->len; mem_info->mdl[i].kva = dma_alloc_coherent(&bnad->pcidev->dev, - mem_info->len, &dma_pa, - GFP_KERNEL); - + mem_info->len, &dma_pa, + GFP_KERNEL); if (mem_info->mdl[i].kva == NULL) goto err_return; @@ -3069,8 +3068,7 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu) } static int -bnad_vlan_rx_add_vid(struct net_device *netdev, - unsigned short vid) +bnad_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct bnad *bnad = netdev_priv(netdev); unsigned long flags; @@ -3091,8 +3089,7 @@ bnad_vlan_rx_add_vid(struct net_device *netdev, } static int -bnad_vlan_rx_kill_vid(struct net_device *netdev, - unsigned short vid) +bnad_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct bnad *bnad = netdev_priv(netdev); unsigned long flags; @@ -3171,14 +3168,14 @@ bnad_netdev_init(struct bnad *bnad, bool using_dac) netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_TX; + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_TX; netdev->vlan_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; netdev->features |= netdev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; if (using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c index 3becdb2deb46..cc9a185f0abb 100644 --- a/drivers/net/ethernet/cadence/at91_ether.c +++ b/drivers/net/ethernet/cadence/at91_ether.c @@ -47,22 +47,19 @@ static int at91ether_start(struct net_device *dev) int i; lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev, - MAX_RX_DESCR * sizeof(struct macb_dma_desc), - &lp->rx_ring_dma, GFP_KERNEL); - if (!lp->rx_ring) { - netdev_err(dev, "unable to alloc rx ring DMA buffer\n"); + (MAX_RX_DESCR * + sizeof(struct macb_dma_desc)), + &lp->rx_ring_dma, GFP_KERNEL); + if (!lp->rx_ring) return -ENOMEM; - } lp->rx_buffers = dma_alloc_coherent(&lp->pdev->dev, - MAX_RX_DESCR * MAX_RBUFF_SZ, - &lp->rx_buffers_dma, GFP_KERNEL); + MAX_RX_DESCR * MAX_RBUFF_SZ, + &lp->rx_buffers_dma, GFP_KERNEL); if (!lp->rx_buffers) { - netdev_err(dev, "unable to alloc rx data DMA buffer\n"); - dma_free_coherent(&lp->pdev->dev, - MAX_RX_DESCR * sizeof(struct macb_dma_desc), - lp->rx_ring, lp->rx_ring_dma); + MAX_RX_DESCR * sizeof(struct macb_dma_desc), + lp->rx_ring, lp->rx_ring_dma); lp->rx_ring = NULL; return -ENOMEM; } @@ -209,7 +206,6 @@ static void at91ether_rx(struct net_device *dev) netif_rx(skb); } else { lp->stats.rx_dropped++; - netdev_notice(dev, "Memory squeeze, dropping packet.\n"); } if (lp->rx_ring[lp->rx_tail].ctrl & MACB_BIT(RX_MHASH_MATCH)) @@ -303,42 +299,7 @@ static const struct of_device_id at91ether_dt_ids[] = { { .compatible = "cdns,emac" }, { /* sentinel */ } }; - MODULE_DEVICE_TABLE(of, at91ether_dt_ids); - -static int at91ether_get_phy_mode_dt(struct platform_device *pdev) -{ - struct device_node *np = pdev->dev.of_node; - - if (np) - return of_get_phy_mode(np); - - return -ENODEV; -} - -static int at91ether_get_hwaddr_dt(struct macb *bp) -{ - struct device_node *np = bp->pdev->dev.of_node; - - if (np) { - const char *mac = of_get_mac_address(np); - if (mac) { - memcpy(bp->dev->dev_addr, mac, ETH_ALEN); - return 0; - } - } - - return -ENODEV; -} -#else -static int at91ether_get_phy_mode_dt(struct platform_device *pdev) -{ - return -ENODEV; -} -static int at91ether_get_hwaddr_dt(struct macb *bp) -{ - return -ENODEV; -} #endif /* Detect MAC & PHY and perform ethernet interface initialization */ @@ -352,6 +313,7 @@ static int __init at91ether_probe(struct platform_device *pdev) struct macb *lp; int res; u32 reg; + const char *mac; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) @@ -403,11 +365,13 @@ static int __init at91ether_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); - res = at91ether_get_hwaddr_dt(lp); - if (res < 0) + mac = of_get_mac_address(pdev->dev.of_node); + if (mac) + memcpy(lp->dev->dev_addr, mac, ETH_ALEN); + else macb_get_hwaddr(lp); - res = at91ether_get_phy_mode_dt(pdev); + res = of_get_phy_mode(pdev->dev.of_node); if (res < 0) { if (board_data && board_data->is_rmii) lp->phy_interface = PHY_INTERFACE_MODE_RMII; @@ -430,7 +394,8 @@ static int __init at91ether_probe(struct platform_device *pdev) if (res) goto err_disable_clock; - if (macb_mii_init(lp) != 0) + res = macb_mii_init(lp); + if (res) goto err_out_unregister_netdev; /* will be enabled in open() */ @@ -519,18 +484,7 @@ static struct platform_driver at91ether_driver = { }, }; -static int __init at91ether_init(void) -{ - return platform_driver_probe(&at91ether_driver, at91ether_probe); -} - -static void __exit at91ether_exit(void) -{ - platform_driver_unregister(&at91ether_driver); -} - -module_init(at91ether_init) -module_exit(at91ether_exit) +module_platform_driver_probe(at91ether_driver, at91ether_probe); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("AT91RM9200 EMAC Ethernet driver"); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 79039439bfdc..6be513deb17f 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -485,6 +485,8 @@ static void macb_tx_interrupt(struct macb *bp) status = macb_readl(bp, TSR); macb_writel(bp, TSR, status); + macb_writel(bp, ISR, MACB_BIT(TCOMP)); + netdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n", (unsigned long)status); @@ -736,6 +738,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) * now. */ macb_writel(bp, IDR, MACB_RX_INT_FLAGS); + macb_writel(bp, ISR, MACB_BIT(RCOMP)); if (napi_schedule_prep(&bp->napi)) { netdev_vdbg(bp->dev, "scheduling RX softirq\n"); @@ -1054,6 +1057,7 @@ static void macb_configure_dma(struct macb *bp) dmacfg |= GEM_BF(RXBS, RX_BUFFER_SIZE / 64); dmacfg |= GEM_BF(FBLDO, 16); dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); + dmacfg &= ~GEM_BIT(ENDIA); gem_writel(bp, DMACFG, dmacfg); } } @@ -1472,41 +1476,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,gem" }, { /* sentinel */ } }; - MODULE_DEVICE_TABLE(of, macb_dt_ids); - -static int macb_get_phy_mode_dt(struct platform_device *pdev) -{ - struct device_node *np = pdev->dev.of_node; - - if (np) - return of_get_phy_mode(np); - - return -ENODEV; -} - -static int macb_get_hwaddr_dt(struct macb *bp) -{ - struct device_node *np = bp->pdev->dev.of_node; - if (np) { - const char *mac = of_get_mac_address(np); - if (mac) { - memcpy(bp->dev->dev_addr, mac, ETH_ALEN); - return 0; - } - } - - return -ENODEV; -} -#else -static int macb_get_phy_mode_dt(struct platform_device *pdev) -{ - return -ENODEV; -} -static int macb_get_hwaddr_dt(struct macb *bp) -{ - return -ENODEV; -} #endif static int __init macb_probe(struct platform_device *pdev) @@ -1519,6 +1489,7 @@ static int __init macb_probe(struct platform_device *pdev) u32 config; int err = -ENXIO; struct pinctrl *pinctrl; + const char *mac; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) { @@ -1557,14 +1528,14 @@ static int __init macb_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to get macb_clk\n"); goto err_out_free_dev; } - clk_enable(bp->pclk); + clk_prepare_enable(bp->pclk); bp->hclk = clk_get(&pdev->dev, "hclk"); if (IS_ERR(bp->hclk)) { dev_err(&pdev->dev, "failed to get hclk\n"); goto err_out_put_pclk; } - clk_enable(bp->hclk); + clk_prepare_enable(bp->hclk); bp->regs = ioremap(regs->start, resource_size(regs)); if (!bp->regs) { @@ -1592,11 +1563,13 @@ static int __init macb_probe(struct platform_device *pdev) config |= macb_dbw(bp); macb_writel(bp, NCFGR, config); - err = macb_get_hwaddr_dt(bp); - if (err < 0) + mac = of_get_mac_address(pdev->dev.of_node); + if (mac) + memcpy(bp->dev->dev_addr, mac, ETH_ALEN); + else macb_get_hwaddr(bp); - err = macb_get_phy_mode_dt(pdev); + err = of_get_phy_mode(pdev->dev.of_node); if (err < 0) { pdata = pdev->dev.platform_data; if (pdata && pdata->is_rmii) @@ -1629,9 +1602,9 @@ static int __init macb_probe(struct platform_device *pdev) goto err_out_free_irq; } - if (macb_mii_init(bp) != 0) { + err = macb_mii_init(bp); + if (err) goto err_out_unregister_netdev; - } platform_set_drvdata(pdev, dev); @@ -1654,9 +1627,9 @@ err_out_free_irq: err_out_iounmap: iounmap(bp->regs); err_out_disable_clocks: - clk_disable(bp->hclk); + clk_disable_unprepare(bp->hclk); clk_put(bp->hclk); - clk_disable(bp->pclk); + clk_disable_unprepare(bp->pclk); err_out_put_pclk: clk_put(bp->pclk); err_out_free_dev: @@ -1683,9 +1656,9 @@ static int __exit macb_remove(struct platform_device *pdev) unregister_netdev(dev); free_irq(dev->irq, dev); iounmap(bp->regs); - clk_disable(bp->hclk); + clk_disable_unprepare(bp->hclk); clk_put(bp->hclk); - clk_disable(bp->pclk); + clk_disable_unprepare(bp->pclk); clk_put(bp->pclk); free_netdev(dev); platform_set_drvdata(pdev, NULL); @@ -1703,8 +1676,8 @@ static int macb_suspend(struct platform_device *pdev, pm_message_t state) netif_carrier_off(netdev); netif_device_detach(netdev); - clk_disable(bp->hclk); - clk_disable(bp->pclk); + clk_disable_unprepare(bp->hclk); + clk_disable_unprepare(bp->pclk); return 0; } @@ -1714,8 +1687,8 @@ static int macb_resume(struct platform_device *pdev) struct net_device *netdev = platform_get_drvdata(pdev); struct macb *bp = netdev_priv(netdev); - clk_enable(bp->pclk); - clk_enable(bp->hclk); + clk_prepare_enable(bp->pclk); + clk_prepare_enable(bp->hclk); netif_device_attach(netdev); @@ -1737,18 +1710,7 @@ static struct platform_driver macb_driver = { }, }; -static int __init macb_init(void) -{ - return platform_driver_probe(&macb_driver, macb_probe); -} - -static void __exit macb_exit(void) -{ - platform_driver_unregister(&macb_driver); -} - -module_init(macb_init); -module_exit(macb_exit); +module_platform_driver_probe(macb_driver, macb_probe); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cadence MACB/GEM Ethernet driver"); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 570908b93578..993d70380688 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -173,6 +173,8 @@ /* Bitfields in DMACFG. */ #define GEM_FBLDO_OFFSET 0 #define GEM_FBLDO_SIZE 5 +#define GEM_ENDIA_OFFSET 7 +#define GEM_ENDIA_SIZE 1 #define GEM_RXBMS_OFFSET 8 #define GEM_RXBMS_SIZE 2 #define GEM_TXPBMS_OFFSET 10 diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index a170065b5973..791e5ff305d8 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -163,6 +163,7 @@ #define XGMAC_FLOW_CTRL_FCB_BPA 0x00000001 /* Flow Control Busy ... */ /* XGMAC_INT_STAT reg */ +#define XGMAC_INT_STAT_PMTIM 0x00800000 /* PMT Interrupt Mask */ #define XGMAC_INT_STAT_PMT 0x0080 /* PMT Interrupt Status */ #define XGMAC_INT_STAT_LPI 0x0040 /* LPI Interrupt Status */ @@ -960,6 +961,9 @@ static int xgmac_hw_init(struct net_device *dev) writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_STATUS); writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_INTR_ENA); + /* Mask power mgt interrupt */ + writel(XGMAC_INT_STAT_PMTIM, ioaddr + XGMAC_INT_STAT); + /* XGMAC requires AXI bus init. This is a 'magic number' for now */ writel(0x0077000E, ioaddr + XGMAC_DMA_AXI_BUS); @@ -1141,6 +1145,9 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit) struct sk_buff *skb; int frame_len; + if (!dma_ring_cnt(priv->rx_head, priv->rx_tail, DMA_RX_RING_SZ)) + break; + entry = priv->rx_tail; p = priv->dma_rx + entry; if (desc_get_owner(p)) @@ -1825,7 +1832,7 @@ static void xgmac_pmt(void __iomem *ioaddr, unsigned long mode) unsigned int pmt = 0; if (mode & WAKE_MAGIC) - pmt |= XGMAC_PMT_POWERDOWN | XGMAC_PMT_MAGIC_PKT; + pmt |= XGMAC_PMT_POWERDOWN | XGMAC_PMT_MAGIC_PKT_EN; if (mode & WAKE_UCAST) pmt |= XGMAC_PMT_POWERDOWN | XGMAC_PMT_GLBL_UNICAST; @@ -1879,12 +1886,9 @@ static int xgmac_resume(struct device *dev) return 0; } +#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(xgmac_pm_ops, xgmac_suspend, xgmac_resume); -#define XGMAC_PM_OPS (&xgmac_pm_ops) -#else -#define XGMAC_PM_OPS NULL -#endif /* CONFIG_PM_SLEEP */ static const struct of_device_id xgmac_of_match[] = { { .compatible = "calxeda,hb-xgmac", }, @@ -1899,7 +1903,7 @@ static struct platform_driver xgmac_driver = { }, .probe = xgmac_probe, .remove = xgmac_remove, - .driver.pm = XGMAC_PM_OPS, + .driver.pm = &xgmac_pm_ops, }; module_platform_driver(xgmac_driver); diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 20d2085f61c5..9624cfe7df57 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -856,10 +856,10 @@ static netdev_features_t t1_fix_features(struct net_device *dev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -869,7 +869,7 @@ static int t1_set_features(struct net_device *dev, netdev_features_t features) netdev_features_t changed = dev->features ^ features; struct adapter *adapter = dev->ml_priv; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) t1_vlan_mode(adapter, features); return 0; @@ -1085,8 +1085,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= NETIF_F_HIGHDMA; if (vlan_tso_capable(adapter)) { netdev->features |= - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - netdev->hw_features |= NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; /* T204: disable TSO */ if (!(is_T2(adapter)) || bi->port_number != 4) { diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 482976925154..8061fb0ef7ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -734,7 +734,7 @@ void t1_vlan_mode(struct adapter *adapter, netdev_features_t features) { struct sge *sge = adapter->sge; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) sge->sge_control |= F_VLAN_XTRACT; else sge->sge_control &= ~F_VLAN_XTRACT; @@ -835,7 +835,7 @@ static void refill_free_list(struct sge *sge, struct freelQ *q) struct sk_buff *skb; dma_addr_t mapping; - skb = alloc_skb(q->rx_buffer_size, GFP_ATOMIC); + skb = dev_alloc_skb(q->rx_buffer_size); if (!skb) break; @@ -1046,11 +1046,10 @@ static inline struct sk_buff *get_packet(struct pci_dev *pdev, const struct freelQ_ce *ce = &fl->centries[fl->cidx]; if (len < copybreak) { - skb = alloc_skb(len + 2, GFP_ATOMIC); + skb = netdev_alloc_skb_ip_align(NULL, len); if (!skb) goto use_orig_buf; - skb_reserve(skb, 2); /* align IP header */ skb_put(skb, len); pci_dma_sync_single_for_cpu(pdev, dma_unmap_addr(ce, dma_addr), @@ -1387,7 +1386,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) if (p->vlan_valid) { st->vlan_xtract++; - __vlan_hwaccel_put_tag(skb, ntohs(p->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan)); } netif_receive_skb(skb); } diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 2b5e62193cea..71497e835f42 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1181,14 +1181,15 @@ static void cxgb_vlan_mode(struct net_device *dev, netdev_features_t features) if (adapter->params.rev > 0) { t3_set_vlan_accel(adapter, 1 << pi->port_id, - features & NETIF_F_HW_VLAN_RX); + features & NETIF_F_HW_VLAN_CTAG_RX); } else { /* single control for all ports */ - unsigned int i, have_vlans = features & NETIF_F_HW_VLAN_RX; + unsigned int i, have_vlans = features & NETIF_F_HW_VLAN_CTAG_RX; for_each_port(adapter, i) have_vlans |= - adapter->port[i]->features & NETIF_F_HW_VLAN_RX; + adapter->port[i]->features & + NETIF_F_HW_VLAN_CTAG_RX; t3_set_vlan_accel(adapter, 1, have_vlans); } @@ -2563,10 +2564,10 @@ static netdev_features_t cxgb_fix_features(struct net_device *dev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -2575,7 +2576,7 @@ static int cxgb_set_features(struct net_device *dev, netdev_features_t features) { netdev_features_t changed = dev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) cxgb_vlan_mode(dev, features); return 0; @@ -3288,8 +3289,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len - 1; netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX; - netdev->features |= netdev->hw_features | NETIF_F_HW_VLAN_TX; + NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; + netdev->features |= netdev->hw_features | + NETIF_F_HW_VLAN_CTAG_TX; netdev->vlan_features |= netdev->features & VLAN_FEAT; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 4232767862b5..0c96e5fe99cc 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -185,7 +185,7 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter, if (!memcmp(dev->dev_addr, mac, ETH_ALEN)) { rcu_read_lock(); if (vlan && vlan != VLAN_VID_MASK) { - dev = __vlan_find_dev_deep(dev, vlan); + dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan); } else if (netif_is_bond_slave(dev)) { struct net_device *upper_dev; diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 9d67eb794c4b..f12e6b85a653 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2030,7 +2030,7 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, if (p->vlan_valid) { qs->port_stats[SGE_PSTAT_VLANEX]++; - __vlan_hwaccel_put_tag(skb, ntohs(p->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan)); } if (rq->polling) { if (lro) @@ -2132,7 +2132,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, if (cpl->vlan_valid) { qs->port_stats[SGE_PSTAT_VLANEX]++; - __vlan_hwaccel_put_tag(skb, ntohs(cpl->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); } napi_gro_frags(&qs->napi); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6db997c78a5f..681804b30a3f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -54,6 +54,10 @@ #define FW_VERSION_MINOR 1 #define FW_VERSION_MICRO 0 +#define FW_VERSION_MAJOR_T5 0 +#define FW_VERSION_MINOR_T5 0 +#define FW_VERSION_MICRO_T5 0 + #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) enum { @@ -66,7 +70,9 @@ enum { enum { MEM_EDC0, MEM_EDC1, - MEM_MC + MEM_MC, + MEM_MC0 = MEM_MC, + MEM_MC1 }; enum { @@ -74,8 +80,10 @@ enum { MEMWIN0_BASE = 0x1b800, MEMWIN1_APERTURE = 32768, MEMWIN1_BASE = 0x28000, + MEMWIN1_BASE_T5 = 0x52000, MEMWIN2_APERTURE = 65536, MEMWIN2_BASE = 0x30000, + MEMWIN2_BASE_T5 = 0x54000, }; enum dev_master { @@ -431,6 +439,7 @@ struct sge_txq { spinlock_t db_lock; int db_disabled; unsigned short db_pidx; + u64 udb; }; struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ @@ -504,13 +513,44 @@ struct sge { struct l2t_data; +#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision)) +#define CHELSIO_CHIP_VERSION(code) ((code) >> 4) +#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf) + +#define CHELSIO_T4 0x4 +#define CHELSIO_T5 0x5 + +enum chip_type { + T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 0), + T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1), + T4_A3 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2), + T4_FIRST_REV = T4_A1, + T4_LAST_REV = T4_A3, + + T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0), + T5_FIRST_REV = T5_A1, + T5_LAST_REV = T5_A1, +}; + +#ifdef CONFIG_PCI_IOV + +/* T4 supports SRIOV on PF0-3 and T5 on PF0-7. However, the Serial + * Configuration initialization for T5 only has SR-IOV functionality enabled + * on PF0-3 in order to simplify everything. + */ +#define NUM_OF_PF_WITH_SRIOV 4 + +#endif + struct adapter { void __iomem *regs; + void __iomem *bar2; struct pci_dev *pdev; struct device *pdev_dev; unsigned int mbox; unsigned int fn; unsigned int flags; + enum chip_type chip; int msg_enable; @@ -673,6 +713,16 @@ enum { VLAN_REWRITE }; +static inline int is_t5(enum chip_type chip) +{ + return (chip >= T5_FIRST_REV && chip <= T5_LAST_REV); +} + +static inline int is_t4(enum chip_type chip) +{ + return (chip >= T4_FIRST_REV && chip <= T4_LAST_REV); +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -858,7 +908,8 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, int start, int n, const u16 *rspq, unsigned int nrspq); int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, unsigned int flags); -int t4_mc_read(struct adapter *adap, u32 addr, __be32 *data, u64 *parity); +int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, + u64 *parity); int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e707e31abd81..6a6a01af75fd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -68,8 +68,8 @@ #include "t4fw_api.h" #include "l2t.h" -#define DRV_VERSION "1.3.0-ko" -#define DRV_DESC "Chelsio T4 Network Driver" +#define DRV_VERSION "2.0.0-ko" +#define DRV_DESC "Chelsio T4/T5 Network Driver" /* * Max interrupt hold-off timer value in us. Queues fall back to this value @@ -229,11 +229,51 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = { CH_DEVICE(0x440a, 4), CH_DEVICE(0x440d, 4), CH_DEVICE(0x440e, 4), + CH_DEVICE(0x5001, 5), + CH_DEVICE(0x5002, 5), + CH_DEVICE(0x5003, 5), + CH_DEVICE(0x5004, 5), + CH_DEVICE(0x5005, 5), + CH_DEVICE(0x5006, 5), + CH_DEVICE(0x5007, 5), + CH_DEVICE(0x5008, 5), + CH_DEVICE(0x5009, 5), + CH_DEVICE(0x500A, 5), + CH_DEVICE(0x500B, 5), + CH_DEVICE(0x500C, 5), + CH_DEVICE(0x500D, 5), + CH_DEVICE(0x500E, 5), + CH_DEVICE(0x500F, 5), + CH_DEVICE(0x5010, 5), + CH_DEVICE(0x5011, 5), + CH_DEVICE(0x5012, 5), + CH_DEVICE(0x5013, 5), + CH_DEVICE(0x5401, 5), + CH_DEVICE(0x5402, 5), + CH_DEVICE(0x5403, 5), + CH_DEVICE(0x5404, 5), + CH_DEVICE(0x5405, 5), + CH_DEVICE(0x5406, 5), + CH_DEVICE(0x5407, 5), + CH_DEVICE(0x5408, 5), + CH_DEVICE(0x5409, 5), + CH_DEVICE(0x540A, 5), + CH_DEVICE(0x540B, 5), + CH_DEVICE(0x540C, 5), + CH_DEVICE(0x540D, 5), + CH_DEVICE(0x540E, 5), + CH_DEVICE(0x540F, 5), + CH_DEVICE(0x5410, 5), + CH_DEVICE(0x5411, 5), + CH_DEVICE(0x5412, 5), + CH_DEVICE(0x5413, 5), { 0, } }; #define FW_FNAME "cxgb4/t4fw.bin" +#define FW5_FNAME "cxgb4/t5fw.bin" #define FW_CFNAME "cxgb4/t4-config.txt" +#define FW5_CFNAME "cxgb4/t5-config.txt" MODULE_DESCRIPTION(DRV_DESC); MODULE_AUTHOR("Chelsio Communications"); @@ -241,6 +281,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl); MODULE_FIRMWARE(FW_FNAME); +MODULE_FIRMWARE(FW5_FNAME); /* * Normally we're willing to become the firmware's Master PF but will be happy @@ -319,7 +360,10 @@ static bool vf_acls; module_param(vf_acls, bool, 0644); MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement"); -static unsigned int num_vf[4]; +/* Configure the number of PCI-E Virtual Function which are to be instantiated + * on SR-IOV Capable Physical Functions. + */ +static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV]; module_param_array(num_vf, uint, NULL, 0644); MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3"); @@ -515,7 +559,7 @@ static int link_start(struct net_device *dev) * that step explicitly. */ ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1, - !!(dev->features & NETIF_F_HW_VLAN_RX), true); + !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true); if (ret == 0) { ret = t4_change_mac(pi->adapter, mb, pi->viid, pi->xact_addr_filt, dev->dev_addr, true, @@ -1002,21 +1046,36 @@ freeout: t4_free_sge_resources(adap); static int upgrade_fw(struct adapter *adap) { int ret; - u32 vers; + u32 vers, exp_major; const struct fw_hdr *hdr; const struct firmware *fw; struct device *dev = adap->pdev_dev; + char *fw_file_name; - ret = request_firmware(&fw, FW_FNAME, dev); + switch (CHELSIO_CHIP_VERSION(adap->chip)) { + case CHELSIO_T4: + fw_file_name = FW_FNAME; + exp_major = FW_VERSION_MAJOR; + break; + case CHELSIO_T5: + fw_file_name = FW5_FNAME; + exp_major = FW_VERSION_MAJOR_T5; + break; + default: + dev_err(dev, "Unsupported chip type, %x\n", adap->chip); + return -EINVAL; + } + + ret = request_firmware(&fw, fw_file_name, dev); if (ret < 0) { - dev_err(dev, "unable to load firmware image " FW_FNAME - ", error %d\n", ret); + dev_err(dev, "unable to load firmware image %s, error %d\n", + fw_file_name, ret); return ret; } hdr = (const struct fw_hdr *)fw->data; vers = ntohl(hdr->fw_ver); - if (FW_HDR_FW_VER_MAJOR_GET(vers) != FW_VERSION_MAJOR) { + if (FW_HDR_FW_VER_MAJOR_GET(vers) != exp_major) { ret = -EINVAL; /* wrong major version, won't do */ goto out; } @@ -1024,18 +1083,15 @@ static int upgrade_fw(struct adapter *adap) /* * If the flash FW is unusable or we found something newer, load it. */ - if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR || + if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != exp_major || vers > adap->params.fw_vers) { dev_info(dev, "upgrading firmware ...\n"); ret = t4_fw_upgrade(adap, adap->mbox, fw->data, fw->size, /*force=*/false); if (!ret) - dev_info(dev, "firmware successfully upgraded to " - FW_FNAME " (%d.%d.%d.%d)\n", - FW_HDR_FW_VER_MAJOR_GET(vers), - FW_HDR_FW_VER_MINOR_GET(vers), - FW_HDR_FW_VER_MICRO_GET(vers), - FW_HDR_FW_VER_BUILD_GET(vers)); + dev_info(dev, + "firmware upgraded to version %pI4 from %s\n", + &hdr->fw_ver, fw_file_name); else dev_err(dev, "firmware upgrade failed! err=%d\n", -ret); } else { @@ -1308,6 +1364,8 @@ static char stats_strings[][ETH_GSTRING_LEN] = { "VLANinsertions ", "GROpackets ", "GROmerged ", + "WriteCoalSuccess ", + "WriteCoalFail ", }; static int get_sset_count(struct net_device *dev, int sset) @@ -1321,10 +1379,15 @@ static int get_sset_count(struct net_device *dev, int sset) } #define T4_REGMAP_SIZE (160 * 1024) +#define T5_REGMAP_SIZE (332 * 1024) static int get_regs_len(struct net_device *dev) { - return T4_REGMAP_SIZE; + struct adapter *adap = netdev2adap(dev); + if (is_t4(adap->chip)) + return T4_REGMAP_SIZE; + else + return T5_REGMAP_SIZE; } static int get_eeprom_len(struct net_device *dev) @@ -1398,11 +1461,25 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; + u32 val1, val2; t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data); data += sizeof(struct port_stats) / sizeof(u64); collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data); + data += sizeof(struct queue_port_stats) / sizeof(u64); + if (!is_t4(adapter->chip)) { + t4_write_reg(adapter, SGE_STAT_CFG, STATSOURCE_T5(7)); + val1 = t4_read_reg(adapter, SGE_STAT_TOTAL); + val2 = t4_read_reg(adapter, SGE_STAT_MATCH); + *data = val1 - val2; + data++; + *data = val2; + data++; + } else { + memset(data, 0, 2 * sizeof(u64)); + *data += 2; + } } /* @@ -1413,7 +1490,8 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, */ static inline unsigned int mk_adap_vers(const struct adapter *ap) { - return 4 | (ap->params.rev << 10) | (1 << 16); + return CHELSIO_CHIP_VERSION(ap->chip) | + (CHELSIO_CHIP_RELEASE(ap->chip) << 10) | (1 << 16); } static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start, @@ -1428,7 +1506,7 @@ static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start, static void get_regs(struct net_device *dev, struct ethtool_regs *regs, void *buf) { - static const unsigned int reg_ranges[] = { + static const unsigned int t4_reg_ranges[] = { 0x1008, 0x1108, 0x1180, 0x11b4, 0x11fc, 0x123c, @@ -1648,13 +1726,452 @@ static void get_regs(struct net_device *dev, struct ethtool_regs *regs, 0x27e00, 0x27e04 }; + static const unsigned int t5_reg_ranges[] = { + 0x1008, 0x1148, + 0x1180, 0x11b4, + 0x11fc, 0x123c, + 0x1280, 0x173c, + 0x1800, 0x18fc, + 0x3000, 0x3028, + 0x3060, 0x30d8, + 0x30e0, 0x30fc, + 0x3140, 0x357c, + 0x35a8, 0x35cc, + 0x35ec, 0x35ec, + 0x3600, 0x5624, + 0x56cc, 0x575c, + 0x580c, 0x5814, + 0x5890, 0x58bc, + 0x5940, 0x59dc, + 0x59fc, 0x5a18, + 0x5a60, 0x5a9c, + 0x5b9c, 0x5bfc, + 0x6000, 0x6040, + 0x6058, 0x614c, + 0x7700, 0x7798, + 0x77c0, 0x78fc, + 0x7b00, 0x7c54, + 0x7d00, 0x7efc, + 0x8dc0, 0x8de0, + 0x8df8, 0x8e84, + 0x8ea0, 0x8f84, + 0x8fc0, 0x90f8, + 0x9400, 0x9470, + 0x9600, 0x96f4, + 0x9800, 0x9808, + 0x9820, 0x983c, + 0x9850, 0x9864, + 0x9c00, 0x9c6c, + 0x9c80, 0x9cec, + 0x9d00, 0x9d6c, + 0x9d80, 0x9dec, + 0x9e00, 0x9e6c, + 0x9e80, 0x9eec, + 0x9f00, 0x9f6c, + 0x9f80, 0xa020, + 0xd004, 0xd03c, + 0xdfc0, 0xdfe0, + 0xe000, 0x11088, + 0x1109c, 0x1117c, + 0x11190, 0x11204, + 0x19040, 0x1906c, + 0x19078, 0x19080, + 0x1908c, 0x19124, + 0x19150, 0x191b0, + 0x191d0, 0x191e8, + 0x19238, 0x19290, + 0x193f8, 0x19474, + 0x19490, 0x194cc, + 0x194f0, 0x194f8, + 0x19c00, 0x19c60, + 0x19c94, 0x19e10, + 0x19e50, 0x19f34, + 0x19f40, 0x19f50, + 0x19f90, 0x19fe4, + 0x1a000, 0x1a06c, + 0x1a0b0, 0x1a120, + 0x1a128, 0x1a138, + 0x1a190, 0x1a1c4, + 0x1a1fc, 0x1a1fc, + 0x1e008, 0x1e00c, + 0x1e040, 0x1e04c, + 0x1e284, 0x1e290, + 0x1e2c0, 0x1e2c0, + 0x1e2e0, 0x1e2e0, + 0x1e300, 0x1e384, + 0x1e3c0, 0x1e3c8, + 0x1e408, 0x1e40c, + 0x1e440, 0x1e44c, + 0x1e684, 0x1e690, + 0x1e6c0, 0x1e6c0, + 0x1e6e0, 0x1e6e0, + 0x1e700, 0x1e784, + 0x1e7c0, 0x1e7c8, + 0x1e808, 0x1e80c, + 0x1e840, 0x1e84c, + 0x1ea84, 0x1ea90, + 0x1eac0, 0x1eac0, + 0x1eae0, 0x1eae0, + 0x1eb00, 0x1eb84, + 0x1ebc0, 0x1ebc8, + 0x1ec08, 0x1ec0c, + 0x1ec40, 0x1ec4c, + 0x1ee84, 0x1ee90, + 0x1eec0, 0x1eec0, + 0x1eee0, 0x1eee0, + 0x1ef00, 0x1ef84, + 0x1efc0, 0x1efc8, + 0x1f008, 0x1f00c, + 0x1f040, 0x1f04c, + 0x1f284, 0x1f290, + 0x1f2c0, 0x1f2c0, + 0x1f2e0, 0x1f2e0, + 0x1f300, 0x1f384, + 0x1f3c0, 0x1f3c8, + 0x1f408, 0x1f40c, + 0x1f440, 0x1f44c, + 0x1f684, 0x1f690, + 0x1f6c0, 0x1f6c0, + 0x1f6e0, 0x1f6e0, + 0x1f700, 0x1f784, + 0x1f7c0, 0x1f7c8, + 0x1f808, 0x1f80c, + 0x1f840, 0x1f84c, + 0x1fa84, 0x1fa90, + 0x1fac0, 0x1fac0, + 0x1fae0, 0x1fae0, + 0x1fb00, 0x1fb84, + 0x1fbc0, 0x1fbc8, + 0x1fc08, 0x1fc0c, + 0x1fc40, 0x1fc4c, + 0x1fe84, 0x1fe90, + 0x1fec0, 0x1fec0, + 0x1fee0, 0x1fee0, + 0x1ff00, 0x1ff84, + 0x1ffc0, 0x1ffc8, + 0x30000, 0x30030, + 0x30100, 0x30144, + 0x30190, 0x301d0, + 0x30200, 0x30318, + 0x30400, 0x3052c, + 0x30540, 0x3061c, + 0x30800, 0x30834, + 0x308c0, 0x30908, + 0x30910, 0x309ac, + 0x30a00, 0x30a04, + 0x30a0c, 0x30a2c, + 0x30a44, 0x30a50, + 0x30a74, 0x30c24, + 0x30d08, 0x30d14, + 0x30d1c, 0x30d20, + 0x30d3c, 0x30d50, + 0x31200, 0x3120c, + 0x31220, 0x31220, + 0x31240, 0x31240, + 0x31600, 0x31600, + 0x31608, 0x3160c, + 0x31a00, 0x31a1c, + 0x31e04, 0x31e20, + 0x31e38, 0x31e3c, + 0x31e80, 0x31e80, + 0x31e88, 0x31ea8, + 0x31eb0, 0x31eb4, + 0x31ec8, 0x31ed4, + 0x31fb8, 0x32004, + 0x32208, 0x3223c, + 0x32600, 0x32630, + 0x32a00, 0x32abc, + 0x32b00, 0x32b70, + 0x33000, 0x33048, + 0x33060, 0x3309c, + 0x330f0, 0x33148, + 0x33160, 0x3319c, + 0x331f0, 0x332e4, + 0x332f8, 0x333e4, + 0x333f8, 0x33448, + 0x33460, 0x3349c, + 0x334f0, 0x33548, + 0x33560, 0x3359c, + 0x335f0, 0x336e4, + 0x336f8, 0x337e4, + 0x337f8, 0x337fc, + 0x33814, 0x33814, + 0x3382c, 0x3382c, + 0x33880, 0x3388c, + 0x338e8, 0x338ec, + 0x33900, 0x33948, + 0x33960, 0x3399c, + 0x339f0, 0x33ae4, + 0x33af8, 0x33b10, + 0x33b28, 0x33b28, + 0x33b3c, 0x33b50, + 0x33bf0, 0x33c10, + 0x33c28, 0x33c28, + 0x33c3c, 0x33c50, + 0x33cf0, 0x33cfc, + 0x34000, 0x34030, + 0x34100, 0x34144, + 0x34190, 0x341d0, + 0x34200, 0x34318, + 0x34400, 0x3452c, + 0x34540, 0x3461c, + 0x34800, 0x34834, + 0x348c0, 0x34908, + 0x34910, 0x349ac, + 0x34a00, 0x34a04, + 0x34a0c, 0x34a2c, + 0x34a44, 0x34a50, + 0x34a74, 0x34c24, + 0x34d08, 0x34d14, + 0x34d1c, 0x34d20, + 0x34d3c, 0x34d50, + 0x35200, 0x3520c, + 0x35220, 0x35220, + 0x35240, 0x35240, + 0x35600, 0x35600, + 0x35608, 0x3560c, + 0x35a00, 0x35a1c, + 0x35e04, 0x35e20, + 0x35e38, 0x35e3c, + 0x35e80, 0x35e80, + 0x35e88, 0x35ea8, + 0x35eb0, 0x35eb4, + 0x35ec8, 0x35ed4, + 0x35fb8, 0x36004, + 0x36208, 0x3623c, + 0x36600, 0x36630, + 0x36a00, 0x36abc, + 0x36b00, 0x36b70, + 0x37000, 0x37048, + 0x37060, 0x3709c, + 0x370f0, 0x37148, + 0x37160, 0x3719c, + 0x371f0, 0x372e4, + 0x372f8, 0x373e4, + 0x373f8, 0x37448, + 0x37460, 0x3749c, + 0x374f0, 0x37548, + 0x37560, 0x3759c, + 0x375f0, 0x376e4, + 0x376f8, 0x377e4, + 0x377f8, 0x377fc, + 0x37814, 0x37814, + 0x3782c, 0x3782c, + 0x37880, 0x3788c, + 0x378e8, 0x378ec, + 0x37900, 0x37948, + 0x37960, 0x3799c, + 0x379f0, 0x37ae4, + 0x37af8, 0x37b10, + 0x37b28, 0x37b28, + 0x37b3c, 0x37b50, + 0x37bf0, 0x37c10, + 0x37c28, 0x37c28, + 0x37c3c, 0x37c50, + 0x37cf0, 0x37cfc, + 0x38000, 0x38030, + 0x38100, 0x38144, + 0x38190, 0x381d0, + 0x38200, 0x38318, + 0x38400, 0x3852c, + 0x38540, 0x3861c, + 0x38800, 0x38834, + 0x388c0, 0x38908, + 0x38910, 0x389ac, + 0x38a00, 0x38a04, + 0x38a0c, 0x38a2c, + 0x38a44, 0x38a50, + 0x38a74, 0x38c24, + 0x38d08, 0x38d14, + 0x38d1c, 0x38d20, + 0x38d3c, 0x38d50, + 0x39200, 0x3920c, + 0x39220, 0x39220, + 0x39240, 0x39240, + 0x39600, 0x39600, + 0x39608, 0x3960c, + 0x39a00, 0x39a1c, + 0x39e04, 0x39e20, + 0x39e38, 0x39e3c, + 0x39e80, 0x39e80, + 0x39e88, 0x39ea8, + 0x39eb0, 0x39eb4, + 0x39ec8, 0x39ed4, + 0x39fb8, 0x3a004, + 0x3a208, 0x3a23c, + 0x3a600, 0x3a630, + 0x3aa00, 0x3aabc, + 0x3ab00, 0x3ab70, + 0x3b000, 0x3b048, + 0x3b060, 0x3b09c, + 0x3b0f0, 0x3b148, + 0x3b160, 0x3b19c, + 0x3b1f0, 0x3b2e4, + 0x3b2f8, 0x3b3e4, + 0x3b3f8, 0x3b448, + 0x3b460, 0x3b49c, + 0x3b4f0, 0x3b548, + 0x3b560, 0x3b59c, + 0x3b5f0, 0x3b6e4, + 0x3b6f8, 0x3b7e4, + 0x3b7f8, 0x3b7fc, + 0x3b814, 0x3b814, + 0x3b82c, 0x3b82c, + 0x3b880, 0x3b88c, + 0x3b8e8, 0x3b8ec, + 0x3b900, 0x3b948, + 0x3b960, 0x3b99c, + 0x3b9f0, 0x3bae4, + 0x3baf8, 0x3bb10, + 0x3bb28, 0x3bb28, + 0x3bb3c, 0x3bb50, + 0x3bbf0, 0x3bc10, + 0x3bc28, 0x3bc28, + 0x3bc3c, 0x3bc50, + 0x3bcf0, 0x3bcfc, + 0x3c000, 0x3c030, + 0x3c100, 0x3c144, + 0x3c190, 0x3c1d0, + 0x3c200, 0x3c318, + 0x3c400, 0x3c52c, + 0x3c540, 0x3c61c, + 0x3c800, 0x3c834, + 0x3c8c0, 0x3c908, + 0x3c910, 0x3c9ac, + 0x3ca00, 0x3ca04, + 0x3ca0c, 0x3ca2c, + 0x3ca44, 0x3ca50, + 0x3ca74, 0x3cc24, + 0x3cd08, 0x3cd14, + 0x3cd1c, 0x3cd20, + 0x3cd3c, 0x3cd50, + 0x3d200, 0x3d20c, + 0x3d220, 0x3d220, + 0x3d240, 0x3d240, + 0x3d600, 0x3d600, + 0x3d608, 0x3d60c, + 0x3da00, 0x3da1c, + 0x3de04, 0x3de20, + 0x3de38, 0x3de3c, + 0x3de80, 0x3de80, + 0x3de88, 0x3dea8, + 0x3deb0, 0x3deb4, + 0x3dec8, 0x3ded4, + 0x3dfb8, 0x3e004, + 0x3e208, 0x3e23c, + 0x3e600, 0x3e630, + 0x3ea00, 0x3eabc, + 0x3eb00, 0x3eb70, + 0x3f000, 0x3f048, + 0x3f060, 0x3f09c, + 0x3f0f0, 0x3f148, + 0x3f160, 0x3f19c, + 0x3f1f0, 0x3f2e4, + 0x3f2f8, 0x3f3e4, + 0x3f3f8, 0x3f448, + 0x3f460, 0x3f49c, + 0x3f4f0, 0x3f548, + 0x3f560, 0x3f59c, + 0x3f5f0, 0x3f6e4, + 0x3f6f8, 0x3f7e4, + 0x3f7f8, 0x3f7fc, + 0x3f814, 0x3f814, + 0x3f82c, 0x3f82c, + 0x3f880, 0x3f88c, + 0x3f8e8, 0x3f8ec, + 0x3f900, 0x3f948, + 0x3f960, 0x3f99c, + 0x3f9f0, 0x3fae4, + 0x3faf8, 0x3fb10, + 0x3fb28, 0x3fb28, + 0x3fb3c, 0x3fb50, + 0x3fbf0, 0x3fc10, + 0x3fc28, 0x3fc28, + 0x3fc3c, 0x3fc50, + 0x3fcf0, 0x3fcfc, + 0x40000, 0x4000c, + 0x40040, 0x40068, + 0x40080, 0x40144, + 0x40180, 0x4018c, + 0x40200, 0x40298, + 0x402ac, 0x4033c, + 0x403f8, 0x403fc, + 0x41300, 0x413c4, + 0x41400, 0x4141c, + 0x41480, 0x414d0, + 0x44000, 0x44078, + 0x440c0, 0x44278, + 0x442c0, 0x44478, + 0x444c0, 0x44678, + 0x446c0, 0x44878, + 0x448c0, 0x449fc, + 0x45000, 0x45068, + 0x45080, 0x45084, + 0x450a0, 0x450b0, + 0x45200, 0x45268, + 0x45280, 0x45284, + 0x452a0, 0x452b0, + 0x460c0, 0x460e4, + 0x47000, 0x4708c, + 0x47200, 0x47250, + 0x47400, 0x47420, + 0x47600, 0x47618, + 0x47800, 0x47814, + 0x48000, 0x4800c, + 0x48040, 0x48068, + 0x48080, 0x48144, + 0x48180, 0x4818c, + 0x48200, 0x48298, + 0x482ac, 0x4833c, + 0x483f8, 0x483fc, + 0x49300, 0x493c4, + 0x49400, 0x4941c, + 0x49480, 0x494d0, + 0x4c000, 0x4c078, + 0x4c0c0, 0x4c278, + 0x4c2c0, 0x4c478, + 0x4c4c0, 0x4c678, + 0x4c6c0, 0x4c878, + 0x4c8c0, 0x4c9fc, + 0x4d000, 0x4d068, + 0x4d080, 0x4d084, + 0x4d0a0, 0x4d0b0, + 0x4d200, 0x4d268, + 0x4d280, 0x4d284, + 0x4d2a0, 0x4d2b0, + 0x4e0c0, 0x4e0e4, + 0x4f000, 0x4f08c, + 0x4f200, 0x4f250, + 0x4f400, 0x4f420, + 0x4f600, 0x4f618, + 0x4f800, 0x4f814, + 0x50000, 0x500cc, + 0x50400, 0x50400, + 0x50800, 0x508cc, + 0x50c00, 0x50c00, + 0x51000, 0x5101c, + 0x51300, 0x51308, + }; + int i; struct adapter *ap = netdev2adap(dev); + static const unsigned int *reg_ranges; + int arr_size = 0, buf_size = 0; + + if (is_t4(ap->chip)) { + reg_ranges = &t4_reg_ranges[0]; + arr_size = ARRAY_SIZE(t4_reg_ranges); + buf_size = T4_REGMAP_SIZE; + } else { + reg_ranges = &t5_reg_ranges[0]; + arr_size = ARRAY_SIZE(t5_reg_ranges); + buf_size = T5_REGMAP_SIZE; + } regs->version = mk_adap_vers(ap); - memset(buf, 0, T4_REGMAP_SIZE); - for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2) + memset(buf, 0, buf_size); + for (i = 0; i < arr_size; i += 2) reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]); } @@ -2205,14 +2722,14 @@ static int cxgb_set_features(struct net_device *dev, netdev_features_t features) netdev_features_t changed = dev->features ^ features; int err; - if (!(changed & NETIF_F_HW_VLAN_RX)) + if (!(changed & NETIF_F_HW_VLAN_CTAG_RX)) return 0; err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1, -1, -1, -1, - !!(features & NETIF_F_HW_VLAN_RX), true); + !!(features & NETIF_F_HW_VLAN_CTAG_RX), true); if (unlikely(err)) - dev->features = features ^ NETIF_F_HW_VLAN_RX; + dev->features = features ^ NETIF_F_HW_VLAN_CTAG_RX; return err; } @@ -2363,8 +2880,8 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, int ret, ofst; __be32 data[16]; - if (mem == MEM_MC) - ret = t4_mc_read(adap, pos, data, NULL); + if ((mem == MEM_MC) || (mem == MEM_MC1)) + ret = t4_mc_read(adap, mem % MEM_MC, pos, data, NULL); else ret = t4_edc_read(adap, mem, pos, data, NULL); if (ret) @@ -2405,18 +2922,37 @@ static void add_debugfs_mem(struct adapter *adap, const char *name, static int setup_debugfs(struct adapter *adap) { int i; + u32 size; if (IS_ERR_OR_NULL(adap->debugfs_root)) return -1; i = t4_read_reg(adap, MA_TARGET_MEM_ENABLE); - if (i & EDRAM0_ENABLE) - add_debugfs_mem(adap, "edc0", MEM_EDC0, 5); - if (i & EDRAM1_ENABLE) - add_debugfs_mem(adap, "edc1", MEM_EDC1, 5); - if (i & EXT_MEM_ENABLE) - add_debugfs_mem(adap, "mc", MEM_MC, - EXT_MEM_SIZE_GET(t4_read_reg(adap, MA_EXT_MEMORY_BAR))); + if (i & EDRAM0_ENABLE) { + size = t4_read_reg(adap, MA_EDRAM0_BAR); + add_debugfs_mem(adap, "edc0", MEM_EDC0, EDRAM_SIZE_GET(size)); + } + if (i & EDRAM1_ENABLE) { + size = t4_read_reg(adap, MA_EDRAM1_BAR); + add_debugfs_mem(adap, "edc1", MEM_EDC1, EDRAM_SIZE_GET(size)); + } + if (is_t4(adap->chip)) { + size = t4_read_reg(adap, MA_EXT_MEMORY_BAR); + if (i & EXT_MEM_ENABLE) + add_debugfs_mem(adap, "mc", MEM_MC, + EXT_MEM_SIZE_GET(size)); + } else { + if (i & EXT_MEM_ENABLE) { + size = t4_read_reg(adap, MA_EXT_MEMORY_BAR); + add_debugfs_mem(adap, "mc0", MEM_MC0, + EXT_MEM_SIZE_GET(size)); + } + if (i & EXT_MEM1_ENABLE) { + size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR); + add_debugfs_mem(adap, "mc1", MEM_MC1, + EXT_MEM_SIZE_GET(size)); + } + } if (adap->l2t) debugfs_create_file("l2t", S_IRUSR, adap->debugfs_root, adap, &t4_l2t_fops); @@ -2747,10 +3283,18 @@ EXPORT_SYMBOL(cxgb4_port_chan); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo) { struct adapter *adap = netdev2adap(dev); - u32 v; + u32 v1, v2, lp_count, hp_count; - v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS); - return lpfifo ? G_LP_COUNT(v) : G_HP_COUNT(v); + v1 = t4_read_reg(adap, A_SGE_DBFIFO_STATUS); + v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2); + if (is_t4(adap->chip)) { + lp_count = G_LP_COUNT(v1); + hp_count = G_HP_COUNT(v1); + } else { + lp_count = G_LP_COUNT_T5(v1); + hp_count = G_HP_COUNT_T5(v2); + } + return lpfifo ? lp_count : hp_count; } EXPORT_SYMBOL(cxgb4_dbfifo_count); @@ -2853,6 +3397,25 @@ out: } EXPORT_SYMBOL(cxgb4_sync_txq_pidx); +void cxgb4_disable_db_coalescing(struct net_device *dev) +{ + struct adapter *adap; + + adap = netdev2adap(dev); + t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_NOCOALESCE, + F_NOCOALESCE); +} +EXPORT_SYMBOL(cxgb4_disable_db_coalescing); + +void cxgb4_enable_db_coalescing(struct net_device *dev) +{ + struct adapter *adap; + + adap = netdev2adap(dev); + t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_NOCOALESCE, 0); +} +EXPORT_SYMBOL(cxgb4_enable_db_coalescing); + static struct pci_driver cxgb4_driver; static void check_neigh_update(struct neighbour *neigh) @@ -2888,14 +3451,23 @@ static struct notifier_block cxgb4_netevent_nb = { static void drain_db_fifo(struct adapter *adap, int usecs) { - u32 v; + u32 v1, v2, lp_count, hp_count; do { + v1 = t4_read_reg(adap, A_SGE_DBFIFO_STATUS); + v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2); + if (is_t4(adap->chip)) { + lp_count = G_LP_COUNT(v1); + hp_count = G_HP_COUNT(v1); + } else { + lp_count = G_LP_COUNT_T5(v1); + hp_count = G_HP_COUNT_T5(v2); + } + + if (lp_count == 0 && hp_count == 0) + break; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(usecs)); - v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS); - if (G_LP_COUNT(v) == 0 && G_HP_COUNT(v) == 0) - break; } while (1); } @@ -3004,24 +3576,62 @@ static void process_db_drop(struct work_struct *work) adap = container_of(work, struct adapter, db_drop_task); + if (is_t4(adap->chip)) { + disable_dbs(adap); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP); + drain_db_fifo(adap, 1); + recover_all_queues(adap); + enable_dbs(adap); + } else { + u32 dropped_db = t4_read_reg(adap, 0x010ac); + u16 qid = (dropped_db >> 15) & 0x1ffff; + u16 pidx_inc = dropped_db & 0x1fff; + unsigned int s_qpp; + unsigned short udb_density; + unsigned long qpshift; + int page; + u32 udb; + + dev_warn(adap->pdev_dev, + "Dropped DB 0x%x qid %d bar2 %d coalesce %d pidx %d\n", + dropped_db, qid, + (dropped_db >> 14) & 1, + (dropped_db >> 13) & 1, + pidx_inc); + + drain_db_fifo(adap, 1); + + s_qpp = QUEUESPERPAGEPF1 * adap->fn; + udb_density = 1 << QUEUESPERPAGEPF0_GET(t4_read_reg(adap, + SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp); + qpshift = PAGE_SHIFT - ilog2(udb_density); + udb = qid << qpshift; + udb &= PAGE_MASK; + page = udb / PAGE_SIZE; + udb += (qid - (page * udb_density)) * 128; + + writel(PIDX(pidx_inc), adap->bar2 + udb + 8); + + /* Re-enable BAR2 WC */ + t4_set_reg_field(adap, 0x10b0, 1<<15, 1<<15); + } + t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_DROPPED_DB, 0); - disable_dbs(adap); - notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP); - drain_db_fifo(adap, 1); - recover_all_queues(adap); - enable_dbs(adap); } void t4_db_full(struct adapter *adap) { - t4_set_reg_field(adap, SGE_INT_ENABLE3, - DBFIFO_HP_INT | DBFIFO_LP_INT, 0); - queue_work(workq, &adap->db_full_task); + if (is_t4(adap->chip)) { + t4_set_reg_field(adap, SGE_INT_ENABLE3, + DBFIFO_HP_INT | DBFIFO_LP_INT, 0); + queue_work(workq, &adap->db_full_task); + } } void t4_db_dropped(struct adapter *adap) { - queue_work(workq, &adap->db_drop_task); + if (is_t4(adap->chip)) + queue_work(workq, &adap->db_drop_task); } static void uld_attach(struct adapter *adap, unsigned int uld) @@ -3566,17 +4176,27 @@ void t4_fatal_err(struct adapter *adap) static void setup_memwin(struct adapter *adap) { - u32 bar0; + u32 bar0, mem_win0_base, mem_win1_base, mem_win2_base; bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */ + if (is_t4(adap->chip)) { + mem_win0_base = bar0 + MEMWIN0_BASE; + mem_win1_base = bar0 + MEMWIN1_BASE; + mem_win2_base = bar0 + MEMWIN2_BASE; + } else { + /* For T5, only relative offset inside the PCIe BAR is passed */ + mem_win0_base = MEMWIN0_BASE; + mem_win1_base = MEMWIN1_BASE_T5; + mem_win2_base = MEMWIN2_BASE_T5; + } t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0), - (bar0 + MEMWIN0_BASE) | BIR(0) | + mem_win0_base | BIR(0) | WINDOW(ilog2(MEMWIN0_APERTURE) - 10)); t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1), - (bar0 + MEMWIN1_BASE) | BIR(0) | + mem_win1_base | BIR(0) | WINDOW(ilog2(MEMWIN1_APERTURE) - 10)); t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2), - (bar0 + MEMWIN2_BASE) | BIR(0) | + mem_win2_base | BIR(0) | WINDOW(ilog2(MEMWIN2_APERTURE) - 10)); } @@ -3745,6 +4365,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) unsigned long mtype = 0, maddr = 0; u32 finiver, finicsum, cfcsum; int ret, using_flash; + char *fw_config_file, fw_config_file_path[256]; /* * Reset device if necessary. @@ -3761,7 +4382,21 @@ static int adap_init0_config(struct adapter *adapter, int reset) * then use that. Otherwise, use the configuration file stored * in the adapter flash ... */ - ret = request_firmware(&cf, FW_CFNAME, adapter->pdev_dev); + switch (CHELSIO_CHIP_VERSION(adapter->chip)) { + case CHELSIO_T4: + fw_config_file = FW_CFNAME; + break; + case CHELSIO_T5: + fw_config_file = FW5_CFNAME; + break; + default: + dev_err(adapter->pdev_dev, "Device %d is not supported\n", + adapter->pdev->device); + ret = -EINVAL; + goto bye; + } + + ret = request_firmware(&cf, fw_config_file, adapter->pdev_dev); if (ret < 0) { using_flash = 1; mtype = FW_MEMTYPE_CF_FLASH; @@ -3877,6 +4512,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) if (ret < 0) goto bye; + sprintf(fw_config_file_path, "/lib/firmware/%s", fw_config_file); /* * Return successfully and note that we're operating with parameters * not supplied by the driver, rather than from hard-wired @@ -3887,7 +4523,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) "Configuration File %s, version %#x, computed checksum %#x\n", (using_flash ? "in device FLASH" - : "/lib/firmware/" FW_CFNAME), + : fw_config_file_path), finiver, cfcsum); return 0; @@ -4814,7 +5450,8 @@ static void print_port_info(const struct net_device *dev) sprintf(bufp, "BASE-%s", base[pi->port_type]); netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n", - adap->params.vpd.id, adap->params.rev, buf, + adap->params.vpd.id, + CHELSIO_CHIP_RELEASE(adap->params.rev), buf, is_offload(adap) ? "R" : "", adap->params.pci.width, spd, (adap->flags & USING_MSIX) ? " MSI-X" : (adap->flags & USING_MSI) ? " MSI" : ""); @@ -4854,10 +5491,11 @@ static void free_some_resources(struct adapter *adapter) #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \ NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA) +#define SEGMENT_SIZE 128 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - int func, i, err; + int func, i, err, s_qpp, qpp, num_seg; struct port_info *pi; bool highdma = false; struct adapter *adapter = NULL; @@ -4934,7 +5572,34 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = t4_prep_adapter(adapter); if (err) - goto out_unmap_bar; + goto out_unmap_bar0; + + if (!is_t4(adapter->chip)) { + s_qpp = QUEUESPERPAGEPF1 * adapter->fn; + qpp = 1 << QUEUESPERPAGEPF0_GET(t4_read_reg(adapter, + SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp); + num_seg = PAGE_SIZE / SEGMENT_SIZE; + + /* Each segment size is 128B. Write coalescing is enabled only + * when SGE_EGRESS_QUEUES_PER_PAGE_PF reg value for the + * queue is less no of segments that can be accommodated in + * a page size. + */ + if (qpp > num_seg) { + dev_err(&pdev->dev, + "Incorrect number of egress queues per page\n"); + err = -EINVAL; + goto out_unmap_bar0; + } + adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + if (!adapter->bar2) { + dev_err(&pdev->dev, "cannot map device bar2 region\n"); + err = -ENOMEM; + goto out_unmap_bar0; + } + } + setup_memwin(adapter); err = adap_init0(adapter); setup_memwin_rdma(adapter); @@ -4963,7 +5628,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -5063,6 +5728,9 @@ sriov: out_free_dev: free_some_resources(adapter); out_unmap_bar: + if (!is_t4(adapter->chip)) + iounmap(adapter->bar2); + out_unmap_bar0: iounmap(adapter->regs); out_free_adapter: kfree(adapter); @@ -5113,6 +5781,8 @@ static void remove_one(struct pci_dev *pdev) free_some_resources(adapter); iounmap(adapter->regs); + if (!is_t4(adapter->chip)) + iounmap(adapter->bar2); kfree(adapter); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index e2bbc7f3e2de..4faf4d067ee7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -269,4 +269,7 @@ struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl, unsigned int skb_len, unsigned int pull_len); int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx, u16 size); int cxgb4_flush_eq_cache(struct net_device *dev); +void cxgb4_disable_db_coalescing(struct net_device *dev); +void cxgb4_enable_db_coalescing(struct net_device *dev); + #endif /* !__CXGB4_OFLD_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index fe9a2ea3588b..2bfbb206b35a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -506,10 +506,14 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q) static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) { + u32 val; if (q->pend_cred >= 8) { + val = PIDX(q->pend_cred / 8); + if (!is_t4(adap->chip)) + val |= DBTYPE(1); wmb(); t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | - QID(q->cntxt_id) | PIDX(q->pend_cred / 8)); + QID(q->cntxt_id) | val); q->pend_cred &= 7; } } @@ -812,6 +816,22 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, *end = 0; } +/* This function copies 64 byte coalesced work request to + * memory mapped BAR2 space(user space writes). + * For coalesced WR SGE, fetches data from the FIFO instead of from Host. + */ +static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) +{ + int count = 8; + + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } +} + /** * ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter @@ -822,11 +842,25 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, */ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { + unsigned int *wr, index; + wmb(); /* write descriptors before telling HW */ spin_lock(&q->db_lock); if (!q->db_disabled) { - t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), - QID(q->cntxt_id) | PIDX(n)); + if (is_t4(adap->chip)) { + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + QID(q->cntxt_id) | PIDX(n)); + } else { + if (n == 1) { + index = q->pidx ? (q->pidx - 1) : (q->size - 1); + wr = (unsigned int *)&q->desc[index]; + cxgb_pio_copy((u64 __iomem *) + (adap->bar2 + q->udb + 64), + (u64 *)wr); + } else + writel(n, adap->bar2 + q->udb + 8); + wmb(); + } } q->db_pidx = q->pidx; spin_unlock(&q->db_lock); @@ -1555,7 +1589,6 @@ static noinline int handle_trace_pkt(struct adapter *adap, const struct pkt_gl *gl) { struct sk_buff *skb; - struct cpl_trace_pkt *p; skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN); if (unlikely(!skb)) { @@ -1563,8 +1596,11 @@ static noinline int handle_trace_pkt(struct adapter *adap, return 0; } - p = (struct cpl_trace_pkt *)skb->data; - __skb_pull(skb, sizeof(*p)); + if (is_t4(adap->chip)) + __skb_pull(skb, sizeof(struct cpl_trace_pkt)); + else + __skb_pull(skb, sizeof(struct cpl_t5_trace_pkt)); + skb_reset_mac_header(skb); skb->protocol = htons(0xffff); skb->dev = adap->port[0]; @@ -1597,7 +1633,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->rxhash = (__force u32)pkt->rsshdr.hash_val; if (unlikely(pkt->vlan_ex)) { - __vlan_hwaccel_put_tag(skb, ntohs(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); rxq->stats.vlan_ex++; } ret = napi_gro_frags(&rxq->rspq.napi); @@ -1625,8 +1661,10 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_rx_pkt *pkt; struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); struct sge *s = &q->adap->sge; + int cpl_trace_pkt = is_t4(q->adap->chip) ? + CPL_TRACE_PKT : CPL_TRACE_PKT_T5; - if (unlikely(*(u8 *)rsp == CPL_TRACE_PKT)) + if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) return handle_trace_pkt(q->adap, si); pkt = (const struct cpl_rx_pkt *)rsp; @@ -1667,7 +1705,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, skb_checksum_none_assert(skb); if (unlikely(pkt->vlan_ex)) { - __vlan_hwaccel_put_tag(skb, ntohs(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); rxq->stats.vlan_ex++; } netif_receive_skb(skb); @@ -2143,11 +2181,27 @@ err: static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) { + q->cntxt_id = id; + if (!is_t4(adap->chip)) { + unsigned int s_qpp; + unsigned short udb_density; + unsigned long qpshift; + int page; + + s_qpp = QUEUESPERPAGEPF1 * adap->fn; + udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap, + SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp)); + qpshift = PAGE_SHIFT - ilog2(udb_density); + q->udb = q->cntxt_id << qpshift; + q->udb &= PAGE_MASK; + page = q->udb / PAGE_SIZE; + q->udb += (q->cntxt_id - (page * udb_density)) * 128; + } + q->in_use = 0; q->cidx = q->pidx = 0; q->stops = q->restarts = 0; q->stat = (void *)&q->desc[q->size]; - q->cntxt_id = id; spin_lock_init(&q->db_lock); adap->sge.egr_map[id - adap->sge.egr_start] = q; } @@ -2587,11 +2641,20 @@ static int t4_sge_init_hard(struct adapter *adap) * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows * and generate an interrupt when this occurs so we can recover. */ - t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, - V_HP_INT_THRESH(M_HP_INT_THRESH) | - V_LP_INT_THRESH(M_LP_INT_THRESH), - V_HP_INT_THRESH(dbfifo_int_thresh) | - V_LP_INT_THRESH(dbfifo_int_thresh)); + if (is_t4(adap->chip)) { + t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, + V_HP_INT_THRESH(M_HP_INT_THRESH) | + V_LP_INT_THRESH(M_LP_INT_THRESH), + V_HP_INT_THRESH(dbfifo_int_thresh) | + V_LP_INT_THRESH(dbfifo_int_thresh)); + } else { + t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, + V_LP_INT_THRESH_T5(M_LP_INT_THRESH_T5), + V_LP_INT_THRESH_T5(dbfifo_int_thresh)); + t4_set_reg_field(adap, SGE_DBFIFO_STATUS2, + V_HP_INT_THRESH_T5(M_HP_INT_THRESH_T5), + V_HP_INT_THRESH_T5(dbfifo_int_thresh)); + } t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP, F_ENABLE_DROP); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 4ce62031f62f..d02d4e8c4417 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -282,6 +282,7 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, * t4_mc_read - read from MC through backdoor accesses * @adap: the adapter * @addr: address of first byte requested + * @idx: which MC to access * @data: 64 bytes of data containing the requested address * @ecc: where to store the corresponding 64-bit ECC word * @@ -289,22 +290,38 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, * that covers the requested address @addr. If @parity is not %NULL it * is assigned the 64-bit ECC word for the read data. */ -int t4_mc_read(struct adapter *adap, u32 addr, __be32 *data, u64 *ecc) +int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) { int i; + u32 mc_bist_cmd, mc_bist_cmd_addr, mc_bist_cmd_len; + u32 mc_bist_status_rdata, mc_bist_data_pattern; + + if (is_t4(adap->chip)) { + mc_bist_cmd = MC_BIST_CMD; + mc_bist_cmd_addr = MC_BIST_CMD_ADDR; + mc_bist_cmd_len = MC_BIST_CMD_LEN; + mc_bist_status_rdata = MC_BIST_STATUS_RDATA; + mc_bist_data_pattern = MC_BIST_DATA_PATTERN; + } else { + mc_bist_cmd = MC_REG(MC_P_BIST_CMD, idx); + mc_bist_cmd_addr = MC_REG(MC_P_BIST_CMD_ADDR, idx); + mc_bist_cmd_len = MC_REG(MC_P_BIST_CMD_LEN, idx); + mc_bist_status_rdata = MC_REG(MC_P_BIST_STATUS_RDATA, idx); + mc_bist_data_pattern = MC_REG(MC_P_BIST_DATA_PATTERN, idx); + } - if (t4_read_reg(adap, MC_BIST_CMD) & START_BIST) + if (t4_read_reg(adap, mc_bist_cmd) & START_BIST) return -EBUSY; - t4_write_reg(adap, MC_BIST_CMD_ADDR, addr & ~0x3fU); - t4_write_reg(adap, MC_BIST_CMD_LEN, 64); - t4_write_reg(adap, MC_BIST_DATA_PATTERN, 0xc); - t4_write_reg(adap, MC_BIST_CMD, BIST_OPCODE(1) | START_BIST | + t4_write_reg(adap, mc_bist_cmd_addr, addr & ~0x3fU); + t4_write_reg(adap, mc_bist_cmd_len, 64); + t4_write_reg(adap, mc_bist_data_pattern, 0xc); + t4_write_reg(adap, mc_bist_cmd, BIST_OPCODE(1) | START_BIST | BIST_CMD_GAP(1)); - i = t4_wait_op_done(adap, MC_BIST_CMD, START_BIST, 0, 10, 1); + i = t4_wait_op_done(adap, mc_bist_cmd, START_BIST, 0, 10, 1); if (i) return i; -#define MC_DATA(i) MC_BIST_STATUS_REG(MC_BIST_STATUS_RDATA, i) +#define MC_DATA(i) MC_BIST_STATUS_REG(mc_bist_status_rdata, i) for (i = 15; i >= 0; i--) *data++ = htonl(t4_read_reg(adap, MC_DATA(i))); @@ -329,20 +346,39 @@ int t4_mc_read(struct adapter *adap, u32 addr, __be32 *data, u64 *ecc) int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) { int i; + u32 edc_bist_cmd, edc_bist_cmd_addr, edc_bist_cmd_len; + u32 edc_bist_cmd_data_pattern, edc_bist_status_rdata; + + if (is_t4(adap->chip)) { + edc_bist_cmd = EDC_REG(EDC_BIST_CMD, idx); + edc_bist_cmd_addr = EDC_REG(EDC_BIST_CMD_ADDR, idx); + edc_bist_cmd_len = EDC_REG(EDC_BIST_CMD_LEN, idx); + edc_bist_cmd_data_pattern = EDC_REG(EDC_BIST_DATA_PATTERN, + idx); + edc_bist_status_rdata = EDC_REG(EDC_BIST_STATUS_RDATA, + idx); + } else { + edc_bist_cmd = EDC_REG_T5(EDC_H_BIST_CMD, idx); + edc_bist_cmd_addr = EDC_REG_T5(EDC_H_BIST_CMD_ADDR, idx); + edc_bist_cmd_len = EDC_REG_T5(EDC_H_BIST_CMD_LEN, idx); + edc_bist_cmd_data_pattern = + EDC_REG_T5(EDC_H_BIST_DATA_PATTERN, idx); + edc_bist_status_rdata = + EDC_REG_T5(EDC_H_BIST_STATUS_RDATA, idx); + } - idx *= EDC_STRIDE; - if (t4_read_reg(adap, EDC_BIST_CMD + idx) & START_BIST) + if (t4_read_reg(adap, edc_bist_cmd) & START_BIST) return -EBUSY; - t4_write_reg(adap, EDC_BIST_CMD_ADDR + idx, addr & ~0x3fU); - t4_write_reg(adap, EDC_BIST_CMD_LEN + idx, 64); - t4_write_reg(adap, EDC_BIST_DATA_PATTERN + idx, 0xc); - t4_write_reg(adap, EDC_BIST_CMD + idx, + t4_write_reg(adap, edc_bist_cmd_addr, addr & ~0x3fU); + t4_write_reg(adap, edc_bist_cmd_len, 64); + t4_write_reg(adap, edc_bist_cmd_data_pattern, 0xc); + t4_write_reg(adap, edc_bist_cmd, BIST_OPCODE(1) | BIST_CMD_GAP(1) | START_BIST); - i = t4_wait_op_done(adap, EDC_BIST_CMD + idx, START_BIST, 0, 10, 1); + i = t4_wait_op_done(adap, edc_bist_cmd, START_BIST, 0, 10, 1); if (i) return i; -#define EDC_DATA(i) (EDC_BIST_STATUS_REG(EDC_BIST_STATUS_RDATA, i) + idx) +#define EDC_DATA(i) (EDC_BIST_STATUS_REG(edc_bist_status_rdata, i)) for (i = 15; i >= 0; i--) *data++ = htonl(t4_read_reg(adap, EDC_DATA(i))); @@ -366,6 +402,7 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) static int t4_mem_win_rw(struct adapter *adap, u32 addr, __be32 *data, int dir) { int i; + u32 win_pf = is_t4(adap->chip) ? 0 : V_PFNUM(adap->fn); /* * Setup offset into PCIE memory window. Address must be a @@ -374,7 +411,7 @@ static int t4_mem_win_rw(struct adapter *adap, u32 addr, __be32 *data, int dir) * values.) */ t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, - addr & ~(MEMWIN0_APERTURE - 1)); + (addr & ~(MEMWIN0_APERTURE - 1)) | win_pf); t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); /* Collecting data 4 bytes at a time upto MEMWIN0_APERTURE */ @@ -410,6 +447,7 @@ static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len, __be32 *buf, int dir) { u32 pos, start, end, offset, memoffset; + u32 edc_size, mc_size; int ret = 0; __be32 *data; @@ -423,13 +461,21 @@ static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len, if (!data) return -ENOMEM; - /* - * Offset into the region of memory which is being accessed + /* Offset into the region of memory which is being accessed * MEM_EDC0 = 0 * MEM_EDC1 = 1 - * MEM_MC = 2 + * MEM_MC = 2 -- T4 + * MEM_MC0 = 2 -- For T5 + * MEM_MC1 = 3 -- For T5 */ - memoffset = (mtype * (5 * 1024 * 1024)); + edc_size = EDRAM_SIZE_GET(t4_read_reg(adap, MA_EDRAM0_BAR)); + if (mtype != MEM_MC1) + memoffset = (mtype * (edc_size * 1024 * 1024)); + else { + mc_size = EXT_MEM_SIZE_GET(t4_read_reg(adap, + MA_EXT_MEMORY_BAR)); + memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; + } /* Determine the PCIE_MEM_ACCESS_OFFSET */ addr = addr + memoffset; @@ -497,8 +543,9 @@ int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, } #define EEPROM_STAT_ADDR 0x7bfc -#define VPD_BASE 0 -#define VPD_LEN 512 +#define VPD_BASE 0x400 +#define VPD_BASE_OLD 0 +#define VPD_LEN 1024 /** * t4_seeprom_wp - enable/disable EEPROM write protection @@ -524,7 +571,7 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable) int get_vpd_params(struct adapter *adapter, struct vpd_params *p) { u32 cclk_param, cclk_val; - int i, ret; + int i, ret, addr; int ec, sn; u8 *vpd, csum; unsigned int vpdr_len, kw_offset, id_len; @@ -533,7 +580,12 @@ int get_vpd_params(struct adapter *adapter, struct vpd_params *p) if (!vpd) return -ENOMEM; - ret = pci_read_vpd(adapter->pdev, VPD_BASE, VPD_LEN, vpd); + ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd); + if (ret < 0) + goto out; + addr = *vpd == 0x82 ? VPD_BASE : VPD_BASE_OLD; + + ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd); if (ret < 0) goto out; @@ -850,6 +902,7 @@ int t4_check_fw_version(struct adapter *adapter) { u32 api_vers[2]; int ret, major, minor, micro; + int exp_major, exp_minor, exp_micro; ret = get_fw_version(adapter, &adapter->params.fw_vers); if (!ret) @@ -864,17 +917,35 @@ int t4_check_fw_version(struct adapter *adapter) major = FW_HDR_FW_VER_MAJOR_GET(adapter->params.fw_vers); minor = FW_HDR_FW_VER_MINOR_GET(adapter->params.fw_vers); micro = FW_HDR_FW_VER_MICRO_GET(adapter->params.fw_vers); + + switch (CHELSIO_CHIP_VERSION(adapter->chip)) { + case CHELSIO_T4: + exp_major = FW_VERSION_MAJOR; + exp_minor = FW_VERSION_MINOR; + exp_micro = FW_VERSION_MICRO; + break; + case CHELSIO_T5: + exp_major = FW_VERSION_MAJOR_T5; + exp_minor = FW_VERSION_MINOR_T5; + exp_micro = FW_VERSION_MICRO_T5; + break; + default: + dev_err(adapter->pdev_dev, "Unsupported chip type, %x\n", + adapter->chip); + return -EINVAL; + } + memcpy(adapter->params.api_vers, api_vers, sizeof(adapter->params.api_vers)); - if (major != FW_VERSION_MAJOR) { /* major mismatch - fail */ + if (major != exp_major) { /* major mismatch - fail */ dev_err(adapter->pdev_dev, "card FW has major version %u, driver wants %u\n", - major, FW_VERSION_MAJOR); + major, exp_major); return -EINVAL; } - if (minor == FW_VERSION_MINOR && micro == FW_VERSION_MICRO) + if (minor == exp_minor && micro == exp_micro) return 0; /* perfect match */ /* Minor/micro version mismatch. Report it but often it's OK. */ @@ -1240,6 +1311,45 @@ static void pcie_intr_handler(struct adapter *adapter) { 0 } }; + static struct intr_info t5_pcie_intr_info[] = { + { MSTGRPPERR, "Master Response Read Queue parity error", + -1, 1 }, + { MSTTIMEOUTPERR, "Master Timeout FIFO parity error", -1, 1 }, + { MSIXSTIPERR, "MSI-X STI SRAM parity error", -1, 1 }, + { MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, + { MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, + { MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, + { MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, + { PIOCPLGRPPERR, "PCI PIO completion Group FIFO parity error", + -1, 1 }, + { PIOREQGRPPERR, "PCI PIO request Group FIFO parity error", + -1, 1 }, + { TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, + { MSTTAGQPERR, "PCI master tag queue parity error", -1, 1 }, + { CREQPERR, "PCI CMD channel request parity error", -1, 1 }, + { CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, + { DREQWRPERR, "PCI DMA channel write request parity error", + -1, 1 }, + { DREQPERR, "PCI DMA channel request parity error", -1, 1 }, + { DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, + { HREQWRPERR, "PCI HMA channel count parity error", -1, 1 }, + { HREQPERR, "PCI HMA channel request parity error", -1, 1 }, + { HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, + { CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, + { FIDPERR, "PCI FID parity error", -1, 1 }, + { VFIDPERR, "PCI INTx clear parity error", -1, 1 }, + { MAGRPPERR, "PCI MA group FIFO parity error", -1, 1 }, + { PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, + { IPRXHDRGRPPERR, "PCI IP Rx header group parity error", + -1, 1 }, + { IPRXDATAGRPPERR, "PCI IP Rx data group parity error", -1, 1 }, + { RPLPERR, "PCI IP replay buffer parity error", -1, 1 }, + { IPSOTPERR, "PCI IP SOT buffer parity error", -1, 1 }, + { TRGT1GRPPERR, "PCI TRGT1 group FIFOs parity error", -1, 1 }, + { READRSPERR, "Outbound read error", -1, 0 }, + { 0 } + }; + int fat; fat = t4_handle_intr_status(adapter, @@ -1248,7 +1358,10 @@ static void pcie_intr_handler(struct adapter *adapter) t4_handle_intr_status(adapter, PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, pcie_port_intr_info) + - t4_handle_intr_status(adapter, PCIE_INT_CAUSE, pcie_intr_info); + t4_handle_intr_status(adapter, PCIE_INT_CAUSE, + is_t4(adapter->chip) ? + pcie_intr_info : t5_pcie_intr_info); + if (fat) t4_fatal_err(adapter); } @@ -1658,7 +1771,14 @@ static void ncsi_intr_handler(struct adapter *adap) */ static void xgmac_intr_handler(struct adapter *adap, int port) { - u32 v = t4_read_reg(adap, PORT_REG(port, XGMAC_PORT_INT_CAUSE)); + u32 v, int_cause_reg; + + if (is_t4(adap->chip)) + int_cause_reg = PORT_REG(port, XGMAC_PORT_INT_CAUSE); + else + int_cause_reg = T5_PORT_REG(port, MAC_PORT_INT_CAUSE); + + v = t4_read_reg(adap, int_cause_reg); v &= TXFIFO_PRTY_ERR | RXFIFO_PRTY_ERR; if (!v) @@ -2120,7 +2240,9 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) u32 bgmap = get_mps_bg_map(adap, idx); #define GET_STAT(name) \ - t4_read_reg64(adap, PORT_REG(idx, MPS_PORT_STAT_##name##_L)) + t4_read_reg64(adap, \ + (is_t4(adap->chip) ? PORT_REG(idx, MPS_PORT_STAT_##name##_L) : \ + T5_PORT_REG(idx, MPS_PORT_STAT_##name##_L))) #define GET_STAT_COM(name) t4_read_reg64(adap, MPS_STAT_##name##_L) p->tx_octets = GET_STAT(TX_PORT_BYTES); @@ -2199,14 +2321,26 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr) { + u32 mag_id_reg_l, mag_id_reg_h, port_cfg_reg; + + if (is_t4(adap->chip)) { + mag_id_reg_l = PORT_REG(port, XGMAC_PORT_MAGIC_MACID_LO); + mag_id_reg_h = PORT_REG(port, XGMAC_PORT_MAGIC_MACID_HI); + port_cfg_reg = PORT_REG(port, XGMAC_PORT_CFG2); + } else { + mag_id_reg_l = T5_PORT_REG(port, MAC_PORT_MAGIC_MACID_LO); + mag_id_reg_h = T5_PORT_REG(port, MAC_PORT_MAGIC_MACID_HI); + port_cfg_reg = T5_PORT_REG(port, MAC_PORT_CFG2); + } + if (addr) { - t4_write_reg(adap, PORT_REG(port, XGMAC_PORT_MAGIC_MACID_LO), + t4_write_reg(adap, mag_id_reg_l, (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) | addr[5]); - t4_write_reg(adap, PORT_REG(port, XGMAC_PORT_MAGIC_MACID_HI), + t4_write_reg(adap, mag_id_reg_h, (addr[0] << 8) | addr[1]); } - t4_set_reg_field(adap, PORT_REG(port, XGMAC_PORT_CFG2), MAGICEN, + t4_set_reg_field(adap, port_cfg_reg, MAGICEN, addr ? MAGICEN : 0); } @@ -2229,16 +2363,23 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, u64 mask0, u64 mask1, unsigned int crc, bool enable) { int i; + u32 port_cfg_reg; + + if (is_t4(adap->chip)) + port_cfg_reg = PORT_REG(port, XGMAC_PORT_CFG2); + else + port_cfg_reg = T5_PORT_REG(port, MAC_PORT_CFG2); if (!enable) { - t4_set_reg_field(adap, PORT_REG(port, XGMAC_PORT_CFG2), - PATEN, 0); + t4_set_reg_field(adap, port_cfg_reg, PATEN, 0); return 0; } if (map > 0xff) return -EINVAL; -#define EPIO_REG(name) PORT_REG(port, XGMAC_PORT_EPIO_##name) +#define EPIO_REG(name) \ + (is_t4(adap->chip) ? PORT_REG(port, XGMAC_PORT_EPIO_##name) : \ + T5_PORT_REG(port, MAC_PORT_EPIO_##name)) t4_write_reg(adap, EPIO_REG(DATA1), mask0 >> 32); t4_write_reg(adap, EPIO_REG(DATA2), mask1); @@ -2316,24 +2457,24 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, * @addr: address of first byte requested aligned on 32b. * @data: len bytes to hold the data read * @len: amount of data to read from window. Must be <= - * MEMWIN0_APERATURE after adjusting for 16B alignment - * requirements of the the memory window. + * MEMWIN0_APERATURE after adjusting for 16B for T4 and + * 128B for T5 alignment requirements of the the memory window. * * Read len bytes of data from MC starting at @addr. */ int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len) { - int i; - int off; + int i, off; + u32 win_pf = is_t4(adap->chip) ? 0 : V_PFNUM(adap->fn); - /* - * Align on a 16B boundary. + /* Align on a 2KB boundary. */ - off = addr & 15; + off = addr & MEMWIN0_APERTURE; if ((addr & 3) || (len + off) > MEMWIN0_APERTURE) return -EINVAL; - t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, addr & ~15); + t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, + (addr & ~MEMWIN0_APERTURE) | win_pf); t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); for (i = 0; i < len; i += 4) @@ -3156,6 +3297,9 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, int i, ret; struct fw_vi_mac_cmd c; struct fw_vi_mac_exact *p; + unsigned int max_naddr = is_t4(adap->chip) ? + NUM_MPS_CLS_SRAM_L_INSTANCES : + NUM_MPS_T5_CLS_SRAM_L_INSTANCES; if (naddr > 7) return -EINVAL; @@ -3181,8 +3325,8 @@ int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, u16 index = FW_VI_MAC_CMD_IDX_GET(ntohs(p->valid_to_idx)); if (idx) - idx[i] = index >= NEXACT_MAC ? 0xffff : index; - if (index < NEXACT_MAC) + idx[i] = index >= max_naddr ? 0xffff : index; + if (index < max_naddr) ret++; else if (hash) *hash |= (1ULL << hash_mac_addr(addr[i])); @@ -3215,6 +3359,9 @@ int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, int ret, mode; struct fw_vi_mac_cmd c; struct fw_vi_mac_exact *p = c.u.exact; + unsigned int max_mac_addr = is_t4(adap->chip) ? + NUM_MPS_CLS_SRAM_L_INSTANCES : + NUM_MPS_T5_CLS_SRAM_L_INSTANCES; if (idx < 0) /* new allocation */ idx = persist ? FW_VI_MAC_ADD_PERSIST_MAC : FW_VI_MAC_ADD_MAC; @@ -3232,7 +3379,7 @@ int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); if (ret == 0) { ret = FW_VI_MAC_CMD_IDX_GET(ntohs(p->valid_to_idx)); - if (ret >= NEXACT_MAC) + if (ret >= max_mac_addr) ret = -ENOMEM; } return ret; @@ -3541,7 +3688,8 @@ static int get_flash_params(struct adapter *adap) */ int t4_prep_adapter(struct adapter *adapter) { - int ret; + int ret, ver; + uint16_t device_id; ret = t4_wait_dev_ready(adapter); if (ret < 0) @@ -3556,6 +3704,28 @@ int t4_prep_adapter(struct adapter *adapter) return ret; } + /* Retrieve adapter's device ID + */ + pci_read_config_word(adapter->pdev, PCI_DEVICE_ID, &device_id); + ver = device_id >> 12; + switch (ver) { + case CHELSIO_T4: + adapter->chip = CHELSIO_CHIP_CODE(CHELSIO_T4, + adapter->params.rev); + break; + case CHELSIO_T5: + adapter->chip = CHELSIO_CHIP_CODE(CHELSIO_T5, + adapter->params.rev); + break; + default: + dev_err(adapter->pdev_dev, "Device %d is not supported\n", + device_id); + return -EINVAL; + } + + /* Reassign the updated revision field */ + adapter->params.rev = adapter->chip; + init_cong_ctrl(adapter->params.a_wnd, adapter->params.b_wnd); /* diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index f534ed7e10e9..1d1623be9f1e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -47,7 +47,6 @@ enum { TCB_SIZE = 128, /* TCB size */ NMTUS = 16, /* size of MTU table */ NCCTRL_WIN = 32, /* # of congestion control windows */ - NEXACT_MAC = 336, /* # of exact MAC address filters */ L2T_SIZE = 4096, /* # of L2T entries */ MBOX_LEN = 64, /* mailbox size in bytes */ TRACE_LEN = 112, /* length of trace data and mask */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 261d17703adc..47656ac1ac25 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -74,6 +74,7 @@ enum { CPL_PASS_ESTABLISH = 0x41, CPL_RX_DATA_DDP = 0x42, CPL_PASS_ACCEPT_REQ = 0x44, + CPL_TRACE_PKT_T5 = 0x48, CPL_RDMA_READ_REQ = 0x60, @@ -287,6 +288,23 @@ struct cpl_act_open_req { __be32 opt2; }; +#define S_FILTER_TUPLE 24 +#define M_FILTER_TUPLE 0xFFFFFFFFFF +#define V_FILTER_TUPLE(x) ((x) << S_FILTER_TUPLE) +#define G_FILTER_TUPLE(x) (((x) >> S_FILTER_TUPLE) & M_FILTER_TUPLE) +struct cpl_t5_act_open_req { + WR_HDR; + union opcode_tid ot; + __be16 local_port; + __be16 peer_port; + __be32 local_ip; + __be32 peer_ip; + __be64 opt0; + __be32 rsvd; + __be32 opt2; + __be64 params; +}; + struct cpl_act_open_req6 { WR_HDR; union opcode_tid ot; @@ -566,6 +584,11 @@ struct cpl_rx_pkt { #define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) #define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) +#define S_RX_T5_ETHHDR_LEN 0 +#define M_RX_T5_ETHHDR_LEN 0x3F +#define V_RX_T5_ETHHDR_LEN(x) ((x) << S_RX_T5_ETHHDR_LEN) +#define G_RX_T5_ETHHDR_LEN(x) (((x) >> S_RX_T5_ETHHDR_LEN) & M_RX_T5_ETHHDR_LEN) + #define S_RX_MACIDX 8 #define M_RX_MACIDX 0x1FF #define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) @@ -612,6 +635,28 @@ struct cpl_trace_pkt { __be64 tstamp; }; +struct cpl_t5_trace_pkt { + __u8 opcode; + __u8 intf; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 runt:4; + __u8 filter_hit:4; + __u8:6; + __u8 err:1; + __u8 trunc:1; +#else + __u8 filter_hit:4; + __u8 runt:4; + __u8 trunc:1; + __u8 err:1; + __u8:6; +#endif + __be16 rsvd; + __be16 len; + __be64 tstamp; + __be64 rsvd1; +}; + struct cpl_l2t_write_req { WR_HDR; union opcode_tid ot; @@ -742,4 +787,12 @@ struct ulp_mem_io { #define ULP_MEMIO_LOCK(x) ((x) << 31) }; +#define S_T5_ULP_MEMIO_IMM 23 +#define V_T5_ULP_MEMIO_IMM(x) ((x) << S_T5_ULP_MEMIO_IMM) +#define F_T5_ULP_MEMIO_IMM V_T5_ULP_MEMIO_IMM(1U) + +#define S_T5_ULP_MEMIO_ORDER 22 +#define V_T5_ULP_MEMIO_ORDER(x) ((x) << S_T5_ULP_MEMIO_ORDER) +#define F_T5_ULP_MEMIO_ORDER V_T5_ULP_MEMIO_ORDER(1U) + #endif /* __T4_MSG_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 83ec5f7844ac..ef146c0ba481 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -68,9 +68,14 @@ #define QID_SHIFT 15 #define QID(x) ((x) << QID_SHIFT) #define DBPRIO(x) ((x) << 14) +#define DBTYPE(x) ((x) << 13) #define PIDX_MASK 0x00003fffU #define PIDX_SHIFT 0 #define PIDX(x) ((x) << PIDX_SHIFT) +#define S_PIDX_T5 0 +#define M_PIDX_T5 0x1fffU +#define PIDX_T5(x) (((x) >> S_PIDX_T5) & M_PIDX_T5) + #define SGE_PF_GTS 0x4 #define INGRESSQID_MASK 0xffff0000U @@ -152,6 +157,8 @@ #define QUEUESPERPAGEPF0_MASK 0x0000000fU #define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) +#define QUEUESPERPAGEPF1 4 + #define SGE_INT_CAUSE1 0x1024 #define SGE_INT_CAUSE2 0x1030 #define SGE_INT_CAUSE3 0x103c @@ -234,6 +241,10 @@ #define SGE_DOORBELL_CONTROL 0x10a8 #define ENABLE_DROP (1 << 13) +#define S_NOCOALESCE 26 +#define V_NOCOALESCE(x) ((x) << S_NOCOALESCE) +#define F_NOCOALESCE V_NOCOALESCE(1U) + #define SGE_TIMER_VALUE_0_AND_1 0x10b8 #define TIMERVALUE0_MASK 0xffff0000U #define TIMERVALUE0_SHIFT 16 @@ -272,17 +283,36 @@ #define S_HP_INT_THRESH 28 #define M_HP_INT_THRESH 0xfU #define V_HP_INT_THRESH(x) ((x) << S_HP_INT_THRESH) +#define S_LP_INT_THRESH_T5 18 +#define V_LP_INT_THRESH_T5(x) ((x) << S_LP_INT_THRESH_T5) +#define M_LP_COUNT_T5 0x3ffffU +#define G_LP_COUNT_T5(x) (((x) >> S_LP_COUNT) & M_LP_COUNT_T5) #define M_HP_COUNT 0x7ffU #define S_HP_COUNT 16 #define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT) #define S_LP_INT_THRESH 12 #define M_LP_INT_THRESH 0xfU +#define M_LP_INT_THRESH_T5 0xfffU #define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH) #define M_LP_COUNT 0x7ffU #define S_LP_COUNT 0 #define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT) #define A_SGE_DBFIFO_STATUS 0x10a4 +#define SGE_STAT_TOTAL 0x10e4 +#define SGE_STAT_MATCH 0x10e8 + +#define SGE_STAT_CFG 0x10ec +#define S_STATSOURCE_T5 9 +#define STATSOURCE_T5(x) ((x) << S_STATSOURCE_T5) + +#define SGE_DBFIFO_STATUS2 0x1118 +#define M_HP_COUNT_T5 0x3ffU +#define G_HP_COUNT_T5(x) ((x) & M_HP_COUNT_T5) +#define S_HP_INT_THRESH_T5 10 +#define M_HP_INT_THRESH_T5 0xfU +#define V_HP_INT_THRESH_T5(x) ((x) << S_HP_INT_THRESH_T5) + #define S_ENABLE_DROP 13 #define V_ENABLE_DROP(x) ((x) << S_ENABLE_DROP) #define F_ENABLE_DROP V_ENABLE_DROP(1U) @@ -331,8 +361,27 @@ #define MSIADDRHPERR 0x00000002U #define MSIADDRLPERR 0x00000001U +#define READRSPERR 0x20000000U +#define TRGT1GRPPERR 0x10000000U +#define IPSOTPERR 0x08000000U +#define IPRXDATAGRPPERR 0x02000000U +#define IPRXHDRGRPPERR 0x01000000U +#define MAGRPPERR 0x00400000U +#define VFIDPERR 0x00200000U +#define HREQWRPERR 0x00010000U +#define DREQWRPERR 0x00002000U +#define MSTTAGQPERR 0x00000400U +#define PIOREQGRPPERR 0x00000100U +#define PIOCPLGRPPERR 0x00000080U +#define MSIXSTIPERR 0x00000004U +#define MSTTIMEOUTPERR 0x00000002U +#define MSTGRPPERR 0x00000001U + #define PCIE_NONFAT_ERR 0x3010 #define PCIE_MEM_ACCESS_BASE_WIN 0x3068 +#define S_PCIEOFST 10 +#define M_PCIEOFST 0x3fffffU +#define GET_PCIEOFST(x) (((x) >> S_PCIEOFST) & M_PCIEOFST) #define PCIEOFST_MASK 0xfffffc00U #define BIR_MASK 0x00000300U #define BIR_SHIFT 8 @@ -342,6 +391,9 @@ #define WINDOW(x) ((x) << WINDOW_SHIFT) #define PCIE_MEM_ACCESS_OFFSET 0x306c +#define S_PFNUM 0 +#define V_PFNUM(x) ((x) << S_PFNUM) + #define PCIE_FW 0x30b8 #define PCIE_FW_ERR 0x80000000U #define PCIE_FW_INIT 0x40000000U @@ -407,12 +459,18 @@ #define MC_BIST_STATUS_RDATA 0x7688 +#define MA_EDRAM0_BAR 0x77c0 +#define MA_EDRAM1_BAR 0x77c4 +#define EDRAM_SIZE_MASK 0xfffU +#define EDRAM_SIZE_GET(x) ((x) & EDRAM_SIZE_MASK) + #define MA_EXT_MEMORY_BAR 0x77c8 #define EXT_MEM_SIZE_MASK 0x00000fffU #define EXT_MEM_SIZE_SHIFT 0 #define EXT_MEM_SIZE_GET(x) (((x) & EXT_MEM_SIZE_MASK) >> EXT_MEM_SIZE_SHIFT) #define MA_TARGET_MEM_ENABLE 0x77d8 +#define EXT_MEM1_ENABLE 0x00000010U #define EXT_MEM_ENABLE 0x00000004U #define EDRAM1_ENABLE 0x00000002U #define EDRAM0_ENABLE 0x00000001U @@ -431,6 +489,7 @@ #define MA_PCIE_FW 0x30b8 #define MA_PARITY_ERROR_STATUS 0x77f4 +#define MA_EXT_MEMORY1_BAR 0x7808 #define EDC_0_BASE_ADDR 0x7900 #define EDC_BIST_CMD 0x7904 @@ -801,6 +860,15 @@ #define MPS_PORT_STAT_RX_PORT_PPP7_H 0x60c #define MPS_PORT_STAT_RX_PORT_LESS_64B_L 0x610 #define MPS_PORT_STAT_RX_PORT_LESS_64B_H 0x614 +#define MAC_PORT_CFG2 0x818 +#define MAC_PORT_MAGIC_MACID_LO 0x824 +#define MAC_PORT_MAGIC_MACID_HI 0x828 +#define MAC_PORT_EPIO_DATA0 0x8c0 +#define MAC_PORT_EPIO_DATA1 0x8c4 +#define MAC_PORT_EPIO_DATA2 0x8c8 +#define MAC_PORT_EPIO_DATA3 0x8cc +#define MAC_PORT_EPIO_OP 0x8d0 + #define MPS_CMN_CTL 0x9000 #define NUMPORTS_MASK 0x00000003U #define NUMPORTS_SHIFT 0 @@ -1063,6 +1131,7 @@ #define ADDRESS_SHIFT 0 #define ADDRESS(x) ((x) << ADDRESS_SHIFT) +#define MAC_PORT_INT_CAUSE 0x8dc #define XGMAC_PORT_INT_CAUSE 0x10dc #define A_TP_TX_MOD_QUEUE_REQ_MAP 0x7e28 @@ -1101,4 +1170,33 @@ #define V_PORT(x) ((x) << S_PORT) #define F_PORT V_PORT(1U) +#define NUM_MPS_CLS_SRAM_L_INSTANCES 336 +#define NUM_MPS_T5_CLS_SRAM_L_INSTANCES 512 + +#define T5_PORT0_BASE 0x30000 +#define T5_PORT_STRIDE 0x4000 +#define T5_PORT_BASE(idx) (T5_PORT0_BASE + (idx) * T5_PORT_STRIDE) +#define T5_PORT_REG(idx, reg) (T5_PORT_BASE(idx) + (reg)) + +#define MC_0_BASE_ADDR 0x40000 +#define MC_1_BASE_ADDR 0x48000 +#define MC_STRIDE (MC_1_BASE_ADDR - MC_0_BASE_ADDR) +#define MC_REG(reg, idx) (reg + MC_STRIDE * idx) + +#define MC_P_BIST_CMD 0x41400 +#define MC_P_BIST_CMD_ADDR 0x41404 +#define MC_P_BIST_CMD_LEN 0x41408 +#define MC_P_BIST_DATA_PATTERN 0x4140c +#define MC_P_BIST_STATUS_RDATA 0x41488 +#define EDC_T50_BASE_ADDR 0x50000 +#define EDC_H_BIST_CMD 0x50004 +#define EDC_H_BIST_CMD_ADDR 0x50008 +#define EDC_H_BIST_CMD_LEN 0x5000c +#define EDC_H_BIST_DATA_PATTERN 0x50010 +#define EDC_H_BIST_STATUS_RDATA 0x50028 + +#define EDC_T51_BASE_ADDR 0x50800 +#define EDC_STRIDE_T5 (EDC_T51_BASE_ADDR - EDC_T50_BASE_ADDR) +#define EDC_REG_T5(reg, idx) (reg + EDC_STRIDE_T5 * idx) + #endif /* __T4_REGS_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a0dcccd846c9..93444325b1e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -574,7 +574,7 @@ struct fw_eth_tx_pkt_vm_wr { __be16 vlantci; }; -#define FW_CMD_MAX_TIMEOUT 3000 +#define FW_CMD_MAX_TIMEOUT 10000 /* * If a host driver does a HELLO and discovers that there's already a MASTER diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 68eaa9c88c7d..be5c7ef6ca93 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -344,6 +344,7 @@ struct adapter { unsigned long registered_device_map; unsigned long open_device_map; unsigned long flags; + enum chip_type chip; struct adapter_params params; /* queue and interrupt resources */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 56b46ab2d4c5..73aef76a526c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -54,8 +54,8 @@ /* * Generic information about the driver. */ -#define DRV_VERSION "1.0.0" -#define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver" +#define DRV_VERSION "2.0.0-ko" +#define DRV_DESC "Chelsio T4/T5 Virtual Function (VF) Network Driver" /* * Module Parameters. @@ -1050,7 +1050,7 @@ static inline unsigned int mk_adap_vers(const struct adapter *adapter) /* * Chip version 4, revision 0x3f (cxgb4vf). */ - return 4 | (0x3f << 10); + return CHELSIO_CHIP_VERSION(adapter->chip) | (0x3f << 10); } /* @@ -1100,10 +1100,10 @@ static netdev_features_t cxgb4vf_fix_features(struct net_device *dev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1114,9 +1114,9 @@ static int cxgb4vf_set_features(struct net_device *dev, struct port_info *pi = netdev_priv(dev); netdev_features_t changed = dev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, - features & NETIF_F_HW_VLAN_TX, 0); + features & NETIF_F_HW_VLAN_CTAG_TX, 0); return 0; } @@ -2099,6 +2099,15 @@ static int adap_init0(struct adapter *adapter) return err; } + switch (adapter->pdev->device >> 12) { + case CHELSIO_T4: + adapter->chip = CHELSIO_CHIP_CODE(CHELSIO_T4, 0); + break; + case CHELSIO_T5: + adapter->chip = CHELSIO_CHIP_CODE(CHELSIO_T5, 0); + break; + } + /* * Grab basic operational parameters. These will predominantly have * been set up by the Physical Function Driver or will be hard coded @@ -2614,11 +2623,12 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; netdev->vlan_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA; - netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX; + netdev->features = netdev->hw_features | + NETIF_F_HW_VLAN_CTAG_TX; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; @@ -2888,6 +2898,26 @@ static struct pci_device_id cxgb4vf_pci_tbl[] = { CH_DEVICE(0x480a, 0), /* T404-bt */ CH_DEVICE(0x480d, 0), /* T480-cr */ CH_DEVICE(0x480e, 0), /* T440-lp-cr */ + CH_DEVICE(0x5800, 0), /* T580-dbg */ + CH_DEVICE(0x5801, 0), /* T520-cr */ + CH_DEVICE(0x5802, 0), /* T522-cr */ + CH_DEVICE(0x5803, 0), /* T540-cr */ + CH_DEVICE(0x5804, 0), /* T520-bch */ + CH_DEVICE(0x5805, 0), /* T540-bch */ + CH_DEVICE(0x5806, 0), /* T540-ch */ + CH_DEVICE(0x5807, 0), /* T520-so */ + CH_DEVICE(0x5808, 0), /* T520-cx */ + CH_DEVICE(0x5809, 0), /* T520-bt */ + CH_DEVICE(0x580a, 0), /* T504-bt */ + CH_DEVICE(0x580b, 0), /* T520-sr */ + CH_DEVICE(0x580c, 0), /* T504-bt */ + CH_DEVICE(0x580d, 0), /* T580-cr */ + CH_DEVICE(0x580e, 0), /* T540-lp-cr */ + CH_DEVICE(0x580f, 0), /* Amsterdam */ + CH_DEVICE(0x5810, 0), /* T580-lp-cr */ + CH_DEVICE(0x5811, 0), /* T520-lp-cr */ + CH_DEVICE(0x5812, 0), /* T560-cr */ + CH_DEVICE(0x5813, 0), /* T580-cr */ { 0, } }; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 9488032d6d2d..df296af20bd5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -528,17 +528,21 @@ static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl) */ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl) { + u32 val; + /* * The SGE keeps track of its Producer and Consumer Indices in terms * of Egress Queue Units so we can only tell it about integral numbers * of multiples of Free List Entries per Egress Queue Units ... */ if (fl->pend_cred >= FL_PER_EQ_UNIT) { + val = PIDX(fl->pend_cred / FL_PER_EQ_UNIT); + if (!is_t4(adapter->chip)) + val |= DBTYPE(1); wmb(); t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL, DBPRIO(1) | - QID(fl->cntxt_id) | - PIDX(fl->pend_cred / FL_PER_EQ_UNIT)); + QID(fl->cntxt_id) | val); fl->pend_cred %= FL_PER_EQ_UNIT; } } @@ -1478,7 +1482,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb_record_rx_queue(skb, rxq->rspq.idx); if (pkt->vlan_ex) { - __vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q), + be16_to_cpu(pkt->vlan)); rxq->stats.vlan_ex++; } ret = napi_gro_frags(&rxq->rspq.napi); @@ -1547,7 +1552,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, if (pkt->vlan_ex) { rxq->stats.vlan_ex++; - __vlan_hwaccel_put_tag(skb, be16_to_cpu(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(pkt->vlan)); } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 283f9d0d37fd..53cbfed21d0b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -38,6 +38,25 @@ #include "../cxgb4/t4fw_api.h" +#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision)) +#define CHELSIO_CHIP_VERSION(code) ((code) >> 4) +#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf) + +#define CHELSIO_T4 0x4 +#define CHELSIO_T5 0x5 + +enum chip_type { + T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 0), + T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1), + T4_A3 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2), + T4_FIRST_REV = T4_A1, + T4_LAST_REV = T4_A3, + + T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0), + T5_FIRST_REV = T5_A1, + T5_LAST_REV = T5_A1, +}; + /* * The "len16" field of a Firmware Command Structure ... */ @@ -232,6 +251,11 @@ static inline int t4vf_wr_mbox_ns(struct adapter *adapter, const void *cmd, return t4vf_wr_mbox_core(adapter, cmd, size, rpl, false); } +static inline int is_t4(enum chip_type chip) +{ + return (chip >= T4_FIRST_REV && chip <= T4_LAST_REV); +} + int t4vf_wait_dev_ready(struct adapter *); int t4vf_port_init(struct adapter *, int); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 7127c7b9efde..9f96dc3bb112 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -1027,8 +1027,11 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free, unsigned nfilters = 0; unsigned int rem = naddr; struct fw_vi_mac_cmd cmd, rpl; + unsigned int max_naddr = is_t4(adapter->chip) ? + NUM_MPS_CLS_SRAM_L_INSTANCES : + NUM_MPS_T5_CLS_SRAM_L_INSTANCES; - if (naddr > FW_CLS_TCAM_NUM_ENTRIES) + if (naddr > max_naddr) return -EINVAL; for (offset = 0; offset < naddr; /**/) { @@ -1069,10 +1072,10 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free, if (idx) idx[offset+i] = - (index >= FW_CLS_TCAM_NUM_ENTRIES + (index >= max_naddr ? 0xffff : index); - if (index < FW_CLS_TCAM_NUM_ENTRIES) + if (index < max_naddr) nfilters++; else if (hash) *hash |= (1ULL << hash_mac_addr(addr[offset+i])); @@ -1118,6 +1121,9 @@ int t4vf_change_mac(struct adapter *adapter, unsigned int viid, struct fw_vi_mac_exact *p = &cmd.u.exact[0]; size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, u.exact[1]), 16); + unsigned int max_naddr = is_t4(adapter->chip) ? + NUM_MPS_CLS_SRAM_L_INSTANCES : + NUM_MPS_T5_CLS_SRAM_L_INSTANCES; /* * If this is a new allocation, determine whether it should be @@ -1140,7 +1146,7 @@ int t4vf_change_mac(struct adapter *adapter, unsigned int viid, if (ret == 0) { p = &rpl.u.exact[0]; ret = FW_VI_MAC_CMD_IDX_GET(be16_to_cpu(p->valid_to_idx)); - if (ret >= FW_CLS_TCAM_NUM_ENTRIES) + if (ret >= max_naddr) ret = -ENOMEM; } return ret; diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index 138446957786..19f642a45f40 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -101,23 +101,6 @@ static char version[] __initdata = * them to system IRQ numbers. This mapping is card specific and is set to * the configuration of the Cirrus Eval board for this chip. */ -#if defined(CONFIG_MACH_IXDP2351) -#define CS89x0_NONISA_IRQ -static unsigned int netcard_portlist[] __used __initdata = { - IXDP2351_VIRT_CS8900_BASE, 0 -}; -static unsigned int cs8900_irq_map[] = { - IRQ_IXDP2351_CS8900, 0, 0, 0 -}; -#elif defined(CONFIG_ARCH_IXDP2X01) -#define CS89x0_NONISA_IRQ -static unsigned int netcard_portlist[] __used __initdata = { - IXDP2X01_CS8900_VIRT_BASE, 0 -}; -static unsigned int cs8900_irq_map[] = { - IRQ_IXDP2X01_CS8900, 0, 0, 0 -}; -#else #ifndef CONFIG_CS89x0_PLATFORM static unsigned int netcard_portlist[] __used __initdata = { 0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240, @@ -127,7 +110,6 @@ static unsigned int cs8900_irq_map[] = { 10, 11, 12, 5 }; #endif -#endif #if DEBUGGING static unsigned int net_debug = DEBUGGING; @@ -210,32 +192,6 @@ static int __init media_fn(char *str) __setup("cs89x0_media=", media_fn); #endif -#if defined(CONFIG_MACH_IXDP2351) -static u16 -readword(unsigned long base_addr, int portno) -{ - return __raw_readw(base_addr + (portno << 1)); -} - -static void -writeword(unsigned long base_addr, int portno, u16 value) -{ - __raw_writew(value, base_addr + (portno << 1)); -} -#elif defined(CONFIG_ARCH_IXDP2X01) -static u16 -readword(unsigned long base_addr, int portno) -{ - return __raw_readl(base_addr + (portno << 1)); -} - -static void -writeword(unsigned long base_addr, int portno, u16 value) -{ - __raw_writel(value, base_addr + (portno << 1)); -} -#endif - static void readwords(struct net_local *lp, int portno, void *buf, int length) { u8 *buf8 = (u8 *)buf; @@ -478,9 +434,6 @@ dma_rx(struct net_device *dev) /* Malloc up new buffer. */ skb = netdev_alloc_skb(dev, length + 2); if (skb == NULL) { - /* I don't think we want to do this to a stressed system */ - cs89_dbg(0, err, "%s: Memory squeeze, dropping packet\n", - dev->name); dev->stats.rx_dropped++; /* AKPM: advance bp to the next frame */ @@ -731,9 +684,6 @@ net_rx(struct net_device *dev) /* Malloc up new buffer. */ skb = netdev_alloc_skb(dev, length + 2); if (skb == NULL) { -#if 0 /* Again, this seems a cruel thing to do */ - pr_warn("%s: Memory squeeze, dropping packet\n", dev->name); -#endif dev->stats.rx_dropped++; return; } @@ -908,7 +858,7 @@ net_open(struct net_device *dev) goto bad_out; } } else { -#if !defined(CS89x0_NONISA_IRQ) && !defined(CONFIG_CS89x0_PLATFORM) +#if !defined(CONFIG_CS89x0_PLATFORM) if (((1 << dev->irq) & lp->irq_map) == 0) { pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n", dev->name, dev->irq, lp->irq_map); @@ -1321,9 +1271,7 @@ static const struct net_device_ops net_ops = { static void __init reset_chip(struct net_device *dev) { #if !defined(CONFIG_MACH_MX31ADS) -#if !defined(CS89x0_NONISA_IRQ) struct net_local *lp = netdev_priv(dev); -#endif /* CS89x0_NONISA_IRQ */ int reset_start_time; writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET); @@ -1331,7 +1279,6 @@ static void __init reset_chip(struct net_device *dev) /* wait 30 ms */ msleep(30); -#if !defined(CS89x0_NONISA_IRQ) if (lp->chip_type != CS8900) { /* Hardware problem requires PNP registers to be reconfigured after a reset */ iowrite16(PP_CS8920_ISAINT, lp->virt_addr + ADD_PORT); @@ -1344,7 +1291,6 @@ static void __init reset_chip(struct net_device *dev) iowrite8((dev->mem_start >> 8) & 0xff, lp->virt_addr + DATA_PORT + 1); } -#endif /* CS89x0_NONISA_IRQ */ /* Wait until the chip is reset */ reset_start_time = jiffies; @@ -1579,9 +1525,6 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) i = lp->isa_config & INT_NO_MASK; #ifndef CONFIG_CS89x0_PLATFORM if (lp->chip_type == CS8900) { -#ifdef CS89x0_NONISA_IRQ - i = cs8900_irq_map[0]; -#else /* Translate the IRQ using the IRQ mapping table. */ if (i >= ARRAY_SIZE(cs8900_irq_map)) pr_err("invalid ISA interrupt number %d\n", i); @@ -1599,7 +1542,6 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) lp->irq_map = ((irq_map_buff[0] >> 8) | (irq_map_buff[1] << 8)); } -#endif } #endif if (!dev->irq) @@ -1978,18 +1920,6 @@ static struct platform_driver cs89x0_driver = { .remove = cs89x0_platform_remove, }; -static int __init cs89x0_init(void) -{ - return platform_driver_probe(&cs89x0_driver, cs89x0_platform_probe); -} - -module_init(cs89x0_init); - -static void __exit cs89x0_cleanup(void) -{ - platform_driver_unregister(&cs89x0_driver); -} - -module_exit(cs89x0_cleanup); +module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe); #endif /* CONFIG_CS89x0_PLATFORM */ diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 354cbb78ed50..67b0388b6e68 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -887,18 +887,7 @@ static struct platform_driver ep93xx_eth_driver = { }, }; -static int __init ep93xx_eth_init_module(void) -{ - printk(KERN_INFO DRV_MODULE_NAME " version " DRV_MODULE_VERSION " loading\n"); - return platform_driver_register(&ep93xx_eth_driver); -} - -static void __exit ep93xx_eth_cleanup_module(void) -{ - platform_driver_unregister(&ep93xx_eth_driver); -} +module_platform_driver(ep93xx_eth_driver); -module_init(ep93xx_eth_init_module); -module_exit(ep93xx_eth_cleanup_module); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:ep93xx-eth"); diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c index bf0fc56dba19..4b6e5695b263 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.c +++ b/drivers/net/ethernet/cisco/enic/enic_dev.c @@ -212,7 +212,7 @@ int enic_dev_deinit_done(struct enic *enic, int *status) } /* rtnl lock is held */ -int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +int enic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct enic *enic = netdev_priv(netdev); int err; @@ -225,7 +225,7 @@ int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid) } /* rtnl lock is held */ -int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +int enic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct enic *enic = netdev_priv(netdev); int err; diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.h b/drivers/net/ethernet/cisco/enic/enic_dev.h index da1cba3c410e..08bded051b93 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.h +++ b/drivers/net/ethernet/cisco/enic/enic_dev.h @@ -46,8 +46,8 @@ int enic_dev_packet_filter(struct enic *enic, int directed, int multicast, int broadcast, int promisc, int allmulti); int enic_dev_add_addr(struct enic *enic, u8 *addr); int enic_dev_del_addr(struct enic *enic, u8 *addr); -int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid); -int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); +int enic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); +int enic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); int enic_dev_notify_unset(struct enic *enic); int enic_dev_hang_notify(struct enic *enic); int enic_dev_set_ig_vlan_rewrite_mode(struct enic *enic); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index ec1a233622c6..635f55992d7e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1300,7 +1300,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, } if (vlan_stripped) - __vlan_hwaccel_put_tag(skb, vlan_tci); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); if (netdev->features & NETIF_F_GRO) napi_gro_receive(&enic->napi[q_number], skb); @@ -2496,9 +2496,9 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->watchdog_timeo = 2 * HZ; netdev->ethtool_ops = &enic_ethtool_ops; - netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (ENIC_SETTING(enic, LOOP)) { - netdev->features &= ~NETIF_F_HW_VLAN_TX; + netdev->features &= ~NETIF_F_HW_VLAN_CTAG_TX; enic->loop_enable = 1; enic->loop_tag = enic->config.loop_tag; dev_info(dev, "loopback tag=0x%04x\n", enic->loop_tag); diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index 605b22283be1..97455c573db5 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -308,6 +308,9 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, if (status & STAT_ERROR) { err = (int)readq(&devcmd->args[0]); + if (err == ERR_EINVAL && + cmd == CMD_CAPABILITY) + return err; if (err != ERR_ECMDUNKNOWN || cmd != CMD_CAPABILITY) pr_err("Error %d devcmd %d\n", diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 8cdf02503d13..9105465b2a1a 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -257,6 +257,107 @@ static void dm9000_dumpblk_32bit(void __iomem *reg, int count) tmp = readl(reg); } +/* + * Sleep, either by using msleep() or if we are suspending, then + * use mdelay() to sleep. + */ +static void dm9000_msleep(board_info_t *db, unsigned int ms) +{ + if (db->in_suspend) + mdelay(ms); + else + msleep(ms); +} + +/* Read a word from phyxcer */ +static int +dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg) +{ + board_info_t *db = netdev_priv(dev); + unsigned long flags; + unsigned int reg_save; + int ret; + + mutex_lock(&db->addr_lock); + + spin_lock_irqsave(&db->lock, flags); + + /* Save previous register address */ + reg_save = readb(db->io_addr); + + /* Fill the phyxcer register into REG_0C */ + iow(db, DM9000_EPAR, DM9000_PHY | reg); + + /* Issue phyxcer read command */ + iow(db, DM9000_EPCR, EPCR_ERPRR | EPCR_EPOS); + + writeb(reg_save, db->io_addr); + spin_unlock_irqrestore(&db->lock, flags); + + dm9000_msleep(db, 1); /* Wait read complete */ + + spin_lock_irqsave(&db->lock, flags); + reg_save = readb(db->io_addr); + + iow(db, DM9000_EPCR, 0x0); /* Clear phyxcer read command */ + + /* The read data keeps on REG_0D & REG_0E */ + ret = (ior(db, DM9000_EPDRH) << 8) | ior(db, DM9000_EPDRL); + + /* restore the previous address */ + writeb(reg_save, db->io_addr); + spin_unlock_irqrestore(&db->lock, flags); + + mutex_unlock(&db->addr_lock); + + dm9000_dbg(db, 5, "phy_read[%02x] -> %04x\n", reg, ret); + return ret; +} + +/* Write a word to phyxcer */ +static void +dm9000_phy_write(struct net_device *dev, + int phyaddr_unused, int reg, int value) +{ + board_info_t *db = netdev_priv(dev); + unsigned long flags; + unsigned long reg_save; + + dm9000_dbg(db, 5, "phy_write[%02x] = %04x\n", reg, value); + mutex_lock(&db->addr_lock); + + spin_lock_irqsave(&db->lock, flags); + + /* Save previous register address */ + reg_save = readb(db->io_addr); + + /* Fill the phyxcer register into REG_0C */ + iow(db, DM9000_EPAR, DM9000_PHY | reg); + + /* Fill the written data into REG_0D & REG_0E */ + iow(db, DM9000_EPDRL, value); + iow(db, DM9000_EPDRH, value >> 8); + + /* Issue phyxcer write command */ + iow(db, DM9000_EPCR, EPCR_EPOS | EPCR_ERPRW); + + writeb(reg_save, db->io_addr); + spin_unlock_irqrestore(&db->lock, flags); + + dm9000_msleep(db, 1); /* Wait write complete */ + + spin_lock_irqsave(&db->lock, flags); + reg_save = readb(db->io_addr); + + iow(db, DM9000_EPCR, 0x0); /* Clear phyxcer write command */ + + /* restore the previous address */ + writeb(reg_save, db->io_addr); + + spin_unlock_irqrestore(&db->lock, flags); + mutex_unlock(&db->addr_lock); +} + /* dm9000_set_io * * select the specified set of io routines to use with the @@ -795,6 +896,9 @@ dm9000_init_dm9000(struct net_device *dev) iow(db, DM9000_GPCR, GPCR_GEP_CNTL); /* Let GPIO0 output */ + dm9000_phy_write(dev, 0, MII_BMCR, BMCR_RESET); /* PHY RESET */ + dm9000_phy_write(dev, 0, MII_DM_DSPCR, DSPCR_INIT_PARAM); /* Init */ + ncr = (db->flags & DM9000_PLATF_EXT_PHY) ? NCR_EXT_PHY : 0; /* if wol is needed, then always set NCR_WAKEEN otherwise we end @@ -1201,109 +1305,6 @@ dm9000_open(struct net_device *dev) return 0; } -/* - * Sleep, either by using msleep() or if we are suspending, then - * use mdelay() to sleep. - */ -static void dm9000_msleep(board_info_t *db, unsigned int ms) -{ - if (db->in_suspend) - mdelay(ms); - else - msleep(ms); -} - -/* - * Read a word from phyxcer - */ -static int -dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg) -{ - board_info_t *db = netdev_priv(dev); - unsigned long flags; - unsigned int reg_save; - int ret; - - mutex_lock(&db->addr_lock); - - spin_lock_irqsave(&db->lock,flags); - - /* Save previous register address */ - reg_save = readb(db->io_addr); - - /* Fill the phyxcer register into REG_0C */ - iow(db, DM9000_EPAR, DM9000_PHY | reg); - - iow(db, DM9000_EPCR, EPCR_ERPRR | EPCR_EPOS); /* Issue phyxcer read command */ - - writeb(reg_save, db->io_addr); - spin_unlock_irqrestore(&db->lock,flags); - - dm9000_msleep(db, 1); /* Wait read complete */ - - spin_lock_irqsave(&db->lock,flags); - reg_save = readb(db->io_addr); - - iow(db, DM9000_EPCR, 0x0); /* Clear phyxcer read command */ - - /* The read data keeps on REG_0D & REG_0E */ - ret = (ior(db, DM9000_EPDRH) << 8) | ior(db, DM9000_EPDRL); - - /* restore the previous address */ - writeb(reg_save, db->io_addr); - spin_unlock_irqrestore(&db->lock,flags); - - mutex_unlock(&db->addr_lock); - - dm9000_dbg(db, 5, "phy_read[%02x] -> %04x\n", reg, ret); - return ret; -} - -/* - * Write a word to phyxcer - */ -static void -dm9000_phy_write(struct net_device *dev, - int phyaddr_unused, int reg, int value) -{ - board_info_t *db = netdev_priv(dev); - unsigned long flags; - unsigned long reg_save; - - dm9000_dbg(db, 5, "phy_write[%02x] = %04x\n", reg, value); - mutex_lock(&db->addr_lock); - - spin_lock_irqsave(&db->lock,flags); - - /* Save previous register address */ - reg_save = readb(db->io_addr); - - /* Fill the phyxcer register into REG_0C */ - iow(db, DM9000_EPAR, DM9000_PHY | reg); - - /* Fill the written data into REG_0D & REG_0E */ - iow(db, DM9000_EPDRL, value); - iow(db, DM9000_EPDRH, value >> 8); - - iow(db, DM9000_EPCR, EPCR_EPOS | EPCR_ERPRW); /* Issue phyxcer write command */ - - writeb(reg_save, db->io_addr); - spin_unlock_irqrestore(&db->lock, flags); - - dm9000_msleep(db, 1); /* Wait write complete */ - - spin_lock_irqsave(&db->lock,flags); - reg_save = readb(db->io_addr); - - iow(db, DM9000_EPCR, 0x0); /* Clear phyxcer write command */ - - /* restore the previous address */ - writeb(reg_save, db->io_addr); - - spin_unlock_irqrestore(&db->lock, flags); - mutex_unlock(&db->addr_lock); -} - static void dm9000_shutdown(struct net_device *dev) { @@ -1502,7 +1503,12 @@ dm9000_probe(struct platform_device *pdev) db->flags |= DM9000_PLATF_SIMPLE_PHY; #endif - dm9000_reset(db); + /* Fixing bug on dm9000_probe, takeover dm9000_reset(db), + * Need 'NCR_MAC_LBK' bit to indeed stable our DM9000 fifo + * while probe stage. + */ + + iow(db, DM9000_NCR, NCR_MAC_LBK | NCR_RST); /* try multiple times, DM9000 sometimes gets the read wrong */ for (i = 0; i < 8; i++) { @@ -1687,22 +1693,7 @@ static struct platform_driver dm9000_driver = { .remove = dm9000_drv_remove, }; -static int __init -dm9000_init(void) -{ - printk(KERN_INFO "%s Ethernet Driver, V%s\n", CARDNAME, DRV_VERSION); - - return platform_driver_register(&dm9000_driver); -} - -static void __exit -dm9000_cleanup(void) -{ - platform_driver_unregister(&dm9000_driver); -} - -module_init(dm9000_init); -module_exit(dm9000_cleanup); +module_platform_driver(dm9000_driver); MODULE_AUTHOR("Sascha Hauer, Ben Dooks"); MODULE_DESCRIPTION("Davicom DM9000 network driver"); diff --git a/drivers/net/ethernet/davicom/dm9000.h b/drivers/net/ethernet/davicom/dm9000.h index 55688bd1a3ef..9ce058adabab 100644 --- a/drivers/net/ethernet/davicom/dm9000.h +++ b/drivers/net/ethernet/davicom/dm9000.h @@ -69,7 +69,9 @@ #define NCR_WAKEEN (1<<6) #define NCR_FCOL (1<<4) #define NCR_FDX (1<<3) -#define NCR_LBK (3<<1) + +#define NCR_RESERVED (3<<1) +#define NCR_MAC_LBK (1<<1) #define NCR_RST (1<<0) #define NSR_SPEED (1<<7) @@ -167,5 +169,12 @@ #define ISR_LNKCHNG (1<<5) #define ISR_UNDERRUN (1<<4) +/* Davicom MII registers. + */ + +#define MII_DM_DSPCR 0x1b /* DSP Control Register */ + +#define DSPCR_INIT_PARAM 0xE100 /* DSP init parameter */ + #endif /* _DM9000X_H_ */ diff --git a/drivers/net/ethernet/dec/tulip/Kconfig b/drivers/net/ethernet/dec/tulip/Kconfig index 0c37fb2cc867..1df33c799c00 100644 --- a/drivers/net/ethernet/dec/tulip/Kconfig +++ b/drivers/net/ethernet/dec/tulip/Kconfig @@ -108,6 +108,7 @@ config TULIP_DM910X config DE4X5 tristate "Generic DECchip & DIGITAL EtherWORKS PCI/EISA" depends on (PCI || EISA) + depends on VIRT_TO_BUS || ALPHA || PPC || SPARC select CRC32 ---help--- This is support for the DIGITAL series of PCI/EISA Ethernet cards. diff --git a/drivers/net/ethernet/dec/tulip/xircom_cb.c b/drivers/net/ethernet/dec/tulip/xircom_cb.c index 88feced9a629..cdbcd1643141 100644 --- a/drivers/net/ethernet/dec/tulip/xircom_cb.c +++ b/drivers/net/ethernet/dec/tulip/xircom_cb.c @@ -236,17 +236,14 @@ static int xircom_probe(struct pci_dev *pdev, const struct pci_device_id *id) private->rx_buffer = dma_alloc_coherent(d, 8192, &private->rx_dma_handle, GFP_KERNEL); - if (private->rx_buffer == NULL) { - pr_err("%s: no memory for rx buffer\n", __func__); + if (private->rx_buffer == NULL) goto rx_buf_fail; - } + private->tx_buffer = dma_alloc_coherent(d, 8192, &private->tx_dma_handle, GFP_KERNEL); - if (private->tx_buffer == NULL) { - pr_err("%s: no memory for tx buffer\n", __func__); + if (private->tx_buffer == NULL) goto tx_buf_fail; - } SET_NETDEV_DEV(dev, &pdev->dev); diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 110d26f4c602..afa8e3af2c4d 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -580,12 +580,9 @@ alloc_list (struct net_device *dev) skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); np->rx_skbuff[i] = skb; - if (skb == NULL) { - printk (KERN_ERR - "%s: alloc_list: allocate Rx buffer error! ", - dev->name); + if (skb == NULL) break; - } + /* Rubicon now supports 40 bits of addressing space. */ np->rx_ring[i].fraginfo = cpu_to_le64 ( pci_map_single ( diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 29aff55f2eea..e2d5ced7e733 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -214,6 +214,7 @@ struct be_tx_stats { }; struct be_tx_obj { + u32 db_offset; struct be_queue_info q; struct be_queue_info cq; /* Remember the skbs that were transmitted */ @@ -434,6 +435,7 @@ struct be_adapter { u8 wol_cap; bool wol; u32 uc_macs; /* Count of secondary UC MAC programmed */ + u16 asic_rev; u32 msg_enable; int be_get_temp_freq; u16 max_mcast_mac; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 3c9b4f12e3e5..d6291aba2524 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -687,10 +687,8 @@ static struct be_mcc_wrb *wrb_from_mccq(struct be_adapter *adapter) if (!mccq->created) return NULL; - if (atomic_read(&mccq->used) >= mccq->len) { - dev_err(&adapter->pdev->dev, "Out of MCCQ wrbs\n"); + if (atomic_read(&mccq->used) >= mccq->len) return NULL; - } wrb = queue_head_node(mccq); queue_head_inc(mccq); @@ -1095,15 +1093,14 @@ int be_cmd_mccq_create(struct be_adapter *adapter, return status; } -int be_cmd_txq_create(struct be_adapter *adapter, - struct be_queue_info *txq, - struct be_queue_info *cq) +int be_cmd_txq_create(struct be_adapter *adapter, struct be_tx_obj *txo) { struct be_mcc_wrb *wrb; struct be_cmd_req_eth_tx_create *req; + struct be_queue_info *txq = &txo->q; + struct be_queue_info *cq = &txo->cq; struct be_dma_mem *q_mem = &txq->dma_mem; - void *ctxt; - int status; + int status, ver = 0; spin_lock_bh(&adapter->mcc_lock); @@ -1114,34 +1111,37 @@ int be_cmd_txq_create(struct be_adapter *adapter, } req = embedded_payload(wrb); - ctxt = &req->context; be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ETH, OPCODE_ETH_TX_CREATE, sizeof(*req), wrb, NULL); if (lancer_chip(adapter)) { req->hdr.version = 1; - AMAP_SET_BITS(struct amap_tx_context, if_id, ctxt, - adapter->if_handle); + req->if_id = cpu_to_le16(adapter->if_handle); + } else if (BEx_chip(adapter)) { + if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) + req->hdr.version = 2; + } else { /* For SH */ + req->hdr.version = 2; } req->num_pages = PAGES_4K_SPANNED(q_mem->va, q_mem->size); req->ulp_num = BE_ULP1_NUM; req->type = BE_ETH_TX_RING_TYPE_STANDARD; - - AMAP_SET_BITS(struct amap_tx_context, tx_ring_size, ctxt, - be_encoded_q_len(txq->len)); - AMAP_SET_BITS(struct amap_tx_context, ctx_valid, ctxt, 1); - AMAP_SET_BITS(struct amap_tx_context, cq_id_send, ctxt, cq->id); - - be_dws_cpu_to_le(ctxt, sizeof(req->context)); - + req->cq_id = cpu_to_le16(cq->id); + req->queue_size = be_encoded_q_len(txq->len); be_cmd_page_addrs_prepare(req->pages, ARRAY_SIZE(req->pages), q_mem); + ver = req->hdr.version; + status = be_mcc_notify_wait(adapter); if (!status) { struct be_cmd_resp_eth_tx_create *resp = embedded_payload(wrb); txq->id = le16_to_cpu(resp->cid); + if (ver == 2) + txo->db_offset = le32_to_cpu(resp->db_offset); + else + txo->db_offset = DB_TXULP1_OFFSET; txq->created = true; } @@ -1834,7 +1834,7 @@ err: /* Uses mbox */ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num, - u32 *mode, u32 *caps) + u32 *mode, u32 *caps, u16 *asic_rev) { struct be_mcc_wrb *wrb; struct be_cmd_req_query_fw_cfg *req; @@ -1855,6 +1855,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num, *port_num = le32_to_cpu(resp->phys_port); *mode = le32_to_cpu(resp->function_mode); *caps = le32_to_cpu(resp->function_caps); + *asic_rev = le32_to_cpu(resp->asic_revision) & 0xFF; } mutex_unlock(&adapter->mbox_lock); @@ -2343,7 +2344,6 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, { struct be_mcc_wrb *wrb; struct be_cmd_req_seeprom_read *req; - struct be_sge *sge; int status; spin_lock_bh(&adapter->mcc_lock); @@ -2354,7 +2354,6 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, goto err; } req = nonemb_cmd->va; - sge = nonembedded_sgl(wrb); be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_SEEPROM_READ, sizeof(*req), wrb, @@ -2667,10 +2666,8 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, cmd.size = sizeof(struct be_cmd_req_set_mac_list); cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, GFP_KERNEL); - if (!cmd.va) { - dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); + if (!cmd.va) return -ENOMEM; - } spin_lock_bh(&adapter->mcc_lock); @@ -2949,7 +2946,8 @@ static struct be_nic_resource_desc *be_get_nic_desc(u8 *buf, u32 desc_count, break; } - if (desc->desc_type == NIC_RESOURCE_DESC_TYPE_ID) + if (desc->desc_type == NIC_RESOURCE_DESC_TYPE_V0 || + desc->desc_type == NIC_RESOURCE_DESC_TYPE_V1) break; desc = (void *)desc + desc->desc_len; @@ -3023,23 +3021,41 @@ err: return status; } - /* Uses sync mcc */ -int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, - u8 domain) +/* Uses mbox */ +int be_cmd_get_profile_config_mbox(struct be_adapter *adapter, + u8 domain, struct be_dma_mem *cmd) { struct be_mcc_wrb *wrb; struct be_cmd_req_get_profile_config *req; int status; - struct be_dma_mem cmd; - memset(&cmd, 0, sizeof(struct be_dma_mem)); - cmd.size = sizeof(struct be_cmd_resp_get_profile_config); - cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, - &cmd.dma); - if (!cmd.va) { - dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); - return -ENOMEM; - } + if (mutex_lock_interruptible(&adapter->mbox_lock)) + return -1; + wrb = wrb_from_mbox(adapter); + + req = cmd->va; + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_GET_PROFILE_CONFIG, + cmd->size, wrb, cmd); + + req->type = ACTIVE_PROFILE_TYPE; + req->hdr.domain = domain; + if (!lancer_chip(adapter)) + req->hdr.version = 1; + + status = be_mbox_notify_wait(adapter); + + mutex_unlock(&adapter->mbox_lock); + return status; +} + +/* Uses sync mcc */ +int be_cmd_get_profile_config_mccq(struct be_adapter *adapter, + u8 domain, struct be_dma_mem *cmd) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_get_profile_config *req; + int status; spin_lock_bh(&adapter->mcc_lock); @@ -3049,16 +3065,47 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, goto err; } - req = cmd.va; - + req = cmd->va; be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_GET_PROFILE_CONFIG, - cmd.size, wrb, &cmd); + cmd->size, wrb, cmd); req->type = ACTIVE_PROFILE_TYPE; req->hdr.domain = domain; + if (!lancer_chip(adapter)) + req->hdr.version = 1; status = be_mcc_notify_wait(adapter); + +err: + spin_unlock_bh(&adapter->mcc_lock); + return status; +} + +/* Uses sync mcc, if MCCQ is already created otherwise mbox */ +int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, + u16 *txq_count, u8 domain) +{ + struct be_queue_info *mccq = &adapter->mcc_obj.q; + struct be_dma_mem cmd; + int status; + + memset(&cmd, 0, sizeof(struct be_dma_mem)); + if (!lancer_chip(adapter)) + cmd.size = sizeof(struct be_cmd_resp_get_profile_config_v1); + else + cmd.size = sizeof(struct be_cmd_resp_get_profile_config); + cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, + &cmd.dma); + if (!cmd.va) { + dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); + return -ENOMEM; + } + + if (!mccq->created) + status = be_cmd_get_profile_config_mbox(adapter, domain, &cmd); + else + status = be_cmd_get_profile_config_mccq(adapter, domain, &cmd); if (!status) { struct be_cmd_resp_get_profile_config *resp = cmd.va; u32 desc_count = le32_to_cpu(resp->desc_count); @@ -3071,12 +3118,15 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, status = -EINVAL; goto err; } - *cap_flags = le32_to_cpu(desc->cap_flags); + if (cap_flags) + *cap_flags = le32_to_cpu(desc->cap_flags); + if (txq_count) + *txq_count = le32_to_cpu(desc->txq_count); } err: - spin_unlock_bh(&adapter->mcc_lock); - pci_free_consistent(adapter->pdev, cmd.size, - cmd.va, cmd.dma); + if (cmd.va) + pci_free_consistent(adapter->pdev, cmd.size, + cmd.va, cmd.dma); return status; } @@ -3105,7 +3155,7 @@ int be_cmd_set_profile_config(struct be_adapter *adapter, u32 bps, req->hdr.domain = domain; req->desc_count = cpu_to_le32(1); - req->nic_desc.desc_type = NIC_RESOURCE_DESC_TYPE_ID; + req->nic_desc.desc_type = NIC_RESOURCE_DESC_TYPE_V0; req->nic_desc.desc_len = RESOURCE_DESC_SIZE; req->nic_desc.flags = (1 << QUN) | (1 << IMM) | (1 << NOSV); req->nic_desc.pf_num = adapter->pf_number; @@ -3202,6 +3252,31 @@ err: return status; } +int be_cmd_intr_set(struct be_adapter *adapter, bool intr_enable) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_intr_set *req; + int status; + + if (mutex_lock_interruptible(&adapter->mbox_lock)) + return -1; + + wrb = wrb_from_mbox(adapter); + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_SET_INTERRUPT_ENABLE, sizeof(*req), + wrb, NULL); + + req->intr_enabled = intr_enable; + + status = be_mbox_notify_wait(adapter); + + mutex_unlock(&adapter->mbox_lock); + return status; +} + int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, int wrb_payload_size, u16 *cmd_status, u16 *ext_status) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 96970860c915..460332021590 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -188,6 +188,7 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_GET_BEACON_STATE 70 #define OPCODE_COMMON_READ_TRANSRECV_DATA 73 #define OPCODE_COMMON_GET_PORT_NAME 77 +#define OPCODE_COMMON_SET_INTERRUPT_ENABLE 89 #define OPCODE_COMMON_GET_PHY_DETAILS 102 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP 103 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES 121 @@ -473,46 +474,27 @@ struct be_cmd_resp_mcc_create { #define BE_ETH_TX_RING_TYPE_STANDARD 2 #define BE_ULP1_NUM 1 -/* Pseudo amap definition in which each bit of the actual structure is defined - * as a byte: used to calculate offset/shift/mask of each field */ -struct amap_tx_context { - u8 if_id[16]; /* dword 0 */ - u8 tx_ring_size[4]; /* dword 0 */ - u8 rsvd1[26]; /* dword 0 */ - u8 pci_func_id[8]; /* dword 1 */ - u8 rsvd2[9]; /* dword 1 */ - u8 ctx_valid; /* dword 1 */ - u8 cq_id_send[16]; /* dword 2 */ - u8 rsvd3[16]; /* dword 2 */ - u8 rsvd4[32]; /* dword 3 */ - u8 rsvd5[32]; /* dword 4 */ - u8 rsvd6[32]; /* dword 5 */ - u8 rsvd7[32]; /* dword 6 */ - u8 rsvd8[32]; /* dword 7 */ - u8 rsvd9[32]; /* dword 8 */ - u8 rsvd10[32]; /* dword 9 */ - u8 rsvd11[32]; /* dword 10 */ - u8 rsvd12[32]; /* dword 11 */ - u8 rsvd13[32]; /* dword 12 */ - u8 rsvd14[32]; /* dword 13 */ - u8 rsvd15[32]; /* dword 14 */ - u8 rsvd16[32]; /* dword 15 */ -} __packed; - struct be_cmd_req_eth_tx_create { struct be_cmd_req_hdr hdr; u8 num_pages; u8 ulp_num; - u8 type; - u8 bound_port; - u8 context[sizeof(struct amap_tx_context) / 8]; + u16 type; + u16 if_id; + u8 queue_size; + u8 rsvd0; + u32 rsvd1; + u16 cq_id; + u16 rsvd2; + u32 rsvd3[13]; struct phys_addr pages[8]; } __packed; struct be_cmd_resp_eth_tx_create { struct be_cmd_resp_hdr hdr; u16 cid; - u16 rsvd0; + u16 rid; + u32 db_offset; + u32 rsvd0[4]; } __packed; /******************** Create RxQ ***************************/ @@ -1066,7 +1048,6 @@ struct be_cmd_resp_modify_eq_delay { } __packed; /******************** Get FW Config *******************/ -#define BE_FUNCTION_CAPS_RSS 0x2 /* The HW can come up in either of the following multi-channel modes * based on the skew/IPL. */ @@ -1706,9 +1687,11 @@ struct be_cmd_req_set_ext_fat_caps { struct be_fat_conf_params set_params; }; -#define RESOURCE_DESC_SIZE 72 -#define NIC_RESOURCE_DESC_TYPE_ID 0x41 +#define RESOURCE_DESC_SIZE 88 +#define NIC_RESOURCE_DESC_TYPE_V0 0x41 +#define NIC_RESOURCE_DESC_TYPE_V1 0x51 #define MAX_RESOURCE_DESC 4 +#define MAX_RESOURCE_DESC_V1 32 /* QOS unit number */ #define QUN 4 @@ -1774,6 +1757,12 @@ struct be_cmd_resp_get_profile_config { u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE]; }; +struct be_cmd_resp_get_profile_config_v1 { + struct be_cmd_req_hdr hdr; + u32 desc_count; + u8 func_param[MAX_RESOURCE_DESC_V1 * RESOURCE_DESC_SIZE]; +}; + struct be_cmd_req_set_profile_config { struct be_cmd_req_hdr hdr; u32 rsvd; @@ -1791,6 +1780,12 @@ struct be_cmd_enable_disable_vf { u8 rsvd[3]; }; +struct be_cmd_req_intr_set { + struct be_cmd_req_hdr hdr; + u8 intr_enabled; + u8 rsvd[3]; +}; + static inline bool check_privilege(struct be_adapter *adapter, u32 flags) { return flags & adapter->cmd_privileges ? true : false; @@ -1834,8 +1829,7 @@ extern int be_cmd_mccq_create(struct be_adapter *adapter, struct be_queue_info *mccq, struct be_queue_info *cq); extern int be_cmd_txq_create(struct be_adapter *adapter, - struct be_queue_info *txq, - struct be_queue_info *cq); + struct be_tx_obj *txo); extern int be_cmd_rxq_create(struct be_adapter *adapter, struct be_queue_info *rxq, u16 cq_id, u16 frag_size, u32 if_id, u32 rss, u8 *rss_id); @@ -1862,8 +1856,8 @@ extern int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc); extern int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc); -extern int be_cmd_query_fw_cfg(struct be_adapter *adapter, - u32 *port_num, u32 *function_mode, u32 *function_caps); +extern int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num, + u32 *function_mode, u32 *function_caps, u16 *asic_rev); extern int be_cmd_reset_function(struct be_adapter *adapter); extern int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size); @@ -1931,10 +1925,11 @@ extern int lancer_test_and_set_rdy_state(struct be_adapter *adapter); extern int be_cmd_query_port_name(struct be_adapter *adapter, u8 *port_name); extern int be_cmd_get_func_config(struct be_adapter *adapter); extern int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, - u8 domain); + u16 *txq_count, u8 domain); extern int be_cmd_set_profile_config(struct be_adapter *adapter, u32 bps, u8 domain); extern int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, int vf_num); extern int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain); +extern int be_cmd_intr_set(struct be_adapter *adapter, bool intr_enable); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 76b302f30c87..07b7f27cb0b9 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -719,10 +719,8 @@ be_test_ddr_dma(struct be_adapter *adapter) ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test); ddrdma_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, ddrdma_cmd.size, &ddrdma_cmd.dma, GFP_KERNEL); - if (!ddrdma_cmd.va) { - dev_err(&adapter->pdev->dev, "Memory allocation failure\n"); + if (!ddrdma_cmd.va) return -ENOMEM; - } for (i = 0; i < 2; i++) { ret = be_cmd_ddr_dma_test(adapter, pattern[i], @@ -845,11 +843,8 @@ be_read_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, eeprom_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, eeprom_cmd.size, &eeprom_cmd.dma, GFP_KERNEL); - if (!eeprom_cmd.va) { - dev_err(&adapter->pdev->dev, - "Memory allocation failure. Could not read eeprom\n"); + if (!eeprom_cmd.va) return -ENOMEM; - } status = be_cmd_get_seeprom_data(adapter, &eeprom_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 62dc220695f7..3c1099b47f2a 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -72,6 +72,10 @@ */ #define MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK (1 << 29) /* bit 29 */ +/********* PCI Function Capability *********/ +#define BE_FUNCTION_CAPS_RSS 0x2 +#define BE_FUNCTION_CAPS_SUPER_NIC 0x40 + /********* Power management (WOL) **********/ #define PCICFG_PM_CONTROL_OFFSET 0x44 #define PCICFG_PM_CONTROL_MASK 0x108 /* bits 3 & 8 */ @@ -495,7 +499,8 @@ struct flash_file_hdr_g3 { u32 antidote; u32 num_imgs; u8 build[24]; - u8 rsvd[32]; + u8 asic_type_rev; + u8 rsvd[31]; }; struct flash_section_hdr { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 08e54f3d288b..654e7820daa0 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -146,20 +146,16 @@ static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q, q->entry_size = entry_size; mem->size = len * entry_size; mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!mem->va) return -ENOMEM; - memset(mem->va, 0, mem->size); return 0; } -static void be_intr_set(struct be_adapter *adapter, bool enable) +static void be_reg_intr_set(struct be_adapter *adapter, bool enable) { u32 reg, enabled; - if (adapter->eeh_error) - return; - pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, ®); enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK; @@ -175,6 +171,22 @@ static void be_intr_set(struct be_adapter *adapter, bool enable) PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg); } +static void be_intr_set(struct be_adapter *adapter, bool enable) +{ + int status = 0; + + /* On lancer interrupts can't be controlled via this register */ + if (lancer_chip(adapter)) + return; + + if (adapter->eeh_error) + return; + + status = be_cmd_intr_set(adapter, enable); + if (status) + be_reg_intr_set(adapter, enable); +} + static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted) { u32 val = 0; @@ -185,14 +197,15 @@ static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted) iowrite32(val, adapter->db + DB_RQ_OFFSET); } -static void be_txq_notify(struct be_adapter *adapter, u16 qid, u16 posted) +static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo, + u16 posted) { u32 val = 0; - val |= qid & DB_TXULP_RING_ID_MASK; + val |= txo->q.id & DB_TXULP_RING_ID_MASK; val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT; wmb(); - iowrite32(val, adapter->db + DB_TXULP1_OFFSET); + iowrite32(val, adapter->db + txo->db_offset); } static void be_eq_notify(struct be_adapter *adapter, u16 qid, @@ -759,8 +772,9 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, if (vlan_tx_tag_present(skb)) { vlan_tag = be_get_tx_vlan_tag(adapter, skb); - __vlan_put_tag(skb, vlan_tag); - skb->vlan_tci = 0; + skb = __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + if (skb) + skb->vlan_tci = 0; } return skb; @@ -821,7 +835,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, stopped = true; } - be_txq_notify(adapter, txq->id, wrb_cnt); + be_txq_notify(adapter, txo, wrb_cnt); be_tx_stats_update(txo, wrb_cnt, copied, gso_segs, stopped); } else { @@ -890,7 +904,7 @@ set_vlan_promisc: return status; } -static int be_vlan_add_vid(struct net_device *netdev, u16 vid) +static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct be_adapter *adapter = netdev_priv(netdev); int status = 0; @@ -916,7 +930,7 @@ ret: return status; } -static int be_vlan_rem_vid(struct net_device *netdev, u16 vid) +static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct be_adapter *adapter = netdev_priv(netdev); int status = 0; @@ -1371,7 +1385,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo, if (rxcp->vlanf) - __vlan_hwaccel_put_tag(skb, rxcp->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); netif_receive_skb(skb); } @@ -1427,7 +1441,7 @@ void be_rx_compl_process_gro(struct be_rx_obj *rxo, struct napi_struct *napi, skb->rxhash = rxcp->rss_hash; if (rxcp->vlanf) - __vlan_hwaccel_put_tag(skb, rxcp->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); napi_gro_frags(napi); } @@ -1957,7 +1971,7 @@ static int be_tx_qs_create(struct be_adapter *adapter) if (status) return status; - status = be_cmd_txq_create(adapter, &txo->q, &txo->cq); + status = be_cmd_txq_create(adapter, txo); if (status) return status; } @@ -2435,9 +2449,6 @@ static int be_close(struct net_device *netdev) be_roce_dev_close(adapter); - if (!lancer_chip(adapter)) - be_intr_set(adapter, false); - for_all_evt_queues(adapter, eqo, i) napi_disable(&eqo->napi); @@ -2525,9 +2536,6 @@ static int be_open(struct net_device *netdev) be_irq_register(adapter); - if (!lancer_chip(adapter)) - be_intr_set(adapter, true); - for_all_rx_queues(adapter, rxo, i) be_cq_notify(adapter, rxo->cq.id, true, 0); @@ -2562,10 +2570,9 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable) cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config); cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (cmd.va == NULL) return -1; - memset(cmd.va, 0, cmd.size); if (enable) { status = pci_write_config_dword(adapter->pdev, @@ -2713,7 +2720,8 @@ static int be_vfs_if_create(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { if (!BE3_chip(adapter)) - be_cmd_get_profile_config(adapter, &cap_flags, vf + 1); + be_cmd_get_profile_config(adapter, &cap_flags, + NULL, vf + 1); /* If a FW profile exists, then cap_flags are updated */ en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED | @@ -2877,11 +2885,14 @@ static void be_get_resources(struct be_adapter *adapter) u16 dev_num_vfs; int pos, status; bool profile_present = false; + u16 txq_count = 0; if (!BEx_chip(adapter)) { status = be_cmd_get_func_config(adapter); if (!status) profile_present = true; + } else if (BE3_chip(adapter) && be_physfn(adapter)) { + be_cmd_get_profile_config(adapter, NULL, &txq_count, 0); } if (profile_present) { @@ -2919,7 +2930,9 @@ static void be_get_resources(struct be_adapter *adapter) adapter->max_vlans = BE_NUM_VLANS_SUPPORTED; adapter->max_mcast_mac = BE_MAX_MC; - adapter->max_tx_queues = MAX_TX_QS; + adapter->max_tx_queues = txq_count ? txq_count : MAX_TX_QS; + adapter->max_tx_queues = min_t(u16, adapter->max_tx_queues, + MAX_TX_QS); adapter->max_rss_queues = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; adapter->max_event_queues = BE3_MAX_RSS_QS; @@ -2953,7 +2966,8 @@ static int be_get_config(struct be_adapter *adapter) status = be_cmd_query_fw_cfg(adapter, &adapter->port_num, &adapter->function_mode, - &adapter->function_caps); + &adapter->function_caps, + &adapter->asic_rev); if (status) goto err; @@ -3214,7 +3228,7 @@ static int be_flash(struct be_adapter *adapter, const u8 *img, return 0; } -/* For BE2 and BE3 */ +/* For BE2, BE3 and BE3-R */ static int be_flash_BEx(struct be_adapter *adapter, const struct firmware *fw, struct be_dma_mem *flash_cmd, @@ -3457,11 +3471,9 @@ static int lancer_fw_download(struct be_adapter *adapter, flash_cmd.size = sizeof(struct lancer_cmd_req_write_object) + LANCER_FW_DOWNLOAD_CHUNK; flash_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, flash_cmd.size, - &flash_cmd.dma, GFP_KERNEL); + &flash_cmd.dma, GFP_KERNEL); if (!flash_cmd.va) { status = -ENOMEM; - dev_err(&adapter->pdev->dev, - "Memory allocation failure while flashing\n"); goto lancer_fw_exit; } @@ -3529,18 +3541,22 @@ lancer_fw_exit: #define UFI_TYPE2 2 #define UFI_TYPE3 3 +#define UFI_TYPE3R 10 #define UFI_TYPE4 4 static int be_get_ufi_type(struct be_adapter *adapter, - struct flash_file_hdr_g2 *fhdr) + struct flash_file_hdr_g3 *fhdr) { if (fhdr == NULL) goto be_get_ufi_exit; if (skyhawk_chip(adapter) && fhdr->build[0] == '4') return UFI_TYPE4; - else if (BE3_chip(adapter) && fhdr->build[0] == '3') - return UFI_TYPE3; - else if (BE2_chip(adapter) && fhdr->build[0] == '2') + else if (BE3_chip(adapter) && fhdr->build[0] == '3') { + if (fhdr->asic_type_rev == 0x10) + return UFI_TYPE3R; + else + return UFI_TYPE3; + } else if (BE2_chip(adapter) && fhdr->build[0] == '2') return UFI_TYPE2; be_get_ufi_exit: @@ -3551,7 +3567,6 @@ be_get_ufi_exit: static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) { - struct flash_file_hdr_g2 *fhdr; struct flash_file_hdr_g3 *fhdr3; struct image_hdr *img_hdr_ptr = NULL; struct be_dma_mem flash_cmd; @@ -3563,29 +3578,41 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) &flash_cmd.dma, GFP_KERNEL); if (!flash_cmd.va) { status = -ENOMEM; - dev_err(&adapter->pdev->dev, - "Memory allocation failure while flashing\n"); goto be_fw_exit; } p = fw->data; - fhdr = (struct flash_file_hdr_g2 *)p; + fhdr3 = (struct flash_file_hdr_g3 *)p; - ufi_type = be_get_ufi_type(adapter, fhdr); + ufi_type = be_get_ufi_type(adapter, fhdr3); - fhdr3 = (struct flash_file_hdr_g3 *)fw->data; num_imgs = le32_to_cpu(fhdr3->num_imgs); for (i = 0; i < num_imgs; i++) { img_hdr_ptr = (struct image_hdr *)(fw->data + (sizeof(struct flash_file_hdr_g3) + i * sizeof(struct image_hdr))); if (le32_to_cpu(img_hdr_ptr->imageid) == 1) { - if (ufi_type == UFI_TYPE4) + switch (ufi_type) { + case UFI_TYPE4: status = be_flash_skyhawk(adapter, fw, &flash_cmd, num_imgs); - else if (ufi_type == UFI_TYPE3) + break; + case UFI_TYPE3R: status = be_flash_BEx(adapter, fw, &flash_cmd, num_imgs); + break; + case UFI_TYPE3: + /* Do not flash this ufi on BE3-R cards */ + if (adapter->asic_rev < 0x10) + status = be_flash_BEx(adapter, fw, + &flash_cmd, + num_imgs); + else { + status = -1; + dev_err(&adapter->pdev->dev, + "Can't load BE3 UFI on BE3R\n"); + } + } } } @@ -3662,12 +3689,12 @@ static void be_netdev_init(struct net_device *netdev) netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; if (be_multi_rxq(adapter)) netdev->hw_features |= NETIF_F_RXHASH; netdev->features |= netdev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -3791,12 +3818,13 @@ static int be_ctrl_init(struct be_adapter *adapter) rx_filter->size = sizeof(struct be_cmd_req_rx_filter); rx_filter->va = dma_alloc_coherent(&adapter->pdev->dev, rx_filter->size, - &rx_filter->dma, GFP_KERNEL); + &rx_filter->dma, + GFP_KERNEL | __GFP_ZERO); if (rx_filter->va == NULL) { status = -ENOMEM; goto free_mbox; } - memset(rx_filter->va, 0, rx_filter->size); + mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); @@ -3838,10 +3866,9 @@ static int be_stats_init(struct be_adapter *adapter) cmd->size = sizeof(struct be_cmd_req_get_stats_v1); cmd->va = dma_alloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (cmd->va == NULL) return -1; - memset(cmd->va, 0, cmd->size); return 0; } @@ -3853,6 +3880,7 @@ static void be_remove(struct pci_dev *pdev) return; be_roce_dev_remove(adapter); + be_intr_set(adapter, false); cancel_delayed_work_sync(&adapter->func_recovery_work); @@ -4107,6 +4135,11 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) status = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); if (!status) { + status = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); + if (status < 0) { + dev_err(&pdev->dev, "dma_set_coherent_mask failed\n"); + goto free_netdev; + } netdev->features |= NETIF_F_HIGHDMA; } else { status = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); @@ -4142,11 +4175,11 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) goto ctrl_clean; } - /* The INTR bit may be set in the card when probed by a kdump kernel - * after a crash. - */ - if (!lancer_chip(adapter)) - be_intr_set(adapter, false); + /* Wait for interrupts to quiesce after an FLR */ + msleep(100); + + /* Allow interrupts for other ULPs running on NIC function */ + be_intr_set(adapter, true); status = be_stats_init(adapter); if (status) diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 55d32aa0a093..f3d126dcc104 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index db4ea8081c07..276572998463 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2011 Emulex + * Copyright (C) 2005 - 2013 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 7c361d1db94c..21b85fb7d05f 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -780,12 +780,11 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) priv->descs = dma_alloc_coherent(priv->dev, sizeof(struct ftgmac100_descs), - &priv->descs_dma_addr, GFP_KERNEL); + &priv->descs_dma_addr, + GFP_KERNEL | __GFP_ZERO); if (!priv->descs) return -ENOMEM; - memset(priv->descs, 0, sizeof(struct ftgmac100_descs)); - /* initialize RX ring */ ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); @@ -1350,22 +1349,7 @@ static struct platform_driver ftgmac100_driver = { }, }; -/****************************************************************************** - * initialization / finalization - *****************************************************************************/ -static int __init ftgmac100_init(void) -{ - pr_info("Loading version " DRV_VERSION " ...\n"); - return platform_driver_register(&ftgmac100_driver); -} - -static void __exit ftgmac100_exit(void) -{ - platform_driver_unregister(&ftgmac100_driver); -} - -module_init(ftgmac100_init); -module_exit(ftgmac100_exit); +module_platform_driver(ftgmac100_driver); MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>"); MODULE_DESCRIPTION("FTGMAC100 driver"); diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index b5ea8fbd8a76..a6eda8d83138 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -732,13 +732,13 @@ static int ftmac100_alloc_buffers(struct ftmac100 *priv) { int i; - priv->descs = dma_alloc_coherent(priv->dev, sizeof(struct ftmac100_descs), - &priv->descs_dma_addr, GFP_KERNEL); + priv->descs = dma_alloc_coherent(priv->dev, + sizeof(struct ftmac100_descs), + &priv->descs_dma_addr, + GFP_KERNEL | __GFP_ZERO); if (!priv->descs) return -ENOMEM; - memset(priv->descs, 0, sizeof(struct ftmac100_descs)); - /* initialize RX ring */ ftmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index b7d58fe6f531..549ce13b92ac 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -2,7 +2,8 @@ # Makefile for the Freescale network device drivers. # -obj-$(CONFIG_FEC) += fec.o fec_ptp.o +obj-$(CONFIG_FEC) += fec.o +fec-objs :=fec_main.o fec_ptp.o obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx.o ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y) obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index f5390071efd0..eb4372962839 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -240,6 +240,7 @@ struct fec_enet_private { phy_interface_t phy_interface; int link; int full_duplex; + int speed; struct completion mdio_done; int irq[FEC_IRQ_NUM]; int bufdesc_ex; diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec_main.c index 069a155d16ed..2451ab1b5a83 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -29,7 +29,6 @@ #include <linux/ioport.h> #include <linux/slab.h> #include <linux/interrupt.h> -#include <linux/pci.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/netdevice.h> @@ -53,11 +52,6 @@ #include <asm/cacheflush.h> -#ifndef CONFIG_ARM -#include <asm/coldfire.h> -#include <asm/mcfsim.h> -#endif - #include "fec.h" #if defined(CONFIG_ARM) @@ -107,6 +101,9 @@ static struct platform_device_id fec_devtype[] = { .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | FEC_QUIRK_HAS_BUFDESC_EX, }, { + .name = "mvf-fec", + .driver_data = FEC_QUIRK_ENET_MAC, + }, { /* sentinel */ } }; @@ -117,6 +114,7 @@ enum imx_fec_type { IMX27_FEC, /* runs on i.mx27/35/51 */ IMX28_FEC, IMX6Q_FEC, + MVF_FEC, }; static const struct of_device_id fec_dt_ids[] = { @@ -124,6 +122,7 @@ static const struct of_device_id fec_dt_ids[] = { { .compatible = "fsl,imx27-fec", .data = &fec_devtype[IMX27_FEC], }, { .compatible = "fsl,imx28-fec", .data = &fec_devtype[IMX28_FEC], }, { .compatible = "fsl,imx6q-fec", .data = &fec_devtype[IMX6Q_FEC], }, + { .compatible = "fsl,mvf-fec", .data = &fec_devtype[MVF_FEC], }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fec_dt_ids); @@ -262,7 +261,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* Ooops. All transmit buffers are full. Bail out. * This should not happen, since ndev->tbusy should be set. */ - printk("%s: tx queue full!.\n", ndev->name); + netdev_err(ndev, "tx queue full!\n"); return NETDEV_TX_BUSY; } @@ -345,6 +344,53 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +/* Init RX & TX buffer descriptors + */ +static void fec_enet_bd_init(struct net_device *dev) +{ + struct fec_enet_private *fep = netdev_priv(dev); + struct bufdesc *bdp; + unsigned int i; + + /* Initialize the receive buffer descriptors. */ + bdp = fep->rx_bd_base; + for (i = 0; i < RX_RING_SIZE; i++) { + + /* Initialize the BD for every fragment in the page. */ + if (bdp->cbd_bufaddr) + bdp->cbd_sc = BD_ENET_RX_EMPTY; + else + bdp->cbd_sc = 0; + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + /* Set the last buffer to wrap */ + bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp->cbd_sc |= BD_SC_WRAP; + + fep->cur_rx = fep->rx_bd_base; + + /* ...and the same for transmit */ + bdp = fep->tx_bd_base; + fep->cur_tx = bdp; + for (i = 0; i < TX_RING_SIZE; i++) { + + /* Initialize the BD for every fragment in the page. */ + bdp->cbd_sc = 0; + if (bdp->cbd_bufaddr && fep->tx_skbuff[i]) { + dev_kfree_skb_any(fep->tx_skbuff[i]); + fep->tx_skbuff[i] = NULL; + } + bdp->cbd_bufaddr = 0; + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + /* Set the last buffer to wrap */ + bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp->cbd_sc |= BD_SC_WRAP; + fep->dirty_tx = bdp; +} + /* This function is called to start or restart the FEC during a link * change. This only happens when switching between half and full * duplex. @@ -388,6 +434,8 @@ fec_restart(struct net_device *ndev, int duplex) /* Set maximum receive buffer size. */ writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); + fec_enet_bd_init(ndev); + /* Set receive and transmit descriptor base. */ writel(fep->bd_dma, fep->hwp + FEC_R_DES_START); if (fep->bufdesc_ex) @@ -397,7 +445,6 @@ fec_restart(struct net_device *ndev, int duplex) writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) * RX_RING_SIZE, fep->hwp + FEC_X_DES_START); - fep->cur_rx = fep->rx_bd_base; for (i = 0; i <= TX_RING_MOD_MASK; i++) { if (fep->tx_skbuff[i]) { @@ -526,7 +573,7 @@ fec_stop(struct net_device *ndev) writel(1, fep->hwp + FEC_X_CNTRL); /* Graceful transmit stop */ udelay(10); if (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_GRA)) - printk("fec_stop : Graceful transmit stop did not complete !\n"); + netdev_err(ndev, "Graceful transmit stop did not complete!\n"); } /* Whack a reset. We should wait for this. */ @@ -624,7 +671,7 @@ fec_enet_tx(struct net_device *ndev) } if (status & BD_ENET_TX_READY) - printk("HEY! Enet xmit interrupt and TX_READY.\n"); + netdev_err(ndev, "HEY! Enet xmit interrupt and TX_READY\n"); /* Deferred means some collisions occurred during transmit, * but we eventually sent the packet OK. @@ -692,7 +739,7 @@ fec_enet_rx(struct net_device *ndev, int budget) * the last indicator should be set. */ if ((status & BD_ENET_RX_LAST) == 0) - printk("FEC ENET: rcv is not +last\n"); + netdev_err(ndev, "rcv is not +last\n"); if (!fep->opened) goto rx_processing_done; @@ -743,8 +790,6 @@ fec_enet_rx(struct net_device *ndev, int budget) skb = netdev_alloc_skb(ndev, pkt_len - 4 + NET_IP_ALIGN); if (unlikely(!skb)) { - printk("%s: Memory squeeze, dropping packet.\n", - ndev->name); ndev->stats.rx_dropped++; } else { skb_reserve(skb, NET_IP_ALIGN); @@ -868,7 +913,6 @@ static void fec_get_mac(struct net_device *ndev) */ iap = macaddr; -#ifdef CONFIG_OF /* * 2) from device tree data */ @@ -880,7 +924,6 @@ static void fec_get_mac(struct net_device *ndev) iap = (unsigned char *) mac; } } -#endif /* * 3) from flash or fuse (via platform data) @@ -934,24 +977,29 @@ static void fec_enet_adjust_link(struct net_device *ndev) goto spin_unlock; } - /* Duplex link change */ if (phy_dev->link) { - if (fep->full_duplex != phy_dev->duplex) { - fec_restart(ndev, phy_dev->duplex); - /* prevent unnecessary second fec_restart() below */ + if (!fep->link) { fep->link = phy_dev->link; status_change = 1; } - } - /* Link on or off change */ - if (phy_dev->link != fep->link) { - fep->link = phy_dev->link; - if (phy_dev->link) + if (fep->full_duplex != phy_dev->duplex) + status_change = 1; + + if (phy_dev->speed != fep->speed) { + fep->speed = phy_dev->speed; + status_change = 1; + } + + /* if any of the above changed restart the FEC */ + if (status_change) fec_restart(ndev, phy_dev->duplex); - else + } else { + if (fep->link) { fec_stop(ndev); - status_change = 1; + fep->link = phy_dev->link; + status_change = 1; + } } spin_unlock: @@ -979,7 +1027,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) usecs_to_jiffies(FEC_MII_TIMEOUT)); if (time_left == 0) { fep->mii_timeout = 1; - printk(KERN_ERR "FEC: MDIO read timeout\n"); + netdev_err(fep->netdev, "MDIO read timeout\n"); return -ETIMEDOUT; } @@ -1007,7 +1055,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, usecs_to_jiffies(FEC_MII_TIMEOUT)); if (time_left == 0) { fep->mii_timeout = 1; - printk(KERN_ERR "FEC: MDIO write timeout\n"); + netdev_err(fep->netdev, "MDIO write timeout\n"); return -ETIMEDOUT; } @@ -1047,9 +1095,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) } if (phy_id >= PHY_MAX_ADDR) { - printk(KERN_INFO - "%s: no PHY, assuming direct connection to switch\n", - ndev->name); + netdev_info(ndev, "no PHY, assuming direct connection to switch\n"); strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); phy_id = 0; } @@ -1058,7 +1104,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, fep->phy_interface); if (IS_ERR(phy_dev)) { - printk(KERN_ERR "%s: could not attach to PHY\n", ndev->name); + netdev_err(ndev, "could not attach to PHY\n"); return PTR_ERR(phy_dev); } @@ -1076,11 +1122,9 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; - printk(KERN_INFO - "%s: Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", - ndev->name, - fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev), - fep->phy_dev->irq); + netdev_info(ndev, "Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", + fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev), + fep->phy_dev->irq); return 0; } @@ -1328,7 +1372,7 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) static void fec_enet_free_buffers(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - int i; + unsigned int i; struct sk_buff *skb; struct bufdesc *bdp; @@ -1352,7 +1396,7 @@ static void fec_enet_free_buffers(struct net_device *ndev) static int fec_enet_alloc_buffers(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - int i; + unsigned int i; struct sk_buff *skb; struct bufdesc *bdp; @@ -1390,7 +1434,7 @@ static int fec_enet_alloc_buffers(struct net_device *ndev) if (fep->bufdesc_ex) { struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - ebdp->cbd_esc = BD_ENET_RX_INT; + ebdp->cbd_esc = BD_ENET_TX_INT; } bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); @@ -1437,6 +1481,7 @@ fec_enet_close(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); /* Don't know what to do yet. */ + napi_disable(&fep->napi); fep->opened = 0; netif_stop_queue(ndev); fec_stop(ndev); @@ -1554,7 +1599,7 @@ fec_set_mac_address(struct net_device *ndev, void *p) * Polled functionality used by netconsole and others in non interrupt mode * */ -void fec_poll_controller(struct net_device *dev) +static void fec_poll_controller(struct net_device *dev) { int i; struct fec_enet_private *fep = netdev_priv(dev); @@ -1592,17 +1637,14 @@ static int fec_enet_init(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); struct bufdesc *cbd_base; - struct bufdesc *bdp; - int i; /* Allocate memory for buffer descriptors. */ cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma, - GFP_KERNEL); - if (!cbd_base) { - printk("FEC: allocate descriptor memory failed?\n"); + GFP_KERNEL); + if (!cbd_base) return -ENOMEM; - } + memset(cbd_base, 0, PAGE_SIZE); spin_lock_init(&fep->hw_lock); fep->netdev = ndev; @@ -1626,51 +1668,12 @@ static int fec_enet_init(struct net_device *ndev) writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK); netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT); - /* Initialize the receive buffer descriptors. */ - bdp = fep->rx_bd_base; - for (i = 0; i < RX_RING_SIZE; i++) { - - /* Initialize the BD for every fragment in the page. */ - bdp->cbd_sc = 0; - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); - } - - /* Set the last buffer to wrap */ - bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); - bdp->cbd_sc |= BD_SC_WRAP; - - /* ...and the same for transmit */ - bdp = fep->tx_bd_base; - fep->cur_tx = bdp; - for (i = 0; i < TX_RING_SIZE; i++) { - - /* Initialize the BD for every fragment in the page. */ - bdp->cbd_sc = 0; - bdp->cbd_bufaddr = 0; - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); - } - - /* Set the last buffer to wrap */ - bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); - bdp->cbd_sc |= BD_SC_WRAP; - fep->dirty_tx = bdp; - fec_restart(ndev, 0); return 0; } #ifdef CONFIG_OF -static int fec_get_phy_mode_dt(struct platform_device *pdev) -{ - struct device_node *np = pdev->dev.of_node; - - if (np) - return of_get_phy_mode(np); - - return -ENODEV; -} - static void fec_reset_phy(struct platform_device *pdev) { int err, phy_reset; @@ -1699,11 +1702,6 @@ static void fec_reset_phy(struct platform_device *pdev) gpio_set_value(phy_reset, 1); } #else /* CONFIG_OF */ -static int fec_get_phy_mode_dt(struct platform_device *pdev) -{ - return -ENODEV; -} - static void fec_reset_phy(struct platform_device *pdev) { /* @@ -1734,16 +1732,10 @@ fec_probe(struct platform_device *pdev) if (!r) return -ENXIO; - r = request_mem_region(r->start, resource_size(r), pdev->name); - if (!r) - return -EBUSY; - /* Init network device */ ndev = alloc_etherdev(sizeof(struct fec_enet_private)); - if (!ndev) { - ret = -ENOMEM; - goto failed_alloc_etherdev; - } + if (!ndev) + return -ENOMEM; SET_NETDEV_DEV(ndev, &pdev->dev); @@ -1755,7 +1747,7 @@ fec_probe(struct platform_device *pdev) (pdev->id_entry->driver_data & FEC_QUIRK_HAS_GBIT)) fep->pause_flag |= FEC_PAUSE_FLAG_AUTONEG; - fep->hwp = ioremap(r->start, resource_size(r)); + fep->hwp = devm_request_and_ioremap(&pdev->dev, r); fep->pdev = pdev; fep->dev_id = dev_id++; @@ -1768,7 +1760,7 @@ fec_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); - ret = fec_get_phy_mode_dt(pdev); + ret = of_get_phy_mode(pdev->dev.of_node); if (ret < 0) { pdata = pdev->dev.platform_data; if (pdata) @@ -1858,6 +1850,9 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_register; + if (fep->bufdesc_ex && fep->ptp_clock) + netdev_info(ndev, "registered PHC device %d\n", fep->dev_id); + return 0; failed_register: @@ -1877,11 +1872,8 @@ failed_regulator: clk_disable_unprepare(fep->clk_ptp); failed_pin: failed_clk: - iounmap(fep->hwp); failed_ioremap: free_netdev(ndev); -failed_alloc_etherdev: - release_mem_region(r->start, resource_size(r)); return ret; } @@ -1891,7 +1883,6 @@ fec_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); - struct resource *r; int i; unregister_netdev(ndev); @@ -1907,19 +1898,14 @@ fec_drv_remove(struct platform_device *pdev) if (irq > 0) free_irq(irq, ndev); } - iounmap(fep->hwp); free_netdev(ndev); - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - BUG_ON(!r); - release_mem_region(r->start, resource_size(r)); - platform_set_drvdata(pdev, NULL); return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int fec_suspend(struct device *dev) { @@ -1951,24 +1937,15 @@ fec_resume(struct device *dev) return 0; } +#endif /* CONFIG_PM_SLEEP */ -static const struct dev_pm_ops fec_pm_ops = { - .suspend = fec_suspend, - .resume = fec_resume, - .freeze = fec_suspend, - .thaw = fec_resume, - .poweroff = fec_suspend, - .restore = fec_resume, -}; -#endif +static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); static struct platform_driver fec_driver = { .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, -#ifdef CONFIG_PM .pm = &fec_pm_ops, -#endif .of_match_table = fec_dt_ids, }, .id_table = fec_devtype, diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 77943a6a1b8c..9bc15e2365bb 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -14,6 +14,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/dma-mapping.h> #include <linux/module.h> @@ -858,13 +860,11 @@ static int mpc52xx_fec_probe(struct platform_device *op) /* Reserve FEC control zone */ rv = of_address_to_resource(np, 0, &mem); if (rv) { - printk(KERN_ERR DRIVER_NAME ": " - "Error while parsing device node resource\n" ); + pr_err("Error while parsing device node resource\n"); goto err_netdev; } if (resource_size(&mem) < sizeof(struct mpc52xx_fec)) { - printk(KERN_ERR DRIVER_NAME - " - invalid resource size (%lx < %x), check mpc52xx_devices.c\n", + pr_err("invalid resource size (%lx < %x), check mpc52xx_devices.c\n", (unsigned long)resource_size(&mem), sizeof(struct mpc52xx_fec)); rv = -EINVAL; @@ -902,7 +902,7 @@ static int mpc52xx_fec_probe(struct platform_device *op) priv->tx_dmatsk = bcom_fec_tx_init(FEC_TX_NUM_BD, tx_fifo); if (!priv->rx_dmatsk || !priv->tx_dmatsk) { - printk(KERN_ERR DRIVER_NAME ": Can not init SDMA tasks\n" ); + pr_err("Can not init SDMA tasks\n"); rv = -ENOMEM; goto err_rx_tx_dmatsk; } @@ -982,8 +982,8 @@ static int mpc52xx_fec_probe(struct platform_device *op) /* We're done ! */ dev_set_drvdata(&op->dev, ndev); - printk(KERN_INFO "%s: %s MAC %pM\n", - ndev->name, op->dev.of_node->full_name, ndev->dev_addr); + netdev_info(ndev, "%s MAC %pM\n", + op->dev.of_node->full_name, ndev->dev_addr); return 0; @@ -1094,7 +1094,7 @@ mpc52xx_fec_init(void) int ret; ret = platform_driver_register(&mpc52xx_fec_mdio_driver); if (ret) { - printk(KERN_ERR DRIVER_NAME ": failed to register mdio driver\n"); + pr_err("failed to register mdio driver\n"); return ret; } #endif diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 1f17ca0f2201..25fc960cbf0e 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -17,6 +17,8 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> @@ -379,7 +381,5 @@ void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev) if (IS_ERR(fep->ptp_clock)) { fep->ptp_clock = NULL; pr_err("ptp_clock_register failed\n"); - } else { - pr_info("registered PHC device on %s\n", ndev->name); } } diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 46df28893c10..edc120094c34 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -177,8 +177,6 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) received++; netif_receive_skb(skb); } else { - dev_warn(fep->dev, - "Memory squeeze, dropping packet.\n"); fep->stats.rx_dropped++; skbn = skb; } @@ -309,8 +307,6 @@ static int fs_enet_rx_non_napi(struct net_device *dev) received++; netif_rx(skb); } else { - dev_warn(fep->dev, - "Memory squeeze, dropping packet.\n"); fep->stats.rx_dropped++; skbn = skb; } @@ -505,11 +501,9 @@ void fs_init_bds(struct net_device *dev) */ for (i = 0, bdp = fep->rx_bd_base; i < fep->rx_ring; i++, bdp++) { skb = netdev_alloc_skb(dev, ENET_RX_FRSIZE); - if (skb == NULL) { - dev_warn(fep->dev, - "Memory squeeze, unable to allocate skb\n"); + if (skb == NULL) break; - } + skb_align(skb, ENET_RX_ALIGN); fep->rx_skbuff[i] = skb; CBDW_BUFADDR(bdp, @@ -593,13 +587,8 @@ static struct sk_buff *tx_skb_align_workaround(struct net_device *dev, /* Alloc new skb */ new_skb = netdev_alloc_skb(dev, skb->len + 4); - if (!new_skb) { - if (net_ratelimit()) { - dev_warn(fep->dev, - "Memory squeeze, dropping tx packet.\n"); - } + if (!new_skb) return NULL; - } /* Make sure new skb is properly aligned */ skb_align(new_skb, 4); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d2c5441d1bf0..2375a01715a0 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -132,7 +132,7 @@ static int gfar_poll(struct napi_struct *napi, int budget); static void gfar_netpoll(struct net_device *dev); #endif int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit); -static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue); +static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue); static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, int amount_pull, struct napi_struct *napi); void gfar_halt(struct net_device *dev); @@ -245,14 +245,13 @@ static int gfar_alloc_skb_resources(struct net_device *ndev) /* Allocate memory for the buffer descriptors */ vaddr = dma_alloc_coherent(dev, - sizeof(struct txbd8) * priv->total_tx_ring_size + - sizeof(struct rxbd8) * priv->total_rx_ring_size, - &addr, GFP_KERNEL); - if (!vaddr) { - netif_err(priv, ifup, ndev, - "Could not allocate buffer descriptors!\n"); + (priv->total_tx_ring_size * + sizeof(struct txbd8)) + + (priv->total_rx_ring_size * + sizeof(struct rxbd8)), + &addr, GFP_KERNEL); + if (!vaddr) return -ENOMEM; - } for (i = 0; i < priv->num_tx_queues; i++) { tx_queue = priv->tx_queue[i]; @@ -342,7 +341,7 @@ static void gfar_init_mac(struct net_device *ndev) gfar_init_tx_rx_base(priv); /* Configure the coalescing support */ - gfar_configure_coalescing(priv, 0xFF, 0xFF); + gfar_configure_coalescing_all(priv); /* set this when rx hw offload (TOE) functions are being used */ priv->uses_rxfcb = 0; @@ -387,7 +386,7 @@ static void gfar_init_mac(struct net_device *ndev) priv->uses_rxfcb = 1; } - if (ndev->features & NETIF_F_HW_VLAN_RX) { + if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) { rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT; priv->uses_rxfcb = 1; } @@ -691,7 +690,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) } for (i = 0; i < priv->num_tx_queues; i++) - priv->tx_queue[i] = NULL; + priv->tx_queue[i] = NULL; for (i = 0; i < priv->num_rx_queues; i++) priv->rx_queue[i] = NULL; @@ -1051,8 +1050,9 @@ static int gfar_probe(struct platform_device *ofdev) } if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { - dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - dev->features |= NETIF_F_HW_VLAN_RX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; } if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) { @@ -1817,25 +1817,15 @@ void gfar_start(struct net_device *dev) dev->trans_start = jiffies; /* prevent tx timeout */ } -void gfar_configure_coalescing(struct gfar_private *priv, +static void gfar_configure_coalescing(struct gfar_private *priv, unsigned long tx_mask, unsigned long rx_mask) { struct gfar __iomem *regs = priv->gfargrp[0].regs; u32 __iomem *baddr; - int i = 0; - - /* Backward compatible case ---- even if we enable - * multiple queues, there's only single reg to program - */ - gfar_write(®s->txic, 0); - if (likely(priv->tx_queue[0]->txcoalescing)) - gfar_write(®s->txic, priv->tx_queue[0]->txic); - - gfar_write(®s->rxic, 0); - if (unlikely(priv->rx_queue[0]->rxcoalescing)) - gfar_write(®s->rxic, priv->rx_queue[0]->rxic); if (priv->mode == MQ_MG_MODE) { + int i = 0; + baddr = ®s->txic0; for_each_set_bit(i, &tx_mask, priv->num_tx_queues) { gfar_write(baddr + i, 0); @@ -1849,9 +1839,25 @@ void gfar_configure_coalescing(struct gfar_private *priv, if (likely(priv->rx_queue[i]->rxcoalescing)) gfar_write(baddr + i, priv->rx_queue[i]->rxic); } + } else { + /* Backward compatible case -- even if we enable + * multiple queues, there's only single reg to program + */ + gfar_write(®s->txic, 0); + if (likely(priv->tx_queue[0]->txcoalescing)) + gfar_write(®s->txic, priv->tx_queue[0]->txic); + + gfar_write(®s->rxic, 0); + if (unlikely(priv->rx_queue[0]->rxcoalescing)) + gfar_write(®s->rxic, priv->rx_queue[0]->rxic); } } +void gfar_configure_coalescing_all(struct gfar_private *priv) +{ + gfar_configure_coalescing(priv, 0xFF, 0xFF); +} + static int register_grp_irqs(struct gfar_priv_grp *grp) { struct gfar_private *priv = grp->priv; @@ -1941,7 +1947,7 @@ int startup_gfar(struct net_device *ndev) phy_start(priv->phydev); - gfar_configure_coalescing(priv, 0xFF, 0xFF); + gfar_configure_coalescing_all(priv); return 0; @@ -2343,7 +2349,7 @@ void gfar_vlan_mode(struct net_device *dev, netdev_features_t features) local_irq_save(flags); lock_rx_qs(priv); - if (features & NETIF_F_HW_VLAN_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) { /* Enable VLAN tag insertion */ tempval = gfar_read(®s->tctrl); tempval |= TCTRL_VLINS; @@ -2355,7 +2361,7 @@ void gfar_vlan_mode(struct net_device *dev, netdev_features_t features) gfar_write(®s->tctrl, tempval); } - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* Enable VLAN tag extraction */ tempval = gfar_read(®s->rctrl); tempval |= (RCTRL_VLEX | RCTRL_PRSDEP_INIT); @@ -2469,12 +2475,11 @@ static void gfar_align_skb(struct sk_buff *skb) } /* Interrupt Handler for Transmit complete */ -static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) +static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) { struct net_device *dev = tx_queue->dev; struct netdev_queue *txq; struct gfar_private *priv = netdev_priv(dev); - struct gfar_priv_rx_q *rx_queue = NULL; struct txbd8 *bdp, *next = NULL; struct txbd8 *lbdp = NULL; struct txbd8 *base = tx_queue->tx_bd_base; @@ -2489,7 +2494,6 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) u32 lstatus; size_t buflen; - rx_queue = priv->rx_queue[tqi]; txq = netdev_get_tx_queue(dev, tqi); bdp = tx_queue->dirty_tx; skb_dirtytx = tx_queue->skb_dirtytx; @@ -2571,8 +2575,6 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) tx_queue->dirty_tx = bdp; netdev_tx_completed_queue(txq, howmany, bytes_sent); - - return howmany; } static void gfar_schedule_cleanup(struct gfar_priv_grp *gfargrp) @@ -2694,8 +2696,6 @@ static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, struct gfar_private *priv = netdev_priv(dev); struct rxfcb *fcb = NULL; - gro_result_t ret; - /* fcb is at the beginning if exists */ fcb = (struct rxfcb *)skb->data; @@ -2725,19 +2725,17 @@ static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, /* Tell the skb what kind of packet this is */ skb->protocol = eth_type_trans(skb, dev); - /* There's need to check for NETIF_F_HW_VLAN_RX here. + /* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here. * Even if vlan rx accel is disabled, on some chips * RXFCB_VLN is pseudo randomly set. */ - if (dev->features & NETIF_F_HW_VLAN_RX && + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX && fcb->flags & RXFCB_VLN) - __vlan_hwaccel_put_tag(skb, fcb->vlctl); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), fcb->vlctl); /* Send the packet up the stack */ - ret = napi_gro_receive(napi, skb); + napi_gro_receive(napi, skb); - if (unlikely(GRO_DROP == ret)) - atomic64_inc(&priv->extra_stats.kernel_dropped); } /* gfar_clean_rx_ring() -- Processes each frame in the rx ring @@ -2835,62 +2833,82 @@ static int gfar_poll(struct napi_struct *napi, int budget) struct gfar __iomem *regs = gfargrp->regs; struct gfar_priv_tx_q *tx_queue = NULL; struct gfar_priv_rx_q *rx_queue = NULL; - int rx_cleaned = 0, budget_per_queue = 0, rx_cleaned_per_queue = 0; - int tx_cleaned = 0, i, left_over_budget = budget; - unsigned long serviced_queues = 0; - int num_queues = 0; - - num_queues = gfargrp->num_rx_queues; - budget_per_queue = budget/num_queues; + int work_done = 0, work_done_per_q = 0; + int i, budget_per_q = 0; + int has_tx_work; + unsigned long rstat_rxf; + int num_act_queues; /* Clear IEVENT, so interrupts aren't called again * because of the packets that have already arrived */ gfar_write(®s->ievent, IEVENT_RTX_MASK); - while (num_queues && left_over_budget) { - budget_per_queue = left_over_budget/num_queues; - left_over_budget = 0; + rstat_rxf = gfar_read(®s->rstat) & RSTAT_RXF_MASK; + + num_act_queues = bitmap_weight(&rstat_rxf, MAX_RX_QS); + if (num_act_queues) + budget_per_q = budget/num_act_queues; + + while (1) { + has_tx_work = 0; + for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) { + tx_queue = priv->tx_queue[i]; + /* run Tx cleanup to completion */ + if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) { + gfar_clean_tx_ring(tx_queue); + has_tx_work = 1; + } + } for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) { - if (test_bit(i, &serviced_queues)) + /* skip queue if not active */ + if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i))) continue; + rx_queue = priv->rx_queue[i]; - tx_queue = priv->tx_queue[rx_queue->qindex]; - - tx_cleaned += gfar_clean_tx_ring(tx_queue); - rx_cleaned_per_queue = - gfar_clean_rx_ring(rx_queue, budget_per_queue); - rx_cleaned += rx_cleaned_per_queue; - if (rx_cleaned_per_queue < budget_per_queue) { - left_over_budget = left_over_budget + - (budget_per_queue - - rx_cleaned_per_queue); - set_bit(i, &serviced_queues); - num_queues--; + work_done_per_q = + gfar_clean_rx_ring(rx_queue, budget_per_q); + work_done += work_done_per_q; + + /* finished processing this queue */ + if (work_done_per_q < budget_per_q) { + /* clear active queue hw indication */ + gfar_write(®s->rstat, + RSTAT_CLEAR_RXF0 >> i); + rstat_rxf &= ~(RSTAT_CLEAR_RXF0 >> i); + num_act_queues--; + + if (!num_act_queues) + break; + /* recompute budget per Rx queue */ + budget_per_q = + (budget - work_done) / num_act_queues; } } - } - if (tx_cleaned) - return budget; + if (work_done >= budget) + break; - if (rx_cleaned < budget) { - napi_complete(napi); + if (!num_act_queues && !has_tx_work) { - /* Clear the halt bit in RSTAT */ - gfar_write(®s->rstat, gfargrp->rstat); + napi_complete(napi); - gfar_write(®s->imask, IMASK_DEFAULT); + /* Clear the halt bit in RSTAT */ + gfar_write(®s->rstat, gfargrp->rstat); - /* If we are coalescing interrupts, update the timer - * Otherwise, clear it - */ - gfar_configure_coalescing(priv, gfargrp->rx_bit_map, - gfargrp->tx_bit_map); + gfar_write(®s->imask, IMASK_DEFAULT); + + /* If we are coalescing interrupts, update the timer + * Otherwise, clear it + */ + gfar_configure_coalescing(priv, gfargrp->rx_bit_map, + gfargrp->tx_bit_map); + break; + } } - return rx_cleaned; + return work_done; } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 63a28d294e20..04b552cd419d 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -291,7 +291,9 @@ extern const char gfar_driver_version[]; #define RCTRL_PADDING(x) ((x << 16) & RCTRL_PAL_MASK) -#define RSTAT_CLEAR_RHALT 0x00800000 +#define RSTAT_CLEAR_RHALT 0x00800000 +#define RSTAT_CLEAR_RXF0 0x00000080 +#define RSTAT_RXF_MASK 0x000000ff #define TCTRL_IPCSEN 0x00004000 #define TCTRL_TUCSEN 0x00002000 @@ -627,7 +629,6 @@ struct rmon_mib }; struct gfar_extra_stats { - atomic64_t kernel_dropped; atomic64_t rx_large; atomic64_t rx_short; atomic64_t rx_nonoctet; @@ -1180,8 +1181,7 @@ extern void stop_gfar(struct net_device *dev); extern void gfar_halt(struct net_device *dev); extern void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev, int enable, u32 regnum, u32 read); -extern void gfar_configure_coalescing(struct gfar_private *priv, - unsigned long tx_mask, unsigned long rx_mask); +extern void gfar_configure_coalescing_all(struct gfar_private *priv); void gfar_init_sysfs(struct net_device *dev); int gfar_set_features(struct net_device *dev, netdev_features_t features); extern void gfar_check_rx_parser_mode(struct gfar_private *priv); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 75e89acf4912..21cd88124ca9 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -66,7 +66,6 @@ static void gfar_gdrvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo); static const char stat_gstrings[][ETH_GSTRING_LEN] = { - "rx-dropped-by-kernel", "rx-large-frame-errors", "rx-short-frame-errors", "rx-non-octet-errors", @@ -390,14 +389,14 @@ static int gfar_scoalesce(struct net_device *dev, /* Check the bounds of the values */ if (cvals->rx_coalesce_usecs > GFAR_MAX_COAL_USECS) { - pr_info("Coalescing is limited to %d microseconds\n", - GFAR_MAX_COAL_USECS); + netdev_info(dev, "Coalescing is limited to %d microseconds\n", + GFAR_MAX_COAL_USECS); return -EINVAL; } if (cvals->rx_max_coalesced_frames > GFAR_MAX_COAL_FRAMES) { - pr_info("Coalescing is limited to %d frames\n", - GFAR_MAX_COAL_FRAMES); + netdev_info(dev, "Coalescing is limited to %d frames\n", + GFAR_MAX_COAL_FRAMES); return -EINVAL; } @@ -419,14 +418,14 @@ static int gfar_scoalesce(struct net_device *dev, /* Check the bounds of the values */ if (cvals->tx_coalesce_usecs > GFAR_MAX_COAL_USECS) { - pr_info("Coalescing is limited to %d microseconds\n", - GFAR_MAX_COAL_USECS); + netdev_info(dev, "Coalescing is limited to %d microseconds\n", + GFAR_MAX_COAL_USECS); return -EINVAL; } if (cvals->tx_max_coalesced_frames > GFAR_MAX_COAL_FRAMES) { - pr_info("Coalescing is limited to %d frames\n", - GFAR_MAX_COAL_FRAMES); + netdev_info(dev, "Coalescing is limited to %d frames\n", + GFAR_MAX_COAL_FRAMES); return -EINVAL; } @@ -436,7 +435,7 @@ static int gfar_scoalesce(struct net_device *dev, gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs)); } - gfar_configure_coalescing(priv, 0xFF, 0xFF); + gfar_configure_coalescing_all(priv); return 0; } @@ -543,7 +542,7 @@ int gfar_set_features(struct net_device *dev, netdev_features_t features) int err = 0, i = 0; netdev_features_t changed = dev->features ^ features; - if (changed & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)) + if (changed & (NETIF_F_HW_VLAN_CTAG_TX|NETIF_F_HW_VLAN_CTAG_RX)) gfar_vlan_mode(dev, features); if (!(changed & NETIF_F_RXCSUM)) @@ -736,7 +735,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, cmp_rqfpr = RQFPR_IPV6 |RQFPR_UDP; break; default: - pr_err("Right now this class is not supported\n"); + netdev_err(priv->ndev, + "Right now this class is not supported\n"); ret = 0; goto err; } @@ -752,7 +752,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, } if (i == MAX_FILER_IDX + 1) { - pr_err("No parse rule found, can't create hash rules\n"); + netdev_err(priv->ndev, + "No parse rule found, can't create hash rules\n"); ret = 0; goto err; } @@ -1569,7 +1570,7 @@ static int gfar_process_filer_changes(struct gfar_private *priv) gfar_cluster_filer(tab); gfar_optimize_filer_masks(tab); - pr_debug("\n\tSummary:\n" + pr_debug("\tSummary:\n" "\tData on hardware: %d\n" "\tCompression rate: %d%%\n", tab->index, 100 - (100 * tab->index) / i); diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 2e5daee0438a..fe8e9e5cfb2b 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -17,6 +17,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/device.h> #include <linux/hrtimer.h> #include <linux/init.h> diff --git a/drivers/net/ethernet/freescale/gianfar_sysfs.c b/drivers/net/ethernet/freescale/gianfar_sysfs.c index cd14a4d449c2..acb55af7e3f3 100644 --- a/drivers/net/ethernet/freescale/gianfar_sysfs.c +++ b/drivers/net/ethernet/freescale/gianfar_sysfs.c @@ -337,5 +337,5 @@ void gfar_init_sysfs(struct net_device *dev) rc |= device_create_file(&dev->dev, &dev_attr_fifo_starve); rc |= device_create_file(&dev->dev, &dev_attr_fifo_starve_off); if (rc) - dev_err(&dev->dev, "Error creating gianfar sysfs files.\n"); + dev_err(&dev->dev, "Error creating gianfar sysfs files\n"); } diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 0a70bb55d1b0..e04c59818f60 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -12,6 +12,9 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> @@ -50,12 +53,6 @@ #define ugeth_dbg(format, arg...) \ ugeth_printk(KERN_DEBUG , format , ## arg) -#define ugeth_err(format, arg...) \ - ugeth_printk(KERN_ERR , format , ## arg) -#define ugeth_info(format, arg...) \ - ugeth_printk(KERN_INFO , format , ## arg) -#define ugeth_warn(format, arg...) \ - ugeth_printk(KERN_WARNING , format , ## arg) #ifdef UGETH_VERBOSE_DEBUG #define ugeth_vdbg ugeth_dbg @@ -281,7 +278,7 @@ static int fill_init_enet_entries(struct ucc_geth_private *ugeth, for (i = 0; i < num_entries; i++) { if ((snum = qe_get_snum()) < 0) { if (netif_msg_ifup(ugeth)) - ugeth_err("fill_init_enet_entries: Can not get SNUM."); + pr_err("Can not get SNUM\n"); return snum; } if ((i == 0) && skip_page_for_first_entry) @@ -292,7 +289,7 @@ static int fill_init_enet_entries(struct ucc_geth_private *ugeth, qe_muram_alloc(thread_size, thread_alignment); if (IS_ERR_VALUE(init_enet_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err("fill_init_enet_entries: Can not allocate DPRAM memory."); + pr_err("Can not allocate DPRAM memory\n"); qe_put_snum((u8) snum); return -ENOMEM; } @@ -365,10 +362,9 @@ static int dump_init_enet_entries(struct ucc_geth_private *ugeth, init_enet_offset = (in_be32(p_start) & ENET_INIT_PARAM_PTR_MASK); - ugeth_info("Init enet entry %d:", i); - ugeth_info("Base address: 0x%08x", - (u32) - qe_muram_addr(init_enet_offset)); + pr_info("Init enet entry %d:\n", i); + pr_info("Base address: 0x%08x\n", + (u32)qe_muram_addr(init_enet_offset)); mem_disp(qe_muram_addr(init_enet_offset), thread_size); } @@ -396,8 +392,8 @@ static int hw_clear_addr_in_paddr(struct ucc_geth_private *ugeth, u8 paddr_num) { struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt; - if (!(paddr_num < NUM_OF_PADDRS)) { - ugeth_warn("%s: Illagel paddr_num.", __func__); + if (paddr_num >= NUM_OF_PADDRS) { + pr_warn("%s: Invalid paddr_num: %u\n", __func__, paddr_num); return -EINVAL; } @@ -573,7 +569,7 @@ static void dump_bds(struct ucc_geth_private *ugeth) length = (ugeth->ug_info->bdRingLenTx[i] * sizeof(struct qe_bd)); - ugeth_info("TX BDs[%d]", i); + pr_info("TX BDs[%d]\n", i); mem_disp(ugeth->p_tx_bd_ring[i], length); } } @@ -582,7 +578,7 @@ static void dump_bds(struct ucc_geth_private *ugeth) length = (ugeth->ug_info->bdRingLenRx[i] * sizeof(struct qe_bd)); - ugeth_info("RX BDs[%d]", i); + pr_info("RX BDs[%d]\n", i); mem_disp(ugeth->p_rx_bd_ring[i], length); } } @@ -592,93 +588,93 @@ static void dump_regs(struct ucc_geth_private *ugeth) { int i; - ugeth_info("UCC%d Geth registers:", ugeth->ug_info->uf_info.ucc_num + 1); - ugeth_info("Base address: 0x%08x", (u32) ugeth->ug_regs); - - ugeth_info("maccfg1 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->maccfg1, - in_be32(&ugeth->ug_regs->maccfg1)); - ugeth_info("maccfg2 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->maccfg2, - in_be32(&ugeth->ug_regs->maccfg2)); - ugeth_info("ipgifg : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->ipgifg, - in_be32(&ugeth->ug_regs->ipgifg)); - ugeth_info("hafdup : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->hafdup, - in_be32(&ugeth->ug_regs->hafdup)); - ugeth_info("ifctl : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->ifctl, - in_be32(&ugeth->ug_regs->ifctl)); - ugeth_info("ifstat : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->ifstat, - in_be32(&ugeth->ug_regs->ifstat)); - ugeth_info("macstnaddr1: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->macstnaddr1, - in_be32(&ugeth->ug_regs->macstnaddr1)); - ugeth_info("macstnaddr2: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->macstnaddr2, - in_be32(&ugeth->ug_regs->macstnaddr2)); - ugeth_info("uempr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->uempr, - in_be32(&ugeth->ug_regs->uempr)); - ugeth_info("utbipar : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->utbipar, - in_be32(&ugeth->ug_regs->utbipar)); - ugeth_info("uescr : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->ug_regs->uescr, - in_be16(&ugeth->ug_regs->uescr)); - ugeth_info("tx64 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->tx64, - in_be32(&ugeth->ug_regs->tx64)); - ugeth_info("tx127 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->tx127, - in_be32(&ugeth->ug_regs->tx127)); - ugeth_info("tx255 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->tx255, - in_be32(&ugeth->ug_regs->tx255)); - ugeth_info("rx64 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rx64, - in_be32(&ugeth->ug_regs->rx64)); - ugeth_info("rx127 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rx127, - in_be32(&ugeth->ug_regs->rx127)); - ugeth_info("rx255 : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rx255, - in_be32(&ugeth->ug_regs->rx255)); - ugeth_info("txok : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->txok, - in_be32(&ugeth->ug_regs->txok)); - ugeth_info("txcf : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->ug_regs->txcf, - in_be16(&ugeth->ug_regs->txcf)); - ugeth_info("tmca : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->tmca, - in_be32(&ugeth->ug_regs->tmca)); - ugeth_info("tbca : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->tbca, - in_be32(&ugeth->ug_regs->tbca)); - ugeth_info("rxfok : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rxfok, - in_be32(&ugeth->ug_regs->rxfok)); - ugeth_info("rxbok : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rxbok, - in_be32(&ugeth->ug_regs->rxbok)); - ugeth_info("rbyt : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rbyt, - in_be32(&ugeth->ug_regs->rbyt)); - ugeth_info("rmca : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rmca, - in_be32(&ugeth->ug_regs->rmca)); - ugeth_info("rbca : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->rbca, - in_be32(&ugeth->ug_regs->rbca)); - ugeth_info("scar : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->scar, - in_be32(&ugeth->ug_regs->scar)); - ugeth_info("scam : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->ug_regs->scam, - in_be32(&ugeth->ug_regs->scam)); + pr_info("UCC%d Geth registers:\n", ugeth->ug_info->uf_info.ucc_num + 1); + pr_info("Base address: 0x%08x\n", (u32)ugeth->ug_regs); + + pr_info("maccfg1 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->maccfg1, + in_be32(&ugeth->ug_regs->maccfg1)); + pr_info("maccfg2 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->maccfg2, + in_be32(&ugeth->ug_regs->maccfg2)); + pr_info("ipgifg : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->ipgifg, + in_be32(&ugeth->ug_regs->ipgifg)); + pr_info("hafdup : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->hafdup, + in_be32(&ugeth->ug_regs->hafdup)); + pr_info("ifctl : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->ifctl, + in_be32(&ugeth->ug_regs->ifctl)); + pr_info("ifstat : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->ifstat, + in_be32(&ugeth->ug_regs->ifstat)); + pr_info("macstnaddr1: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->macstnaddr1, + in_be32(&ugeth->ug_regs->macstnaddr1)); + pr_info("macstnaddr2: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->macstnaddr2, + in_be32(&ugeth->ug_regs->macstnaddr2)); + pr_info("uempr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->uempr, + in_be32(&ugeth->ug_regs->uempr)); + pr_info("utbipar : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->utbipar, + in_be32(&ugeth->ug_regs->utbipar)); + pr_info("uescr : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->ug_regs->uescr, + in_be16(&ugeth->ug_regs->uescr)); + pr_info("tx64 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->tx64, + in_be32(&ugeth->ug_regs->tx64)); + pr_info("tx127 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->tx127, + in_be32(&ugeth->ug_regs->tx127)); + pr_info("tx255 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->tx255, + in_be32(&ugeth->ug_regs->tx255)); + pr_info("rx64 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rx64, + in_be32(&ugeth->ug_regs->rx64)); + pr_info("rx127 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rx127, + in_be32(&ugeth->ug_regs->rx127)); + pr_info("rx255 : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rx255, + in_be32(&ugeth->ug_regs->rx255)); + pr_info("txok : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->txok, + in_be32(&ugeth->ug_regs->txok)); + pr_info("txcf : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->ug_regs->txcf, + in_be16(&ugeth->ug_regs->txcf)); + pr_info("tmca : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->tmca, + in_be32(&ugeth->ug_regs->tmca)); + pr_info("tbca : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->tbca, + in_be32(&ugeth->ug_regs->tbca)); + pr_info("rxfok : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rxfok, + in_be32(&ugeth->ug_regs->rxfok)); + pr_info("rxbok : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rxbok, + in_be32(&ugeth->ug_regs->rxbok)); + pr_info("rbyt : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rbyt, + in_be32(&ugeth->ug_regs->rbyt)); + pr_info("rmca : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rmca, + in_be32(&ugeth->ug_regs->rmca)); + pr_info("rbca : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->rbca, + in_be32(&ugeth->ug_regs->rbca)); + pr_info("scar : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->scar, + in_be32(&ugeth->ug_regs->scar)); + pr_info("scam : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->ug_regs->scam, + in_be32(&ugeth->ug_regs->scam)); if (ugeth->p_thread_data_tx) { int numThreadsTxNumerical; @@ -703,13 +699,13 @@ static void dump_regs(struct ucc_geth_private *ugeth) break; } - ugeth_info("Thread data TXs:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_thread_data_tx); + pr_info("Thread data TXs:\n"); + pr_info("Base address: 0x%08x\n", + (u32)ugeth->p_thread_data_tx); for (i = 0; i < numThreadsTxNumerical; i++) { - ugeth_info("Thread data TX[%d]:", i); - ugeth_info("Base address: 0x%08x", - (u32) & ugeth->p_thread_data_tx[i]); + pr_info("Thread data TX[%d]:\n", i); + pr_info("Base address: 0x%08x\n", + (u32)&ugeth->p_thread_data_tx[i]); mem_disp((u8 *) & ugeth->p_thread_data_tx[i], sizeof(struct ucc_geth_thread_data_tx)); } @@ -737,270 +733,260 @@ static void dump_regs(struct ucc_geth_private *ugeth) break; } - ugeth_info("Thread data RX:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_thread_data_rx); + pr_info("Thread data RX:\n"); + pr_info("Base address: 0x%08x\n", + (u32)ugeth->p_thread_data_rx); for (i = 0; i < numThreadsRxNumerical; i++) { - ugeth_info("Thread data RX[%d]:", i); - ugeth_info("Base address: 0x%08x", - (u32) & ugeth->p_thread_data_rx[i]); + pr_info("Thread data RX[%d]:\n", i); + pr_info("Base address: 0x%08x\n", + (u32)&ugeth->p_thread_data_rx[i]); mem_disp((u8 *) & ugeth->p_thread_data_rx[i], sizeof(struct ucc_geth_thread_data_rx)); } } if (ugeth->p_exf_glbl_param) { - ugeth_info("EXF global param:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_exf_glbl_param); + pr_info("EXF global param:\n"); + pr_info("Base address: 0x%08x\n", + (u32)ugeth->p_exf_glbl_param); mem_disp((u8 *) ugeth->p_exf_glbl_param, sizeof(*ugeth->p_exf_glbl_param)); } if (ugeth->p_tx_glbl_pram) { - ugeth_info("TX global param:"); - ugeth_info("Base address: 0x%08x", (u32) ugeth->p_tx_glbl_pram); - ugeth_info("temoder : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_tx_glbl_pram->temoder, - in_be16(&ugeth->p_tx_glbl_pram->temoder)); - ugeth_info("sqptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->sqptr, - in_be32(&ugeth->p_tx_glbl_pram->sqptr)); - ugeth_info("schedulerbasepointer: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->schedulerbasepointer, - in_be32(&ugeth->p_tx_glbl_pram-> - schedulerbasepointer)); - ugeth_info("txrmonbaseptr: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->txrmonbaseptr, - in_be32(&ugeth->p_tx_glbl_pram->txrmonbaseptr)); - ugeth_info("tstate : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->tstate, - in_be32(&ugeth->p_tx_glbl_pram->tstate)); - ugeth_info("iphoffset[0] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[0], - ugeth->p_tx_glbl_pram->iphoffset[0]); - ugeth_info("iphoffset[1] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[1], - ugeth->p_tx_glbl_pram->iphoffset[1]); - ugeth_info("iphoffset[2] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[2], - ugeth->p_tx_glbl_pram->iphoffset[2]); - ugeth_info("iphoffset[3] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[3], - ugeth->p_tx_glbl_pram->iphoffset[3]); - ugeth_info("iphoffset[4] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[4], - ugeth->p_tx_glbl_pram->iphoffset[4]); - ugeth_info("iphoffset[5] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[5], - ugeth->p_tx_glbl_pram->iphoffset[5]); - ugeth_info("iphoffset[6] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[6], - ugeth->p_tx_glbl_pram->iphoffset[6]); - ugeth_info("iphoffset[7] : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_tx_glbl_pram->iphoffset[7], - ugeth->p_tx_glbl_pram->iphoffset[7]); - ugeth_info("vtagtable[0] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[0], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[0])); - ugeth_info("vtagtable[1] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[1], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[1])); - ugeth_info("vtagtable[2] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[2], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[2])); - ugeth_info("vtagtable[3] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[3], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[3])); - ugeth_info("vtagtable[4] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[4], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[4])); - ugeth_info("vtagtable[5] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[5], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[5])); - ugeth_info("vtagtable[6] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[6], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[6])); - ugeth_info("vtagtable[7] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->vtagtable[7], - in_be32(&ugeth->p_tx_glbl_pram->vtagtable[7])); - ugeth_info("tqptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_tx_glbl_pram->tqptr, - in_be32(&ugeth->p_tx_glbl_pram->tqptr)); + pr_info("TX global param:\n"); + pr_info("Base address: 0x%08x\n", (u32)ugeth->p_tx_glbl_pram); + pr_info("temoder : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_tx_glbl_pram->temoder, + in_be16(&ugeth->p_tx_glbl_pram->temoder)); + pr_info("sqptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->sqptr, + in_be32(&ugeth->p_tx_glbl_pram->sqptr)); + pr_info("schedulerbasepointer: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->schedulerbasepointer, + in_be32(&ugeth->p_tx_glbl_pram->schedulerbasepointer)); + pr_info("txrmonbaseptr: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->txrmonbaseptr, + in_be32(&ugeth->p_tx_glbl_pram->txrmonbaseptr)); + pr_info("tstate : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->tstate, + in_be32(&ugeth->p_tx_glbl_pram->tstate)); + pr_info("iphoffset[0] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[0], + ugeth->p_tx_glbl_pram->iphoffset[0]); + pr_info("iphoffset[1] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[1], + ugeth->p_tx_glbl_pram->iphoffset[1]); + pr_info("iphoffset[2] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[2], + ugeth->p_tx_glbl_pram->iphoffset[2]); + pr_info("iphoffset[3] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[3], + ugeth->p_tx_glbl_pram->iphoffset[3]); + pr_info("iphoffset[4] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[4], + ugeth->p_tx_glbl_pram->iphoffset[4]); + pr_info("iphoffset[5] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[5], + ugeth->p_tx_glbl_pram->iphoffset[5]); + pr_info("iphoffset[6] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[6], + ugeth->p_tx_glbl_pram->iphoffset[6]); + pr_info("iphoffset[7] : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_tx_glbl_pram->iphoffset[7], + ugeth->p_tx_glbl_pram->iphoffset[7]); + pr_info("vtagtable[0] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[0], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[0])); + pr_info("vtagtable[1] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[1], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[1])); + pr_info("vtagtable[2] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[2], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[2])); + pr_info("vtagtable[3] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[3], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[3])); + pr_info("vtagtable[4] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[4], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[4])); + pr_info("vtagtable[5] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[5], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[5])); + pr_info("vtagtable[6] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[6], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[6])); + pr_info("vtagtable[7] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->vtagtable[7], + in_be32(&ugeth->p_tx_glbl_pram->vtagtable[7])); + pr_info("tqptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_tx_glbl_pram->tqptr, + in_be32(&ugeth->p_tx_glbl_pram->tqptr)); } if (ugeth->p_rx_glbl_pram) { - ugeth_info("RX global param:"); - ugeth_info("Base address: 0x%08x", (u32) ugeth->p_rx_glbl_pram); - ugeth_info("remoder : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->remoder, - in_be32(&ugeth->p_rx_glbl_pram->remoder)); - ugeth_info("rqptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->rqptr, - in_be32(&ugeth->p_rx_glbl_pram->rqptr)); - ugeth_info("typeorlen : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->typeorlen, - in_be16(&ugeth->p_rx_glbl_pram->typeorlen)); - ugeth_info("rxgstpack : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_rx_glbl_pram->rxgstpack, - ugeth->p_rx_glbl_pram->rxgstpack); - ugeth_info("rxrmonbaseptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->rxrmonbaseptr, - in_be32(&ugeth->p_rx_glbl_pram->rxrmonbaseptr)); - ugeth_info("intcoalescingptr: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->intcoalescingptr, - in_be32(&ugeth->p_rx_glbl_pram->intcoalescingptr)); - ugeth_info("rstate : addr - 0x%08x, val - 0x%02x", - (u32) & ugeth->p_rx_glbl_pram->rstate, - ugeth->p_rx_glbl_pram->rstate); - ugeth_info("mrblr : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->mrblr, - in_be16(&ugeth->p_rx_glbl_pram->mrblr)); - ugeth_info("rbdqptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->rbdqptr, - in_be32(&ugeth->p_rx_glbl_pram->rbdqptr)); - ugeth_info("mflr : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->mflr, - in_be16(&ugeth->p_rx_glbl_pram->mflr)); - ugeth_info("minflr : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->minflr, - in_be16(&ugeth->p_rx_glbl_pram->minflr)); - ugeth_info("maxd1 : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->maxd1, - in_be16(&ugeth->p_rx_glbl_pram->maxd1)); - ugeth_info("maxd2 : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->maxd2, - in_be16(&ugeth->p_rx_glbl_pram->maxd2)); - ugeth_info("ecamptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->ecamptr, - in_be32(&ugeth->p_rx_glbl_pram->ecamptr)); - ugeth_info("l2qt : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l2qt, - in_be32(&ugeth->p_rx_glbl_pram->l2qt)); - ugeth_info("l3qt[0] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[0], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[0])); - ugeth_info("l3qt[1] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[1], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[1])); - ugeth_info("l3qt[2] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[2], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[2])); - ugeth_info("l3qt[3] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[3], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[3])); - ugeth_info("l3qt[4] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[4], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[4])); - ugeth_info("l3qt[5] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[5], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[5])); - ugeth_info("l3qt[6] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[6], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[6])); - ugeth_info("l3qt[7] : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->l3qt[7], - in_be32(&ugeth->p_rx_glbl_pram->l3qt[7])); - ugeth_info("vlantype : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->vlantype, - in_be16(&ugeth->p_rx_glbl_pram->vlantype)); - ugeth_info("vlantci : addr - 0x%08x, val - 0x%04x", - (u32) & ugeth->p_rx_glbl_pram->vlantci, - in_be16(&ugeth->p_rx_glbl_pram->vlantci)); + pr_info("RX global param:\n"); + pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_glbl_pram); + pr_info("remoder : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->remoder, + in_be32(&ugeth->p_rx_glbl_pram->remoder)); + pr_info("rqptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->rqptr, + in_be32(&ugeth->p_rx_glbl_pram->rqptr)); + pr_info("typeorlen : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->typeorlen, + in_be16(&ugeth->p_rx_glbl_pram->typeorlen)); + pr_info("rxgstpack : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_rx_glbl_pram->rxgstpack, + ugeth->p_rx_glbl_pram->rxgstpack); + pr_info("rxrmonbaseptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->rxrmonbaseptr, + in_be32(&ugeth->p_rx_glbl_pram->rxrmonbaseptr)); + pr_info("intcoalescingptr: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->intcoalescingptr, + in_be32(&ugeth->p_rx_glbl_pram->intcoalescingptr)); + pr_info("rstate : addr - 0x%08x, val - 0x%02x\n", + (u32)&ugeth->p_rx_glbl_pram->rstate, + ugeth->p_rx_glbl_pram->rstate); + pr_info("mrblr : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->mrblr, + in_be16(&ugeth->p_rx_glbl_pram->mrblr)); + pr_info("rbdqptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->rbdqptr, + in_be32(&ugeth->p_rx_glbl_pram->rbdqptr)); + pr_info("mflr : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->mflr, + in_be16(&ugeth->p_rx_glbl_pram->mflr)); + pr_info("minflr : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->minflr, + in_be16(&ugeth->p_rx_glbl_pram->minflr)); + pr_info("maxd1 : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->maxd1, + in_be16(&ugeth->p_rx_glbl_pram->maxd1)); + pr_info("maxd2 : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->maxd2, + in_be16(&ugeth->p_rx_glbl_pram->maxd2)); + pr_info("ecamptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->ecamptr, + in_be32(&ugeth->p_rx_glbl_pram->ecamptr)); + pr_info("l2qt : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l2qt, + in_be32(&ugeth->p_rx_glbl_pram->l2qt)); + pr_info("l3qt[0] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[0], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[0])); + pr_info("l3qt[1] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[1], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[1])); + pr_info("l3qt[2] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[2], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[2])); + pr_info("l3qt[3] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[3], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[3])); + pr_info("l3qt[4] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[4], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[4])); + pr_info("l3qt[5] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[5], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[5])); + pr_info("l3qt[6] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[6], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[6])); + pr_info("l3qt[7] : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->l3qt[7], + in_be32(&ugeth->p_rx_glbl_pram->l3qt[7])); + pr_info("vlantype : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->vlantype, + in_be16(&ugeth->p_rx_glbl_pram->vlantype)); + pr_info("vlantci : addr - 0x%08x, val - 0x%04x\n", + (u32)&ugeth->p_rx_glbl_pram->vlantci, + in_be16(&ugeth->p_rx_glbl_pram->vlantci)); for (i = 0; i < 64; i++) - ugeth_info - ("addressfiltering[%d]: addr - 0x%08x, val - 0x%02x", - i, - (u32) & ugeth->p_rx_glbl_pram->addressfiltering[i], - ugeth->p_rx_glbl_pram->addressfiltering[i]); - ugeth_info("exfGlobalParam : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_glbl_pram->exfGlobalParam, - in_be32(&ugeth->p_rx_glbl_pram->exfGlobalParam)); + pr_info("addressfiltering[%d]: addr - 0x%08x, val - 0x%02x\n", + i, + (u32)&ugeth->p_rx_glbl_pram->addressfiltering[i], + ugeth->p_rx_glbl_pram->addressfiltering[i]); + pr_info("exfGlobalParam : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_glbl_pram->exfGlobalParam, + in_be32(&ugeth->p_rx_glbl_pram->exfGlobalParam)); } if (ugeth->p_send_q_mem_reg) { - ugeth_info("Send Q memory registers:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_send_q_mem_reg); + pr_info("Send Q memory registers:\n"); + pr_info("Base address: 0x%08x\n", (u32)ugeth->p_send_q_mem_reg); for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { - ugeth_info("SQQD[%d]:", i); - ugeth_info("Base address: 0x%08x", - (u32) & ugeth->p_send_q_mem_reg->sqqd[i]); + pr_info("SQQD[%d]:\n", i); + pr_info("Base address: 0x%08x\n", + (u32)&ugeth->p_send_q_mem_reg->sqqd[i]); mem_disp((u8 *) & ugeth->p_send_q_mem_reg->sqqd[i], sizeof(struct ucc_geth_send_queue_qd)); } } if (ugeth->p_scheduler) { - ugeth_info("Scheduler:"); - ugeth_info("Base address: 0x%08x", (u32) ugeth->p_scheduler); + pr_info("Scheduler:\n"); + pr_info("Base address: 0x%08x\n", (u32)ugeth->p_scheduler); mem_disp((u8 *) ugeth->p_scheduler, sizeof(*ugeth->p_scheduler)); } if (ugeth->p_tx_fw_statistics_pram) { - ugeth_info("TX FW statistics pram:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_tx_fw_statistics_pram); + pr_info("TX FW statistics pram:\n"); + pr_info("Base address: 0x%08x\n", + (u32)ugeth->p_tx_fw_statistics_pram); mem_disp((u8 *) ugeth->p_tx_fw_statistics_pram, sizeof(*ugeth->p_tx_fw_statistics_pram)); } if (ugeth->p_rx_fw_statistics_pram) { - ugeth_info("RX FW statistics pram:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_rx_fw_statistics_pram); + pr_info("RX FW statistics pram:\n"); + pr_info("Base address: 0x%08x\n", + (u32)ugeth->p_rx_fw_statistics_pram); mem_disp((u8 *) ugeth->p_rx_fw_statistics_pram, sizeof(*ugeth->p_rx_fw_statistics_pram)); } if (ugeth->p_rx_irq_coalescing_tbl) { - ugeth_info("RX IRQ coalescing tables:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_rx_irq_coalescing_tbl); + pr_info("RX IRQ coalescing tables:\n"); + pr_info("Base address: 0x%08x\n", + (u32)ugeth->p_rx_irq_coalescing_tbl); for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { - ugeth_info("RX IRQ coalescing table entry[%d]:", i); - ugeth_info("Base address: 0x%08x", - (u32) & ugeth->p_rx_irq_coalescing_tbl-> - coalescingentry[i]); - ugeth_info - ("interruptcoalescingmaxvalue: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_irq_coalescing_tbl-> - coalescingentry[i].interruptcoalescingmaxvalue, - in_be32(&ugeth->p_rx_irq_coalescing_tbl-> - coalescingentry[i]. - interruptcoalescingmaxvalue)); - ugeth_info - ("interruptcoalescingcounter : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_irq_coalescing_tbl-> - coalescingentry[i].interruptcoalescingcounter, - in_be32(&ugeth->p_rx_irq_coalescing_tbl-> - coalescingentry[i]. - interruptcoalescingcounter)); + pr_info("RX IRQ coalescing table entry[%d]:\n", i); + pr_info("Base address: 0x%08x\n", + (u32)&ugeth->p_rx_irq_coalescing_tbl-> + coalescingentry[i]); + pr_info("interruptcoalescingmaxvalue: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_irq_coalescing_tbl-> + coalescingentry[i].interruptcoalescingmaxvalue, + in_be32(&ugeth->p_rx_irq_coalescing_tbl-> + coalescingentry[i]. + interruptcoalescingmaxvalue)); + pr_info("interruptcoalescingcounter : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_irq_coalescing_tbl-> + coalescingentry[i].interruptcoalescingcounter, + in_be32(&ugeth->p_rx_irq_coalescing_tbl-> + coalescingentry[i]. + interruptcoalescingcounter)); } } if (ugeth->p_rx_bd_qs_tbl) { - ugeth_info("RX BD QS tables:"); - ugeth_info("Base address: 0x%08x", (u32) ugeth->p_rx_bd_qs_tbl); + pr_info("RX BD QS tables:\n"); + pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_bd_qs_tbl); for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { - ugeth_info("RX BD QS table[%d]:", i); - ugeth_info("Base address: 0x%08x", - (u32) & ugeth->p_rx_bd_qs_tbl[i]); - ugeth_info - ("bdbaseptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_bd_qs_tbl[i].bdbaseptr, - in_be32(&ugeth->p_rx_bd_qs_tbl[i].bdbaseptr)); - ugeth_info - ("bdptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_bd_qs_tbl[i].bdptr, - in_be32(&ugeth->p_rx_bd_qs_tbl[i].bdptr)); - ugeth_info - ("externalbdbaseptr: addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, - in_be32(&ugeth->p_rx_bd_qs_tbl[i]. - externalbdbaseptr)); - ugeth_info - ("externalbdptr : addr - 0x%08x, val - 0x%08x", - (u32) & ugeth->p_rx_bd_qs_tbl[i].externalbdptr, - in_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdptr)); - ugeth_info("ucode RX Prefetched BDs:"); - ugeth_info("Base address: 0x%08x", - (u32) - qe_muram_addr(in_be32 - (&ugeth->p_rx_bd_qs_tbl[i]. - bdbaseptr))); + pr_info("RX BD QS table[%d]:\n", i); + pr_info("Base address: 0x%08x\n", + (u32)&ugeth->p_rx_bd_qs_tbl[i]); + pr_info("bdbaseptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_bd_qs_tbl[i].bdbaseptr, + in_be32(&ugeth->p_rx_bd_qs_tbl[i].bdbaseptr)); + pr_info("bdptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_bd_qs_tbl[i].bdptr, + in_be32(&ugeth->p_rx_bd_qs_tbl[i].bdptr)); + pr_info("externalbdbaseptr: addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, + in_be32(&ugeth->p_rx_bd_qs_tbl[i]. + externalbdbaseptr)); + pr_info("externalbdptr : addr - 0x%08x, val - 0x%08x\n", + (u32)&ugeth->p_rx_bd_qs_tbl[i].externalbdptr, + in_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdptr)); + pr_info("ucode RX Prefetched BDs:\n"); + pr_info("Base address: 0x%08x\n", + (u32)qe_muram_addr(in_be32 + (&ugeth->p_rx_bd_qs_tbl[i]. + bdbaseptr))); mem_disp((u8 *) qe_muram_addr(in_be32 (&ugeth->p_rx_bd_qs_tbl[i]. @@ -1010,9 +996,9 @@ static void dump_regs(struct ucc_geth_private *ugeth) } if (ugeth->p_init_enet_param_shadow) { int size; - ugeth_info("Init enet param shadow:"); - ugeth_info("Base address: 0x%08x", - (u32) ugeth->p_init_enet_param_shadow); + pr_info("Init enet param shadow:\n"); + pr_info("Base address: 0x%08x\n", + (u32) ugeth->p_init_enet_param_shadow); mem_disp((u8 *) ugeth->p_init_enet_param_shadow, sizeof(*ugeth->p_init_enet_param_shadow)); @@ -1392,12 +1378,11 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) struct phy_device *tbiphy; if (!ug_info->tbi_node) - ugeth_warn("TBI mode requires that the device " - "tree specify a tbi-handle\n"); + pr_warn("TBI mode requires that the device tree specify a tbi-handle\n"); tbiphy = of_phy_find_device(ug_info->tbi_node); if (!tbiphy) - ugeth_warn("Could not get TBI device\n"); + pr_warn("Could not get TBI device\n"); value = phy_read(tbiphy, ENET_TBI_MII_CR); value &= ~0x1000; /* Turn off autonegotiation */ @@ -1409,8 +1394,7 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) ret_val = init_preamble_length(ug_info->prel, &ug_regs->maccfg2); if (ret_val != 0) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: Preamble length must be between 3 and 7 inclusive.", - __func__); + pr_err("Preamble length must be between 3 and 7 inclusive\n"); return ret_val; } @@ -1520,7 +1504,7 @@ static int ugeth_enable(struct ucc_geth_private *ugeth, enum comm_dir mode) /* check if the UCC number is in range. */ if (ugeth->ug_info->uf_info.ucc_num >= UCC_MAX_NUM) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: ucc_num out of range.", __func__); + pr_err("ucc_num out of range\n"); return -EINVAL; } @@ -1549,7 +1533,7 @@ static int ugeth_disable(struct ucc_geth_private *ugeth, enum comm_dir mode) /* check if the UCC number is in range. */ if (ugeth->ug_info->uf_info.ucc_num >= UCC_MAX_NUM) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: ucc_num out of range.", __func__); + pr_err("ucc_num out of range\n"); return -EINVAL; } @@ -1648,7 +1632,7 @@ static void adjust_link(struct net_device *dev) break; default: if (netif_msg_link(ugeth)) - ugeth_warn( + pr_warn( "%s: Ack! Speed (%d) is not 10/100/1000!", dev->name, phydev->speed); break; @@ -2103,8 +2087,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) if (!((uf_info->bd_mem_part == MEM_PART_SYSTEM) || (uf_info->bd_mem_part == MEM_PART_MURAM))) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: Bad memory partition value.", - __func__); + pr_err("Bad memory partition value\n"); return -EINVAL; } @@ -2114,9 +2097,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) (ug_info->bdRingLenRx[i] % UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT)) { if (netif_msg_probe(ugeth)) - ugeth_err - ("%s: Rx BD ring length must be multiple of 4, no smaller than 8.", - __func__); + pr_err("Rx BD ring length must be multiple of 4, no smaller than 8\n"); return -EINVAL; } } @@ -2125,9 +2106,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) for (i = 0; i < ug_info->numQueuesTx; i++) { if (ug_info->bdRingLenTx[i] < UCC_GETH_TX_BD_RING_SIZE_MIN) { if (netif_msg_probe(ugeth)) - ugeth_err - ("%s: Tx BD ring length must be no smaller than 2.", - __func__); + pr_err("Tx BD ring length must be no smaller than 2\n"); return -EINVAL; } } @@ -2136,23 +2115,21 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) if ((uf_info->max_rx_buf_length == 0) || (uf_info->max_rx_buf_length % UCC_GETH_MRBLR_ALIGNMENT)) { if (netif_msg_probe(ugeth)) - ugeth_err - ("%s: max_rx_buf_length must be non-zero multiple of 128.", - __func__); + pr_err("max_rx_buf_length must be non-zero multiple of 128\n"); return -EINVAL; } /* num Tx queues */ if (ug_info->numQueuesTx > NUM_TX_QUEUES) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: number of tx queues too large.", __func__); + pr_err("number of tx queues too large\n"); return -EINVAL; } /* num Rx queues */ if (ug_info->numQueuesRx > NUM_RX_QUEUES) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: number of rx queues too large.", __func__); + pr_err("number of rx queues too large\n"); return -EINVAL; } @@ -2160,10 +2137,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) for (i = 0; i < UCC_GETH_VLAN_PRIORITY_MAX; i++) { if (ug_info->l2qt[i] >= ug_info->numQueuesRx) { if (netif_msg_probe(ugeth)) - ugeth_err - ("%s: VLAN priority table entry must not be" - " larger than number of Rx queues.", - __func__); + pr_err("VLAN priority table entry must not be larger than number of Rx queues\n"); return -EINVAL; } } @@ -2172,18 +2146,14 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) for (i = 0; i < UCC_GETH_IP_PRIORITY_MAX; i++) { if (ug_info->l3qt[i] >= ug_info->numQueuesRx) { if (netif_msg_probe(ugeth)) - ugeth_err - ("%s: IP priority table entry must not be" - " larger than number of Rx queues.", - __func__); + pr_err("IP priority table entry must not be larger than number of Rx queues\n"); return -EINVAL; } } if (ug_info->cam && !ug_info->ecamptr) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: If cam mode is chosen, must supply cam ptr.", - __func__); + pr_err("If cam mode is chosen, must supply cam ptr\n"); return -EINVAL; } @@ -2191,9 +2161,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) UCC_GETH_NUM_OF_STATION_ADDRESSES_1) && ug_info->rxExtendedFiltering) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: Number of station addresses greater than 1 " - "not allowed in extended parsing mode.", - __func__); + pr_err("Number of station addresses greater than 1 not allowed in extended parsing mode\n"); return -EINVAL; } @@ -2207,7 +2175,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* Initialize the general fast UCC block. */ if (ucc_fast_init(uf_info, &ugeth->uccf)) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: Failed to init uccf.", __func__); + pr_err("Failed to init uccf\n"); return -ENOMEM; } @@ -2222,7 +2190,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) ugeth->ug_regs = ioremap(uf_info->regs, sizeof(*ugeth->ug_regs)); if (!ugeth->ug_regs) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: Failed to ioremap regs.", __func__); + pr_err("Failed to ioremap regs\n"); return -ENOMEM; } @@ -2273,9 +2241,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) } if (!ugeth->p_tx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate memory for Tx bd rings.", - __func__); + pr_err("Can not allocate memory for Tx bd rings\n"); return -ENOMEM; } /* Zero unused end of bd ring, according to spec */ @@ -2293,8 +2259,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) if (ugeth->tx_skbuff[j] == NULL) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Could not allocate tx_skbuff", - __func__); + pr_err("Could not allocate tx_skbuff\n"); return -ENOMEM; } @@ -2353,9 +2318,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) } if (!ugeth->p_rx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate memory for Rx bd rings.", - __func__); + pr_err("Can not allocate memory for Rx bd rings\n"); return -ENOMEM; } } @@ -2369,8 +2332,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) if (ugeth->rx_skbuff[j] == NULL) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Could not allocate rx_skbuff", - __func__); + pr_err("Could not allocate rx_skbuff\n"); return -ENOMEM; } @@ -2438,8 +2400,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) break; default: if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Bad number of Rx threads value.", - __func__); + pr_err("Bad number of Rx threads value\n"); return -EINVAL; break; } @@ -2462,8 +2423,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) break; default: if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Bad number of Tx threads value.", - __func__); + pr_err("Bad number of Tx threads value\n"); return -EINVAL; break; } @@ -2512,8 +2472,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) &ug_regs->ipgifg); if (ret_val != 0) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: IPGIFG initialization parameter too large.", - __func__); + pr_err("IPGIFG initialization parameter too large\n"); return ret_val; } @@ -2529,8 +2488,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) &ug_regs->hafdup); if (ret_val != 0) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Half Duplex initialization parameter too large.", - __func__); + pr_err("Half Duplex initialization parameter too large\n"); return ret_val; } @@ -2567,9 +2525,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT); if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_tx_glbl_pram.", - __func__); + pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n"); return -ENOMEM; } ugeth->p_tx_glbl_pram = @@ -2589,9 +2545,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_THREAD_DATA_ALIGNMENT); if (IS_ERR_VALUE(ugeth->thread_dat_tx_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_thread_data_tx.", - __func__); + pr_err("Can not allocate DPRAM memory for p_thread_data_tx\n"); return -ENOMEM; } @@ -2618,9 +2572,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT); if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_send_q_mem_reg.", - __func__); + pr_err("Can not allocate DPRAM memory for p_send_q_mem_reg\n"); return -ENOMEM; } @@ -2661,9 +2613,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_SCHEDULER_ALIGNMENT); if (IS_ERR_VALUE(ugeth->scheduler_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_scheduler.", - __func__); + pr_err("Can not allocate DPRAM memory for p_scheduler\n"); return -ENOMEM; } @@ -2710,10 +2660,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_TX_STATISTICS_ALIGNMENT); if (IS_ERR_VALUE(ugeth->tx_fw_statistics_pram_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_tx_fw_statistics_pram.", - __func__); + pr_err("Can not allocate DPRAM memory for p_tx_fw_statistics_pram\n"); return -ENOMEM; } ugeth->p_tx_fw_statistics_pram = @@ -2750,9 +2697,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_rx_glbl_pram.", - __func__); + pr_err("Can not allocate DPRAM memory for p_rx_glbl_pram\n"); return -ENOMEM; } ugeth->p_rx_glbl_pram = @@ -2771,9 +2716,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_THREAD_DATA_ALIGNMENT); if (IS_ERR_VALUE(ugeth->thread_dat_rx_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_thread_data_rx.", - __func__); + pr_err("Can not allocate DPRAM memory for p_thread_data_rx\n"); return -ENOMEM; } @@ -2794,9 +2737,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_RX_STATISTICS_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_fw_statistics_pram_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_rx_fw_statistics_pram.", __func__); + pr_err("Can not allocate DPRAM memory for p_rx_fw_statistics_pram\n"); return -ENOMEM; } ugeth->p_rx_fw_statistics_pram = @@ -2816,9 +2757,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_rx_irq_coalescing_tbl.", __func__); + pr_err("Can not allocate DPRAM memory for p_rx_irq_coalescing_tbl\n"); return -ENOMEM; } @@ -2884,9 +2823,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_RX_BD_QUEUES_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_bd_qs_tbl_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_rx_bd_qs_tbl.", - __func__); + pr_err("Can not allocate DPRAM memory for p_rx_bd_qs_tbl\n"); return -ENOMEM; } @@ -2961,8 +2898,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) if (ug_info->rxExtendedFiltering) { if (!ug_info->extendedFilteringChainPointer) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Null Extended Filtering Chain Pointer.", - __func__); + pr_err("Null Extended Filtering Chain Pointer\n"); return -EINVAL; } @@ -2973,9 +2909,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_RX_EXTENDED_FILTERING_GLOBAL_PARAMETERS_ALIGNMENT); if (IS_ERR_VALUE(ugeth->exf_glbl_param_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for" - " p_exf_glbl_param.", __func__); + pr_err("Can not allocate DPRAM memory for p_exf_glbl_param\n"); return -ENOMEM; } @@ -3020,9 +2954,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) if (!(ugeth->p_init_enet_param_shadow = kmalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate memory for" - " p_UccInitEnetParamShadows.", __func__); + pr_err("Can not allocate memory for p_UccInitEnetParamShadows\n"); return -ENOMEM; } /* Zero out *p_init_enet_param_shadow */ @@ -3055,8 +2987,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) (ug_info->largestexternallookupkeysize != QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES)) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Invalid largest External Lookup Key Size.", - __func__); + pr_err("Invalid largest External Lookup Key Size\n"); return -EINVAL; } ugeth->p_init_enet_param_shadow->largestexternallookupkeysize = @@ -3081,8 +3012,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) , size, UCC_GETH_THREAD_RX_PRAM_ALIGNMENT, ug_info->riscRx, 1)) != 0) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Can not fill p_init_enet_param_shadow.", - __func__); + pr_err("Can not fill p_init_enet_param_shadow\n"); return ret_val; } @@ -3096,8 +3026,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) UCC_GETH_THREAD_TX_PRAM_ALIGNMENT, ug_info->riscTx, 0)) != 0) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Can not fill p_init_enet_param_shadow.", - __func__); + pr_err("Can not fill p_init_enet_param_shadow\n"); return ret_val; } @@ -3105,8 +3034,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) for (i = 0; i < ug_info->numQueuesRx; i++) { if ((ret_val = rx_bd_buffer_set(ugeth, (u8) i)) != 0) { if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Can not fill Rx bds with buffers.", - __func__); + pr_err("Can not fill Rx bds with buffers\n"); return ret_val; } } @@ -3115,9 +3043,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) init_enet_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_init_pram), 4); if (IS_ERR_VALUE(init_enet_pram_offset)) { if (netif_msg_ifup(ugeth)) - ugeth_err - ("%s: Can not allocate DPRAM memory for p_init_enet_pram.", - __func__); + pr_err("Can not allocate DPRAM memory for p_init_enet_pram\n"); return -ENOMEM; } p_init_enet_pram = @@ -3266,8 +3192,8 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit (!(bd_status & (R_F | R_L))) || (bd_status & R_ERRORS_FATAL)) { if (netif_msg_rx_err(ugeth)) - ugeth_err("%s, %d: ERROR!!! skb - 0x%08x", - __func__, __LINE__, (u32) skb); + pr_err("%d: ERROR!!! skb - 0x%08x\n", + __LINE__, (u32)skb); dev_kfree_skb(skb); ugeth->rx_skbuff[rxQ][ugeth->skb_currx[rxQ]] = NULL; @@ -3290,7 +3216,7 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit skb = get_new_skb(ugeth, bd); if (!skb) { if (netif_msg_rx_err(ugeth)) - ugeth_warn("%s: No Rx Data Buffer", __func__); + pr_warn("No Rx Data Buffer\n"); dev->stats.rx_dropped++; break; } @@ -3481,25 +3407,19 @@ static int ucc_geth_init_mac(struct ucc_geth_private *ugeth) err = ucc_struct_init(ugeth); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot configure internal struct, " - "aborting.", dev->name); + netif_err(ugeth, ifup, dev, "Cannot configure internal struct, aborting\n"); goto err; } err = ucc_geth_startup(ugeth); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot configure net device, aborting.", - dev->name); + netif_err(ugeth, ifup, dev, "Cannot configure net device, aborting\n"); goto err; } err = adjust_enet_interface(ugeth); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot configure net device, aborting.", - dev->name); + netif_err(ugeth, ifup, dev, "Cannot configure net device, aborting\n"); goto err; } @@ -3516,8 +3436,7 @@ static int ucc_geth_init_mac(struct ucc_geth_private *ugeth) err = ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot enable net device, aborting.", dev->name); + netif_err(ugeth, ifup, dev, "Cannot enable net device, aborting\n"); goto err; } @@ -3538,35 +3457,27 @@ static int ucc_geth_open(struct net_device *dev) /* Test station address */ if (dev->dev_addr[0] & ENET_GROUP_ADDR) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Multicast address used for station " - "address - is this what you wanted?", - __func__); + netif_err(ugeth, ifup, dev, + "Multicast address used for station address - is this what you wanted?\n"); return -EINVAL; } err = init_phy(dev); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot initialize PHY, aborting.", - dev->name); + netif_err(ugeth, ifup, dev, "Cannot initialize PHY, aborting\n"); return err; } err = ucc_geth_init_mac(ugeth); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot initialize MAC, aborting.", - dev->name); + netif_err(ugeth, ifup, dev, "Cannot initialize MAC, aborting\n"); goto err; } err = request_irq(ugeth->ug_info->uf_info.irq, ucc_geth_irq_handler, 0, "UCC Geth", dev); if (err) { - if (netif_msg_ifup(ugeth)) - ugeth_err("%s: Cannot get IRQ for net device, aborting.", - dev->name); + netif_err(ugeth, ifup, dev, "Cannot get IRQ for net device, aborting\n"); goto err; } @@ -3704,8 +3615,7 @@ static int ucc_geth_resume(struct platform_device *ofdev) err = ucc_geth_init_mac(ugeth); if (err) { - ugeth_err("%s: Cannot initialize MAC, aborting.", - ndev->name); + netdev_err(ndev, "Cannot initialize MAC, aborting\n"); return err; } } @@ -3825,8 +3735,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) ug_info = &ugeth_info[ucc_num]; if (ug_info == NULL) { if (netif_msg_probe(&debug)) - ugeth_err("%s: [%d] Missing additional data!", - __func__, ucc_num); + pr_err("[%d] Missing additional data!\n", ucc_num); return -ENODEV; } @@ -3837,8 +3746,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) ug_info->uf_info.rx_clock = qe_clock_source(sprop); if ((ug_info->uf_info.rx_clock < QE_CLK_NONE) || (ug_info->uf_info.rx_clock > QE_CLK24)) { - printk(KERN_ERR - "ucc_geth: invalid rx-clock-name property\n"); + pr_err("invalid rx-clock-name property\n"); return -EINVAL; } } else { @@ -3846,13 +3754,11 @@ static int ucc_geth_probe(struct platform_device* ofdev) if (!prop) { /* If both rx-clock-name and rx-clock are missing, we want to tell people to use rx-clock-name. */ - printk(KERN_ERR - "ucc_geth: missing rx-clock-name property\n"); + pr_err("missing rx-clock-name property\n"); return -EINVAL; } if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - printk(KERN_ERR - "ucc_geth: invalid rx-clock propperty\n"); + pr_err("invalid rx-clock propperty\n"); return -EINVAL; } ug_info->uf_info.rx_clock = *prop; @@ -3863,20 +3769,17 @@ static int ucc_geth_probe(struct platform_device* ofdev) ug_info->uf_info.tx_clock = qe_clock_source(sprop); if ((ug_info->uf_info.tx_clock < QE_CLK_NONE) || (ug_info->uf_info.tx_clock > QE_CLK24)) { - printk(KERN_ERR - "ucc_geth: invalid tx-clock-name property\n"); + pr_err("invalid tx-clock-name property\n"); return -EINVAL; } } else { prop = of_get_property(np, "tx-clock", NULL); if (!prop) { - printk(KERN_ERR - "ucc_geth: missing tx-clock-name property\n"); + pr_err("missing tx-clock-name property\n"); return -EINVAL; } if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - printk(KERN_ERR - "ucc_geth: invalid tx-clock property\n"); + pr_err("invalid tx-clock property\n"); return -EINVAL; } ug_info->uf_info.tx_clock = *prop; @@ -3949,7 +3852,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) } if (netif_msg_probe(&debug)) - printk(KERN_INFO "ucc_geth: UCC%1d at 0x%8x (irq = %d)\n", + pr_info("UCC%1d at 0x%8x (irq = %d)\n", ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs, ug_info->uf_info.irq); @@ -3988,8 +3891,8 @@ static int ucc_geth_probe(struct platform_device* ofdev) err = register_netdev(dev); if (err) { if (netif_msg_probe(ugeth)) - ugeth_err("%s: Cannot register net device, aborting.", - dev->name); + pr_err("%s: Cannot register net device, aborting\n", + dev->name); free_netdev(dev); return err; } @@ -4047,7 +3950,7 @@ static int __init ucc_geth_init(void) int i, ret; if (netif_msg_drv(&debug)) - printk(KERN_INFO "ucc_geth: " DRV_DESC "\n"); + pr_info(DRV_DESC "\n"); for (i = 0; i < 8; i++) memcpy(&(ugeth_info[i]), &ugeth_primary_info, sizeof(ugeth_primary_info)); diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 1ebf7128ec04..e79aaf9ae52a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -38,7 +38,7 @@ #include "ucc_geth.h" -static char hw_stat_gstrings[][ETH_GSTRING_LEN] = { +static const char hw_stat_gstrings[][ETH_GSTRING_LEN] = { "tx-64-frames", "tx-65-127-frames", "tx-128-255-frames", @@ -59,7 +59,7 @@ static char hw_stat_gstrings[][ETH_GSTRING_LEN] = { "rx-dropped-frames", }; -static char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { +static const char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { "tx-single-collision", "tx-multiple-collision", "tx-late-collsion", @@ -74,7 +74,7 @@ static char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { "tx-jumbo-frames", }; -static char rx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { +static const char rx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { "rx-crc-errors", "rx-alignment-errors", "rx-in-range-length-errors", @@ -160,8 +160,7 @@ uec_set_pauseparam(struct net_device *netdev, if (ugeth->phydev->autoneg) { if (netif_running(netdev)) { /* FIXME: automatically restart */ - printk(KERN_INFO - "Please re-open the interface.\n"); + netdev_info(netdev, "Please re-open the interface\n"); } } else { struct ucc_geth_info *ug_info = ugeth->ug_info; @@ -240,18 +239,18 @@ uec_set_ringparam(struct net_device *netdev, int queue = 0, ret = 0; if (ring->rx_pending < UCC_GETH_RX_BD_RING_SIZE_MIN) { - printk("%s: RxBD ring size must be no smaller than %d.\n", - netdev->name, UCC_GETH_RX_BD_RING_SIZE_MIN); + netdev_info(netdev, "RxBD ring size must be no smaller than %d\n", + UCC_GETH_RX_BD_RING_SIZE_MIN); return -EINVAL; } if (ring->rx_pending % UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT) { - printk("%s: RxBD ring size must be multiple of %d.\n", - netdev->name, UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT); + netdev_info(netdev, "RxBD ring size must be multiple of %d\n", + UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT); return -EINVAL; } if (ring->tx_pending < UCC_GETH_TX_BD_RING_SIZE_MIN) { - printk("%s: TxBD ring size must be no smaller than %d.\n", - netdev->name, UCC_GETH_TX_BD_RING_SIZE_MIN); + netdev_info(netdev, "TxBD ring size must be no smaller than %d\n", + UCC_GETH_TX_BD_RING_SIZE_MIN); return -EINVAL; } @@ -260,8 +259,7 @@ uec_set_ringparam(struct net_device *netdev, if (netif_running(netdev)) { /* FIXME: restart automatically */ - printk(KERN_INFO - "Please re-open the interface.\n"); + netdev_info(netdev, "Please re-open the interface\n"); } return ret; diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 2418faf2251a..84125707f321 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -1003,8 +1003,6 @@ static void fjn_rx(struct net_device *dev) } skb = netdev_alloc_skb(dev, pkt_len + 2); if (skb == NULL) { - netdev_notice(dev, "Memory squeeze, dropping packet (len %d)\n", - pkt_len); outb(F_SKP_PKT, ioaddr + RX_SKIP); dev->stats.rx_dropped++; break; diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 1c54e229e3cc..e38816145395 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -798,16 +798,14 @@ static inline int i596_rx(struct net_device *dev) #ifdef __mc68000__ cache_clear(virt_to_phys(newskb->data), PKT_BUF_SZ); #endif - } - else + } else { skb = netdev_alloc_skb(dev, pkt_len + 2); + } memory_squeeze: if (skb == NULL) { /* XXX tulip.c can defer packets here!! */ - printk(KERN_WARNING "%s: i596_rx Memory squeeze, dropping packet.\n", dev->name); dev->stats.rx_dropped++; - } - else { + } else { if (!rx_in_place) { /* 16 byte align the data fields */ skb_reserve(skb, 2); diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index f045ea4dc514..d653bac4cfc4 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -715,14 +715,12 @@ static inline int i596_rx(struct net_device *dev) rbd->v_data = newskb->data; rbd->b_data = SWAP32(dma_addr); DMA_WBACK_INV(dev, rbd, sizeof(struct i596_rbd)); - } else + } else { skb = netdev_alloc_skb_ip_align(dev, pkt_len); + } memory_squeeze: if (skb == NULL) { /* XXX tulip.c can defer packets here!! */ - printk(KERN_ERR - "%s: i596_rx Memory squeeze, dropping packet.\n", - dev->name); dev->stats.rx_dropped++; } else { if (!rx_in_place) { diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 328f47c92e26..90ea0b1673ca 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -402,7 +402,6 @@ static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes) skb_arr_rq1[index] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); if (!skb_arr_rq1[index]) { - netdev_info(dev, "Unable to allocate enough skb in the array\n"); pr->rq1_skba.os_skbs = fill_wqes - i; break; } @@ -432,10 +431,8 @@ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a) for (i = 0; i < nr_rq1a; i++) { skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); - if (!skb_arr_rq1[i]) { - netdev_info(dev, "Not enough memory to allocate skb array\n"); + if (!skb_arr_rq1[i]) break; - } } /* Ring doorbell */ ehea_update_rq1a(pr->qp, i - 1); @@ -695,10 +692,8 @@ static int ehea_proc_rwqes(struct net_device *dev, skb = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); - if (!skb) { - netdev_err(dev, "Not enough memory to allocate skb\n"); + if (!skb) break; - } } skb_copy_to_linear_data(skb, ((char *)cqe) + 64, cqe->num_bytes_transfered - 4); @@ -730,7 +725,8 @@ static int ehea_proc_rwqes(struct net_device *dev, processed_bytes += skb->len; if (cqe->status & EHEA_CQE_VLAN_TAG_XTRACT) - __vlan_hwaccel_put_tag(skb, cqe->vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + cqe->vlan_tag); napi_gro_receive(&pr->napi, skb); } else { @@ -2115,7 +2111,7 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int ehea_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; @@ -2153,7 +2149,7 @@ out: return err; } -static int ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int ehea_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ehea_port *port = netdev_priv(dev); struct ehea_adapter *adapter = port->adapter; @@ -3025,12 +3021,12 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, dev->netdev_ops = &ehea_netdev_ops; ehea_set_ethtool_ops(dev); - dev->hw_features = NETIF_F_SG | NETIF_F_TSO - | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO - | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX - | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER - | NETIF_F_RXCSUM; + dev->hw_features = NETIF_F_SG | NETIF_F_TSO | + NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO | + NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM; dev->vlan_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM; dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 1f7ecf57181e..610ed223d1db 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -637,17 +637,12 @@ static int mal_probe(struct platform_device *ofdev) bd_size = sizeof(struct mal_descriptor) * (NUM_TX_BUFF * mal->num_tx_chans + NUM_RX_BUFF * mal->num_rx_chans); - mal->bd_virt = - dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, - GFP_KERNEL); + mal->bd_virt = dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, + GFP_KERNEL | __GFP_ZERO); if (mal->bd_virt == NULL) { - printk(KERN_ERR - "mal%d: out of memory allocating RX/TX descriptors!\n", - index); err = -ENOMEM; goto fail_unmap; } - memset(mal->bd_virt, 0, bd_size); for (i = 0; i < mal->num_tx_chans; ++i) set_mal_dcrn(mal, MAL_TXCTPR(i), mal->bd_dma + diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index c859771a9902..302d59401065 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -556,11 +556,9 @@ static int ibmveth_open(struct net_device *netdev) adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) * rxq_entries; adapter->rx_queue.queue_addr = - dma_alloc_coherent(dev, adapter->rx_queue.queue_len, - &adapter->rx_queue.queue_dma, GFP_KERNEL); - + dma_alloc_coherent(dev, adapter->rx_queue.queue_len, + &adapter->rx_queue.queue_dma, GFP_KERNEL); if (!adapter->rx_queue.queue_addr) { - netdev_err(netdev, "unable to allocate rx queue pages\n"); rc = -ENOMEM; goto err_out; } diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index ec800b093e7e..d2bea3f07c73 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -870,7 +870,7 @@ err_unlock: } static int e100_exec_cb(struct nic *nic, struct sk_buff *skb, - void (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *)) + int (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *)) { struct cb *cb; unsigned long flags; @@ -888,10 +888,13 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb, nic->cbs_avail--; cb->skb = skb; + err = cb_prepare(nic, cb, skb); + if (err) + goto err_unlock; + if (unlikely(!nic->cbs_avail)) err = -ENOSPC; - cb_prepare(nic, cb, skb); /* Order is important otherwise we'll be in a race with h/w: * set S-bit in current first, then clear S-bit in previous. */ @@ -1091,7 +1094,7 @@ static void e100_get_defaults(struct nic *nic) nic->mii.mdio_write = mdio_write; } -static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) +static int e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) { struct config *config = &cb->u.config; u8 *c = (u8 *)config; @@ -1181,6 +1184,7 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) netif_printk(nic, hw, KERN_DEBUG, nic->netdev, "[16-23]=%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n", c[16], c[17], c[18], c[19], c[20], c[21], c[22], c[23]); + return 0; } /************************************************************************* @@ -1331,7 +1335,7 @@ static const struct firmware *e100_request_firmware(struct nic *nic) return fw; } -static void e100_setup_ucode(struct nic *nic, struct cb *cb, +static int e100_setup_ucode(struct nic *nic, struct cb *cb, struct sk_buff *skb) { const struct firmware *fw = (void *)skb; @@ -1358,6 +1362,7 @@ static void e100_setup_ucode(struct nic *nic, struct cb *cb, cb->u.ucode[min_size] |= cpu_to_le32((BUNDLESMALL) ? 0xFFFF : 0xFF80); cb->command = cpu_to_le16(cb_ucode | cb_el); + return 0; } static inline int e100_load_ucode_wait(struct nic *nic) @@ -1400,18 +1405,20 @@ static inline int e100_load_ucode_wait(struct nic *nic) return err; } -static void e100_setup_iaaddr(struct nic *nic, struct cb *cb, +static int e100_setup_iaaddr(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = cpu_to_le16(cb_iaaddr); memcpy(cb->u.iaaddr, nic->netdev->dev_addr, ETH_ALEN); + return 0; } -static void e100_dump(struct nic *nic, struct cb *cb, struct sk_buff *skb) +static int e100_dump(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = cpu_to_le16(cb_dump); cb->u.dump_buffer_addr = cpu_to_le32(nic->dma_addr + offsetof(struct mem, dump_buf)); + return 0; } static int e100_phy_check_without_mii(struct nic *nic) @@ -1581,7 +1588,7 @@ static int e100_hw_init(struct nic *nic) return 0; } -static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb) +static int e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb) { struct net_device *netdev = nic->netdev; struct netdev_hw_addr *ha; @@ -1596,6 +1603,7 @@ static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb) memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &ha->addr, ETH_ALEN); } + return 0; } static void e100_set_multicast_list(struct net_device *netdev) @@ -1756,11 +1764,18 @@ static void e100_watchdog(unsigned long data) round_jiffies(jiffies + E100_WATCHDOG_PERIOD)); } -static void e100_xmit_prepare(struct nic *nic, struct cb *cb, +static int e100_xmit_prepare(struct nic *nic, struct cb *cb, struct sk_buff *skb) { + dma_addr_t dma_addr; cb->command = nic->tx_command; + dma_addr = pci_map_single(nic->pdev, + skb->data, skb->len, PCI_DMA_TODEVICE); + /* If we can't map the skb, have the upper layer try later */ + if (pci_dma_mapping_error(nic->pdev, dma_addr)) + return -ENOMEM; + /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for * testing, ie sending frames with bad CRC. @@ -1777,11 +1792,10 @@ static void e100_xmit_prepare(struct nic *nic, struct cb *cb, cb->u.tcb.tcb_byte_count = 0; cb->u.tcb.threshold = nic->tx_threshold; cb->u.tcb.tbd_count = 1; - cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev, - skb->data, skb->len, PCI_DMA_TODEVICE)); - /* check for mapping failure? */ + cb->u.tcb.tbd.buf_addr = cpu_to_le32(dma_addr); cb->u.tcb.tbd.size = cpu_to_le16(skb->len); skb_tx_timestamp(skb); + return 0; } static netdev_tx_t e100_xmit_frame(struct sk_buff *skb, diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 43462d596a4e..82a967c95598 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1020,12 +1020,11 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) txdr->size = txdr->count * sizeof(struct e1000_tx_desc); txdr->size = ALIGN(txdr->size, 4096); txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!txdr->desc) { ret_val = 2; goto err_nomem; } - memset(txdr->desc, 0, txdr->size); txdr->next_to_use = txdr->next_to_clean = 0; ew32(TDBAL, ((u64)txdr->dma & 0x00000000FFFFFFFF)); @@ -1053,6 +1052,10 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) txdr->buffer_info[i].dma = dma_map_single(&pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&pdev->dev, txdr->buffer_info[i].dma)) { + ret_val = 4; + goto err_nomem; + } tx_desc->buffer_addr = cpu_to_le64(txdr->buffer_info[i].dma); tx_desc->lower.data = cpu_to_le32(skb->len); tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP | @@ -1069,18 +1072,17 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) rxdr->buffer_info = kcalloc(rxdr->count, sizeof(struct e1000_buffer), GFP_KERNEL); if (!rxdr->buffer_info) { - ret_val = 4; + ret_val = 5; goto err_nomem; } rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc); rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!rxdr->desc) { - ret_val = 5; + ret_val = 6; goto err_nomem; } - memset(rxdr->desc, 0, rxdr->size); rxdr->next_to_use = rxdr->next_to_clean = 0; rctl = er32(RCTL); @@ -1101,7 +1103,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) skb = alloc_skb(E1000_RXBUFFER_2048 + NET_IP_ALIGN, GFP_KERNEL); if (!skb) { - ret_val = 6; + ret_val = 7; goto err_nomem; } skb_reserve(skb, NET_IP_ALIGN); @@ -1110,6 +1112,10 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) rxdr->buffer_info[i].dma = dma_map_single(&pdev->dev, skb->data, E1000_RXBUFFER_2048, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, rxdr->buffer_info[i].dma)) { + ret_val = 8; + goto err_nomem; + } rx_desc->buffer_addr = cpu_to_le64(rxdr->buffer_info[i].dma); memset(skb->data, 0x00, skb->len); } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 8502c625dbef..59ad007dd5aa 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -166,8 +166,10 @@ static void e1000_vlan_mode(struct net_device *netdev, netdev_features_t features); static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter, bool filter_on); -static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid); -static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); +static int e1000_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid); +static int e1000_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid); static void e1000_restore_vlan(struct e1000_adapter *adapter); #ifdef CONFIG_PM @@ -333,7 +335,7 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) if (!test_bit(vid, adapter->active_vlans)) { if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) { - e1000_vlan_rx_add_vid(netdev, vid); + e1000_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid); adapter->mng_vlan_id = vid; } else { adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; @@ -341,7 +343,8 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid) && !test_bit(old_vid, adapter->active_vlans)) - e1000_vlan_rx_kill_vid(netdev, old_vid); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + old_vid); } else { adapter->mng_vlan_id = vid; } @@ -809,10 +812,10 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev, /* Since there is no support for separate Rx/Tx vlan accel * enable/disable make sure Tx flag is always in same state as Rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -823,7 +826,7 @@ static int e1000_set_features(struct net_device *netdev, struct e1000_adapter *adapter = netdev_priv(netdev); netdev_features_t changed = features ^ netdev->features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) e1000_vlan_mode(netdev, features); if (!(changed & (NETIF_F_RXCSUM | NETIF_F_RXALL))) @@ -1058,9 +1061,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac_type >= e1000_82543) { netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_RX; - netdev->features = NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX; + netdev->features = NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_FILTER; } if ((hw->mac_type >= e1000_82544) && @@ -1457,7 +1460,8 @@ static int e1000_close(struct net_device *netdev) if ((hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && !test_bit(adapter->mng_vlan_id, adapter->active_vlans)) { - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + adapter->mng_vlan_id); } return 0; @@ -1516,8 +1520,6 @@ static int e1000_setup_tx_resources(struct e1000_adapter *adapter, if (!txdr->desc) { setup_tx_desc_die: vfree(txdr->buffer_info); - e_err(probe, "Unable to allocate memory for the Tx descriptor " - "ring\n"); return -ENOMEM; } @@ -1707,10 +1709,7 @@ static int e1000_setup_rx_resources(struct e1000_adapter *adapter, rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma, GFP_KERNEL); - if (!rxdr->desc) { - e_err(probe, "Unable to allocate memory for the Rx descriptor " - "ring\n"); setup_rx_desc_die: vfree(rxdr->buffer_info); return -ENOMEM; @@ -1729,8 +1728,6 @@ setup_rx_desc_die: if (!rxdr->desc) { dma_free_coherent(&pdev->dev, rxdr->size, olddesc, olddma); - e_err(probe, "Unable to allocate memory for the Rx " - "descriptor ring\n"); goto setup_rx_desc_die; } @@ -4006,7 +4003,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status, if (status & E1000_RXD_STAT_VP) { u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&adapter->napi, skb); } @@ -4792,7 +4789,7 @@ static void __e1000_vlan_mode(struct e1000_adapter *adapter, u32 ctrl; ctrl = er32(CTRL); - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { /* enable VLAN tag insert/strip */ ctrl |= E1000_CTRL_VME; } else { @@ -4844,7 +4841,8 @@ static void e1000_vlan_mode(struct net_device *netdev, e1000_irq_enable(adapter); } -static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -4869,7 +4867,8 @@ static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -4903,7 +4902,7 @@ static void e1000_restore_vlan(struct e1000_adapter *adapter) e1000_vlan_filter_on_off(adapter, true); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - e1000_vlan_rx_add_vid(adapter->netdev, vid); + e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index e0991388664c..b71c8502a2b3 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -37,7 +37,9 @@ * "index + 5". */ static const u16 e1000_gg82563_cable_length_table[] = { - 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF }; + 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF +}; + #define GG82563_CABLE_LENGTH_TABLE_SIZE \ ARRAY_SIZE(e1000_gg82563_cable_length_table) @@ -116,7 +118,7 @@ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) nvm->type = e1000_nvm_eeprom_spi; size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> - E1000_EECD_SIZE_EX_SHIFT); + E1000_EECD_SIZE_EX_SHIFT); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. @@ -393,7 +395,7 @@ static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, * before the device has completed the "Page Select" MDI * transaction. So we wait 200us after each MDI command... */ - udelay(200); + usleep_range(200, 400); /* ...and verify the command was successful. */ ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp); @@ -403,17 +405,17 @@ static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, return -E1000_ERR_PHY; } - udelay(200); + usleep_range(200, 400); ret_val = e1000e_read_phy_reg_mdic(hw, - MAX_PHY_REG_ADDRESS & offset, - data); + MAX_PHY_REG_ADDRESS & offset, + data); - udelay(200); + usleep_range(200, 400); } else { ret_val = e1000e_read_phy_reg_mdic(hw, - MAX_PHY_REG_ADDRESS & offset, - data); + MAX_PHY_REG_ADDRESS & offset, + data); } e1000_release_phy_80003es2lan(hw); @@ -462,7 +464,7 @@ static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, * before the device has completed the "Page Select" MDI * transaction. So we wait 200us after each MDI command... */ - udelay(200); + usleep_range(200, 400); /* ...and verify the command was successful. */ ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp); @@ -472,17 +474,17 @@ static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, return -E1000_ERR_PHY; } - udelay(200); + usleep_range(200, 400); ret_val = e1000e_write_phy_reg_mdic(hw, - MAX_PHY_REG_ADDRESS & offset, - data); + MAX_PHY_REG_ADDRESS & + offset, data); - udelay(200); + usleep_range(200, 400); } else { ret_val = e1000e_write_phy_reg_mdic(hw, - MAX_PHY_REG_ADDRESS & offset, - data); + MAX_PHY_REG_ADDRESS & + offset, data); } e1000_release_phy_80003es2lan(hw); @@ -580,7 +582,7 @@ static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) e_dbg("Waiting for forced speed/duplex link on GG82563 phy.\n"); ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); + 100000, &link); if (ret_val) return ret_val; @@ -595,7 +597,7 @@ static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) /* Try once more */ ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); + 100000, &link); if (ret_val) return ret_val; } @@ -666,14 +668,12 @@ static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, s32 ret_val; if (hw->phy.media_type == e1000_media_type_copper) { - ret_val = e1000e_get_speed_and_duplex_copper(hw, - speed, - duplex); + ret_val = e1000e_get_speed_and_duplex_copper(hw, speed, duplex); hw->phy.ops.cfg_on_link_up(hw); } else { ret_val = e1000e_get_speed_and_duplex_fiber_serdes(hw, - speed, - duplex); + speed, + duplex); } return ret_val; @@ -754,9 +754,9 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); + /* An error is not fatal and we should not stop init due to this */ if (ret_val) e_dbg("Error initializing identification LED\n"); - /* This is not fatal and we should not stop init due to this */ /* Disabling VLAN filtering */ e_dbg("Initializing the IEEE VLAN\n"); @@ -784,14 +784,14 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) /* Set the transmit descriptor write-back policy */ reg_data = er32(TXDCTL(0)); - reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | - E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC; + reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | + E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); ew32(TXDCTL(0), reg_data); /* ...for both queues. */ reg_data = er32(TXDCTL(1)); - reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | - E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC; + reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | + E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); ew32(TXDCTL(1), reg_data); /* Enable retransmit on late collisions */ @@ -818,13 +818,12 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) /* default to true to enable the MDIC W/A */ hw->dev_spec.e80003es2lan.mdic_wa_enable = true; - ret_val = e1000_read_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET >> - E1000_KMRNCTRLSTA_OFFSET_SHIFT, - &i); + ret_val = + e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET >> + E1000_KMRNCTRLSTA_OFFSET_SHIFT, &i); if (!ret_val) { if ((i & E1000_KMRNCTRLSTA_OPMODE_MASK) == - E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO) + E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO) hw->dev_spec.e80003es2lan.mdic_wa_enable = false; } @@ -891,7 +890,7 @@ static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; - u32 ctrl_ext; + u32 reg; u16 data; ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &data); @@ -954,22 +953,19 @@ static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) } /* Bypass Rx and Tx FIFO's */ - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL, - E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS | - E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS); + reg = E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL; + data = (E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS | + E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS); + ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data); if (ret_val) return ret_val; - ret_val = e1000_read_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE, - &data); + reg = E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE; + ret_val = e1000_read_kmrn_reg_80003es2lan(hw, reg, &data); if (ret_val) return ret_val; data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE, - data); + ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data); if (ret_val) return ret_val; @@ -982,9 +978,9 @@ static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) if (ret_val) return ret_val; - ctrl_ext = er32(CTRL_EXT); - ctrl_ext &= ~(E1000_CTRL_EXT_LINK_MODE_MASK); - ew32(CTRL_EXT, ctrl_ext); + reg = er32(CTRL_EXT); + reg &= ~E1000_CTRL_EXT_LINK_MODE_MASK; + ew32(CTRL_EXT, reg); ret_val = e1e_rphy(hw, GG82563_PHY_PWR_MGMT_CTRL, &data); if (ret_val) @@ -1049,27 +1045,29 @@ static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw) * polling the phy; this fixes erroneous timeouts at 10Mbps. */ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4), - 0xFFFF); + 0xFFFF); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), - ®_data); + ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), - reg_data); + reg_data); if (ret_val) return ret_val; - ret_val = e1000_read_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, - ®_data); + ret_val = + e1000_read_kmrn_reg_80003es2lan(hw, + E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, + ®_data); if (ret_val) return ret_val; reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, - reg_data); + ret_val = + e1000_write_kmrn_reg_80003es2lan(hw, + E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, + reg_data); if (ret_val) return ret_val; @@ -1096,7 +1094,7 @@ static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw) if (hw->phy.media_type == e1000_media_type_copper) { ret_val = e1000e_get_speed_and_duplex_copper(hw, &speed, - &duplex); + &duplex); if (ret_val) return ret_val; @@ -1125,9 +1123,10 @@ static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex) u16 reg_data, reg_data2; reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, - reg_data); + ret_val = + e1000_write_kmrn_reg_80003es2lan(hw, + E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, + reg_data); if (ret_val) return ret_val; @@ -1171,9 +1170,10 @@ static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw) u32 i = 0; reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, - reg_data); + ret_val = + e1000_write_kmrn_reg_80003es2lan(hw, + E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, + reg_data); if (ret_val) return ret_val; @@ -1220,7 +1220,7 @@ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, return ret_val; kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; + E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; ew32(KMRNCTRLSTA, kmrnctrlsta); e1e_flush(); @@ -1255,7 +1255,7 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, return ret_val; kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | data; + E1000_KMRNCTRLSTA_OFFSET) | data; ew32(KMRNCTRLSTA, kmrnctrlsta); e1e_flush(); @@ -1419,4 +1419,3 @@ const struct e1000_info e1000_es2_info = { .phy_ops = &es2_phy_ops, .nvm_ops = &es2_nvm_ops, }; - diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 2faffbde179e..7380442a3829 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -184,7 +184,7 @@ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw) default: nvm->type = e1000_nvm_eeprom_spi; size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> - E1000_EECD_SIZE_EX_SHIFT); + E1000_EECD_SIZE_EX_SHIFT); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. */ @@ -437,7 +437,7 @@ static s32 e1000_get_phy_id_82571(struct e1000_hw *hw) return ret_val; phy->id = (u32)(phy_id << 16); - udelay(20); + usleep_range(20, 40); ret_val = e1e_rphy(hw, MII_PHYSID2, &phy_id); if (ret_val) return ret_val; @@ -482,7 +482,7 @@ static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) if (!(swsm & E1000_SWSM_SMBI)) break; - udelay(50); + usleep_range(50, 100); i++; } @@ -499,7 +499,7 @@ static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) if (er32(SWSM) & E1000_SWSM_SWESMBI) break; - udelay(50); + usleep_range(50, 100); } if (i == fw_timeout) { @@ -526,6 +526,7 @@ static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw) swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); ew32(SWSM, swsm); } + /** * e1000_get_hw_semaphore_82573 - Acquire hardware semaphore * @hw: pointer to the HW structure @@ -846,9 +847,9 @@ static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, } for (i = 0; i < words; i++) { - eewr = (data[i] << E1000_NVM_RW_REG_DATA) | - ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | - E1000_NVM_RW_REG_START; + eewr = ((data[i] << E1000_NVM_RW_REG_DATA) | + ((offset + i) << E1000_NVM_RW_ADDR_SHIFT) | + E1000_NVM_RW_REG_START); ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); if (ret_val) @@ -875,8 +876,7 @@ static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw) s32 timeout = PHY_CFG_TIMEOUT; while (timeout) { - if (er32(EEMNGCTL) & - E1000_NVM_CFG_DONE_PORT_0) + if (er32(EEMNGCTL) & E1000_NVM_CFG_DONE_PORT_0) break; usleep_range(1000, 2000); timeout--; @@ -1022,7 +1022,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) } if (hw->nvm.type == e1000_nvm_flash_hw) { - udelay(10); + usleep_range(10, 20); ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_EE_RST; ew32(CTRL_EXT, ctrl_ext); @@ -1095,9 +1095,9 @@ static s32 e1000_init_hw_82571(struct e1000_hw *hw) /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); + /* An error is not fatal and we should not stop init due to this */ if (ret_val) e_dbg("Error initializing identification LED\n"); - /* This is not fatal and we should not stop init due to this */ /* Disabling VLAN filtering */ e_dbg("Initializing the IEEE VLAN\n"); @@ -1122,9 +1122,8 @@ static s32 e1000_init_hw_82571(struct e1000_hw *hw) /* Set the transmit descriptor write-back policy */ reg_data = er32(TXDCTL(0)); - reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | - E1000_TXDCTL_FULL_TX_DESC_WB | - E1000_TXDCTL_COUNT_DESC; + reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | + E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); ew32(TXDCTL(0), reg_data); /* ...for both queues. */ @@ -1140,9 +1139,9 @@ static s32 e1000_init_hw_82571(struct e1000_hw *hw) break; default: reg_data = er32(TXDCTL(1)); - reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) | - E1000_TXDCTL_FULL_TX_DESC_WB | - E1000_TXDCTL_COUNT_DESC; + reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | + E1000_TXDCTL_FULL_TX_DESC_WB | + E1000_TXDCTL_COUNT_DESC); ew32(TXDCTL(1), reg_data); break; } @@ -1530,7 +1529,7 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) status = er32(STATUS); er32(RXCW); /* SYNCH bit and IV bit are sticky */ - udelay(10); + usleep_range(10, 20); rxcw = er32(RXCW); if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { @@ -1633,7 +1632,7 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) * the IV bit and restart Autoneg */ for (i = 0; i < AN_RETRY_COUNT; i++) { - udelay(10); + usleep_range(10, 20); rxcw = er32(RXCW); if ((rxcw & E1000_RXCW_SYNCH) && (rxcw & E1000_RXCW_C)) @@ -2066,4 +2065,3 @@ const struct e1000_info e1000_82583_info = { .phy_ops = &e82_phy_ops_bm, .nvm_ops = &e82571_nvm_ops, }; - diff --git a/drivers/net/ethernet/intel/e1000e/82571.h b/drivers/net/ethernet/intel/e1000e/82571.h index 85cb1a3b7cd4..08e24dc3dc0e 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.h +++ b/drivers/net/ethernet/intel/e1000e/82571.h @@ -44,6 +44,8 @@ #define E1000_EIAC_82574 0x000DC /* Ext. Interrupt Auto Clear - RW */ #define E1000_EIAC_MASK_82574 0x01F00000 +#define E1000_IVAR_INT_ALLOC_VALID 0x8 + /* Manageability Operation Mode mask */ #define E1000_NVM_INIT_CTRL2_MNGM 0x6000 diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index fc3a4fe1ac71..351c94a0cf74 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -66,7 +66,7 @@ #define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 #define E1000_CTRL_EXT_EIAME 0x01000000 #define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ -#define E1000_CTRL_EXT_IAME 0x08000000 /* Interrupt acknowledge Auto-mask */ +#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ #define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ #define E1000_CTRL_EXT_LSECCK 0x00001000 #define E1000_CTRL_EXT_PHYPDEN 0x00100000 @@ -216,6 +216,8 @@ #define E1000_CTRL_MEHE 0x00080000 /* Memory Error Handling Enable */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ +#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */ #define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ #define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ @@ -234,17 +236,17 @@ #define E1000_STATUS_FUNC_SHIFT 2 #define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ #define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define E1000_STATUS_SPEED_MASK 0x000000C0 #define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ #define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ #define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ #define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by NVM */ #define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ -#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ +#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master Req status */ #define HALF_DUPLEX 1 #define FULL_DUPLEX 2 - #define ADVERTISE_10_HALF 0x0001 #define ADVERTISE_10_FULL 0x0002 #define ADVERTISE_100_HALF 0x0004 @@ -311,6 +313,7 @@ /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 +#define E1000_SCTL_ENABLE_SERDES_LOOPBACK 0x0410 /* Receive Checksum Control */ #define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ @@ -400,7 +403,8 @@ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ -#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ +/* If this bit asserted, the driver should claim the interrupt */ +#define E1000_ICR_INT_ASSERTED 0x80000000 #define E1000_ICR_RXQ0 0x00100000 /* Rx Queue 0 Interrupt */ #define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ #define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ @@ -583,13 +587,13 @@ #define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ #define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) -#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write registers */ -#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ -#define E1000_NVM_RW_REG_START 1 /* Start operation */ -#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ -#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ -#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ -#define E1000_FLASH_UPDATES 2000 +#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM r/w regs */ +#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ +#define E1000_NVM_RW_REG_START 1 /* Start operation */ +#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ +#define E1000_NVM_POLL_WRITE 1 /* Flag for polling write complete */ +#define E1000_NVM_POLL_READ 0 /* Flag for polling read complete */ +#define E1000_FLASH_UPDATES 2000 /* NVM Word Offsets */ #define NVM_COMPAT 0x0003 @@ -785,6 +789,7 @@ GG82563_REG(194, 18) /* Inband Control */ /* MDI Control */ +#define E1000_MDIC_REG_MASK 0x001F0000 #define E1000_MDIC_REG_SHIFT 16 #define E1000_MDIC_PHY_SHIFT 21 #define E1000_MDIC_OP_WRITE 0x04000000 diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index fcc758138b8a..82f1c84282db 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -46,6 +46,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/ptp_classify.h> #include <linux/mii.h> +#include <linux/mdio.h> #include "hw.h" struct e1000_info; @@ -61,7 +62,6 @@ struct e1000_info; #define e_notice(format, arg...) \ netdev_notice(adapter->netdev, format, ## arg) - /* Interrupt modes, as used by the IntMode parameter */ #define E1000E_INT_MODE_LEGACY 0 #define E1000E_INT_MODE_MSI 1 @@ -239,9 +239,8 @@ struct e1000_adapter { u16 tx_itr; u16 rx_itr; - /* Tx */ - struct e1000_ring *tx_ring /* One per active queue */ - ____cacheline_aligned_in_smp; + /* Tx - one ring per active queue */ + struct e1000_ring *tx_ring ____cacheline_aligned_in_smp; u32 tx_fifo_limit; struct napi_struct napi; @@ -352,6 +351,8 @@ struct e1000_adapter { struct timecounter tc; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; + + u16 eee_advert; }; struct e1000_info { @@ -487,8 +488,8 @@ extern int e1000e_setup_tx_resources(struct e1000_ring *ring); extern void e1000e_free_rx_resources(struct e1000_ring *ring); extern void e1000e_free_tx_resources(struct e1000_ring *ring); extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 - *stats); + struct rtnl_link_stats64 + *stats); extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter); extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter); extern void e1000e_get_hw_control(struct e1000_adapter *adapter); @@ -558,12 +559,14 @@ static inline s32 e1000e_update_nvm_checksum(struct e1000_hw *hw) return hw->nvm.ops.update(hw); } -static inline s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +static inline s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) { return hw->nvm.ops.read(hw, offset, words, data); } -static inline s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +static inline s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) { return hw->nvm.ops.write(hw, offset, words, data); } @@ -597,7 +600,7 @@ static inline s32 __ew32_prepare(struct e1000_hw *hw) s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT; while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i) - udelay(50); + usleep_range(50, 100); return i; } diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 2c1813737f6d..7c8ca658d553 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -35,11 +35,11 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/vmalloc.h> -#include <linux/mdio.h> +#include <linux/pm_runtime.h> #include "e1000.h" -enum {NETDEV_STATS, E1000_STATS}; +enum { NETDEV_STATS, E1000_STATS }; struct e1000_stats { char stat_string[ETH_GSTRING_LEN]; @@ -120,6 +120,7 @@ static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = { "Interrupt test (offline)", "Loopback test (offline)", "Link test (on/offline)" }; + #define E1000_TEST_LEN ARRAY_SIZE(e1000_gstrings_test) static int e1000_get_settings(struct net_device *netdev, @@ -196,8 +197,7 @@ static int e1000_get_settings(struct net_device *netdev, /* MDI-X => 2; MDI =>1; Invalid =>0 */ if ((hw->phy.media_type == e1000_media_type_copper) && netif_carrier_ok(netdev)) - ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : - ETH_TP_MDI; + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : ETH_TP_MDI; else ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; @@ -223,8 +223,7 @@ static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) /* Fiber NICs only allow 1000 gbps Full duplex */ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) && - spd != SPEED_1000 && - dplx != DUPLEX_FULL) { + (spd != SPEED_1000) && (dplx != DUPLEX_FULL)) { goto err_inval; } @@ -297,12 +296,10 @@ static int e1000_set_settings(struct net_device *netdev, hw->mac.autoneg = 1; if (hw->phy.media_type == e1000_media_type_fiber) hw->phy.autoneg_advertised = ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE | - ADVERTISED_Autoneg; + ADVERTISED_FIBRE | ADVERTISED_Autoneg; else hw->phy.autoneg_advertised = ecmd->advertising | - ADVERTISED_TP | - ADVERTISED_Autoneg; + ADVERTISED_TP | ADVERTISED_Autoneg; ecmd->advertising = hw->phy.autoneg_advertised; if (adapter->fc_autoneg) hw->fc.requested_mode = e1000_fc_default; @@ -345,7 +342,7 @@ static void e1000_get_pauseparam(struct net_device *netdev, struct e1000_hw *hw = &adapter->hw; pause->autoneg = - (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); + (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); if (hw->fc.current_mode == e1000_fc_rx_pause) { pause->rx_pause = 1; @@ -434,7 +431,7 @@ static void e1000_get_regs(struct net_device *netdev, memset(p, 0, E1000_REGS_LEN * sizeof(u32)); regs->version = (1 << 24) | (adapter->pdev->revision << 16) | - adapter->pdev->device; + adapter->pdev->device; regs_buff[0] = er32(CTRL); regs_buff[1] = er32(STATUS); @@ -502,8 +499,8 @@ static int e1000_get_eeprom(struct net_device *netdev, first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; - eeprom_buff = kmalloc(sizeof(u16) * - (last_word - first_word + 1), GFP_KERNEL); + eeprom_buff = kmalloc(sizeof(u16) * (last_word - first_word + 1), + GFP_KERNEL); if (!eeprom_buff) return -ENOMEM; @@ -514,7 +511,7 @@ static int e1000_get_eeprom(struct net_device *netdev, } else { for (i = 0; i < last_word - first_word + 1; i++) { ret_val = e1000_read_nvm(hw, first_word + i, 1, - &eeprom_buff[i]); + &eeprom_buff[i]); if (ret_val) break; } @@ -552,7 +549,8 @@ static int e1000_set_eeprom(struct net_device *netdev, if (eeprom->len == 0) return -EOPNOTSUPP; - if (eeprom->magic != (adapter->pdev->vendor | (adapter->pdev->device << 16))) + if (eeprom->magic != + (adapter->pdev->vendor | (adapter->pdev->device << 16))) return -EFAULT; if (adapter->flags & FLAG_READ_ONLY_NVM) @@ -578,7 +576,7 @@ static int e1000_set_eeprom(struct net_device *netdev, /* need read/modify/write of last changed EEPROM word */ /* only the first byte of the word is being modified */ ret_val = e1000_read_nvm(hw, last_word, 1, - &eeprom_buff[last_word - first_word]); + &eeprom_buff[last_word - first_word]); if (ret_val) goto out; @@ -617,8 +615,7 @@ static void e1000_get_drvinfo(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); - strlcpy(drvinfo->driver, e1000e_driver_name, - sizeof(drvinfo->driver)); + strlcpy(drvinfo->driver, e1000e_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, e1000e_driver_version, sizeof(drvinfo->version)); @@ -626,10 +623,10 @@ static void e1000_get_drvinfo(struct net_device *netdev, * PCI-E controllers */ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d-%d", - (adapter->eeprom_vers & 0xF000) >> 12, - (adapter->eeprom_vers & 0x0FF0) >> 4, - (adapter->eeprom_vers & 0x000F)); + "%d.%d-%d", + (adapter->eeprom_vers & 0xF000) >> 12, + (adapter->eeprom_vers & 0x0FF0) >> 4, + (adapter->eeprom_vers & 0x000F)); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); @@ -755,7 +752,8 @@ static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data, { u32 pat, val; static const u32 test[] = { - 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; + 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF + }; for (pat = 0; pat < ARRAY_SIZE(test); pat++) { E1000_WRITE_REG_ARRAY(&adapter->hw, reg, offset, (test[pat] & write)); @@ -785,6 +783,7 @@ static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data, } return 0; } + #define REG_PATTERN_TEST_ARRAY(reg, offset, mask, write) \ do { \ if (reg_pattern_test(adapter, data, reg, offset, mask, write)) \ @@ -812,16 +811,16 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) u32 wlock_mac = 0; /* The status register is Read Only, so a write should fail. - * Some bits that get toggled are ignored. + * Some bits that get toggled are ignored. There are several bits + * on newer hardware that are r/w. */ switch (mac->type) { - /* there are several bits on newer hardware that are r/w */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: toggle = 0x7FFFF3FF; break; - default: + default: toggle = 0x7FFFF033; break; } @@ -927,7 +926,7 @@ static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data) } /* If Checksum is not Correct return error else test passed */ - if ((checksum != (u16) NVM_SUM) && !(*data)) + if ((checksum != (u16)NVM_SUM) && !(*data)) *data = 2; return *data; @@ -935,7 +934,7 @@ static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data) static irqreturn_t e1000_test_intr(int __always_unused irq, void *data) { - struct net_device *netdev = (struct net_device *) data; + struct net_device *netdev = (struct net_device *)data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -968,8 +967,8 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) if (!request_irq(irq, e1000_test_intr, IRQF_PROBE_SHARED, netdev->name, netdev)) { shared_int = 0; - } else if (request_irq(irq, e1000_test_intr, IRQF_SHARED, - netdev->name, netdev)) { + } else if (request_irq(irq, e1000_test_intr, IRQF_SHARED, netdev->name, + netdev)) { *data = 1; ret_val = -1; goto out; @@ -1079,28 +1078,33 @@ static void e1000_free_desc_rings(struct e1000_adapter *adapter) struct e1000_ring *tx_ring = &adapter->test_tx_ring; struct e1000_ring *rx_ring = &adapter->test_rx_ring; struct pci_dev *pdev = adapter->pdev; + struct e1000_buffer *buffer_info; int i; if (tx_ring->desc && tx_ring->buffer_info) { for (i = 0; i < tx_ring->count; i++) { - if (tx_ring->buffer_info[i].dma) + buffer_info = &tx_ring->buffer_info[i]; + + if (buffer_info->dma) dma_unmap_single(&pdev->dev, - tx_ring->buffer_info[i].dma, - tx_ring->buffer_info[i].length, - DMA_TO_DEVICE); - if (tx_ring->buffer_info[i].skb) - dev_kfree_skb(tx_ring->buffer_info[i].skb); + buffer_info->dma, + buffer_info->length, + DMA_TO_DEVICE); + if (buffer_info->skb) + dev_kfree_skb(buffer_info->skb); } } if (rx_ring->desc && rx_ring->buffer_info) { for (i = 0; i < rx_ring->count; i++) { - if (rx_ring->buffer_info[i].dma) + buffer_info = &rx_ring->buffer_info[i]; + + if (buffer_info->dma) dma_unmap_single(&pdev->dev, - rx_ring->buffer_info[i].dma, - 2048, DMA_FROM_DEVICE); - if (rx_ring->buffer_info[i].skb) - dev_kfree_skb(rx_ring->buffer_info[i].skb); + buffer_info->dma, + 2048, DMA_FROM_DEVICE); + if (buffer_info->skb) + dev_kfree_skb(buffer_info->skb); } } @@ -1137,8 +1141,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) tx_ring->count = E1000_DEFAULT_TXD; tx_ring->buffer_info = kcalloc(tx_ring->count, - sizeof(struct e1000_buffer), - GFP_KERNEL); + sizeof(struct e1000_buffer), GFP_KERNEL); if (!tx_ring->buffer_info) { ret_val = 1; goto err_nomem; @@ -1155,8 +1158,8 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - ew32(TDBAL(0), ((u64) tx_ring->dma & 0x00000000FFFFFFFF)); - ew32(TDBAH(0), ((u64) tx_ring->dma >> 32)); + ew32(TDBAL(0), ((u64)tx_ring->dma & 0x00000000FFFFFFFF)); + ew32(TDBAH(0), ((u64)tx_ring->dma >> 32)); ew32(TDLEN(0), tx_ring->count * sizeof(struct e1000_tx_desc)); ew32(TDH(0), 0); ew32(TDT(0), 0); @@ -1178,8 +1181,8 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[i].length = skb->len; tx_ring->buffer_info[i].dma = - dma_map_single(&pdev->dev, skb->data, skb->len, - DMA_TO_DEVICE); + dma_map_single(&pdev->dev, skb->data, skb->len, + DMA_TO_DEVICE); if (dma_mapping_error(&pdev->dev, tx_ring->buffer_info[i].dma)) { ret_val = 4; @@ -1199,8 +1202,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) rx_ring->count = E1000_DEFAULT_RXD; rx_ring->buffer_info = kcalloc(rx_ring->count, - sizeof(struct e1000_buffer), - GFP_KERNEL); + sizeof(struct e1000_buffer), GFP_KERNEL); if (!rx_ring->buffer_info) { ret_val = 5; goto err_nomem; @@ -1219,16 +1221,16 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) rctl = er32(RCTL); if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX)) ew32(RCTL, rctl & ~E1000_RCTL_EN); - ew32(RDBAL(0), ((u64) rx_ring->dma & 0xFFFFFFFF)); - ew32(RDBAH(0), ((u64) rx_ring->dma >> 32)); + ew32(RDBAL(0), ((u64)rx_ring->dma & 0xFFFFFFFF)); + ew32(RDBAH(0), ((u64)rx_ring->dma >> 32)); ew32(RDLEN(0), rx_ring->size); ew32(RDH(0), 0); ew32(RDT(0), 0); rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_SZ_2048 | - E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_LPE | - E1000_RCTL_SBP | E1000_RCTL_SECRC | - E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | - (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_LPE | + E1000_RCTL_SBP | E1000_RCTL_SECRC | + E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); ew32(RCTL, rctl); for (i = 0; i < rx_ring->count; i++) { @@ -1243,8 +1245,8 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) skb_reserve(skb, NET_IP_ALIGN); rx_ring->buffer_info[i].skb = skb; rx_ring->buffer_info[i].dma = - dma_map_single(&pdev->dev, skb->data, 2048, - DMA_FROM_DEVICE); + dma_map_single(&pdev->dev, skb->data, 2048, + DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, rx_ring->buffer_info[i].dma)) { ret_val = 8; @@ -1295,7 +1297,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) ew32(CTRL, ctrl_reg); e1e_flush(); - udelay(500); + usleep_range(500, 1000); return 0; } @@ -1321,7 +1323,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) e1e_wphy(hw, PHY_REG(2, 21), phy_reg); /* Assert SW reset for above settings to take effect */ hw->phy.ops.commit(hw); - mdelay(1); + usleep_range(1000, 2000); /* Force Full Duplex */ e1e_rphy(hw, PHY_REG(769, 16), &phy_reg); e1e_wphy(hw, PHY_REG(769, 16), phy_reg | 0x000C); @@ -1362,7 +1364,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) /* force 1000, set loopback */ e1e_wphy(hw, MII_BMCR, 0x4140); - mdelay(250); + msleep(250); /* Now set up the MAC to the same speed/duplex as the PHY. */ ctrl_reg = er32(CTRL); @@ -1394,7 +1396,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) if (hw->phy.type == e1000_phy_m88) e1000_phy_disable_receiver(adapter); - udelay(500); + usleep_range(500, 1000); return 0; } @@ -1430,8 +1432,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) /* special write to serdes control register to enable SerDes analog * loopback */ -#define E1000_SERDES_LB_ON 0x410 - ew32(SCTL, E1000_SERDES_LB_ON); + ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK); e1e_flush(); usleep_range(10000, 20000); @@ -1525,8 +1526,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) case e1000_82572: if (hw->phy.media_type == e1000_media_type_fiber || hw->phy.media_type == e1000_media_type_internal_serdes) { -#define E1000_SERDES_LB_OFF 0x400 - ew32(SCTL, E1000_SERDES_LB_OFF); + ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); e1e_flush(); usleep_range(10000, 20000); break; @@ -1563,7 +1563,7 @@ static int e1000_check_lbtest_frame(struct sk_buff *skb, frame_size &= ~1; if (*(skb->data + 3) == 0xFF) if ((*(skb->data + frame_size / 2 + 10) == 0xBE) && - (*(skb->data + frame_size / 2 + 12) == 0xAF)) + (*(skb->data + frame_size / 2 + 12) == 0xAF)) return 0; return 13; } @@ -1574,6 +1574,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) struct e1000_ring *rx_ring = &adapter->test_rx_ring; struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; + struct e1000_buffer *buffer_info; int i, j, k, l; int lc; int good_cnt; @@ -1594,14 +1595,17 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) k = 0; l = 0; - for (j = 0; j <= lc; j++) { /* loop count loop */ - for (i = 0; i < 64; i++) { /* send the packets */ - e1000_create_lbtest_frame(tx_ring->buffer_info[k].skb, - 1024); + /* loop count loop */ + for (j = 0; j <= lc; j++) { + /* send the packets */ + for (i = 0; i < 64; i++) { + buffer_info = &tx_ring->buffer_info[k]; + + e1000_create_lbtest_frame(buffer_info->skb, 1024); dma_sync_single_for_device(&pdev->dev, - tx_ring->buffer_info[k].dma, - tx_ring->buffer_info[k].length, - DMA_TO_DEVICE); + buffer_info->dma, + buffer_info->length, + DMA_TO_DEVICE); k++; if (k == tx_ring->count) k = 0; @@ -1611,13 +1615,16 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) msleep(200); time = jiffies; /* set the start time for the receive */ good_cnt = 0; - do { /* receive the sent packets */ + /* receive the sent packets */ + do { + buffer_info = &rx_ring->buffer_info[l]; + dma_sync_single_for_cpu(&pdev->dev, - rx_ring->buffer_info[l].dma, 2048, - DMA_FROM_DEVICE); + buffer_info->dma, 2048, + DMA_FROM_DEVICE); - ret_val = e1000_check_lbtest_frame( - rx_ring->buffer_info[l].skb, 1024); + ret_val = e1000_check_lbtest_frame(buffer_info->skb, + 1024); if (!ret_val) good_cnt++; l++; @@ -1636,7 +1643,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) ret_val = 14; /* error code for time out error */ break; } - } /* end loop count loop */ + } return ret_val; } @@ -1695,7 +1702,7 @@ static int e1000_link_test(struct e1000_adapter *adapter, u64 *data) /* On some Phy/switch combinations, link establishment * can take a few seconds more than expected. */ - msleep(5000); + msleep_interruptible(5000); if (!(er32(STATUS) & E1000_STATUS_LU)) *data = 1; @@ -1979,12 +1986,12 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { switch (e1000_gstrings_stats[i].type) { case NETDEV_STATS: - p = (char *) &net_stats + - e1000_gstrings_stats[i].stat_offset; + p = (char *)&net_stats + + e1000_gstrings_stats[i].stat_offset; break; case E1000_STATS: - p = (char *) adapter + - e1000_gstrings_stats[i].stat_offset; + p = (char *)adapter + + e1000_gstrings_stats[i].stat_offset; break; default: data[i] = 0; @@ -1992,7 +1999,7 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, } data[i] = (e1000_gstrings_stats[i].sizeof_stat == - sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } } @@ -2068,23 +2075,20 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u16 cap_addr, adv_addr, lpa_addr, pcs_stat_addr, phy_data, lpi_ctrl; - u32 status, ret_val; + u16 cap_addr, lpa_addr, pcs_stat_addr, phy_data; + u32 ret_val; - if (!(adapter->flags & FLAG_IS_ICH) || - !(adapter->flags2 & FLAG2_HAS_EEE)) + if (!(adapter->flags2 & FLAG2_HAS_EEE)) return -EOPNOTSUPP; switch (hw->phy.type) { case e1000_phy_82579: cap_addr = I82579_EEE_CAPABILITY; - adv_addr = I82579_EEE_ADVERTISEMENT; lpa_addr = I82579_EEE_LP_ABILITY; pcs_stat_addr = I82579_EEE_PCS_STATUS; break; case e1000_phy_i217: cap_addr = I217_EEE_CAPABILITY; - adv_addr = I217_EEE_ADVERTISEMENT; lpa_addr = I217_EEE_LP_ABILITY; pcs_stat_addr = I217_EEE_PCS_STATUS; break; @@ -2103,10 +2107,7 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) edata->supported = mmd_eee_cap_to_ethtool_sup_t(phy_data); /* EEE Advertised */ - ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &phy_data); - if (ret_val) - goto release; - edata->advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + edata->advertised = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); /* EEE Link Partner Advertised */ ret_val = e1000_read_emi_reg_locked(hw, lpa_addr, &phy_data); @@ -2124,25 +2125,11 @@ release: if (ret_val) return -ENODATA; - e1e_rphy(hw, I82579_LPI_CTRL, &lpi_ctrl); - status = er32(STATUS); - /* Result of the EEE auto negotiation - there is no register that * has the status of the EEE negotiation so do a best-guess based - * on whether both Tx and Rx LPI indications have been received or - * base it on the link speed, the EEE advertised speeds on both ends - * and the speeds on which EEE is enabled locally. + * on whether Tx or Rx LPI indications have been received. */ - if (((phy_data & E1000_EEE_TX_LPI_RCVD) && - (phy_data & E1000_EEE_RX_LPI_RCVD)) || - ((status & E1000_STATUS_SPEED_100) && - (edata->advertised & ADVERTISED_100baseT_Full) && - (edata->lp_advertised & ADVERTISED_100baseT_Full) && - (lpi_ctrl & I82579_LPI_CTRL_100_ENABLE)) || - ((status & E1000_STATUS_SPEED_1000) && - (edata->advertised & ADVERTISED_1000baseT_Full) && - (edata->lp_advertised & ADVERTISED_1000baseT_Full) && - (lpi_ctrl & I82579_LPI_CTRL_1000_ENABLE))) + if (phy_data & (E1000_EEE_TX_LPI_RCVD | E1000_EEE_RX_LPI_RCVD)) edata->eee_active = true; edata->eee_enabled = !hw->dev_spec.ich8lan.eee_disable; @@ -2159,19 +2146,10 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) struct ethtool_eee eee_curr; s32 ret_val; - if (!(adapter->flags & FLAG_IS_ICH) || - !(adapter->flags2 & FLAG2_HAS_EEE)) - return -EOPNOTSUPP; - ret_val = e1000e_get_eee(netdev, &eee_curr); if (ret_val) return ret_val; - if (eee_curr.advertised != edata->advertised) { - e_err("Setting EEE advertisement is not supported\n"); - return -EINVAL; - } - if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) { e_err("Setting EEE tx-lpi is not supported\n"); return -EINVAL; @@ -2182,16 +2160,21 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) return -EINVAL; } - if (hw->dev_spec.ich8lan.eee_disable != !edata->eee_enabled) { - hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled; - - /* reset the link */ - if (netif_running(netdev)) - e1000e_reinit_locked(adapter); - else - e1000e_reset(adapter); + if (edata->advertised & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) { + e_err("EEE advertisement supports only 100TX and/or 1000T full-duplex\n"); + return -EINVAL; } + adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); + + hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled; + + /* reset the link */ + if (netif_running(netdev)) + e1000e_reinit_locked(adapter); + else + e1000e_reset(adapter); + return 0; } @@ -2229,7 +2212,19 @@ static int e1000e_get_ts_info(struct net_device *netdev, return 0; } +static int e1000e_ethtool_begin(struct net_device *netdev) +{ + return pm_runtime_get_sync(netdev->dev.parent); +} + +static void e1000e_ethtool_complete(struct net_device *netdev) +{ + pm_runtime_put_sync(netdev->dev.parent); +} + static const struct ethtool_ops e1000_ethtool_ops = { + .begin = e1000e_ethtool_begin, + .complete = e1000e_ethtool_complete, .get_settings = e1000_get_settings, .set_settings = e1000_set_settings, .get_drvinfo = e1000_get_drvinfo, diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 1e6b889aee87..84850f7a23e4 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -167,7 +167,7 @@ enum e1000_1000t_rx_status { e1000_1000t_rx_status_undefined = 0xFF }; -enum e1000_rev_polarity{ +enum e1000_rev_polarity { e1000_rev_polarity_normal = 0, e1000_rev_polarity_reversed, e1000_rev_polarity_undefined = 0xFF @@ -545,7 +545,7 @@ struct e1000_mac_info { u16 mta_reg_count; /* Maximum size of the MTA register table in all supported adapters */ - #define MAX_MTA_REG 128 +#define MAX_MTA_REG 128 u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 121a865c7fbd..ad9d8f2dd868 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -61,15 +61,15 @@ /* Offset 04h HSFSTS */ union ich8_hws_flash_status { struct ich8_hsfsts { - u16 flcdone :1; /* bit 0 Flash Cycle Done */ - u16 flcerr :1; /* bit 1 Flash Cycle Error */ - u16 dael :1; /* bit 2 Direct Access error Log */ - u16 berasesz :2; /* bit 4:3 Sector Erase Size */ - u16 flcinprog :1; /* bit 5 flash cycle in Progress */ - u16 reserved1 :2; /* bit 13:6 Reserved */ - u16 reserved2 :6; /* bit 13:6 Reserved */ - u16 fldesvalid :1; /* bit 14 Flash Descriptor Valid */ - u16 flockdn :1; /* bit 15 Flash Config Lock-Down */ + u16 flcdone:1; /* bit 0 Flash Cycle Done */ + u16 flcerr:1; /* bit 1 Flash Cycle Error */ + u16 dael:1; /* bit 2 Direct Access error Log */ + u16 berasesz:2; /* bit 4:3 Sector Erase Size */ + u16 flcinprog:1; /* bit 5 flash cycle in Progress */ + u16 reserved1:2; /* bit 13:6 Reserved */ + u16 reserved2:6; /* bit 13:6 Reserved */ + u16 fldesvalid:1; /* bit 14 Flash Descriptor Valid */ + u16 flockdn:1; /* bit 15 Flash Config Lock-Down */ } hsf_status; u16 regval; }; @@ -78,11 +78,11 @@ union ich8_hws_flash_status { /* Offset 06h FLCTL */ union ich8_hws_flash_ctrl { struct ich8_hsflctl { - u16 flcgo :1; /* 0 Flash Cycle Go */ - u16 flcycle :2; /* 2:1 Flash Cycle */ - u16 reserved :5; /* 7:3 Reserved */ - u16 fldbcount :2; /* 9:8 Flash Data Byte Count */ - u16 flockdn :6; /* 15:10 Reserved */ + u16 flcgo:1; /* 0 Flash Cycle Go */ + u16 flcycle:2; /* 2:1 Flash Cycle */ + u16 reserved:5; /* 7:3 Reserved */ + u16 fldbcount:2; /* 9:8 Flash Data Byte Count */ + u16 flockdn:6; /* 15:10 Reserved */ } hsf_ctrl; u16 regval; }; @@ -90,10 +90,10 @@ union ich8_hws_flash_ctrl { /* ICH Flash Region Access Permissions */ union ich8_hws_flash_regacc { struct ich8_flracc { - u32 grra :8; /* 0:7 GbE region Read Access */ - u32 grwa :8; /* 8:15 GbE region Write Access */ - u32 gmrag :8; /* 23:16 GbE Master Read Access Grant */ - u32 gmwag :8; /* 31:24 GbE Master Write Access Grant */ + u32 grra:8; /* 0:7 GbE region Read Access */ + u32 grwa:8; /* 8:15 GbE region Write Access */ + u32 gmrag:8; /* 23:16 GbE Master Read Access Grant */ + u32 gmwag:8; /* 31:24 GbE Master Write Access Grant */ } hsf_flregacc; u16 regval; }; @@ -142,6 +142,7 @@ static void e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index); static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index); static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate); +static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw); static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg) { @@ -312,7 +313,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE; ew32(CTRL, mac_reg); e1e_flush(); - udelay(10); + usleep_range(10, 20); mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE; ew32(CTRL, mac_reg); e1e_flush(); @@ -548,8 +549,8 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) /* find total size of the NVM, then cut in half since the total * size represents two separate NVM banks. */ - nvm->flash_bank_size = (sector_end_addr - sector_base_addr) - << FLASH_SECTOR_ADDR_SHIFT; + nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) + << FLASH_SECTOR_ADDR_SHIFT); nvm->flash_bank_size /= 2; /* Adjust to word count */ nvm->flash_bank_size /= sizeof(u16); @@ -636,6 +637,8 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) if (mac->type == e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; + mac->ops.setup_physical_interface = + e1000_setup_copper_link_pch_lpt; } /* Enable PCS Lock-loss workaround for ICH8 */ @@ -692,7 +695,7 @@ s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data) * * Assumes the SW/FW/HW Semaphore is already acquired. **/ -static s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data) +s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data) { return __e1000_access_emi_reg_locked(hw, addr, &data, false); } @@ -709,11 +712,22 @@ static s32 e1000_set_eee_pchlan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; s32 ret_val; - u16 lpi_ctrl; + u16 lpa, pcs_status, adv, adv_addr, lpi_ctrl, data; - if ((hw->phy.type != e1000_phy_82579) && - (hw->phy.type != e1000_phy_i217)) + switch (hw->phy.type) { + case e1000_phy_82579: + lpa = I82579_EEE_LP_ABILITY; + pcs_status = I82579_EEE_PCS_STATUS; + adv_addr = I82579_EEE_ADVERTISEMENT; + break; + case e1000_phy_i217: + lpa = I217_EEE_LP_ABILITY; + pcs_status = I217_EEE_PCS_STATUS; + adv_addr = I217_EEE_ADVERTISEMENT; + break; + default: return 0; + } ret_val = hw->phy.ops.acquire(hw); if (ret_val) @@ -728,34 +742,24 @@ static s32 e1000_set_eee_pchlan(struct e1000_hw *hw) /* Enable EEE if not disabled by user */ if (!dev_spec->eee_disable) { - u16 lpa, pcs_status, data; - /* Save off link partner's EEE ability */ - switch (hw->phy.type) { - case e1000_phy_82579: - lpa = I82579_EEE_LP_ABILITY; - pcs_status = I82579_EEE_PCS_STATUS; - break; - case e1000_phy_i217: - lpa = I217_EEE_LP_ABILITY; - pcs_status = I217_EEE_PCS_STATUS; - break; - default: - ret_val = -E1000_ERR_PHY; - goto release; - } ret_val = e1000_read_emi_reg_locked(hw, lpa, &dev_spec->eee_lp_ability); if (ret_val) goto release; + /* Read EEE advertisement */ + ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &adv); + if (ret_val) + goto release; + /* Enable EEE only for speeds in which the link partner is - * EEE capable. + * EEE capable and for which we advertise EEE. */ - if (dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED) + if (adv & dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED) lpi_ctrl |= I82579_LPI_CTRL_1000_ENABLE; - if (dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) { + if (adv & dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) { e1e_rphy_locked(hw, MII_LPA, &data); if (data & LPA_100FULL) lpi_ctrl |= I82579_LPI_CTRL_100_ENABLE; @@ -767,13 +771,13 @@ static s32 e1000_set_eee_pchlan(struct e1000_hw *hw) dev_spec->eee_lp_ability &= ~I82579_EEE_100_SUPPORTED; } - - /* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */ - ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data); - if (ret_val) - goto release; } + /* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */ + ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data); + if (ret_val) + goto release; + ret_val = e1e_wphy_locked(hw, I82579_LPI_CTRL, lpi_ctrl); release: hw->phy.ops.release(hw); @@ -835,6 +839,94 @@ release: } /** + * e1000_platform_pm_pch_lpt - Set platform power management values + * @hw: pointer to the HW structure + * @link: bool indicating link status + * + * Set the Latency Tolerance Reporting (LTR) values for the "PCIe-like" + * GbE MAC in the Lynx Point PCH based on Rx buffer size and link speed + * when link is up (which must not exceed the maximum latency supported + * by the platform), otherwise specify there is no LTR requirement. + * Unlike true-PCIe devices which set the LTR maximum snoop/no-snoop + * latencies in the LTR Extended Capability Structure in the PCIe Extended + * Capability register set, on this device LTR is set by writing the + * equivalent snoop/no-snoop latencies in the LTRV register in the MAC and + * set the SEND bit to send an Intel On-chip System Fabric sideband (IOSF-SB) + * message to the PMC. + **/ +static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) +{ + u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | + link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; + u16 lat_enc = 0; /* latency encoded */ + + if (link) { + u16 speed, duplex, scale = 0; + u16 max_snoop, max_nosnoop; + u16 max_ltr_enc; /* max LTR latency encoded */ + s64 lat_ns; /* latency (ns) */ + s64 value; + u32 rxa; + + if (!hw->adapter->max_frame_size) { + e_dbg("max_frame_size not set.\n"); + return -E1000_ERR_CONFIG; + } + + hw->mac.ops.get_link_up_info(hw, &speed, &duplex); + if (!speed) { + e_dbg("Speed not set.\n"); + return -E1000_ERR_CONFIG; + } + + /* Rx Packet Buffer Allocation size (KB) */ + rxa = er32(PBA) & E1000_PBA_RXA_MASK; + + /* Determine the maximum latency tolerated by the device. + * + * Per the PCIe spec, the tolerated latencies are encoded as + * a 3-bit encoded scale (only 0-5 are valid) multiplied by + * a 10-bit value (0-1023) to provide a range from 1 ns to + * 2^25*(2^10-1) ns. The scale is encoded as 0=2^0ns, + * 1=2^5ns, 2=2^10ns,...5=2^25ns. + */ + lat_ns = ((s64)rxa * 1024 - + (2 * (s64)hw->adapter->max_frame_size)) * 8 * 1000; + if (lat_ns < 0) + lat_ns = 0; + else + do_div(lat_ns, speed); + + value = lat_ns; + while (value > PCI_LTR_VALUE_MASK) { + scale++; + value = DIV_ROUND_UP(value, (1 << 5)); + } + if (scale > E1000_LTRV_SCALE_MAX) { + e_dbg("Invalid LTR latency scale %d\n", scale); + return -E1000_ERR_CONFIG; + } + lat_enc = (u16)((scale << PCI_LTR_SCALE_SHIFT) | value); + + /* Determine the maximum latency tolerated by the platform */ + pci_read_config_word(hw->adapter->pdev, E1000_PCI_LTR_CAP_LPT, + &max_snoop); + pci_read_config_word(hw->adapter->pdev, + E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop); + max_ltr_enc = max_t(u16, max_snoop, max_nosnoop); + + if (lat_enc > max_ltr_enc) + lat_enc = max_ltr_enc; + } + + /* Set Snoop and No-Snoop latencies the same */ + reg |= lat_enc | (lat_enc << E1000_LTRV_NOSNOOP_SHIFT); + ew32(LTRV, reg); + + return 0; +} + +/** * e1000_check_for_copper_link_ich8lan - Check for link (Copper) * @hw: pointer to the HW structure * @@ -871,6 +963,34 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) return ret_val; } + /* When connected at 10Mbps half-duplex, 82579 parts are excessively + * aggressive resulting in many collisions. To avoid this, increase + * the IPG and reduce Rx latency in the PHY. + */ + if ((hw->mac.type == e1000_pch2lan) && link) { + u32 reg; + reg = er32(STATUS); + if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) { + reg = er32(TIPG); + reg &= ~E1000_TIPG_IPGT_MASK; + reg |= 0xFF; + ew32(TIPG, reg); + + /* Reduce Rx latency in analog PHY */ + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = + e1000_write_emi_reg_locked(hw, I82579_RX_CONFIG, 0); + + hw->phy.ops.release(hw); + + if (ret_val) + return ret_val; + } + } + /* Work-around I218 hang issue */ if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V)) { @@ -879,6 +999,15 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) return ret_val; } + if (hw->mac.type == e1000_pch_lpt) { + /* Set platform power management values for + * Latency Tolerance Reporting (LTR) + */ + ret_val = e1000_platform_pm_pch_lpt(hw, link); + if (ret_val) + return ret_val; + } + /* Clear link partner's EEE ability */ hw->dev_spec.ich8lan.eee_lp_ability = 0; @@ -1002,10 +1131,6 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA; - /* Disable EEE by default until IEEE802.3az spec is finalized */ - if (adapter->flags2 & FLAG2_HAS_EEE) - adapter->hw.dev_spec.ich8lan.eee_disable = true; - return 0; } @@ -1134,9 +1259,9 @@ static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw) u32 fwsm; fwsm = er32(FWSM); - return (fwsm & E1000_ICH_FWSM_FW_VALID) && - ((fwsm & E1000_FWSM_MODE_MASK) == - (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); + return ((fwsm & E1000_ICH_FWSM_FW_VALID) && + ((fwsm & E1000_FWSM_MODE_MASK) == + (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT))); } /** @@ -1153,7 +1278,7 @@ static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw) fwsm = er32(FWSM); return (fwsm & E1000_ICH_FWSM_FW_VALID) && - (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); + (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); } /** @@ -1440,8 +1565,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) word_addr = (u16)(cnf_base_addr << 1); for (i = 0; i < cnf_size; i++) { - ret_val = e1000_read_nvm(hw, (word_addr + i * 2), 1, - ®_data); + ret_val = e1000_read_nvm(hw, (word_addr + i * 2), 1, ®_data); if (ret_val) goto release; @@ -1501,13 +1625,13 @@ static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link) if (ret_val) goto release; - status_reg &= BM_CS_STATUS_LINK_UP | - BM_CS_STATUS_RESOLVED | - BM_CS_STATUS_SPEED_MASK; + status_reg &= (BM_CS_STATUS_LINK_UP | + BM_CS_STATUS_RESOLVED | + BM_CS_STATUS_SPEED_MASK); if (status_reg == (BM_CS_STATUS_LINK_UP | - BM_CS_STATUS_RESOLVED | - BM_CS_STATUS_SPEED_1000)) + BM_CS_STATUS_RESOLVED | + BM_CS_STATUS_SPEED_1000)) k1_enable = false; } @@ -1516,13 +1640,13 @@ static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link) if (ret_val) goto release; - status_reg &= HV_M_STATUS_LINK_UP | - HV_M_STATUS_AUTONEG_COMPLETE | - HV_M_STATUS_SPEED_MASK; + status_reg &= (HV_M_STATUS_LINK_UP | + HV_M_STATUS_AUTONEG_COMPLETE | + HV_M_STATUS_SPEED_MASK); if (status_reg == (HV_M_STATUS_LINK_UP | - HV_M_STATUS_AUTONEG_COMPLETE | - HV_M_STATUS_SPEED_1000)) + HV_M_STATUS_AUTONEG_COMPLETE | + HV_M_STATUS_SPEED_1000)) k1_enable = false; } @@ -1579,7 +1703,7 @@ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) if (ret_val) return ret_val; - udelay(20); + usleep_range(20, 40); ctrl_ext = er32(CTRL_EXT); ctrl_reg = er32(CTRL); @@ -1589,11 +1713,11 @@ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS); e1e_flush(); - udelay(20); + usleep_range(20, 40); ew32(CTRL, ctrl_reg); ew32(CTRL_EXT, ctrl_ext); e1e_flush(); - udelay(20); + usleep_range(20, 40); return 0; } @@ -1667,7 +1791,6 @@ release: return ret_val; } - /** * e1000_set_mdio_slow_mode_hv - Set slow MDIO access mode * @hw: pointer to the HW structure @@ -1834,7 +1957,7 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) * SHRAL/H) and initial CRC values to the MAC */ for (i = 0; i < (hw->mac.rar_entry_count + 4); i++) { - u8 mac_addr[ETH_ALEN] = {0}; + u8 mac_addr[ETH_ALEN] = { 0 }; u32 addr_high, addr_low; addr_high = er32(RAH(i)); @@ -1865,8 +1988,8 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) ew32(RCTL, mac_reg); ret_val = e1000e_read_kmrn_reg(hw, - E1000_KMRNCTRLSTA_CTRL_OFFSET, - &data); + E1000_KMRNCTRLSTA_CTRL_OFFSET, + &data); if (ret_val) return ret_val; ret_val = e1000e_write_kmrn_reg(hw, @@ -1875,8 +1998,8 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) if (ret_val) return ret_val; ret_val = e1000e_read_kmrn_reg(hw, - E1000_KMRNCTRLSTA_HD_CTRL, - &data); + E1000_KMRNCTRLSTA_HD_CTRL, + &data); if (ret_val) return ret_val; data &= ~(0xF << 8); @@ -1923,8 +2046,8 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) ew32(RCTL, mac_reg); ret_val = e1000e_read_kmrn_reg(hw, - E1000_KMRNCTRLSTA_CTRL_OFFSET, - &data); + E1000_KMRNCTRLSTA_CTRL_OFFSET, + &data); if (ret_val) return ret_val; ret_val = e1000e_write_kmrn_reg(hw, @@ -1933,8 +2056,8 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) if (ret_val) return ret_val; ret_val = e1000e_read_kmrn_reg(hw, - E1000_KMRNCTRLSTA_HD_CTRL, - &data); + E1000_KMRNCTRLSTA_HD_CTRL, + &data); if (ret_val) return ret_val; data &= ~(0xF << 8); @@ -2100,7 +2223,7 @@ static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw) do { data = er32(STATUS); data &= E1000_STATUS_LAN_INIT_DONE; - udelay(100); + usleep_range(100, 200); } while ((!data) && --loop); /* If basic configuration is incomplete before the above loop @@ -2445,7 +2568,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) /* Check bank 0 */ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset, - &sig_byte); + &sig_byte); if (ret_val) return ret_val; if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == @@ -2456,8 +2579,8 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) /* Check bank 1 */ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset + - bank1_offset, - &sig_byte); + bank1_offset, + &sig_byte); if (ret_val) return ret_val; if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == @@ -2510,8 +2633,8 @@ static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, ret_val = 0; for (i = 0; i < words; i++) { - if (dev_spec->shadow_ram[offset+i].modified) { - data[i] = dev_spec->shadow_ram[offset+i].value; + if (dev_spec->shadow_ram[offset + i].modified) { + data[i] = dev_spec->shadow_ram[offset + i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, act_offset + i, @@ -2696,8 +2819,8 @@ static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; - flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) + - hw->nvm.flash_base_addr; + flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + + hw->nvm.flash_base_addr); do { udelay(1); @@ -2714,8 +2837,9 @@ static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ew32flash(ICH_FLASH_FADDR, flash_linear_addr); - ret_val = e1000_flash_cycle_ich8lan(hw, - ICH_FLASH_READ_COMMAND_TIMEOUT); + ret_val = + e1000_flash_cycle_ich8lan(hw, + ICH_FLASH_READ_COMMAND_TIMEOUT); /* Check if FCERR is set to 1, if set to 1, clear it * and try the whole sequence a few more times, else @@ -2774,8 +2898,8 @@ static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, nvm->ops.acquire(hw); for (i = 0; i < words; i++) { - dev_spec->shadow_ram[offset+i].modified = true; - dev_spec->shadow_ram[offset+i].value = data[i]; + dev_spec->shadow_ram[offset + i].modified = true; + dev_spec->shadow_ram[offset + i].value = data[i]; } nvm->ops.release(hw); @@ -2844,8 +2968,8 @@ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) data = dev_spec->shadow_ram[i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, i + - old_bank_offset, - &data); + old_bank_offset, + &data); if (ret_val) break; } @@ -2863,7 +2987,7 @@ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) /* Convert offset to bytes. */ act_offset = (i + new_bank_offset) << 1; - udelay(100); + usleep_range(100, 200); /* Write the bytes to the new bank. */ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, @@ -2871,10 +2995,10 @@ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) if (ret_val) break; - udelay(100); + usleep_range(100, 200); ret_val = e1000_retry_write_flash_byte_ich8lan(hw, - act_offset + 1, - (u8)(data >> 8)); + act_offset + 1, + (u8)(data >> 8)); if (ret_val) break; } @@ -3050,8 +3174,8 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; - flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) + - hw->nvm.flash_base_addr; + flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + + hw->nvm.flash_base_addr); do { udelay(1); @@ -3062,7 +3186,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ - hsflctl.hsf_ctrl.fldbcount = size -1; + hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); @@ -3078,8 +3202,9 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, /* check if FCERR is set to 1 , if set to 1, clear it * and try the whole sequence a few more times else done */ - ret_val = e1000_flash_cycle_ich8lan(hw, - ICH_FLASH_WRITE_COMMAND_TIMEOUT); + ret_val = + e1000_flash_cycle_ich8lan(hw, + ICH_FLASH_WRITE_COMMAND_TIMEOUT); if (!ret_val) break; @@ -3138,7 +3263,7 @@ static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, for (program_retries = 0; program_retries < 100; program_retries++) { e_dbg("Retrying Byte %2.2X at offset %u\n", byte, offset); - udelay(100); + usleep_range(100, 200); ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); if (!ret_val) break; @@ -3209,8 +3334,10 @@ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) flash_linear_addr = hw->nvm.flash_base_addr; flash_linear_addr += (bank) ? flash_bank_size : 0; - for (j = 0; j < iteration ; j++) { + for (j = 0; j < iteration; j++) { do { + u32 timeout = ICH_FLASH_ERASE_COMMAND_TIMEOUT; + /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val) @@ -3230,8 +3357,7 @@ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) flash_linear_addr += (j * sector_size); ew32flash(ICH_FLASH_FADDR, flash_linear_addr); - ret_val = e1000_flash_cycle_ich8lan(hw, - ICH_FLASH_ERASE_COMMAND_TIMEOUT); + ret_val = e1000_flash_cycle_ich8lan(hw, timeout); if (!ret_val) break; @@ -3270,8 +3396,7 @@ static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data) return ret_val; } - if (*data == ID_LED_RESERVED_0000 || - *data == ID_LED_RESERVED_FFFF) + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) *data = ID_LED_DEFAULT_ICH8LAN; return 0; @@ -3511,9 +3636,9 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); + /* An error is not fatal and we should not stop init due to this */ if (ret_val) e_dbg("Error initializing identification LED\n"); - /* This is not fatal and we should not stop init due to this */ /* Setup the receive address. */ e1000e_init_rx_addrs(hw, mac->rar_entry_count); @@ -3541,16 +3666,16 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) /* Set the transmit descriptor write-back policy for both queues */ txdctl = er32(TXDCTL(0)); - txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | - E1000_TXDCTL_FULL_TX_DESC_WB; - txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) | - E1000_TXDCTL_MAX_TX_DESC_PREFETCH; + txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) | + E1000_TXDCTL_FULL_TX_DESC_WB); + txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) | + E1000_TXDCTL_MAX_TX_DESC_PREFETCH); ew32(TXDCTL(0), txdctl); txdctl = er32(TXDCTL(1)); - txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | - E1000_TXDCTL_FULL_TX_DESC_WB; - txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) | - E1000_TXDCTL_MAX_TX_DESC_PREFETCH; + txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) | + E1000_TXDCTL_FULL_TX_DESC_WB); + txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) | + E1000_TXDCTL_MAX_TX_DESC_PREFETCH); ew32(TXDCTL(1), txdctl); /* ICH8 has opposite polarity of no_snoop bits. @@ -3559,7 +3684,7 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) if (mac->type == e1000_ich8lan) snoop = PCIE_ICH8_SNOOP_ALL; else - snoop = (u32) ~(PCIE_NO_SNOOP_ALL); + snoop = (u32)~(PCIE_NO_SNOOP_ALL); e1000e_set_pcie_no_snoop(hw, snoop); ctrl_ext = er32(CTRL_EXT); @@ -3575,6 +3700,7 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) return ret_val; } + /** * e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits * @hw: pointer to the HW structure @@ -3686,8 +3812,7 @@ static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw) */ hw->fc.current_mode = hw->fc.requested_mode; - e_dbg("After fix-ups FlowControl is now = %x\n", - hw->fc.current_mode); + e_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); /* Continue to configure the copper link. */ ret_val = hw->mac.ops.setup_physical_interface(hw); @@ -3737,12 +3862,12 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) if (ret_val) return ret_val; ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - ®_data); + ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - reg_data); + reg_data); if (ret_val) return ret_val; @@ -3760,7 +3885,6 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) break; case e1000_phy_82577: case e1000_phy_82579: - case e1000_phy_i217: ret_val = e1000_copper_link_setup_82577(hw); if (ret_val) return ret_val; @@ -3796,6 +3920,31 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) } /** + * e1000_setup_copper_link_pch_lpt - Configure MAC/PHY interface + * @hw: pointer to the HW structure + * + * Calls the PHY specific link setup function and then calls the + * generic setup_copper_link to finish configuring the link for + * Lynxpoint PCH devices + **/ +static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + + ctrl = er32(CTRL); + ctrl |= E1000_CTRL_SLU; + ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + ew32(CTRL, ctrl); + + ret_val = e1000_copper_link_setup_82577(hw); + if (ret_val) + return ret_val; + + return e1000e_setup_copper_link(hw); +} + +/** * e1000_get_link_up_info_ich8lan - Get current link speed and duplex * @hw: pointer to the HW structure * @speed: pointer to store current link speed @@ -3815,8 +3964,7 @@ static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, return ret_val; if ((hw->mac.type == e1000_ich8lan) && - (hw->phy.type == e1000_phy_igp_3) && - (*speed == SPEED_1000)) { + (hw->phy.type == e1000_phy_igp_3) && (*speed == SPEED_1000)) { ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw); } @@ -3899,7 +4047,7 @@ static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw) * /disabled - false). **/ void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, - bool state) + bool state) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; @@ -3981,12 +4129,12 @@ void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) return; ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, - ®_data); + ®_data); if (ret_val) return; reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK; ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, - reg_data); + reg_data); if (ret_val) return; reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 8bf4655c2e17..80034a2b297c 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -211,7 +211,8 @@ #define I82579_MSE_THRESHOLD 0x084F /* 82579 Mean Square Error Threshold */ #define I82577_MSE_THRESHOLD 0x0887 /* 82577 Mean Square Error Threshold */ #define I82579_MSE_LINK_DOWN 0x2411 /* MSE count before dropping link */ -#define I82579_EEE_PCS_STATUS 0x182D /* IEEE MMD Register 3.1 >> 8 */ +#define I82579_RX_CONFIG 0x3412 /* Receive configuration */ +#define I82579_EEE_PCS_STATUS 0x182E /* IEEE MMD Register 3.1 >> 8 */ #define I82579_EEE_CAPABILITY 0x0410 /* IEEE MMD Register 3.20 */ #define I82579_EEE_ADVERTISEMENT 0x040E /* IEEE MMD Register 7.60 */ #define I82579_EEE_LP_ABILITY 0x040F /* IEEE MMD Register 7.61 */ @@ -249,13 +250,6 @@ /* Proprietary Latency Tolerance Reporting PCI Capability */ #define E1000_PCI_LTR_CAP_LPT 0xA8 -/* OBFF Control & Threshold Defines */ -#define E1000_SVCR_OFF_EN 0x00000001 -#define E1000_SVCR_OFF_MASKINT 0x00001000 -#define E1000_SVCR_OFF_TIMER_MASK 0xFFFF0000 -#define E1000_SVCR_OFF_TIMER_SHIFT 16 -#define E1000_SVT_OFF_HWM_MASK 0x0000001F - void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw); void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state); @@ -267,4 +261,5 @@ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable); void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw); s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable); s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data); +s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data); #endif /* _E1000E_ICH8LAN_H_ */ diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b78e02174601..2480c1091873 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -596,7 +596,7 @@ s32 e1000e_check_for_serdes_link(struct e1000_hw *hw) * serdes media type. */ /* SYNCH bit and IV bit are sticky. */ - udelay(10); + usleep_range(10, 20); rxcw = er32(RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { @@ -613,7 +613,7 @@ s32 e1000e_check_for_serdes_link(struct e1000_hw *hw) status = er32(STATUS); if (status & E1000_STATUS_LU) { /* SYNCH bit and IV bit are sticky, so reread rxcw. */ - udelay(10); + usleep_range(10, 20); rxcw = er32(RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { @@ -1382,7 +1382,7 @@ s32 e1000e_get_hw_semaphore(struct e1000_hw *hw) if (!(swsm & E1000_SWSM_SMBI)) break; - udelay(50); + usleep_range(50, 100); i++; } @@ -1400,7 +1400,7 @@ s32 e1000e_get_hw_semaphore(struct e1000_hw *hw) if (er32(SWSM) & E1000_SWSM_SWESMBI) break; - udelay(50); + usleep_range(50, 100); } if (i == timeout) { @@ -1600,15 +1600,28 @@ s32 e1000e_blink_led_generic(struct e1000_hw *hw) ledctl_blink = E1000_LEDCTL_LED0_BLINK | (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); } else { - /* set the blink bit for each LED that's "on" (0x0E) - * in ledctl_mode2 + /* Set the blink bit for each LED that's "on" (0x0E) + * (or "off" if inverted) in ledctl_mode2. The blink + * logic in hardware only works when mode is set to "on" + * so it must be changed accordingly when the mode is + * "off" and inverted. */ ledctl_blink = hw->mac.ledctl_mode2; - for (i = 0; i < 4; i++) - if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) == - E1000_LEDCTL_MODE_LED_ON) - ledctl_blink |= (E1000_LEDCTL_LED0_BLINK << - (i * 8)); + for (i = 0; i < 32; i += 8) { + u32 mode = (hw->mac.ledctl_mode2 >> i) & + E1000_LEDCTL_LED0_MODE_MASK; + u32 led_default = hw->mac.ledctl_default >> i; + + if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && + (mode == E1000_LEDCTL_MODE_LED_ON)) || + ((led_default & E1000_LEDCTL_LED0_IVRT) && + (mode == E1000_LEDCTL_MODE_LED_OFF))) { + ledctl_blink &= + ~(E1000_LEDCTL_LED0_MODE_MASK << i); + ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | + E1000_LEDCTL_MODE_LED_ON) << i; + } + } } ew32(LEDCTL, ledctl_blink); @@ -1712,7 +1725,7 @@ s32 e1000e_disable_pcie_master(struct e1000_hw *hw) while (timeout) { if (!(er32(STATUS) & E1000_STATUS_GIO_MASTER_ENABLE)) break; - udelay(100); + usleep_range(100, 200); timeout--; } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a177b8b65c44..da7f2fad5ba4 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -55,7 +55,7 @@ #define DRV_EXTRAVERSION "-k" -#define DRV_VERSION "2.2.14" DRV_EXTRAVERSION +#define DRV_VERSION "2.3.2" DRV_EXTRAVERSION char e1000e_driver_name[] = "e1000e"; const char e1000e_driver_version[] = DRV_VERSION; @@ -219,9 +219,8 @@ static void e1000e_dump(struct e1000_adapter *adapter) if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); pr_info("Device Name state trans_start last_rx\n"); - pr_info("%-15s %016lX %016lX %016lX\n", - netdev->name, netdev->state, netdev->trans_start, - netdev->last_rx); + pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, + netdev->state, netdev->trans_start, netdev->last_rx); } /* Print Registers */ @@ -555,7 +554,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (staterr & E1000_RXD_STAT_VP) - __vlan_hwaccel_put_tag(skb, tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); napi_gro_receive(&adapter->napi, skb); } @@ -755,8 +754,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_ring *rx_ring, cpu_to_le64(ps_page->dma); } - skb = __netdev_alloc_skb_ip_align(netdev, - adapter->rx_ps_bsize0, + skb = __netdev_alloc_skb_ip_align(netdev, adapter->rx_ps_bsize0, gfp); if (!skb) { @@ -848,11 +846,16 @@ check_page: } } - if (!buffer_info->dma) + if (!buffer_info->dma) { buffer_info->dma = dma_map_page(&pdev->dev, - buffer_info->page, 0, - PAGE_SIZE, + buffer_info->page, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + adapter->alloc_rx_buff_failed++; + break; + } + } rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma); @@ -937,10 +940,8 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, cleaned = true; cleaned_count++; - dma_unmap_single(&pdev->dev, - buffer_info->dma, - adapter->rx_buffer_len, - DMA_FROM_DEVICE); + dma_unmap_single(&pdev->dev, buffer_info->dma, + adapter->rx_buffer_len, DMA_FROM_DEVICE); buffer_info->dma = 0; length = le16_to_cpu(rx_desc->wb.upper.length); @@ -1068,8 +1069,8 @@ static void e1000_put_txbuf(struct e1000_ring *tx_ring, static void e1000_print_hw_hang(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, - struct e1000_adapter, - print_hang_task); + struct e1000_adapter, + print_hang_task); struct net_device *netdev = adapter->netdev; struct e1000_ring *tx_ring = adapter->tx_ring; unsigned int i = tx_ring->next_to_clean; @@ -1082,8 +1083,7 @@ static void e1000_print_hw_hang(struct work_struct *work) if (test_bit(__E1000_DOWN, &adapter->state)) return; - if (!adapter->tx_hang_recheck && - (adapter->flags2 & FLAG2_DMA_BURST)) { + if (!adapter->tx_hang_recheck && (adapter->flags2 & FLAG2_DMA_BURST)) { /* May be block on write-back, flush and detect again * flush pending descriptor writebacks to memory */ @@ -1125,19 +1125,10 @@ static void e1000_print_hw_hang(struct work_struct *work) "PHY 1000BASE-T Status <%x>\n" "PHY Extended Status <%x>\n" "PCI Status <%x>\n", - readl(tx_ring->head), - readl(tx_ring->tail), - tx_ring->next_to_use, - tx_ring->next_to_clean, - tx_ring->buffer_info[eop].time_stamp, - eop, - jiffies, - eop_desc->upper.fields.status, - er32(STATUS), - phy_status, - phy_1000t_status, - phy_ext_status, - pci_status); + readl(tx_ring->head), readl(tx_ring->tail), tx_ring->next_to_use, + tx_ring->next_to_clean, tx_ring->buffer_info[eop].time_stamp, + eop, jiffies, eop_desc->upper.fields.status, er32(STATUS), + phy_status, phy_1000t_status, phy_ext_status, pci_status); /* Suggest workaround for known h/w issue */ if ((hw->mac.type == e1000_pchlan) && (er32(CTRL) & E1000_CTRL_TFCE)) @@ -1430,7 +1421,7 @@ copydone: e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb); if (rx_desc->wb.upper.header_status & - cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP)) + cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP)) adapter->rx_hdr_split++; e1000_receive_skb(adapter, netdev, skb, staterr, @@ -1468,7 +1459,7 @@ next_desc: * e1000_consume_page - helper function **/ static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb, - u16 length) + u16 length) { bi->page = NULL; skb->len += length; @@ -1495,7 +1486,8 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, unsigned int i; int cleaned_count = 0; bool cleaned = false; - unsigned int total_rx_bytes=0, total_rx_packets=0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct skb_shared_info *shinfo; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); @@ -1541,7 +1533,6 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, rx_ring->rx_skb_top = NULL; goto next_desc; } - #define rxtop (rx_ring->rx_skb_top) if (!(staterr & E1000_RXD_STAT_EOP)) { /* this descriptor is only the beginning (or middle) */ @@ -1549,12 +1540,13 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, /* this is the beginning of a chain */ rxtop = skb; skb_fill_page_desc(rxtop, 0, buffer_info->page, - 0, length); + 0, length); } else { /* this is the middle of a chain */ - skb_fill_page_desc(rxtop, - skb_shinfo(rxtop)->nr_frags, - buffer_info->page, 0, length); + shinfo = skb_shinfo(rxtop); + skb_fill_page_desc(rxtop, shinfo->nr_frags, + buffer_info->page, 0, + length); /* re-use the skb, only consumed the page */ buffer_info->skb = skb; } @@ -1563,9 +1555,10 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, } else { if (rxtop) { /* end of the chain */ - skb_fill_page_desc(rxtop, - skb_shinfo(rxtop)->nr_frags, - buffer_info->page, 0, length); + shinfo = skb_shinfo(rxtop); + skb_fill_page_desc(rxtop, shinfo->nr_frags, + buffer_info->page, 0, + length); /* re-use the current skb, we only consumed the * page */ @@ -1590,10 +1583,10 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, skb_put(skb, length); } else { skb_fill_page_desc(skb, 0, - buffer_info->page, 0, - length); + buffer_info->page, 0, + length); e1000_consume_page(buffer_info, skb, - length); + length); } } } @@ -1666,8 +1659,7 @@ static void e1000_clean_rx_ring(struct e1000_ring *rx_ring) DMA_FROM_DEVICE); else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) dma_unmap_page(&pdev->dev, buffer_info->dma, - PAGE_SIZE, - DMA_FROM_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); else if (adapter->clean_rx == e1000_clean_rx_irq_ps) dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_ps_bsize0, @@ -1720,7 +1712,8 @@ static void e1000_clean_rx_ring(struct e1000_ring *rx_ring) static void e1000e_downshift_workaround(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, - struct e1000_adapter, downshift_task); + struct e1000_adapter, + downshift_task); if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -1913,7 +1906,6 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data) struct e1000_hw *hw = &adapter->hw; struct e1000_ring *tx_ring = adapter->tx_ring; - adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; @@ -1970,7 +1962,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) ew32(RFCTL, rfctl); } -#define E1000_IVAR_INT_ALLOC_VALID 0x8 /* Configure Rx vector */ rx_ring->ims_val = E1000_IMS_RXQ0; adapter->eiac_mask |= rx_ring->ims_val; @@ -2045,8 +2036,9 @@ void e1000e_set_interrupt_capability(struct e1000_adapter *adapter) if (adapter->flags & FLAG_HAS_MSIX) { adapter->num_vectors = 3; /* RxQ0, TxQ0 and other */ adapter->msix_entries = kcalloc(adapter->num_vectors, - sizeof(struct msix_entry), - GFP_KERNEL); + sizeof(struct + msix_entry), + GFP_KERNEL); if (adapter->msix_entries) { for (i = 0; i < adapter->num_vectors; i++) adapter->msix_entries[i].entry = i; @@ -2490,7 +2482,7 @@ static unsigned int e1000_update_itr(u16 itr_setting, int packets, int bytes) switch (itr_setting) { case lowest_latency: /* handle TSO and jumbo frames */ - if (bytes/packets > 8000) + if (bytes / packets > 8000) retval = bulk_latency; else if ((packets < 5) && (bytes > 512)) retval = low_latency; @@ -2498,13 +2490,13 @@ static unsigned int e1000_update_itr(u16 itr_setting, int packets, int bytes) case low_latency: /* 50 usec aka 20000 ints/s */ if (bytes > 10000) { /* this if handles the TSO accounting */ - if (bytes/packets > 8000) + if (bytes / packets > 8000) retval = bulk_latency; - else if ((packets < 10) || ((bytes/packets) > 1200)) + else if ((packets < 10) || ((bytes / packets) > 1200)) retval = bulk_latency; else if ((packets > 35)) retval = lowest_latency; - } else if (bytes/packets > 2000) { + } else if (bytes / packets > 2000) { retval = bulk_latency; } else if (packets <= 2 && bytes < 512) { retval = lowest_latency; @@ -2556,8 +2548,8 @@ static void e1000_set_itr(struct e1000_adapter *adapter) current_itr = max(adapter->rx_itr, adapter->tx_itr); - switch (current_itr) { /* counts and packets in update_itr are dependent on these numbers */ + switch (current_itr) { case lowest_latency: new_itr = 70000; break; @@ -2578,8 +2570,7 @@ set_itr_now: * increasing */ new_itr = new_itr > adapter->itr ? - min(adapter->itr + (new_itr >> 2), new_itr) : - new_itr; + min(adapter->itr + (new_itr >> 2), new_itr) : new_itr; adapter->itr = new_itr; adapter->rx_ring->itr_val = new_itr; if (adapter->msix_entries) @@ -2681,7 +2672,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight) return work_done; } -static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -2706,7 +2698,8 @@ static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int e1000_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -2750,7 +2743,8 @@ static void e1000e_vlan_filter_disable(struct e1000_adapter *adapter) ew32(RCTL, rctl); if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) { - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + adapter->mng_vlan_id); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; } } @@ -2810,24 +2804,23 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) u16 vid = adapter->hw.mng_cookie.vlan_id; u16 old_vid = adapter->mng_vlan_id; - if (adapter->hw.mng_cookie.status & - E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { - e1000_vlan_rx_add_vid(netdev, vid); + if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { + e1000_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid); adapter->mng_vlan_id = vid; } if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid)) - e1000_vlan_rx_kill_vid(netdev, old_vid); + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), old_vid); } static void e1000_restore_vlan(struct e1000_adapter *adapter) { u16 vid; - e1000_vlan_rx_add_vid(adapter->netdev, 0); + e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - e1000_vlan_rx_add_vid(adapter->netdev, vid); + e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } static void e1000_init_manageability_pt(struct e1000_adapter *adapter) @@ -3002,8 +2995,8 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) rctl = er32(RCTL); rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | - E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | - (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not Store bad packets */ rctl &= ~E1000_RCTL_SBP; @@ -3089,19 +3082,17 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) /* Enable Packet split descriptors */ rctl |= E1000_RCTL_DTYP_PS; - psrctl |= adapter->rx_ps_bsize0 >> - E1000_PSRCTL_BSIZE0_SHIFT; + psrctl |= adapter->rx_ps_bsize0 >> E1000_PSRCTL_BSIZE0_SHIFT; switch (adapter->rx_ps_pages) { case 3: - psrctl |= PAGE_SIZE << - E1000_PSRCTL_BSIZE3_SHIFT; + psrctl |= PAGE_SIZE << E1000_PSRCTL_BSIZE3_SHIFT; + /* fall-through */ case 2: - psrctl |= PAGE_SIZE << - E1000_PSRCTL_BSIZE2_SHIFT; + psrctl |= PAGE_SIZE << E1000_PSRCTL_BSIZE2_SHIFT; + /* fall-through */ case 1: - psrctl |= PAGE_SIZE >> - E1000_PSRCTL_BSIZE1_SHIFT; + psrctl |= PAGE_SIZE >> E1000_PSRCTL_BSIZE1_SHIFT; break; } @@ -3275,7 +3266,7 @@ static int e1000e_write_mc_addr_list(struct net_device *netdev) /* update_mc_addr_list expects a packed array of only addresses. */ i = 0; netdev_for_each_mc_addr(ha, netdev) - memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); + memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); hw->mac.ops.update_mc_addr_list(hw, mta_list, i); kfree(mta_list); @@ -3385,7 +3376,7 @@ static void e1000e_set_rx_mode(struct net_device *netdev) ew32(RCTL, rctl); - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) e1000e_vlan_strip_enable(adapter); else e1000e_vlan_strip_disable(adapter); @@ -3752,8 +3743,7 @@ void e1000e_reset(struct e1000_adapter *adapter) * but don't include ethernet FCS because hardware appends it */ min_tx_space = (adapter->max_frame_size + - sizeof(struct e1000_tx_desc) - - ETH_FCS_LEN) * 2; + sizeof(struct e1000_tx_desc) - ETH_FCS_LEN) * 2; min_tx_space = ALIGN(min_tx_space, 1024); min_tx_space >>= 10; /* software strips receive CRC, so leave room for it */ @@ -3856,13 +3846,13 @@ void e1000e_reset(struct e1000_adapter *adapter) if ((adapter->max_frame_size * 2) > (pba << 10)) { if (!(adapter->flags2 & FLAG2_DISABLE_AIM)) { dev_info(&adapter->pdev->dev, - "Interrupt Throttle Rate turned off\n"); + "Interrupt Throttle Rate off\n"); adapter->flags2 |= FLAG2_DISABLE_AIM; e1000e_write_itr(adapter, 0); } } else if (adapter->flags2 & FLAG2_DISABLE_AIM) { dev_info(&adapter->pdev->dev, - "Interrupt Throttle Rate turned on\n"); + "Interrupt Throttle Rate on\n"); adapter->flags2 &= ~FLAG2_DISABLE_AIM; adapter->itr = 20000; e1000e_write_itr(adapter, adapter->itr); @@ -3893,6 +3883,38 @@ void e1000e_reset(struct e1000_adapter *adapter) /* initialize systim and reset the ns time counter */ e1000e_config_hwtstamp(adapter); + /* Set EEE advertisement as appropriate */ + if (adapter->flags2 & FLAG2_HAS_EEE) { + s32 ret_val; + u16 adv_addr; + + switch (hw->phy.type) { + case e1000_phy_82579: + adv_addr = I82579_EEE_ADVERTISEMENT; + break; + case e1000_phy_i217: + adv_addr = I217_EEE_ADVERTISEMENT; + break; + default: + dev_err(&adapter->pdev->dev, + "Invalid PHY type setting EEE advertisement\n"); + return; + } + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) { + dev_err(&adapter->pdev->dev, + "EEE advertisement - unable to acquire PHY\n"); + return; + } + + e1000_write_emi_reg_locked(hw, adv_addr, + hw->dev_spec.ich8lan.eee_disable ? + 0 : adapter->eee_advert); + + hw->phy.ops.release(hw); + } + if (!netif_running(adapter->netdev) && !test_bit(__E1000_TESTING, &adapter->state)) { e1000_power_down_phy(adapter); @@ -4261,8 +4283,7 @@ static int e1000_open(struct net_device *netdev) e1000e_power_up_phy(adapter); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; - if ((adapter->hw.mng_cookie.status & - E1000_MNG_DHCP_COOKIE_STATUS_VLAN)) + if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN)) e1000_update_mng_vlan(adapter); /* DMA latency requirement to workaround jumbo issue */ @@ -4303,6 +4324,7 @@ static int e1000_open(struct net_device *netdev) netif_start_queue(netdev); adapter->idle_check = true; + hw->mac.get_link_status = true; pm_runtime_put(&pdev->dev); /* fire a link status change interrupt to start the watchdog */ @@ -4364,9 +4386,9 @@ static int e1000_close(struct net_device *netdev) /* kill manageability vlan ID if supported, but not if a vlan with * the same ID is registered on the host OS (let 8021q kill it) */ - if (adapter->hw.mng_cookie.status & - E1000_MNG_DHCP_COOKIE_STATUS_VLAN) - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); + if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) + e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), + adapter->mng_vlan_id); /* If AMT is enabled, let the firmware know that the network * interface is now closed @@ -4381,6 +4403,7 @@ static int e1000_close(struct net_device *netdev) return 0; } + /** * e1000_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure @@ -4431,7 +4454,8 @@ static int e1000_set_mac(struct net_device *netdev, void *p) static void e1000e_update_phy_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, - struct e1000_adapter, update_phy_task); + struct e1000_adapter, + update_phy_task); if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -4448,7 +4472,7 @@ static void e1000e_update_phy_task(struct work_struct *work) **/ static void e1000_update_phy_info(unsigned long data) { - struct e1000_adapter *adapter = (struct e1000_adapter *) data; + struct e1000_adapter *adapter = (struct e1000_adapter *)data; if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -4615,18 +4639,16 @@ static void e1000e_update_stats(struct e1000_adapter *adapter) * our own version based on RUC and ROC */ netdev->stats.rx_errors = adapter->stats.rxerrc + - adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.ruc + adapter->stats.roc + - adapter->stats.cexterr; + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.ruc + adapter->stats.roc + adapter->stats.cexterr; netdev->stats.rx_length_errors = adapter->stats.ruc + - adapter->stats.roc; + adapter->stats.roc; netdev->stats.rx_crc_errors = adapter->stats.crcerrs; netdev->stats.rx_frame_errors = adapter->stats.algnerrc; netdev->stats.rx_missed_errors = adapter->stats.mpc; /* Tx Errors */ - netdev->stats.tx_errors = adapter->stats.ecol + - adapter->stats.latecol; + netdev->stats.tx_errors = adapter->stats.ecol + adapter->stats.latecol; netdev->stats.tx_aborted_errors = adapter->stats.ecol; netdev->stats.tx_window_errors = adapter->stats.latecol; netdev->stats.tx_carrier_errors = adapter->stats.tncrs; @@ -4662,6 +4684,7 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter) (adapter->hw.phy.media_type == e1000_media_type_copper)) { int ret_val; + pm_runtime_get_sync(&adapter->pdev->dev); ret_val = e1e_rphy(hw, MII_BMCR, &phy->bmcr); ret_val |= e1e_rphy(hw, MII_BMSR, &phy->bmsr); ret_val |= e1e_rphy(hw, MII_ADVERTISE, &phy->advertise); @@ -4672,6 +4695,7 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter) ret_val |= e1e_rphy(hw, MII_ESTATUS, &phy->estatus); if (ret_val) e_warn("Error reading PHY register\n"); + pm_runtime_put_sync(&adapter->pdev->dev); } else { /* Do not read PHY registers if link is not up * Set values to typical power-on defaults @@ -4782,7 +4806,7 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter) **/ static void e1000_watchdog(unsigned long data) { - struct e1000_adapter *adapter = (struct e1000_adapter *) data; + struct e1000_adapter *adapter = (struct e1000_adapter *)data; /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); @@ -4793,7 +4817,8 @@ static void e1000_watchdog(unsigned long data) static void e1000_watchdog_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, - struct e1000_adapter, watchdog_task); + struct e1000_adapter, + watchdog_task); struct net_device *netdev = adapter->netdev; struct e1000_mac_info *mac = &adapter->hw.mac; struct e1000_phy_info *phy = &adapter->hw.phy; @@ -4827,8 +4852,8 @@ static void e1000_watchdog_task(struct work_struct *work) /* update snapshot of PHY registers on LSC */ e1000_phy_read_status(adapter); mac->ops.get_link_up_info(&adapter->hw, - &adapter->link_speed, - &adapter->link_duplex); + &adapter->link_speed, + &adapter->link_duplex); e1000_print_link_info(adapter); /* check if SmartSpeed worked */ @@ -4941,7 +4966,7 @@ static void e1000_watchdog_task(struct work_struct *work) adapter->flags |= FLAG_RESTART_NOW; else pm_schedule_suspend(netdev->dev.parent, - LINK_TIMEOUT); + LINK_TIMEOUT); } } @@ -4976,8 +5001,8 @@ link_up: */ u32 goc = (adapter->gotc + adapter->gorc) / 10000; u32 dif = (adapter->gotc > adapter->gorc ? - adapter->gotc - adapter->gorc : - adapter->gorc - adapter->gotc) / 10000; + adapter->gotc - adapter->gorc : + adapter->gorc - adapter->gotc) / 10000; u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000; e1000e_write_itr(adapter, itr); @@ -5056,14 +5081,14 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) iph->tot_len = 0; iph->check = 0; tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); + 0, IPPROTO_TCP, 0); cmd_length = E1000_TXD_CMD_IP; ipcse = skb_transport_offset(skb) - 1; } else if (skb_is_gso_v6(skb)) { ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); ipcse = 0; } ipcss = skb_network_offset(skb); @@ -5072,7 +5097,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | - E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); + E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); i = tx_ring->next_to_use; context_desc = E1000_CONTEXT_DESC(*tx_ring, i); @@ -5142,8 +5167,7 @@ static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb) context_desc->lower_setup.ip_config = 0; context_desc->upper_setup.tcp_fields.tucss = css; - context_desc->upper_setup.tcp_fields.tucso = - css + skb->csum_offset; + context_desc->upper_setup.tcp_fields.tucso = css + skb->csum_offset; context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.data = 0; context_desc->cmd_and_length = cpu_to_le32(cmd_len); @@ -5216,7 +5240,8 @@ static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb, buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, - offset, size, DMA_TO_DEVICE); + offset, size, + DMA_TO_DEVICE); buffer_info->mapped_as_page = true; if (dma_mapping_error(&pdev->dev, buffer_info->dma)) goto dma_error; @@ -5265,7 +5290,7 @@ static void e1000_tx_queue(struct e1000_ring *tx_ring, int tx_flags, int count) if (tx_flags & E1000_TX_FLAGS_TSO) { txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | - E1000_TXD_CMD_TSE; + E1000_TXD_CMD_TSE; txd_upper |= E1000_TXD_POPTS_TXSM << 8; if (tx_flags & E1000_TX_FLAGS_IPV4) @@ -5296,8 +5321,8 @@ static void e1000_tx_queue(struct e1000_ring *tx_ring, int tx_flags, int count) buffer_info = &tx_ring->buffer_info[i]; tx_desc = E1000_TX_DESC(*tx_ring, i); tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); - tx_desc->lower.data = - cpu_to_le32(txd_lower | buffer_info->length); + tx_desc->lower.data = cpu_to_le32(txd_lower | + buffer_info->length); tx_desc->upper.data = cpu_to_le32(txd_upper); i++; @@ -5347,11 +5372,11 @@ static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter, if (skb->len <= MINIMUM_DHCP_PACKET_SIZE) return 0; - if (((struct ethhdr *) skb->data)->h_proto != htons(ETH_P_IP)) + if (((struct ethhdr *)skb->data)->h_proto != htons(ETH_P_IP)) return 0; { - const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data+14); + const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data + 14); struct udphdr *udp; if (ip->protocol != IPPROTO_UDP) @@ -5576,7 +5601,7 @@ static void e1000_reset_task(struct work_struct *work) * Returns the address of the device statistics structure. **/ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) + struct rtnl_link_stats64 *stats) { struct e1000_adapter *adapter = netdev_priv(netdev); @@ -5597,18 +5622,15 @@ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, * our own version based on RUC and ROC */ stats->rx_errors = adapter->stats.rxerrc + - adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.ruc + adapter->stats.roc + - adapter->stats.cexterr; - stats->rx_length_errors = adapter->stats.ruc + - adapter->stats.roc; + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.ruc + adapter->stats.roc + adapter->stats.cexterr; + stats->rx_length_errors = adapter->stats.ruc + adapter->stats.roc; stats->rx_crc_errors = adapter->stats.crcerrs; stats->rx_frame_errors = adapter->stats.algnerrc; stats->rx_missed_errors = adapter->stats.mpc; /* Tx Errors */ - stats->tx_errors = adapter->stats.ecol + - adapter->stats.latecol; + stats->tx_errors = adapter->stats.ecol + adapter->stats.latecol; stats->tx_aborted_errors = adapter->stats.ecol; stats->tx_window_errors = adapter->stats.latecol; stats->tx_carrier_errors = adapter->stats.tncrs; @@ -5677,9 +5699,9 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) /* adjust allocation if LPE protects us, and we aren't using SBP */ if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || - (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) + (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN - + ETH_FCS_LEN; + + ETH_FCS_LEN; if (netif_running(netdev)) e1000e_up(adapter); @@ -5858,7 +5880,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc) phy_reg &= ~(BM_RCTL_MO_MASK); if (mac_reg & E1000_RCTL_MO_3) phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) - << BM_RCTL_MO_SHIFT); + << BM_RCTL_MO_SHIFT); if (mac_reg & E1000_RCTL_BAM) phy_reg |= BM_RCTL_BAM; if (mac_reg & E1000_RCTL_PMCF) @@ -5887,8 +5909,7 @@ release: return retval; } -static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake, - bool runtime) +static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -5912,10 +5933,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake, } e1000e_reset_interrupt_capability(adapter); - retval = pci_save_state(pdev); - if (retval) - return retval; - status = er32(STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; @@ -5932,10 +5949,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake, } ctrl = er32(CTRL); - /* advertise wake from D3Cold */ - #define E1000_CTRL_ADVD3WUC 0x00100000 - /* phy power management enable */ - #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ctrl |= E1000_CTRL_ADVD3WUC; if (!(adapter->flags2 & FLAG2_HAS_PHY_WAKEUP)) ctrl |= E1000_CTRL_EN_PHY_PWR_MGMT; @@ -5971,13 +5984,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake, ew32(WUFC, 0); } - *enable_wake = !!wufc; - - /* make sure adapter isn't asleep if manageability is enabled */ - if ((adapter->flags & FLAG_MNG_PT_ENABLED) || - (hw->mac.ops.check_mng_mode(hw))) - *enable_wake = true; - if (adapter->hw.phy.type == e1000_phy_igp_3) e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); @@ -5986,28 +5992,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake, */ e1000e_release_hw_control(adapter); - pci_disable_device(pdev); - - return 0; -} - -static void e1000_power_off(struct pci_dev *pdev, bool sleep, bool wake) -{ - if (sleep && wake) { - pci_prepare_to_sleep(pdev); - return; - } - - pci_wake_from_d3(pdev, wake); - pci_set_power_state(pdev, PCI_D3hot); -} - -static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep, - bool wake) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct e1000_adapter *adapter = netdev_priv(netdev); - /* The pci-e switch on some quad port adapters will report a * correctable error when the MAC transitions from D0 to D3. To * prevent this we need to mask off the correctable errors on the @@ -6021,12 +6005,13 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep, pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL, (devctl & ~PCI_EXP_DEVCTL_CERE)); - e1000_power_off(pdev, sleep, wake); + pci_save_state(pdev); + pci_prepare_to_sleep(pdev); pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL, devctl); - } else { - e1000_power_off(pdev, sleep, wake); } + + return 0; } #ifdef CONFIG_PCIEASPM @@ -6084,9 +6069,7 @@ static int __e1000_resume(struct pci_dev *pdev) if (aspm_disable_flag) e1000e_disable_aspm(pdev, aspm_disable_flag); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - pci_save_state(pdev); + pci_set_master(pdev); e1000e_set_interrupt_capability(adapter); if (netif_running(netdev)) { @@ -6107,24 +6090,24 @@ static int __e1000_resume(struct pci_dev *pdev) e1e_rphy(&adapter->hw, BM_WUS, &phy_data); if (phy_data) { e_info("PHY Wakeup cause - %s\n", - phy_data & E1000_WUS_EX ? "Unicast Packet" : - phy_data & E1000_WUS_MC ? "Multicast Packet" : - phy_data & E1000_WUS_BC ? "Broadcast Packet" : - phy_data & E1000_WUS_MAG ? "Magic Packet" : - phy_data & E1000_WUS_LNKC ? - "Link Status Change" : "other"); + phy_data & E1000_WUS_EX ? "Unicast Packet" : + phy_data & E1000_WUS_MC ? "Multicast Packet" : + phy_data & E1000_WUS_BC ? "Broadcast Packet" : + phy_data & E1000_WUS_MAG ? "Magic Packet" : + phy_data & E1000_WUS_LNKC ? + "Link Status Change" : "other"); } e1e_wphy(&adapter->hw, BM_WUS, ~0); } else { u32 wus = er32(WUS); if (wus) { e_info("MAC Wakeup cause - %s\n", - wus & E1000_WUS_EX ? "Unicast Packet" : - wus & E1000_WUS_MC ? "Multicast Packet" : - wus & E1000_WUS_BC ? "Broadcast Packet" : - wus & E1000_WUS_MAG ? "Magic Packet" : - wus & E1000_WUS_LNKC ? "Link Status Change" : - "other"); + wus & E1000_WUS_EX ? "Unicast Packet" : + wus & E1000_WUS_MC ? "Multicast Packet" : + wus & E1000_WUS_BC ? "Broadcast Packet" : + wus & E1000_WUS_MAG ? "Magic Packet" : + wus & E1000_WUS_LNKC ? "Link Status Change" : + "other"); } ew32(WUS, ~0); } @@ -6152,14 +6135,8 @@ static int __e1000_resume(struct pci_dev *pdev) static int e1000_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); - int retval; - bool wake; - retval = __e1000_shutdown(pdev, &wake, false); - if (!retval) - e1000_complete_shutdown(pdev, true, wake); - - return retval; + return __e1000_shutdown(pdev, false); } static int e1000_resume(struct device *dev) @@ -6182,13 +6159,10 @@ static int e1000_runtime_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - if (e1000e_pm_ready(adapter)) { - bool wake; - - __e1000_shutdown(pdev, &wake, true); - } + if (!e1000e_pm_ready(adapter)) + return 0; - return 0; + return __e1000_shutdown(pdev, true); } static int e1000_idle(struct device *dev) @@ -6226,12 +6200,7 @@ static int e1000_runtime_resume(struct device *dev) static void e1000_shutdown(struct pci_dev *pdev) { - bool wake = false; - - __e1000_shutdown(pdev, &wake, false); - - if (system_state == SYSTEM_POWER_OFF) - e1000_complete_shutdown(pdev, false, wake); + __e1000_shutdown(pdev, false); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -6352,9 +6321,9 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; } else { - pci_set_master(pdev); pdev->state_saved = true; pci_restore_state(pdev); + pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); @@ -6413,7 +6382,7 @@ static void e1000_print_device_info(struct e1000_adapter *adapter) e_info("(PCI Express:2.5GT/s:%s) %pM\n", /* bus width */ ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : - "Width x1"), + "Width x1"), /* MAC address */ netdev->dev_addr); e_info("Intel(R) PRO/%s Network Connection\n", @@ -6453,7 +6422,7 @@ static int e1000_set_features(struct net_device *netdev, if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) adapter->flags |= FLAG_TSO_FORCE; - if (!(changed & (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | + if (!(changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_RXFCS | NETIF_F_RXALL))) return 0; @@ -6523,7 +6492,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) resource_size_t flash_start, flash_len; static int cards_found; u16 aspm_disable_flag = 0; - int i, err, pci_using_dac; + int bars, i, err, pci_using_dac; u16 eeprom_data = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; @@ -6550,15 +6519,16 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); goto err_dma; } } } - err = pci_request_selected_regions_exclusive(pdev, - pci_select_bars(pdev, IORESOURCE_MEM), - e1000e_driver_name); + bars = pci_select_bars(pdev, IORESOURCE_MEM); + err = pci_request_selected_regions_exclusive(pdev, bars, + e1000e_driver_name); if (err) goto err_pci_reg; @@ -6611,6 +6581,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_flashmap; } + /* Set default EEE advertisement */ + if (adapter->flags2 & FLAG2_HAS_EEE) + adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; + /* construct the net_device struct */ netdev->netdev_ops = &e1000e_netdev_ops; e1000e_set_ethtool_ops(netdev); @@ -6659,8 +6633,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Set initial default active device features */ netdev->features = (NETIF_F_SG | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | @@ -6674,7 +6648,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXALL; if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) - netdev->features |= NETIF_F_HW_VLAN_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= (NETIF_F_SG | NETIF_F_TSO | @@ -6727,11 +6701,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) init_timer(&adapter->watchdog_timer); adapter->watchdog_timer.function = e1000_watchdog; - adapter->watchdog_timer.data = (unsigned long) adapter; + adapter->watchdog_timer.data = (unsigned long)adapter; init_timer(&adapter->phy_info_timer); adapter->phy_info_timer.function = e1000_update_phy_info; - adapter->phy_info_timer.data = (unsigned long) adapter; + adapter->phy_info_timer.data = (unsigned long)adapter; INIT_WORK(&adapter->reset_task, e1000_reset_task); INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); @@ -6783,7 +6757,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* initialize the wol settings based on the eeprom settings */ adapter->wol = adapter->eeprom_wol; - device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + /* make sure adapter isn't asleep if manageability is enabled */ + if (adapter->wol || (adapter->flags & FLAG_MNG_PT_ENABLED) || + (hw->mac.ops.check_mng_mode(hw))) + device_wakeup_enable(&pdev->dev); /* save off EEPROM version number */ e1000_read_nvm(&adapter->hw, 5, 1, &adapter->eeprom_vers); @@ -6835,7 +6813,7 @@ err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -6905,7 +6883,7 @@ static void e1000_remove(struct pci_dev *pdev) if (adapter->hw.flash_address) iounmap(adapter->hw.flash_address); pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_select_bars(pdev, IORESOURCE_MEM)); free_netdev(netdev); @@ -6926,7 +6904,8 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP), board_82571 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP), + board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL), board_82571 }, @@ -7002,8 +6981,8 @@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); #ifdef CONFIG_PM static const struct dev_pm_ops e1000_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(e1000_suspend, e1000_resume) - SET_RUNTIME_PM_OPS(e1000_runtime_suspend, - e1000_runtime_resume, e1000_idle) + SET_RUNTIME_PM_OPS(e1000_runtime_suspend, e1000_runtime_resume, + e1000_idle) }; #endif diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index 84fecc268162..44ddc0a0ee0e 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -630,7 +630,7 @@ void e1000e_reload_nvm_generic(struct e1000_hw *hw) { u32 ctrl_ext; - udelay(10); + usleep_range(10, 20); ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_EE_RST; ew32(CTRL_EXT, ctrl_ext); diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index 98da75dff936..c16bd75b6caa 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -45,7 +45,7 @@ unsigned int copybreak = COPYBREAK_DEFAULT; module_param(copybreak, uint, 0644); MODULE_PARM_DESC(copybreak, - "Maximum size of packet that is copied to a new buffer on receive"); + "Maximum size of packet that is copied to a new buffer on receive"); /* All parameters are treated the same, as an integer array of values. * This macro just reduces the need to repeat the same declaration code @@ -143,7 +143,8 @@ E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround"); * * Default Value: 1 (enabled) */ -E1000_PARAM(WriteProtectNVM, "Write-protect NVM [WARNING: disabling this can lead to corrupted NVM]"); +E1000_PARAM(WriteProtectNVM, + "Write-protect NVM [WARNING: disabling this can lead to corrupted NVM]"); /* Enable CRC Stripping * @@ -160,13 +161,18 @@ struct e1000_option { const char *err; int def; union { - struct { /* range_option info */ + /* range_option info */ + struct { int min; int max; } r; - struct { /* list_option info */ + /* list_option info */ + struct { int nr; - struct e1000_opt_list { int i; char *str; } *p; + struct e1000_opt_list { + int i; + char *str; + } *p; } l; } arg; }; @@ -246,7 +252,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) "Using defaults for all values\n"); } - { /* Transmit Interrupt Delay */ + /* Transmit Interrupt Delay */ + { static const struct e1000_option opt = { .type = range_option, .name = "Transmit Interrupt Delay", @@ -265,7 +272,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) adapter->tx_int_delay = opt.def; } } - { /* Transmit Absolute Interrupt Delay */ + /* Transmit Absolute Interrupt Delay */ + { static const struct e1000_option opt = { .type = range_option, .name = "Transmit Absolute Interrupt Delay", @@ -284,7 +292,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) adapter->tx_abs_int_delay = opt.def; } } - { /* Receive Interrupt Delay */ + /* Receive Interrupt Delay */ + { static struct e1000_option opt = { .type = range_option, .name = "Receive Interrupt Delay", @@ -303,7 +312,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) adapter->rx_int_delay = opt.def; } } - { /* Receive Absolute Interrupt Delay */ + /* Receive Absolute Interrupt Delay */ + { static const struct e1000_option opt = { .type = range_option, .name = "Receive Absolute Interrupt Delay", @@ -322,7 +332,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) adapter->rx_abs_int_delay = opt.def; } } - { /* Interrupt Throttling Rate */ + /* Interrupt Throttling Rate */ + { static const struct e1000_option opt = { .type = range_option, .name = "Interrupt Throttling Rate (ints/sec)", @@ -392,7 +403,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) break; } } - { /* Interrupt Mode */ + /* Interrupt Mode */ + { static struct e1000_option opt = { .type = range_option, .name = "Interrupt Mode", @@ -435,7 +447,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) kfree(opt.err); #endif } - { /* Smart Power Down */ + /* Smart Power Down */ + { static const struct e1000_option opt = { .type = enable_option, .name = "PHY Smart Power Down", @@ -450,7 +463,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) adapter->flags |= FLAG_SMART_POWER_DOWN; } } - { /* CRC Stripping */ + /* CRC Stripping */ + { static const struct e1000_option opt = { .type = enable_option, .name = "CRC Stripping", @@ -470,27 +484,28 @@ void e1000e_check_options(struct e1000_adapter *adapter) adapter->flags2 |= FLAG2_DFLT_CRC_STRIPPING; } } - { /* Kumeran Lock Loss Workaround */ + /* Kumeran Lock Loss Workaround */ + { static const struct e1000_option opt = { .type = enable_option, .name = "Kumeran Lock Loss Workaround", .err = "defaulting to Enabled", .def = OPTION_ENABLED }; + bool enabled = opt.def; if (num_KumeranLockLoss > bd) { unsigned int kmrn_lock_loss = KumeranLockLoss[bd]; e1000_validate_option(&kmrn_lock_loss, &opt, adapter); - if (hw->mac.type == e1000_ich8lan) - e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, - kmrn_lock_loss); - } else { - if (hw->mac.type == e1000_ich8lan) - e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, - opt.def); + enabled = kmrn_lock_loss; } + + if (hw->mac.type == e1000_ich8lan) + e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, + enabled); } - { /* Write-protect NVM */ + /* Write-protect NVM */ + { static const struct e1000_option opt = { .type = enable_option, .name = "Write-protect NVM", @@ -500,7 +515,8 @@ void e1000e_check_options(struct e1000_adapter *adapter) if (adapter->flags & FLAG_IS_ICH) { if (num_WriteProtectNVM > bd) { - unsigned int write_protect_nvm = WriteProtectNVM[bd]; + unsigned int write_protect_nvm = + WriteProtectNVM[bd]; e1000_validate_option(&write_protect_nvm, &opt, adapter); if (write_protect_nvm) diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 0930c136aa31..59c76a6815a0 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -37,7 +37,9 @@ static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, /* Cable length tables */ static const u16 e1000_m88_cable_length_table[] = { - 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; + 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED +}; + #define M88E1000_CABLE_LENGTH_TABLE_SIZE \ ARRAY_SIZE(e1000_m88_cable_length_table) @@ -49,7 +51,9 @@ static const u16 e1000_igp_2_cable_length_table[] = { 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, - 124}; + 124 +}; + #define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ ARRAY_SIZE(e1000_igp_2_cable_length_table) @@ -67,8 +71,7 @@ s32 e1000e_check_reset_block_generic(struct e1000_hw *hw) manc = er32(MANC); - return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? - E1000_BLK_PHY_RESET : 0; + return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : 0; } /** @@ -94,7 +97,7 @@ s32 e1000e_get_phy_id(struct e1000_hw *hw) return ret_val; phy->id = (u32)(phy_id << 16); - udelay(20); + usleep_range(20, 40); ret_val = e1e_rphy(hw, MII_PHYSID2, &phy_id); if (ret_val) return ret_val; @@ -175,7 +178,13 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) e_dbg("MDI Error\n"); return -E1000_ERR_PHY; } - *data = (u16) mdic; + if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + e_dbg("MDI Read offset error - requested %d, returned %d\n", + offset, + (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + return -E1000_ERR_PHY; + } + *data = (u16)mdic; /* Allow some time after each MDIC transaction to avoid * reading duplicate data in the next MDIC transaction. @@ -233,6 +242,12 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) e_dbg("MDI Error\n"); return -E1000_ERR_PHY; } + if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + e_dbg("MDI Write offset error - requested %d, returned %d\n", + offset, + (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + return -E1000_ERR_PHY; + } /* Allow some time after each MDIC transaction to avoid * reading duplicate data in the next MDIC transaction. @@ -324,7 +339,7 @@ s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page) * semaphores before exiting. **/ static s32 __e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data, - bool locked) + bool locked) { s32 ret_val = 0; @@ -391,7 +406,7 @@ s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) * at the offset. Release any acquired semaphores before exiting. **/ static s32 __e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, - bool locked) + bool locked) { s32 ret_val = 0; @@ -410,8 +425,7 @@ static s32 __e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, (u16)offset); if (!ret_val) ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & - offset, - data); + offset, data); if (!locked) hw->phy.ops.release(hw); @@ -458,7 +472,7 @@ s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data) * Release any acquired semaphores before exiting. **/ static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, - bool locked) + bool locked) { u32 kmrnctrlsta; @@ -531,7 +545,7 @@ s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data) * before exiting. **/ static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, - bool locked) + bool locked) { u32 kmrnctrlsta; @@ -772,8 +786,7 @@ s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw) phy_data |= M88E1000_EPSCR_TX_CLK_25; - if ((phy->revision == 2) && - (phy->id == M88E1111_I_PHY_ID)) { + if ((phy->revision == 2) && (phy->id == M88E1111_I_PHY_ID)) { /* 82573L PHY - set the downshift counter to 5x. */ phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; @@ -1296,7 +1309,7 @@ s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw) e_dbg("Waiting for forced speed/duplex link on M88 phy.\n"); ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); + 100000, &link); if (ret_val) return ret_val; @@ -1319,7 +1332,7 @@ s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw) /* Try once more */ ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT, - 100000, &link); + 100000, &link); if (ret_val) return ret_val; } @@ -1609,9 +1622,9 @@ s32 e1000_check_polarity_m88(struct e1000_hw *hw) ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &data); if (!ret_val) - phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal; + phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); return ret_val; } @@ -1653,9 +1666,9 @@ s32 e1000_check_polarity_igp(struct e1000_hw *hw) ret_val = e1e_rphy(hw, offset, &data); if (!ret_val) - phy->cable_polarity = (data & mask) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal; + phy->cable_polarity = ((data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); return ret_val; } @@ -1685,9 +1698,9 @@ s32 e1000_check_polarity_ife(struct e1000_hw *hw) ret_val = e1e_rphy(hw, offset, &phy_data); if (!ret_val) - phy->cable_polarity = (phy_data & mask) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal; + phy->cable_polarity = ((phy_data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); return ret_val; } @@ -1733,7 +1746,7 @@ static s32 e1000_wait_autoneg(struct e1000_hw *hw) * Polls the PHY status register for link, 'iterations' number of times. **/ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, - u32 usec_interval, bool *success) + u32 usec_interval, bool *success) { s32 ret_val = 0; u16 i, phy_status; @@ -1756,7 +1769,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, if (phy_status & BMSR_LSTATUS) break; if (usec_interval >= 1000) - mdelay(usec_interval/1000); + mdelay(usec_interval / 1000); else udelay(usec_interval); } @@ -1791,8 +1804,8 @@ s32 e1000e_get_cable_length_m88(struct e1000_hw *hw) if (ret_val) return ret_val; - index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT; + index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT); if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) return -E1000_ERR_PHY; @@ -1824,10 +1837,10 @@ s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw) u16 cur_agc_index, max_agc_index = 0; u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { - IGP02E1000_PHY_AGC_A, - IGP02E1000_PHY_AGC_B, - IGP02E1000_PHY_AGC_C, - IGP02E1000_PHY_AGC_D + IGP02E1000_PHY_AGC_A, + IGP02E1000_PHY_AGC_B, + IGP02E1000_PHY_AGC_C, + IGP02E1000_PHY_AGC_D }; /* Read the AGC registers for all channels */ @@ -1841,8 +1854,8 @@ s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw) * that can be put into the lookup table to obtain the * approximate cable length. */ - cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & - IGP02E1000_AGC_LENGTH_MASK; + cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & + IGP02E1000_AGC_LENGTH_MASK); /* Array index bound check. */ if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || @@ -1865,8 +1878,8 @@ s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw) agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); /* Calculate cable length with the error range of +/- 10 meters. */ - phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ? - (agc_value - IGP02E1000_AGC_RANGE) : 0; + phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ? + (agc_value - IGP02E1000_AGC_RANGE) : 0); phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; @@ -2040,9 +2053,9 @@ s32 e1000_get_phy_info_ife(struct e1000_hw *hw) return ret_val; } else { /* Polarity is forced */ - phy->cable_polarity = (data & IFE_PSC_FORCE_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal; + phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); } ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data); @@ -2119,7 +2132,7 @@ s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw) ew32(CTRL, ctrl); e1e_flush(); - udelay(150); + usleep_range(150, 300); phy->ops.release(hw); @@ -2375,13 +2388,13 @@ s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data) /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000e_write_phy_reg_mdic(hw, page_select, - (page << page_shift)); + (page << page_shift)); if (ret_val) goto release; } ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, - data); + data); release: hw->phy.ops.release(hw); @@ -2433,13 +2446,13 @@ s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data) /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000e_write_phy_reg_mdic(hw, page_select, - (page << page_shift)); + (page << page_shift)); if (ret_val) goto release; } ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, - data); + data); release: hw->phy.ops.release(hw); return ret_val; @@ -2674,7 +2687,7 @@ static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, if (read) { /* Read the Wakeup register page value using opcode 0x12 */ ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE, - data); + data); } else { /* Write the Wakeup register page value using opcode 0x12 */ ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE, @@ -2763,7 +2776,7 @@ static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data, if (page > 0 && page < HV_INTC_FC_PAGE_START) { ret_val = e1000_access_phy_debug_regs_hv(hw, offset, - data, true); + data, true); goto out; } @@ -2786,8 +2799,7 @@ static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data, e_dbg("reading PHY page %d (or 0x%x shifted) reg 0x%x\n", page, page << IGP_PAGE_SHIFT, reg); - ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, - data); + ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, data); out: if (!locked) hw->phy.ops.release(hw); @@ -2871,7 +2883,7 @@ static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data, if (page > 0 && page < HV_INTC_FC_PAGE_START) { ret_val = e1000_access_phy_debug_regs_hv(hw, offset, - &data, false); + &data, false); goto out; } @@ -2910,7 +2922,7 @@ static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data, page << IGP_PAGE_SHIFT, reg); ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, - data); + data); out: if (!locked) @@ -2988,15 +3000,15 @@ static u32 e1000_get_phy_addr_for_hv_page(u32 page) * These accesses done with PHY address 2 and without using pages. **/ static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, - u16 *data, bool read) + u16 *data, bool read) { s32 ret_val; u32 addr_reg; u32 data_reg; /* This takes care of the difference with desktop vs mobile phy */ - addr_reg = (hw->phy.type == e1000_phy_82578) ? - I82578_ADDR_REG : I82577_ADDR_REG; + addr_reg = ((hw->phy.type == e1000_phy_82578) ? + I82578_ADDR_REG : I82577_ADDR_REG); data_reg = addr_reg + 1; /* All operations in this function are phy address 2 */ @@ -3050,8 +3062,8 @@ s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw) if (ret_val) return ret_val; - data &= BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | - BM_CS_STATUS_SPEED_MASK; + data &= (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | + BM_CS_STATUS_SPEED_MASK); if (data != (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_1000)) @@ -3086,9 +3098,9 @@ s32 e1000_check_polarity_82577(struct e1000_hw *hw) ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data); if (!ret_val) - phy->cable_polarity = (data & I82577_PHY_STATUS2_REV_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal; + phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); return ret_val; } @@ -3215,8 +3227,8 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw) if (ret_val) return ret_val; - length = (phy_data & I82577_DSTATUS_CABLE_LENGTH) >> - I82577_DSTATUS_CABLE_LENGTH_SHIFT; + length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> + I82577_DSTATUS_CABLE_LENGTH_SHIFT); if (length == E1000_CABLE_LENGTH_UNDEFINED) return -E1000_ERR_PHY; diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index b64542acfa34..c9bba39d50bd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -100,6 +100,7 @@ static bool igb_sgmii_uses_mdio_82575(struct e1000_hw *hw) break; case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: reg = rd32(E1000_MDICNFG); @@ -149,6 +150,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) switch (hw->mac.type) { case e1000_82580: case e1000_i350: + case e1000_i354: phy->ops.read_reg = igb_read_phy_reg_82580; phy->ops.write_reg = igb_write_phy_reg_82580; break; @@ -174,13 +176,14 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) /* Verify phy id and set remaining function pointers */ switch (phy->id) { + case M88E1545_E_PHY_ID: case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1111_I_PHY_ID: phy->type = e1000_phy_m88; + phy->ops.check_polarity = igb_check_polarity_m88; phy->ops.get_phy_info = igb_get_phy_info_m88; - if (phy->id == I347AT4_E_PHY_ID || - phy->id == M88E1112_E_PHY_ID) + if (phy->id != M88E1111_I_PHY_ID) phy->ops.get_cable_length = igb_get_cable_length_m88_gen2; else @@ -227,7 +230,7 @@ out: * igb_init_nvm_params_82575 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ -s32 igb_init_nvm_params_82575(struct e1000_hw *hw) +static s32 igb_init_nvm_params_82575(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = rd32(E1000_EECD); @@ -287,6 +290,7 @@ s32 igb_init_nvm_params_82575(struct e1000_hw *hw) nvm->ops.read = igb_read_nvm_spi; nvm->ops.write = igb_write_nvm_spi; break; + case e1000_i354: case e1000_i350: nvm->ops.validate = igb_validate_nvm_checksum_i350; nvm->ops.update = igb_update_nvm_checksum_i350; @@ -352,6 +356,7 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) mac->rar_entry_count = E1000_RAR_ENTRIES_82580; break; case e1000_i350: + case e1000_i354: mac->rar_entry_count = E1000_RAR_ENTRIES_I350; break; default: @@ -445,14 +450,18 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) case E1000_DEV_ID_I211_COPPER: mac->type = e1000_i211; break; + case E1000_DEV_ID_I354_BACKPLANE_1GBPS: + case E1000_DEV_ID_I354_SGMII: + case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: + mac->type = e1000_i354; + break; default: return -E1000_ERR_MAC_INIT; break; } /* Set media type */ - /* - * The 82575 uses bits 22:23 for link mode. The mode can be changed + /* The 82575 uses bits 22:23 for link mode. The mode can be changed * based on the EEPROM. We cannot rely upon device ID. There * is no distinguishable difference between fiber and internal * SerDes mode on the 82575. There can be an external PHY attached @@ -621,8 +630,7 @@ static s32 igb_get_phy_id_82575(struct e1000_hw *hw) u32 ctrl_ext; u32 mdic; - /* - * For SGMII PHYs, we try the list of possible addresses until + /* For SGMII PHYs, we try the list of possible addresses until * we find one that works. For non-SGMII PHYs * (e.g. integrated copper PHYs), an address of 1 should * work. The result of this function should mean phy->phy_addr @@ -644,6 +652,7 @@ static s32 igb_get_phy_id_82575(struct e1000_hw *hw) break; case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: mdic = rd32(E1000_MDICNFG); @@ -665,8 +674,7 @@ static s32 igb_get_phy_id_82575(struct e1000_hw *hw) wrfl(); msleep(300); - /* - * The address field in the I2CCMD register is 3 bits and 0 is invalid. + /* The address field in the I2CCMD register is 3 bits and 0 is invalid. * Therefore, we need to test 1-7 */ for (phy->addr = 1; phy->addr < 8; phy->addr++) { @@ -674,8 +682,7 @@ static s32 igb_get_phy_id_82575(struct e1000_hw *hw) if (ret_val == 0) { hw_dbg("Vendor ID 0x%08X read at address %u\n", phy_id, phy->addr); - /* - * At the time of this writing, The M88 part is + /* At the time of this writing, The M88 part is * the only supported SGMII PHY product. */ if (phy_id == M88_VENDOR) @@ -711,15 +718,13 @@ static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) { s32 ret_val; - /* - * This isn't a true "hard" reset, but is the only reset + /* This isn't a true "hard" reset, but is the only reset * available to us at this time. */ hw_dbg("Soft resetting SGMII attached PHY...\n"); - /* - * SFP documentation requires the following to configure the SPF module + /* SFP documentation requires the following to configure the SPF module * to work on SGMII. No further documentation is given. */ ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); @@ -774,8 +779,7 @@ static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) data &= ~IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used + /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. @@ -838,8 +842,7 @@ static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) } else { data &= ~E1000_82580_PM_D0_LPLU; - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used + /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. @@ -867,7 +870,7 @@ static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) * During driver activity, SmartSpeed should be enabled so performance is * maintained. **/ -s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) +static s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = 0; @@ -877,8 +880,7 @@ s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) if (!active) { data &= ~E1000_82580_PM_D3_LPLU; - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used + /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. @@ -964,8 +966,7 @@ static s32 igb_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) if (!(swfw_sync & (fwmask | swmask))) break; - /* - * Firmware currently using resource (fwmask) + /* Firmware currently using resource (fwmask) * or other software thread using resource (swmask) */ igb_put_hw_semaphore(hw); @@ -1065,8 +1066,7 @@ static s32 igb_check_for_link_82575(struct e1000_hw *hw) if (hw->phy.media_type != e1000_media_type_copper) { ret_val = igb_get_pcs_speed_and_duplex_82575(hw, &speed, &duplex); - /* - * Use this flag to determine if link needs to be checked or + /* Use this flag to determine if link needs to be checked or * not. If we have link clear the flag so that we do not * continue to check for link. */ @@ -1135,15 +1135,13 @@ static s32 igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, *speed = 0; *duplex = 0; - /* - * Read the PCS Status register for link state. For non-copper mode, + /* Read the PCS Status register for link state. For non-copper mode, * the status register is not accurate. The PCS status register is * used instead. */ pcs = rd32(E1000_PCS_LSTAT); - /* - * The link up bit determines when link is up on autoneg. The sync ok + /* The link up bit determines when link is up on autoneg. The sync ok * gets set once both sides sync up and agree upon link. Stable link * can be determined by checking for both link up and link sync ok */ @@ -1214,8 +1212,7 @@ static s32 igb_reset_hw_82575(struct e1000_hw *hw) u32 ctrl, icr; s32 ret_val; - /* - * Prevent the PCI-E bus from sticking if there is no TLP connection + /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = igb_disable_pcie_master(hw); @@ -1244,8 +1241,7 @@ static s32 igb_reset_hw_82575(struct e1000_hw *hw) ret_val = igb_get_auto_rd_done(hw); if (ret_val) { - /* - * When auto config read does not complete, do not + /* When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ @@ -1287,7 +1283,7 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) /* Disabling VLAN filtering */ hw_dbg("Initializing the IEEE VLAN\n"); - if (hw->mac.type == e1000_i350) + if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) igb_clear_vfta_i350(hw); else igb_clear_vfta(hw); @@ -1308,8 +1304,7 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) /* Setup link and flow control */ ret_val = igb_setup_link(hw); - /* - * Clear all of the statistics registers (clear on read). It is + /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. @@ -1364,6 +1359,7 @@ static s32 igb_setup_copper_link_82575(struct e1000_hw *hw) switch (hw->phy.id) { case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: + case M88E1545_E_PHY_ID: case I210_I_PHY_ID: ret_val = igb_copper_link_setup_m88_gen2(hw); break; @@ -1412,17 +1408,17 @@ static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) return ret_val; - /* - * On the 82575, SerDes loopback mode persists until it is + /* On the 82575, SerDes loopback mode persists until it is * explicitly turned off or a power cycle is performed. A read to * the register does not indicate its status. Therefore, we ensure * loopback mode is disabled during initialization. */ wr32(E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); - /* power on the sfp cage if present */ + /* power on the sfp cage if present and turn on I2C */ ctrl_ext = rd32(E1000_CTRL_EXT); ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; + ctrl_ext |= E1000_CTRL_I2C_ENA; wr32(E1000_CTRL_EXT, ctrl_ext); ctrl_reg = rd32(E1000_CTRL); @@ -1466,8 +1462,7 @@ static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) pcs_autoneg = false; } - /* - * non-SGMII modes only supports a speed of 1000/Full for the + /* non-SGMII modes only supports a speed of 1000/Full for the * link so it is best to just force the MAC and let the pcs * link either autoneg or be forced to 1000/Full */ @@ -1481,8 +1476,7 @@ static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) wr32(E1000_CTRL, ctrl_reg); - /* - * New SerDes mode allows for forcing speed or autonegotiating speed + /* New SerDes mode allows for forcing speed or autonegotiating speed * at 1gb. Autoneg should be default set by most drivers. This is the * mode that will be compatible with older link partners and switches. * However, both are supported by the hardware and some drivers/tools. @@ -1592,8 +1586,7 @@ static s32 igb_read_mac_addr_82575(struct e1000_hw *hw) { s32 ret_val = 0; - /* - * If there's an alternate MAC address place it in RAR0 + /* If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm * address. */ @@ -1777,8 +1770,7 @@ static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw) if (gcr & E1000_GCR_CMPL_TMOUT_MASK) goto out; - /* - * if capababilities version is type 1 we can write the + /* if capabilities version is type 1 we can write the * timeout of 10ms to 200ms through the GCR register */ if (!(gcr & E1000_GCR_CAP_VER2)) { @@ -1786,8 +1778,7 @@ static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw) goto out; } - /* - * for version 2 capabilities we need to write the config space + /* for version 2 capabilities we need to write the config space * directly in order to set the completion timeout value for * 16ms to 55ms */ @@ -1818,27 +1809,33 @@ out: **/ void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) { - u32 dtxswc; + u32 reg_val, reg_offset; switch (hw->mac.type) { case e1000_82576: + reg_offset = E1000_DTXSWC; + break; case e1000_i350: - dtxswc = rd32(E1000_DTXSWC); - if (enable) { - dtxswc |= (E1000_DTXSWC_MAC_SPOOF_MASK | - E1000_DTXSWC_VLAN_SPOOF_MASK); - /* The PF can spoof - it has to in order to - * support emulation mode NICs */ - dtxswc ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); - } else { - dtxswc &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | - E1000_DTXSWC_VLAN_SPOOF_MASK); - } - wr32(E1000_DTXSWC, dtxswc); + case e1000_i354: + reg_offset = E1000_TXSWC; break; default: - break; + return; } + + reg_val = rd32(reg_offset); + if (enable) { + reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | + E1000_DTXSWC_VLAN_SPOOF_MASK); + /* The PF can spoof - it has to in order to + * support emulation mode NICs + */ + reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); + } else { + reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | + E1000_DTXSWC_VLAN_SPOOF_MASK); + } + wr32(reg_offset, reg_val); } /** @@ -1861,6 +1858,7 @@ void igb_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; wr32(E1000_DTXSWC, dtxswc); break; + case e1000_i354: case e1000_i350: dtxswc = rd32(E1000_TXSWC); if (enable) @@ -1874,7 +1872,6 @@ void igb_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) break; } - } /** @@ -1909,7 +1906,6 @@ static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; - ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; @@ -2011,8 +2007,7 @@ static s32 igb_reset_hw_82580(struct e1000_hw *hw) /* Get current control state. */ ctrl = rd32(E1000_CTRL); - /* - * Prevent the PCI-E bus from sticking if there is no TLP connection + /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = igb_disable_pcie_master(hw); @@ -2047,8 +2042,7 @@ static s32 igb_reset_hw_82580(struct e1000_hw *hw) ret_val = igb_get_auto_rd_done(hw); if (ret_val) { - /* - * When auto config read does not complete, do not + /* When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ @@ -2192,7 +2186,8 @@ static s32 igb_validate_nvm_checksum_82580(struct e1000_hw *hw) if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { /* if checksums compatibility bit is set validate checksums - * for all 4 ports. */ + * for all 4 ports. + */ eeprom_regions_count = 4; } @@ -2304,6 +2299,41 @@ out: } /** + * __igb_access_emi_reg - Read/write EMI register + * @hw: pointer to the HW structure + * @addr: EMI address to program + * @data: pointer to value to read/write from/to the EMI address + * @read: boolean flag to indicate read or write + **/ +static s32 __igb_access_emi_reg(struct e1000_hw *hw, u16 address, + u16 *data, bool read) +{ + s32 ret_val = E1000_SUCCESS; + + ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); + else + ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); + + return ret_val; +} + +/** + * igb_read_emi_reg - Read Extended Management Interface register + * @hw: pointer to the HW structure + * @addr: EMI address to program + * @data: value to be read from the EMI address + **/ +s32 igb_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) +{ + return __igb_access_emi_reg(hw, addr, data, true); +} + +/** * igb_set_eee_i350 - Enable/disable EEE support * @hw: pointer to the HW structure * @@ -2333,7 +2363,6 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) if (eee_su & E1000_EEE_SU_LPI_CLK_STP) hw_dbg("LPI Clock Stop Bit should not be set!\n"); - } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); @@ -2350,6 +2379,108 @@ out: return ret_val; } +/** + * igb_set_eee_i354 - Enable/disable EEE support + * @hw: pointer to the HW structure + * + * Enable/disable EEE legacy mode based on setting in dev_spec structure. + * + **/ +s32 igb_set_eee_i354(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u16 phy_data; + + if ((hw->phy.media_type != e1000_media_type_copper) || + (phy->id != M88E1545_E_PHY_ID)) + goto out; + + if (!hw->dev_spec._82575.eee_disable) { + /* Switch to PHY page 18. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1545_PAGE_ADDR, 18); + if (ret_val) + goto out; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1545_EEE_CTRL_1, + &phy_data); + if (ret_val) + goto out; + + phy_data |= E1000_M88E1545_EEE_CTRL_1_MS; + ret_val = phy->ops.write_reg(hw, E1000_M88E1545_EEE_CTRL_1, + phy_data); + if (ret_val) + goto out; + + /* Return the PHY to page 0. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1545_PAGE_ADDR, 0); + if (ret_val) + goto out; + + /* Turn on EEE advertisement. */ + ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + phy_data |= E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED; + ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + phy_data); + } else { + /* Turn off EEE advertisement. */ + ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED); + ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + phy_data); + } + +out: + return ret_val; +} + +/** + * igb_get_eee_status_i354 - Get EEE status + * @hw: pointer to the HW structure + * @status: EEE status + * + * Get EEE status by guessing based on whether Tx or Rx LPI indications have + * been received. + **/ +s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u16 phy_data; + + /* Check if EEE is supported on this device. */ + if ((hw->phy.media_type != e1000_media_type_copper) || + (phy->id != M88E1545_E_PHY_ID)) + goto out; + + ret_val = igb_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, + E1000_PCS_STATUS_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | + E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false; + +out: + return ret_val; +} + static const u8 e1000_emc_temp_data[4] = { E1000_EMC_INTERNAL_DATA, E1000_EMC_DIODE1_DATA, @@ -2363,11 +2494,12 @@ static const u8 e1000_emc_therm_limit[4] = { E1000_EMC_DIODE3_THERM_LIMIT }; -/* igb_get_thermal_sensor_data_generic - Gathers thermal sensor data +/** + * igb_get_thermal_sensor_data_generic - Gathers thermal sensor data * @hw: pointer to hardware structure * * Updates the temperatures in mac.thermal_sensor_data - */ + **/ s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; @@ -2415,12 +2547,13 @@ s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) return status; } -/* igb_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds +/** + * igb_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds * @hw: pointer to hardware structure * * Sets the thermal sensor thresholds according to the NVM map * and save off the threshold and location values into mac.thermal_sensor_data - */ + **/ s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 73ab41f0e032..74a1506b4235 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -263,7 +263,9 @@ void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *, bool, int); void igb_vmdq_set_loopback_pf(struct e1000_hw *, bool); void igb_vmdq_set_replication_pf(struct e1000_hw *, bool); u16 igb_rxpbs_adjust_82580(u32 data); +s32 igb_read_emi_reg(struct e1000_hw *, u16 addr, u16 *data); s32 igb_set_eee_i350(struct e1000_hw *); +s32 igb_set_eee_i354(struct e1000_hw *); s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *); s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw); diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 7e13337d3b9d..31a0f82cc650 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -138,8 +138,7 @@ #define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ #define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ -/* - * Use byte values for the following shift parameters +/* Use byte values for the following shift parameters * Usage: * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & * E1000_PSRCTL_BSIZE0_MASK) | @@ -237,11 +236,14 @@ #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* BMC external code execution disabled */ +#define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */ +#define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */ /* Constants used to intrepret the masked PCI-X bus speed. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 +#define SPEED_2500 2500 #define HALF_DUPLEX 1 #define FULL_DUPLEX 2 @@ -382,8 +384,7 @@ #define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ /* TCP Timer */ -/* - * This defines the bits that are set in the Interrupt Mask +/* This defines the bits that are set in the Interrupt Mask * Set/Read Register. Each bit is documented below: * o RXT0 = Receiver Timer Interrupt (ring 0) * o TXDW = Transmit Descriptor Written Back @@ -440,8 +441,7 @@ #define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ /* Receive Address */ -/* - * Number of high/low register pairs in the RAR. The RAR (Receive Address +/* Number of high/low register pairs in the RAR. The RAR (Receive Address * Registers) holds the directed and multicast addresses that we monitor. * Technically, we have 16 spots. However, we reserve one of these spots * (RAR[15]) for our directed address used by controllers with @@ -760,8 +760,7 @@ #define MAX_PHY_MULTI_PAGE_REG 0xF /* Bit definitions for valid PHY IDs. */ -/* - * I = Integrated +/* I = Integrated * E = External */ #define M88E1111_I_PHY_ID 0x01410CC0 @@ -772,6 +771,7 @@ #define I350_I_PHY_ID 0x015403B0 #define M88_VENDOR 0x0141 #define I210_I_PHY_ID 0x01410C00 +#define M88E1545_E_PHY_ID 0x01410EA0 /* M88E1000 Specific Registers */ #define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ @@ -791,8 +791,7 @@ #define M88E1000_PSCR_AUTO_X_1000T 0x0040 /* Auto crossover enabled all speeds */ #define M88E1000_PSCR_AUTO_X_MODE 0x0060 -/* - * 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold +/* 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold * 0=Normal 10BASE-T Rx Threshold */ /* 1=5-bit interface in 100BASE-TX, 0=MII interface in 100BASE-TX */ @@ -802,8 +801,7 @@ #define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ #define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ #define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ -/* - * 0 = <50M +/* 0 = <50M * 1 = 50-80M * 2 = 80-110M * 3 = 110-140M @@ -816,20 +814,17 @@ #define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 /* M88E1000 Extended PHY Specific Control Register */ -/* - * 1 = Lost lock detect enabled. +/* 1 = Lost lock detect enabled. * Will assert lost lock and bring * link down if idle not seen * within 1ms in 1000BASE-T */ -/* - * Number of times we will attempt to autonegotiate before downshifting if we +/* Number of times we will attempt to autonegotiate before downshifting if we * are the master */ #define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 #define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 -/* - * Number of times we will attempt to autonegotiate before downshifting if we +/* Number of times we will attempt to autonegotiate before downshifting if we * are the slave */ #define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 @@ -844,8 +839,7 @@ /* i347-AT4 Extended PHY Specific Control Register */ -/* - * Number of times we will attempt to autonegotiate before downshifting if we +/* Number of times we will attempt to autonegotiate before downshifting if we * are the master */ #define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800 @@ -895,6 +889,22 @@ #define E1000_EEER_LPI_FC 0x00040000 /* EEE Enable on FC */ #define E1000_EEE_SU_LPI_CLK_STP 0X00800000 /* EEE LPI Clock Stop */ #define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */ +#define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */ +#define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */ +#define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */ +#define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ +#define E1000_M88E1545_PAGE_ADDR 0x16 /* Page Offset Register */ +#define E1000_M88E1545_EEE_CTRL_1 0x0 +#define E1000_M88E1545_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ +#define E1000_EEE_ADV_DEV_I354 7 +#define E1000_EEE_ADV_ADDR_I354 60 +#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ +#define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */ +#define E1000_PCS_STATUS_DEV_I354 3 +#define E1000_PCS_STATUS_ADDR_I354 1 +#define E1000_PCS_STATUS_TX_LPI_IND 0x0200 /* Tx in LPI state */ +#define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400 +#define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800 /* SerDes Control */ #define E1000_GEN_CTL_READY 0x80000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 0d5cf9c63d0d..1138ccaf95ff 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -38,31 +38,31 @@ struct e1000_hw; -#define E1000_DEV_ID_82576 0x10C9 -#define E1000_DEV_ID_82576_FIBER 0x10E6 -#define E1000_DEV_ID_82576_SERDES 0x10E7 -#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 -#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 -#define E1000_DEV_ID_82576_NS 0x150A -#define E1000_DEV_ID_82576_NS_SERDES 0x1518 -#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D -#define E1000_DEV_ID_82575EB_COPPER 0x10A7 -#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 -#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 -#define E1000_DEV_ID_82580_COPPER 0x150E -#define E1000_DEV_ID_82580_FIBER 0x150F -#define E1000_DEV_ID_82580_SERDES 0x1510 -#define E1000_DEV_ID_82580_SGMII 0x1511 -#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 -#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 -#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 -#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A -#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C -#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 -#define E1000_DEV_ID_I350_COPPER 0x1521 -#define E1000_DEV_ID_I350_FIBER 0x1522 -#define E1000_DEV_ID_I350_SERDES 0x1523 -#define E1000_DEV_ID_I350_SGMII 0x1524 +#define E1000_DEV_ID_82576 0x10C9 +#define E1000_DEV_ID_82576_FIBER 0x10E6 +#define E1000_DEV_ID_82576_SERDES 0x10E7 +#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_NS_SERDES 0x1518 +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D +#define E1000_DEV_ID_82575EB_COPPER 0x10A7 +#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 +#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 +#define E1000_DEV_ID_82580_COPPER 0x150E +#define E1000_DEV_ID_82580_FIBER 0x150F +#define E1000_DEV_ID_82580_SERDES 0x1510 +#define E1000_DEV_ID_82580_SGMII 0x1511 +#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 +#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A +#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C +#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 +#define E1000_DEV_ID_I350_COPPER 0x1521 +#define E1000_DEV_ID_I350_FIBER 0x1522 +#define E1000_DEV_ID_I350_SERDES 0x1523 +#define E1000_DEV_ID_I350_SGMII 0x1524 #define E1000_DEV_ID_I210_COPPER 0x1533 #define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 #define E1000_DEV_ID_I210_COPPER_IT 0x1535 @@ -70,6 +70,9 @@ struct e1000_hw; #define E1000_DEV_ID_I210_SERDES 0x1537 #define E1000_DEV_ID_I210_SGMII 0x1538 #define E1000_DEV_ID_I211_COPPER 0x1539 +#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 +#define E1000_DEV_ID_I354_SGMII 0x1F41 +#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 #define E1000_REVISION_2 2 #define E1000_REVISION_4 4 @@ -90,6 +93,7 @@ enum e1000_mac_type { e1000_82576, e1000_82580, e1000_i350, + e1000_i354, e1000_i210, e1000_i211, e1000_num_macs /* List is 1-based, so subtract 1 for true count. */ @@ -98,7 +102,8 @@ enum e1000_mac_type { enum e1000_media_type { e1000_media_type_unknown = 0, e1000_media_type_copper = 1, - e1000_media_type_internal_serdes = 2, + e1000_media_type_fiber = 2, + e1000_media_type_internal_serdes = 3, e1000_num_media_types }; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 6a42344f24f1..9764cd3610e5 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -103,7 +103,7 @@ void igb_release_nvm_i210(struct e1000_hw *hw) * @hw: pointer to the HW structure * * Release hardware semaphore used to access the PHY or NVM - */ + **/ static void igb_put_hw_semaphore_i210(struct e1000_hw *hw) { u32 swsm; @@ -141,9 +141,7 @@ s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) if (!(swfw_sync & fwmask)) break; - /* - * Firmware currently using resource (fwmask) - */ + /* Firmware currently using resource (fwmask) */ igb_put_hw_semaphore_i210(hw); mdelay(5); i++; @@ -203,7 +201,8 @@ s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, /* We cannot hold synchronization semaphores for too long, * because of forceful takeover procedure. However it is more efficient - * to read in bursts than synchronizing access for each word. */ + * to read in bursts than synchronizing access for each word. + */ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? E1000_EERD_EEWR_MAX_COUNT : (words - i); @@ -242,8 +241,7 @@ static s32 igb_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u32 attempts = 100000; s32 ret_val = E1000_SUCCESS; - /* - * A check for invalid values: offset too large, too many words, + /* A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || @@ -294,7 +292,7 @@ out: * * If error code is returned, data and Shadow RAM may be inconsistent - buffer * partially written. - */ + **/ s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { @@ -326,7 +324,7 @@ s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, /** * igb_read_nvm_i211 - Read NVM wrapper function for I211 * @hw: pointer to the HW structure - * @address: the word address (aka eeprom offset) to read + * @words: number of words to read * @data: pointer to the data read * * Wrapper function to return data formerly found in the NVM. @@ -549,8 +547,7 @@ s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw) if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { - /* - * Replace the read function with semaphore grabbing with + /* Replace the read function with semaphore grabbing with * the one that skips this for a while. * We have semaphore taken already here. */ @@ -570,7 +567,6 @@ s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw) return status; } - /** * igb_update_nvm_checksum_i210 - Update EEPROM checksum * @hw: pointer to the HW structure @@ -585,8 +581,7 @@ s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw) u16 checksum = 0; u16 i, nvm_data; - /* - * Read the first word from the EEPROM. If this times out or fails, do + /* Read the first word from the EEPROM. If this times out or fails, do * not continue or we could be in for a very long wait while every * EEPROM read fails */ @@ -597,8 +592,7 @@ s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw) } if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { - /* - * Do not use hw->nvm.ops.write, hw->nvm.ops.read + /* Do not use hw->nvm.ops.write, hw->nvm.ops.read * because we do not want to take the synchronization * semaphores twice here. */ @@ -635,7 +629,7 @@ out: * igb_pool_flash_update_done_i210 - Pool FLUDONE status. * @hw: pointer to the HW structure * - */ + **/ static s32 igb_pool_flash_update_done_i210(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_NVM; @@ -714,3 +708,68 @@ s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data) out: return ret_val; } + +/** + * __igb_access_xmdio_reg - Read/write XMDIO register + * @hw: pointer to the HW structure + * @address: XMDIO address to program + * @dev_addr: device address to program + * @data: pointer to value to read/write from/to the XMDIO address + * @read: boolean flag to indicate read or write + **/ +static s32 __igb_access_xmdio_reg(struct e1000_hw *hw, u16 address, + u8 dev_addr, u16 *data, bool read) +{ + s32 ret_val = E1000_SUCCESS; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | + dev_addr); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); + else + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); + if (ret_val) + return ret_val; + + /* Recalibrate the device back to 0 */ + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); + if (ret_val) + return ret_val; + + return ret_val; +} + +/** + * igb_read_xmdio_reg - Read XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be read from the EMI address + **/ +s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) +{ + return __igb_access_xmdio_reg(hw, addr, dev_addr, data, true); +} + +/** + * igb_write_xmdio_reg - Write XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be written to the XMDIO address + **/ +s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) +{ + return __igb_access_xmdio_reg(hw, addr, dev_addr, &data, false); +} diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index e4e1a73b7c75..bfc08e05c907 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -45,6 +45,10 @@ extern s32 igb_read_nvm_i211(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); extern s32 igb_read_invm_version(struct e1000_hw *hw, struct e1000_fw_version *invm_ver); +extern s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, + u16 *data); +extern s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, + u16 data); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index a5c7200b9a71..2559d70a2321 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -214,7 +214,7 @@ s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add) else vfta &= ~mask; } - if (hw->mac.type == e1000_i350) + if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) igb_write_vfta_i350(hw, index, vfta); else igb_write_vfta(hw, index, vfta); @@ -230,8 +230,8 @@ s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add) * Checks the nvm for an alternate MAC address. An alternate MAC address * can be setup by pre-boot software and must be treated like a permanent * address and must override the actual permanent MAC address. If an - * alternate MAC address is fopund it is saved in the hw struct and - * prgrammed into RAR0 and the cuntion returns success, otherwise the + * alternate MAC address is found it is saved in the hw struct and + * programmed into RAR0 and the function returns success, otherwise the * function returns an error. **/ s32 igb_check_alt_mac_addr(struct e1000_hw *hw) @@ -241,8 +241,7 @@ s32 igb_check_alt_mac_addr(struct e1000_hw *hw) u16 offset, nvm_alt_mac_addr_offset, nvm_data; u8 alt_mac_addr[ETH_ALEN]; - /* - * Alternate MAC address is handled by the option ROM for 82580 + /* Alternate MAC address is handled by the option ROM for 82580 * and newer. SW support not required. */ if (hw->mac.type >= e1000_82580) @@ -285,8 +284,7 @@ s32 igb_check_alt_mac_addr(struct e1000_hw *hw) goto out; } - /* - * We have a valid alternate MAC address, and we want to treat it the + /* We have a valid alternate MAC address, and we want to treat it the * same as the normal permanent MAC address stored by the HW into the * RAR. Do this by mapping this address into RAR0. */ @@ -309,8 +307,7 @@ void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; - /* - * HW expects these in little endian so we reverse the byte order + /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | @@ -323,8 +320,7 @@ void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) if (rar_low || rar_high) rar_high |= E1000_RAH_AV; - /* - * Some bridges will combine consecutive 32-bit writes into + /* Some bridges will combine consecutive 32-bit writes into * a single burst write, which will malfunction on some parts. * The flushes avoid this. */ @@ -348,8 +344,7 @@ void igb_mta_set(struct e1000_hw *hw, u32 hash_value) { u32 hash_bit, hash_reg, mta; - /* - * The MTA is a register array of 32-bit registers. It is + /* The MTA is a register array of 32-bit registers. It is * treated like an array of (32*mta_reg_count) bits. We want to * set bit BitArray[hash_value]. So we figure out what register * the bit is in, read it, OR in the new bit, then write @@ -386,15 +381,13 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) /* Register count multiplied by bits per register */ hash_mask = (hw->mac.mta_reg_count * 32) - 1; - /* - * For a mc_filter_type of 0, bit_shift is the number of left-shifts + /* For a mc_filter_type of 0, bit_shift is the number of left-shifts * where 0xFF would still fall within the hash mask. */ while (hash_mask >> bit_shift != 0xFF) bit_shift++; - /* - * The portion of the address that is used for the hash table + /* The portion of the address that is used for the hash table * is determined by the mc_filter_type setting. * The algorithm is such that there is a total of 8 bits of shifting. * The bit_shift for a mc_filter_type of 0 represents the number of @@ -536,8 +529,7 @@ s32 igb_check_for_copper_link(struct e1000_hw *hw) s32 ret_val; bool link; - /* - * We only want to go out to the PHY registers to see if Auto-Neg + /* We only want to go out to the PHY registers to see if Auto-Neg * has completed and/or if our link status has changed. The * get_link_status flag is set upon receiving a Link Status * Change or Rx Sequence Error interrupt. @@ -547,8 +539,7 @@ s32 igb_check_for_copper_link(struct e1000_hw *hw) goto out; } - /* - * First we want to see if the MII Status Register reports + /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ @@ -561,14 +552,12 @@ s32 igb_check_for_copper_link(struct e1000_hw *hw) mac->get_link_status = false; - /* - * Check if there was DownShift, must be checked + /* Check if there was DownShift, must be checked * immediately after link-up */ igb_check_downshift(hw); - /* - * If we are forcing speed/duplex, then we simply return since + /* If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!mac->autoneg) { @@ -576,15 +565,13 @@ s32 igb_check_for_copper_link(struct e1000_hw *hw) goto out; } - /* - * Auto-Neg is enabled. Auto Speed Detection takes care + /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. */ igb_config_collision_dist(hw); - /* - * Configure Flow Control now that Auto-Neg has completed. + /* Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. @@ -611,15 +598,13 @@ s32 igb_setup_link(struct e1000_hw *hw) { s32 ret_val = 0; - /* - * In the case of the phy reset being blocked, we already have a link. + /* In the case of the phy reset being blocked, we already have a link. * We do not need to set it up again. */ if (igb_check_reset_block(hw)) goto out; - /* - * If requested flow control is set to default, set flow control + /* If requested flow control is set to default, set flow control * based on the EEPROM flow control settings. */ if (hw->fc.requested_mode == e1000_fc_default) { @@ -628,8 +613,7 @@ s32 igb_setup_link(struct e1000_hw *hw) goto out; } - /* - * We want to save off the original Flow Control configuration just + /* We want to save off the original Flow Control configuration just * in case we get disconnected and then reconnected into a different * hub or switch with different Flow Control capabilities. */ @@ -642,8 +626,7 @@ s32 igb_setup_link(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Initialize the flow control address, type, and PAUSE timer + /* Initialize the flow control address, type, and PAUSE timer * registers to their default values. This is done even if flow * control is disabled, because it does not hurt anything to * initialize these registers. @@ -696,16 +679,14 @@ static s32 igb_set_fc_watermarks(struct e1000_hw *hw) s32 ret_val = 0; u32 fcrtl = 0, fcrth = 0; - /* - * Set the flow control receive threshold registers. Normally, + /* Set the flow control receive threshold registers. Normally, * these registers will be set to a default threshold that may be * adjusted later by the driver's runtime code. However, if the * ability to transmit pause frames is not enabled, then these * registers will be set to 0. */ if (hw->fc.current_mode & e1000_fc_tx_pause) { - /* - * We need to set up the Receive Threshold high and low water + /* We need to set up the Receive Threshold high and low water * marks as well as (optionally) enabling the transmission of * XON frames. */ @@ -733,8 +714,7 @@ static s32 igb_set_default_fc(struct e1000_hw *hw) s32 ret_val = 0; u16 nvm_data; - /* - * Read and store word 0x0F of the EEPROM. This word contains bits + /* Read and store word 0x0F of the EEPROM. This word contains bits * that determine the hardware's default PAUSE (flow control) mode, * a bit that determines whether the HW defaults to enabling or * disabling auto-negotiation, and the direction of the @@ -778,8 +758,7 @@ s32 igb_force_mac_fc(struct e1000_hw *hw) ctrl = rd32(E1000_CTRL); - /* - * Because we didn't get link via the internal auto-negotiation + /* Because we didn't get link via the internal auto-negotiation * mechanism (we either forced link or we got link via PHY * auto-neg), we have to manually enable/disable transmit an * receive flow control. @@ -843,8 +822,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; u16 speed, duplex; - /* - * Check for the case where we have fiber media and auto-neg failed + /* Check for the case where we have fiber media and auto-neg failed * so we had to force link. In this case, we need to force the * configuration of the MAC to match the "fc" parameter. */ @@ -861,15 +839,13 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) goto out; } - /* - * Check for the case where we have copper media and auto-neg is + /* Check for the case where we have copper media and auto-neg is * enabled. In this case, we need to check and see if Auto-Neg * has completed, and if so, how the PHY and link partner has * flow control configured. */ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { - /* - * Read the MII Status Register and check to see if AutoNeg + /* Read the MII Status Register and check to see if AutoNeg * has completed. We read this twice because this reg has * some "sticky" (latched) bits. */ @@ -888,8 +864,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) goto out; } - /* - * The AutoNeg process has completed, so we now need to + /* The AutoNeg process has completed, so we now need to * read both the Auto Negotiation Advertisement * Register (Address 4) and the Auto_Negotiation Base * Page Ability Register (Address 5) to determine how @@ -904,8 +879,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Two bits in the Auto Negotiation Advertisement Register + /* Two bits in the Auto Negotiation Advertisement Register * (Address 4) and two bits in the Auto Negotiation Base * Page Ability Register (Address 5) determine flow control * for both the PHY and the link partner. The following @@ -940,8 +914,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) */ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { - /* - * Now we need to check if the user selected RX ONLY + /* Now we need to check if the user selected RX ONLY * of pause frames. In this case, we had to advertise * FULL flow control because we could not advertise RX * ONLY. Hence, we must now check to see if we need to @@ -956,8 +929,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) "RX PAUSE frames only.\r\n"); } } - /* - * For receiving PAUSE frames ONLY. + /* For receiving PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result @@ -971,8 +943,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) hw->fc.current_mode = e1000_fc_tx_pause; hw_dbg("Flow Control = TX PAUSE frames only.\r\n"); } - /* - * For transmitting PAUSE frames ONLY. + /* For transmitting PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result @@ -986,8 +957,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) hw->fc.current_mode = e1000_fc_rx_pause; hw_dbg("Flow Control = RX PAUSE frames only.\r\n"); } - /* - * Per the IEEE spec, at this point flow control should be + /* Per the IEEE spec, at this point flow control should be * disabled. However, we want to consider that we could * be connected to a legacy switch that doesn't advertise * desired flow control, but can be forced on the link @@ -1007,9 +977,9 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) * be asked to delay transmission of packets than asking * our link partner to pause transmission of frames. */ - else if ((hw->fc.requested_mode == e1000_fc_none || - hw->fc.requested_mode == e1000_fc_tx_pause) || - hw->fc.strict_ieee) { + else if ((hw->fc.requested_mode == e1000_fc_none) || + (hw->fc.requested_mode == e1000_fc_tx_pause) || + (hw->fc.strict_ieee)) { hw->fc.current_mode = e1000_fc_none; hw_dbg("Flow Control = NONE.\r\n"); } else { @@ -1017,8 +987,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) hw_dbg("Flow Control = RX PAUSE frames only.\r\n"); } - /* - * Now we need to do one last check... If we auto- + /* Now we need to do one last check... If we auto- * negotiated to HALF DUPLEX, flow control should not be * enabled per IEEE 802.3 spec. */ @@ -1031,8 +1000,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) if (duplex == HALF_DUPLEX) hw->fc.current_mode = e1000_fc_none; - /* - * Now we call a subroutine to actually force the MAC + /* Now we call a subroutine to actually force the MAC * controller to use the correct flow control settings. */ ret_val = igb_force_mac_fc(hw); @@ -1203,6 +1171,17 @@ s32 igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, hw_dbg("Half Duplex\n"); } + /* Check if it is an I354 2.5Gb backplane connection. */ + if (hw->mac.type == e1000_i354) { + if ((status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) { + *speed = SPEED_2500; + *duplex = FULL_DUPLEX; + hw_dbg("2500 Mbs, "); + hw_dbg("Full Duplex\n"); + } + } + return 0; } @@ -1427,8 +1406,7 @@ s32 igb_blink_led(struct e1000_hw *hw) u32 ledctl_blink = 0; u32 i; - /* - * set the blink bit for each LED that's "on" (0x0E) + /* set the blink bit for each LED that's "on" (0x0E) * in ledctl_mode2 */ ledctl_blink = hw->mac.ledctl_mode2; @@ -1467,7 +1445,7 @@ s32 igb_led_off(struct e1000_hw *hw) * @hw: pointer to the HW structure * * Returns 0 (0) if successful, else returns -10 - * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not casued + * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused * the master requests to be disabled. * * Disables PCI-Express master access and verifies there are no pending diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index e6d6ce433261..5e13e83cc608 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -35,8 +35,7 @@ #include "e1000_defines.h" #include "e1000_i210.h" -/* - * Functions that should not be called directly from drivers but can be used +/* Functions that should not be called directly from drivers but can be used * by other files in this 'shared code' */ s32 igb_blink_led(struct e1000_hw *hw); @@ -49,15 +48,15 @@ s32 igb_get_auto_rd_done(struct e1000_hw *hw); s32 igb_get_bus_info_pcie(struct e1000_hw *hw); s32 igb_get_hw_semaphore(struct e1000_hw *hw); s32 igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, - u16 *duplex); + u16 *duplex); s32 igb_id_led_init(struct e1000_hw *hw); s32 igb_led_off(struct e1000_hw *hw); void igb_update_mc_addr_list(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count); + u8 *mc_addr_list, u32 mc_addr_count); s32 igb_setup_link(struct e1000_hw *hw); s32 igb_validate_mdi_setting(struct e1000_hw *hw); s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, - u32 offset, u8 data); + u32 offset, u8 data); void igb_clear_hw_cntrs_base(struct e1000_hw *hw); void igb_clear_vfta(struct e1000_hw *hw); @@ -80,12 +79,12 @@ enum e1000_mng_mode { e1000_mng_mode_host_if_only }; -#define E1000_FACTPS_MNGCG 0x20000000 +#define E1000_FACTPS_MNGCG 0x20000000 -#define E1000_FWSM_MODE_MASK 0xE -#define E1000_FWSM_MODE_SHIFT 1 +#define E1000_FWSM_MODE_MASK 0xE +#define E1000_FWSM_MODE_SHIFT 1 -#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 +#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 extern void e1000_init_function_pointers_82575(struct e1000_hw *hw); diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index 38e0df350904..dac1447fabf7 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -196,7 +196,8 @@ out: * returns SUCCESS if it successfully received a message notification and * copied it into the receive buffer. **/ -static s32 igb_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +static s32 igb_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, + u16 mbx_id) { struct e1000_mbx_info *mbx = &hw->mbx; s32 ret_val = -E1000_ERR_MBX; @@ -222,7 +223,8 @@ out: * returns SUCCESS if it successfully copied message into the buffer and * received an ack to that message within delay * timeout period **/ -static s32 igb_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +static s32 igb_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, + u16 mbx_id) { struct e1000_mbx_info *mbx = &hw->mbx; s32 ret_val = -E1000_ERR_MBX; @@ -325,7 +327,6 @@ static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) s32 ret_val = -E1000_ERR_MBX; u32 p2v_mailbox; - /* Take ownership of the buffer */ wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); @@ -347,7 +348,7 @@ static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) * returns SUCCESS if it successfully copied message into the buffer **/ static s32 igb_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, - u16 vf_number) + u16 vf_number) { s32 ret_val; u16 i; @@ -388,7 +389,7 @@ out_no_write: * a message due to a VF request so no polling for message is needed. **/ static s32 igb_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, - u16 vf_number) + u16 vf_number) { s32 ret_val; u16 i; diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h index c13b56d9edb2..de9bba41acf3 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.h +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h @@ -30,42 +30,42 @@ #include "e1000_hw.h" -#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ -#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ -#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ -#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ -#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ +#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ +#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ +#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ -#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */ -#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ -#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */ -#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ +#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */ +#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ +#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */ +#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ -#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ +#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ /* If it's a E1000_VF_* msg then it originates in the VF and is sent to the * PF. The reverse is true if it is E1000_PF_*. * Message ACK's are the value or'd with 0xF0000000 */ -#define E1000_VT_MSGTYPE_ACK 0x80000000 /* Messages below or'd with - * this are the ACK */ -#define E1000_VT_MSGTYPE_NACK 0x40000000 /* Messages below or'd with - * this are the NACK */ -#define E1000_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still - clear to send requests */ -#define E1000_VT_MSGINFO_SHIFT 16 +/* Messages below or'd with this are the ACK */ +#define E1000_VT_MSGTYPE_ACK 0x80000000 +/* Messages below or'd with this are the NACK */ +#define E1000_VT_MSGTYPE_NACK 0x40000000 +/* Indicates that VF is still clear to send requests */ +#define E1000_VT_MSGTYPE_CTS 0x20000000 +#define E1000_VT_MSGINFO_SHIFT 16 /* bits 23:16 are used for exra info for certain messages */ -#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) +#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) -#define E1000_VF_RESET 0x01 /* VF requests reset */ -#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */ -#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */ -#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */ -#define E1000_VF_SET_LPE 0x05 /* VF requests to set VMOLR.LPE */ -#define E1000_VF_SET_PROMISC 0x06 /*VF requests to clear VMOLR.ROPE/MPME*/ -#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_RESET 0x01 /* VF requests reset */ +#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */ +#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */ +#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */ +#define E1000_VF_SET_LPE 0x05 /* VF requests to set VMOLR.LPE */ +#define E1000_VF_SET_PROMISC 0x06 /*VF requests to clear VMOLR.ROPE/MPME*/ +#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT) -#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */ +#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */ s32 igb_read_mbx(struct e1000_hw *, u32 *, u16, u16); s32 igb_write_mbx(struct e1000_hw *, u32 *, u16, u16); diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index 5b62adbe134d..7f9cd7cbd353 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -289,15 +289,14 @@ static s32 igb_ready_nvm_eeprom(struct e1000_hw *hw) udelay(1); timeout = NVM_MAX_RETRY_SPI; - /* - * Read "Status Register" repeatedly until the LSB is cleared. + /* Read "Status Register" repeatedly until the LSB is cleared. * The EEPROM will signal that the command has been completed * by clearing bit 0 of the internal status register. If it's * not cleared within 'timeout', then error out. */ while (timeout) { igb_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, - hw->nvm.opcode_bits); + hw->nvm.opcode_bits); spi_stat_reg = (u8)igb_shift_in_eec_bits(hw, 8); if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) break; @@ -335,8 +334,7 @@ s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) u16 word_in; u8 read_opcode = NVM_READ_OPCODE_SPI; - /* - * A check for invalid values: offset too large, too many words, + /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || @@ -363,8 +361,7 @@ s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) igb_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); igb_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); - /* - * Read the data. SPI NVMs increment the address with each byte + /* Read the data. SPI NVMs increment the address with each byte * read and will roll over if reading beyond the end. This allows * us to read the whole NVM from any offset */ @@ -395,8 +392,7 @@ s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) u32 i, eerd = 0; s32 ret_val = 0; - /* - * A check for invalid values: offset too large, too many words, + /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || @@ -408,7 +404,7 @@ s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) for (i = 0; i < words; i++) { eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + - E1000_NVM_RW_REG_START; + E1000_NVM_RW_REG_START; wr32(E1000_EERD, eerd); ret_val = igb_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); @@ -441,8 +437,7 @@ s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) s32 ret_val = -E1000_ERR_NVM; u16 widx = 0; - /* - * A check for invalid values: offset too large, too many words, + /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || @@ -472,8 +467,7 @@ s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) igb_standby_nvm(hw); - /* - * Some SPI eeproms use the 8th address bit embedded in the + /* Some SPI eeproms use the 8th address bit embedded in the * opcode */ if ((nvm->address_bits == 8) && (offset >= 128)) @@ -538,8 +532,7 @@ s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, u32 part_num_size) goto out; } - /* - * if nvm_data is not ptr guard the PBA must be in legacy format which + /* if nvm_data is not ptr guard the PBA must be in legacy format which * means pointer is actually our second data word for the PBA number * and we can decode it into an ascii string */ @@ -728,6 +721,7 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) case e1000_82575: case e1000_82576: case e1000_82580: + case e1000_i354: case e1000_i350: case e1000_i210: break; @@ -746,6 +740,7 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) switch (hw->mac.type) { case e1000_i210: + case e1000_i354: case e1000_i350: /* find combo image version */ hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 2918c979b5bb..fd46add6c4e4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -33,29 +33,29 @@ static s32 igb_phy_setup_autoneg(struct e1000_hw *hw); static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw, - u16 *phy_ctrl); + u16 *phy_ctrl); static s32 igb_wait_autoneg(struct e1000_hw *hw); static s32 igb_set_master_slave_mode(struct e1000_hw *hw); /* Cable length tables */ -static const u16 e1000_m88_cable_length_table[] = - { 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; +static const u16 e1000_m88_cable_length_table[] = { + 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; #define M88E1000_CABLE_LENGTH_TABLE_SIZE \ - (sizeof(e1000_m88_cable_length_table) / \ - sizeof(e1000_m88_cable_length_table[0])) - -static const u16 e1000_igp_2_cable_length_table[] = - { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, - 0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, - 6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, - 21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, - 40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, - 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, - 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, - 104, 109, 114, 118, 121, 124}; + (sizeof(e1000_m88_cable_length_table) / \ + sizeof(e1000_m88_cable_length_table[0])) + +static const u16 e1000_igp_2_cable_length_table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, + 0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, + 6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, + 21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, + 40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, + 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, + 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, + 104, 109, 114, 118, 121, 124}; #define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ - (sizeof(e1000_igp_2_cable_length_table) / \ - sizeof(e1000_igp_2_cable_length_table[0])) + (sizeof(e1000_igp_2_cable_length_table) / \ + sizeof(e1000_igp_2_cable_length_table[0])) /** * igb_check_reset_block - Check if PHY reset is blocked @@ -71,8 +71,7 @@ s32 igb_check_reset_block(struct e1000_hw *hw) manc = rd32(E1000_MANC); - return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? - E1000_BLK_PHY_RESET : 0; + return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : 0; } /** @@ -149,8 +148,7 @@ s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) goto out; } - /* - * Set up Op-code, Phy Address, and register offset in the MDI + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ @@ -160,8 +158,7 @@ s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) wr32(E1000_MDIC, mdic); - /* - * Poll the ready bit to see if the MDI read completed + /* Poll the ready bit to see if the MDI read completed * Increasing the time out as testing showed failures with * the lower time out */ @@ -207,8 +204,7 @@ s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) goto out; } - /* - * Set up Op-code, Phy Address, and register offset in the MDI + /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ @@ -219,8 +215,7 @@ s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) wr32(E1000_MDIC, mdic); - /* - * Poll the ready bit to see if the MDI read completed + /* Poll the ready bit to see if the MDI read completed * Increasing the time out as testing showed failures with * the lower time out */ @@ -259,15 +254,13 @@ s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) struct e1000_phy_info *phy = &hw->phy; u32 i, i2ccmd = 0; - - /* - * Set up Op-code, Phy Address, and register address in the I2CCMD + /* Set up Op-code, Phy Address, and register address in the I2CCMD * register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | - (E1000_I2CCMD_OPCODE_READ)); + (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | + (E1000_I2CCMD_OPCODE_READ)); wr32(E1000_I2CCMD, i2ccmd); @@ -317,15 +310,14 @@ s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) /* Swap the data bytes for the I2C interface */ phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); - /* - * Set up Op-code, Phy Address, and register address in the I2CCMD + /* Set up Op-code, Phy Address, and register address in the I2CCMD * register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_WRITE | - phy_data_swapped); + (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_WRITE | + phy_data_swapped); wr32(E1000_I2CCMD, i2ccmd); @@ -371,8 +363,8 @@ s32 igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) if (offset > MAX_PHY_MULTI_PAGE_REG) { ret_val = igb_write_phy_reg_mdic(hw, - IGP01E1000_PHY_PAGE_SELECT, - (u16)offset); + IGP01E1000_PHY_PAGE_SELECT, + (u16)offset); if (ret_val) { hw->phy.ops.release(hw); goto out; @@ -410,8 +402,8 @@ s32 igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) if (offset > MAX_PHY_MULTI_PAGE_REG) { ret_val = igb_write_phy_reg_mdic(hw, - IGP01E1000_PHY_PAGE_SELECT, - (u16)offset); + IGP01E1000_PHY_PAGE_SELECT, + (u16)offset); if (ret_val) { hw->phy.ops.release(hw); goto out; @@ -419,7 +411,7 @@ s32 igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) } ret_val = igb_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, - data); + data); hw->phy.ops.release(hw); @@ -439,7 +431,6 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) s32 ret_val; u16 phy_data; - if (phy->reset_disable) { ret_val = 0; goto out; @@ -472,8 +463,7 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) if (ret_val) goto out; phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; - /* - * Options: + /* Options: * 0 - Auto (default) * 1 - MDI mode * 2 - MDI-X mode @@ -520,8 +510,7 @@ s32 igb_copper_link_setup_m88(struct e1000_hw *hw) phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; - /* - * Options: + /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode @@ -546,8 +535,7 @@ s32 igb_copper_link_setup_m88(struct e1000_hw *hw) break; } - /* - * Options: + /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled @@ -562,12 +550,11 @@ s32 igb_copper_link_setup_m88(struct e1000_hw *hw) goto out; if (phy->revision < E1000_REVISION_4) { - /* - * Force TX_CLK in the Extended PHY Specific Control Register + /* Force TX_CLK in the Extended PHY Specific Control Register * to 25MHz clock. */ ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, - &phy_data); + &phy_data); if (ret_val) goto out; @@ -630,8 +617,7 @@ s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Options: + /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode @@ -659,8 +645,7 @@ s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw) break; } - /* - * Options: + /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled @@ -714,14 +699,12 @@ s32 igb_copper_link_setup_igp(struct e1000_hw *hw) goto out; } - /* - * Wait 100ms for MAC to configure PHY from NVM settings, to avoid + /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid * timeout issues when LFS is enabled. */ msleep(100); - /* - * The NVM settings will configure LPLU in D3 for + /* The NVM settings will configure LPLU in D3 for * non-IGP1 PHYs. */ if (phy->type == e1000_phy_igp) { @@ -765,8 +748,7 @@ s32 igb_copper_link_setup_igp(struct e1000_hw *hw) /* set auto-master slave resolution settings */ if (hw->mac.autoneg) { - /* - * when autonegotiation advertisement is only 1000Mbps then we + /* when autonegotiation advertisement is only 1000Mbps then we * should disable SmartSpeed and enable Auto MasterSlave * resolution as hardware default. */ @@ -844,14 +826,12 @@ static s32 igb_copper_link_autoneg(struct e1000_hw *hw) s32 ret_val; u16 phy_ctrl; - /* - * Perform some bounds checking on the autoneg advertisement + /* Perform some bounds checking on the autoneg advertisement * parameter. */ phy->autoneg_advertised &= phy->autoneg_mask; - /* - * If autoneg_advertised is zero, we assume it was not defaulted + /* If autoneg_advertised is zero, we assume it was not defaulted * by the calling code so we set to advertise full capability. */ if (phy->autoneg_advertised == 0) @@ -865,8 +845,7 @@ static s32 igb_copper_link_autoneg(struct e1000_hw *hw) } hw_dbg("Restarting Auto-Neg\n"); - /* - * Restart auto-negotiation by setting the Auto Neg Enable bit and + /* Restart auto-negotiation by setting the Auto Neg Enable bit and * the Auto Neg Restart bit in the PHY control register. */ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); @@ -878,8 +857,7 @@ static s32 igb_copper_link_autoneg(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Does the user want to wait for Auto-Neg to complete here, or + /* Does the user want to wait for Auto-Neg to complete here, or * check at a later time (for example, callback routine). */ if (phy->autoneg_wait_to_complete) { @@ -928,16 +906,14 @@ static s32 igb_phy_setup_autoneg(struct e1000_hw *hw) goto out; } - /* - * Need to parse both autoneg_advertised and fc and set up + /* Need to parse both autoneg_advertised and fc and set up * the appropriate PHY registers. First we will parse for * autoneg_advertised software override. Since we can advertise * a plethora of combinations, we need to check each bit * individually. */ - /* - * First we clear all the 10/100 mb speed bits in the Auto-Neg + /* First we clear all the 10/100 mb speed bits in the Auto-Neg * Advertisement Register (Address 4) and the 1000 mb speed bits in * the 1000Base-T Control Register (Address 9). */ @@ -983,8 +959,7 @@ static s32 igb_phy_setup_autoneg(struct e1000_hw *hw) mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; } - /* - * Check for a software override of the flow control settings, and + /* Check for a software override of the flow control settings, and * setup the PHY advertisement registers accordingly. If * auto-negotiation is enabled, then software will have to set the * "PAUSE" bits to the correct value in the Auto-Negotiation @@ -1003,15 +978,13 @@ static s32 igb_phy_setup_autoneg(struct e1000_hw *hw) */ switch (hw->fc.current_mode) { case e1000_fc_none: - /* - * Flow control (RX & TX) is completely disabled by a + /* Flow control (RX & TX) is completely disabled by a * software over-ride. */ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; case e1000_fc_rx_pause: - /* - * RX Flow control is enabled, and TX Flow control is + /* RX Flow control is enabled, and TX Flow control is * disabled, by a software over-ride. * * Since there really isn't a way to advertise that we are @@ -1023,16 +996,14 @@ static s32 igb_phy_setup_autoneg(struct e1000_hw *hw) mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; case e1000_fc_tx_pause: - /* - * TX Flow control is enabled, and RX Flow control is + /* TX Flow control is enabled, and RX Flow control is * disabled, by a software over-ride. */ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; break; case e1000_fc_full: - /* - * Flow control (both RX and TX) is enabled by a software + /* Flow control (both RX and TX) is enabled by a software * over-ride. */ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); @@ -1075,18 +1046,15 @@ s32 igb_setup_copper_link(struct e1000_hw *hw) s32 ret_val; bool link; - if (hw->mac.autoneg) { - /* - * Setup autoneg and flow control advertisement and perform + /* Setup autoneg and flow control advertisement and perform * autonegotiation. */ ret_val = igb_copper_link_autoneg(hw); if (ret_val) goto out; } else { - /* - * PHY will be set to 10H, 10F, 100H or 100F + /* PHY will be set to 10H, 10F, 100H or 100F * depending on user settings. */ hw_dbg("Forcing Speed and Duplex\n"); @@ -1097,14 +1065,10 @@ s32 igb_setup_copper_link(struct e1000_hw *hw) } } - /* - * Check link status. Wait up to 100 microseconds for link to become + /* Check link status. Wait up to 100 microseconds for link to become * valid. */ - ret_val = igb_phy_has_link(hw, - COPPER_LINK_UP_LIMIT, - 10, - &link); + ret_val = igb_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link); if (ret_val) goto out; @@ -1145,8 +1109,7 @@ s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Clear Auto-Crossover to force MDI manually. IGP requires MDI + /* Clear Auto-Crossover to force MDI manually. IGP requires MDI * forced whenever speed and duplex are forced. */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); @@ -1167,10 +1130,7 @@ s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw) if (phy->autoneg_wait_to_complete) { hw_dbg("Waiting for forced speed/duplex link on IGP phy.\n"); - ret_val = igb_phy_has_link(hw, - PHY_FORCE_LIMIT, - 100000, - &link); + ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) goto out; @@ -1178,10 +1138,7 @@ s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw) hw_dbg("Link taking longer than expected.\n"); /* Try once more */ - ret_val = igb_phy_has_link(hw, - PHY_FORCE_LIMIT, - 100000, - &link); + ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) goto out; } @@ -1209,8 +1166,7 @@ s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw) /* I210 and I211 devices support Auto-Crossover in forced operation. */ if (phy->type != e1000_phy_i210) { - /* - * Clear Auto-Crossover to force MDI manually. M88E1000 + /* Clear Auto-Crossover to force MDI manually. M88E1000 * requires MDI forced whenever speed and duplex are forced. */ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, @@ -1266,13 +1222,12 @@ s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw) if (!reset_dsp) hw_dbg("Link taking longer than expected.\n"); else { - /* - * We didn't get link. + /* We didn't get link. * Reset the DSP and cross our fingers. */ ret_val = phy->ops.write_reg(hw, - M88E1000_PHY_PAGE_SELECT, - 0x001d); + M88E1000_PHY_PAGE_SELECT, + 0x001d); if (ret_val) goto out; ret_val = igb_phy_reset_dsp(hw); @@ -1298,8 +1253,7 @@ s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Resetting the phy means we need to re-force TX_CLK in the + /* Resetting the phy means we need to re-force TX_CLK in the * Extended PHY Specific Control Register to 25MHz clock from * the reset value of 2.5MHz. */ @@ -1308,8 +1262,7 @@ s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw) if (ret_val) goto out; - /* - * In addition, we must re-enable CRS on Tx for both half and full + /* In addition, we must re-enable CRS on Tx for both half and full * duplex. */ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); @@ -1336,7 +1289,7 @@ out: * take affect. **/ static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw, - u16 *phy_ctrl) + u16 *phy_ctrl) { struct e1000_mac_info *mac = &hw->mac; u32 ctrl; @@ -1417,8 +1370,7 @@ s32 igb_set_d3_lplu_state(struct e1000_hw *hw, bool active) data); if (ret_val) goto out; - /* - * LPLU and SmartSpeed are mutually exclusive. LPLU is used + /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. @@ -1461,13 +1413,13 @@ s32 igb_set_d3_lplu_state(struct e1000_hw *hw, bool active) /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, - &data); + &data); if (ret_val) goto out; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, - data); + data); } out: @@ -1556,8 +1508,7 @@ static s32 igb_check_polarity_igp(struct e1000_hw *hw) s32 ret_val; u16 data, offset, mask; - /* - * Polarity is determined based on the speed of + /* Polarity is determined based on the speed of * our connection. */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); @@ -1569,8 +1520,7 @@ static s32 igb_check_polarity_igp(struct e1000_hw *hw) offset = IGP01E1000_PHY_PCS_INIT_REG; mask = IGP01E1000_PHY_POLARITY_MASK; } else { - /* - * This really only applies to 10Mbps since + /* This really only applies to 10Mbps since * there is no polarity for 100Mbps (always 0). */ offset = IGP01E1000_PHY_PORT_STATUS; @@ -1589,7 +1539,7 @@ out: } /** - * igb_wait_autoneg - Wait for auto-neg compeletion + * igb_wait_autoneg - Wait for auto-neg completion * @hw: pointer to the HW structure * * Waits for auto-negotiation to complete or for the auto-negotiation time @@ -1613,8 +1563,7 @@ static s32 igb_wait_autoneg(struct e1000_hw *hw) msleep(100); } - /* - * PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation + /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation * has completed. */ return ret_val; @@ -1630,21 +1579,19 @@ static s32 igb_wait_autoneg(struct e1000_hw *hw) * Polls the PHY status register for link, 'iterations' number of times. **/ s32 igb_phy_has_link(struct e1000_hw *hw, u32 iterations, - u32 usec_interval, bool *success) + u32 usec_interval, bool *success) { s32 ret_val = 0; u16 i, phy_status; for (i = 0; i < iterations; i++) { - /* - * Some PHYs require the PHY_STATUS register to be read + /* Some PHYs require the PHY_STATUS register to be read * twice due to the link bit being sticky. No harm doing * it across the board. */ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); if (ret_val) { - /* - * If the first read fails, another entity may have + /* If the first read fails, another entity may have * ownership of the resources, wait and try again to * see if they have relinquished the resources yet. */ @@ -1735,6 +1682,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) phy->max_cable_length = phy_data / (is_cm ? 100 : 1); phy->cable_length = phy_data / (is_cm ? 100 : 1); break; + case M88E1545_E_PHY_ID: case I347AT4_E_PHY_ID: /* Remember the original page select and set it to 7 */ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, @@ -1834,10 +1782,10 @@ s32 igb_get_cable_length_igp_2(struct e1000_hw *hw) u16 cur_agc_index, max_agc_index = 0; u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { - IGP02E1000_PHY_AGC_A, - IGP02E1000_PHY_AGC_B, - IGP02E1000_PHY_AGC_C, - IGP02E1000_PHY_AGC_D + IGP02E1000_PHY_AGC_A, + IGP02E1000_PHY_AGC_B, + IGP02E1000_PHY_AGC_C, + IGP02E1000_PHY_AGC_D }; /* Read the AGC registers for all channels */ @@ -1846,8 +1794,7 @@ s32 igb_get_cable_length_igp_2(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Getting bits 15:9, which represent the combination of + /* Getting bits 15:9, which represent the combination of * coarse and fine gain values. The result is a number * that can be put into the lookup table to obtain the * approximate cable length. @@ -2167,15 +2114,13 @@ s32 igb_phy_init_script_igp3(struct e1000_hw *hw) hw->phy.ops.write_reg(hw, 0x1796, 0x0008); /* Change cg_icount + enable integbp for channels BCD */ hw->phy.ops.write_reg(hw, 0x1798, 0xD008); - /* - * Change cg_icount + enable integbp + change prop_factor_master + /* Change cg_icount + enable integbp + change prop_factor_master * to 8 for channel A */ hw->phy.ops.write_reg(hw, 0x1898, 0xD918); /* Disable AHT in Slave mode on channel A */ hw->phy.ops.write_reg(hw, 0x187A, 0x0800); - /* - * Enable LPLU and disable AN to 1000 in non-D0a states, + /* Enable LPLU and disable AN to 1000 in non-D0a states, * Enable SPD+B2B */ hw->phy.ops.write_reg(hw, 0x0019, 0x008D); @@ -2257,8 +2202,8 @@ static s32 igb_check_polarity_82580(struct e1000_hw *hw) if (!ret_val) phy->cable_polarity = (data & I82580_PHY_STATUS2_REV_POLARITY) - ? e1000_rev_polarity_reversed - : e1000_rev_polarity_normal; + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal; return ret_val; } @@ -2278,7 +2223,6 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) u16 phy_data; bool link; - ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) goto out; @@ -2289,8 +2233,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Clear Auto-Crossover to force MDI manually. 82580 requires MDI + /* Clear Auto-Crossover to force MDI manually. 82580 requires MDI * forced whenever speed and duplex are forced. */ ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); @@ -2310,10 +2253,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) if (phy->autoneg_wait_to_complete) { hw_dbg("Waiting for forced speed/duplex link on 82580 phy\n"); - ret_val = igb_phy_has_link(hw, - PHY_FORCE_LIMIT, - 100000, - &link); + ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) goto out; @@ -2321,10 +2261,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) hw_dbg("Link taking longer than expected.\n"); /* Try once more */ - ret_val = igb_phy_has_link(hw, - PHY_FORCE_LIMIT, - 100000, - &link); + ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) goto out; } @@ -2349,7 +2286,6 @@ s32 igb_get_phy_info_82580(struct e1000_hw *hw) u16 data; bool link; - ret_val = igb_phy_has_link(hw, 1, 0, &link); if (ret_val) goto out; @@ -2383,12 +2319,12 @@ s32 igb_get_phy_info_82580(struct e1000_hw *hw) goto out; phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) - ? e1000_1000t_rx_status_ok - : e1000_1000t_rx_status_not_ok; + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; } else { phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; phy->local_rx = e1000_1000t_rx_status_undefined; @@ -2412,13 +2348,12 @@ s32 igb_get_cable_length_82580(struct e1000_hw *hw) s32 ret_val; u16 phy_data, length; - ret_val = phy->ops.read_reg(hw, I82580_PHY_DIAG_STATUS, &phy_data); if (ret_val) goto out; length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >> - I82580_DSTATUS_CABLE_LENGTH_SHIFT; + I82580_DSTATUS_CABLE_LENGTH_SHIFT; if (length == E1000_CABLE_LENGTH_UNDEFINED) ret_val = -E1000_ERR_PHY; diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 15343286082e..82632c6c53af 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -65,6 +65,7 @@ #define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ #define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ #define E1000_LEDCTL 0x00E00 /* LED Control - RW */ +#define E1000_LEDMUX 0x08130 /* LED MUX Control */ #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ @@ -83,6 +84,9 @@ #define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */ #define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */ #define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */ +#define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */ +#define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */ +#define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */ /* IEEE 1588 TIMESYNCH */ #define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ @@ -117,21 +121,21 @@ #define E1000_RQDPC(_n) (0x0C030 + ((_n) * 0x40)) /* DMA Coalescing registers */ -#define E1000_DMACR 0x02508 /* Control Register */ -#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ -#define E1000_DMCTLX 0x02514 /* Time to Lx Request */ -#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ -#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ -#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ -#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ +#define E1000_DMACR 0x02508 /* Control Register */ +#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ +#define E1000_DMCTLX 0x02514 /* Time to Lx Request */ +#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ +#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ +#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ +#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ /* TX Rate Limit Registers */ -#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select - WO */ -#define E1000_RTTBCNRM 0x3690 /* Tx BCN Rate-scheduler MMW */ -#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config - WO */ +#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select - WO */ +#define E1000_RTTBCNRM 0x3690 /* Tx BCN Rate-scheduler MMW */ +#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config - WO */ /* Split and Replication RX Control - RW */ -#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ +#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ /* Thermal sensor configuration and status registers */ #define E1000_THMJT 0x08100 /* Junction Temperature */ @@ -140,8 +144,7 @@ #define E1000_THHIGHTC 0x0810C /* High Threshold Control */ #define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ -/* - * Convenience macros +/* Convenience macros * * Note: "_n" is the queue number of the register to be written to. * @@ -287,7 +290,7 @@ #define E1000_RFCTL 0x05008 /* Receive Filter Control*/ #define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ #define E1000_RA 0x05400 /* Receive Address - RW Array */ -#define E1000_RA2 0x054E0 /* 2nd half of receive address array - RW Array */ +#define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ #define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) #define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ (0x054E0 + ((_i - 16) * 8))) @@ -360,21 +363,25 @@ (readl(hw->hw_addr + reg + ((offset) << 2))) /* DMA Coalescing registers */ -#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ +#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ /* Energy Efficient Ethernet "EEE" register */ -#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ -#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet */ -#define E1000_EEE_SU 0X0E34 /* EEE Setup */ +#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ +#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet */ +#define E1000_EEE_SU 0X0E34 /* EEE Setup */ +#define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */ +#define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */ +#define E1000_MMDAC 13 /* MMD Access Control */ +#define E1000_MMDAAD 14 /* MMD Access Address/Data */ /* Thermal Sensor Register */ -#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ +#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ /* OS2BMC Registers */ -#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ -#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ -#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ -#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ +#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ +#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ +#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ +#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ #define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */ #define E1000_I210_FLMNGCTL 0x12038 diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 25151401c2ab..9d6c075e232d 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -44,54 +44,54 @@ struct igb_adapter; -#define E1000_PCS_CFG_IGN_SD 1 +#define E1000_PCS_CFG_IGN_SD 1 /* Interrupt defines */ -#define IGB_START_ITR 648 /* ~6000 ints/sec */ -#define IGB_4K_ITR 980 -#define IGB_20K_ITR 196 -#define IGB_70K_ITR 56 +#define IGB_START_ITR 648 /* ~6000 ints/sec */ +#define IGB_4K_ITR 980 +#define IGB_20K_ITR 196 +#define IGB_70K_ITR 56 /* TX/RX descriptor defines */ -#define IGB_DEFAULT_TXD 256 -#define IGB_DEFAULT_TX_WORK 128 -#define IGB_MIN_TXD 80 -#define IGB_MAX_TXD 4096 +#define IGB_DEFAULT_TXD 256 +#define IGB_DEFAULT_TX_WORK 128 +#define IGB_MIN_TXD 80 +#define IGB_MAX_TXD 4096 -#define IGB_DEFAULT_RXD 256 -#define IGB_MIN_RXD 80 -#define IGB_MAX_RXD 4096 +#define IGB_DEFAULT_RXD 256 +#define IGB_MIN_RXD 80 +#define IGB_MAX_RXD 4096 -#define IGB_DEFAULT_ITR 3 /* dynamic */ -#define IGB_MAX_ITR_USECS 10000 -#define IGB_MIN_ITR_USECS 10 -#define NON_Q_VECTORS 1 -#define MAX_Q_VECTORS 8 +#define IGB_DEFAULT_ITR 3 /* dynamic */ +#define IGB_MAX_ITR_USECS 10000 +#define IGB_MIN_ITR_USECS 10 +#define NON_Q_VECTORS 1 +#define MAX_Q_VECTORS 8 /* Transmit and receive queues */ -#define IGB_MAX_RX_QUEUES 8 -#define IGB_MAX_RX_QUEUES_82575 4 -#define IGB_MAX_RX_QUEUES_I211 2 -#define IGB_MAX_TX_QUEUES 8 -#define IGB_MAX_VF_MC_ENTRIES 30 -#define IGB_MAX_VF_FUNCTIONS 8 -#define IGB_MAX_VFTA_ENTRIES 128 -#define IGB_82576_VF_DEV_ID 0x10CA -#define IGB_I350_VF_DEV_ID 0x1520 +#define IGB_MAX_RX_QUEUES 8 +#define IGB_MAX_RX_QUEUES_82575 4 +#define IGB_MAX_RX_QUEUES_I211 2 +#define IGB_MAX_TX_QUEUES 8 +#define IGB_MAX_VF_MC_ENTRIES 30 +#define IGB_MAX_VF_FUNCTIONS 8 +#define IGB_MAX_VFTA_ENTRIES 128 +#define IGB_82576_VF_DEV_ID 0x10CA +#define IGB_I350_VF_DEV_ID 0x1520 /* NVM version defines */ -#define IGB_MAJOR_MASK 0xF000 -#define IGB_MINOR_MASK 0x0FF0 -#define IGB_BUILD_MASK 0x000F -#define IGB_COMB_VER_MASK 0x00FF -#define IGB_MAJOR_SHIFT 12 -#define IGB_MINOR_SHIFT 4 -#define IGB_COMB_VER_SHFT 8 -#define IGB_NVM_VER_INVALID 0xFFFF -#define IGB_ETRACK_SHIFT 16 -#define NVM_ETRACK_WORD 0x0042 -#define NVM_COMB_VER_OFF 0x0083 -#define NVM_COMB_VER_PTR 0x003d +#define IGB_MAJOR_MASK 0xF000 +#define IGB_MINOR_MASK 0x0FF0 +#define IGB_BUILD_MASK 0x000F +#define IGB_COMB_VER_MASK 0x00FF +#define IGB_MAJOR_SHIFT 12 +#define IGB_MINOR_SHIFT 4 +#define IGB_COMB_VER_SHFT 8 +#define IGB_NVM_VER_INVALID 0xFFFF +#define IGB_ETRACK_SHIFT 16 +#define NVM_ETRACK_WORD 0x0042 +#define NVM_COMB_VER_OFF 0x0083 +#define NVM_COMB_VER_PTR 0x003d struct vf_data_storage { unsigned char vf_mac_addresses[ETH_ALEN]; @@ -103,6 +103,7 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; + bool spoofchk_enabled; }; #define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ @@ -121,14 +122,14 @@ struct vf_data_storage { * descriptors until either it has this many to write back, or the * ITR timer expires. */ -#define IGB_RX_PTHRESH 8 -#define IGB_RX_HTHRESH 8 -#define IGB_TX_PTHRESH 8 -#define IGB_TX_HTHRESH 1 -#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ - adapter->msix_entries) ? 1 : 4) -#define IGB_TX_WTHRESH ((hw->mac.type == e1000_82576 && \ - adapter->msix_entries) ? 1 : 16) +#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : 8) +#define IGB_RX_HTHRESH 8 +#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) +#define IGB_TX_HTHRESH 1 +#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ + adapter->msix_entries) ? 1 : 4) +#define IGB_TX_WTHRESH ((hw->mac.type == e1000_82576 && \ + adapter->msix_entries) ? 1 : 16) /* this is the size past which hardware will drop packets when setting LPE=0 */ #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 @@ -140,17 +141,17 @@ struct vf_data_storage { #define IGB_RX_BUFSZ IGB_RXBUFFER_2048 /* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ -#define AUTO_ALL_MODES 0 -#define IGB_EEPROM_APME 0x0400 +#define AUTO_ALL_MODES 0 +#define IGB_EEPROM_APME 0x0400 #ifndef IGB_MASTER_SLAVE /* Switch to override PHY master/slave setting */ #define IGB_MASTER_SLAVE e1000_ms_hw_default #endif -#define IGB_MNG_VLAN_NONE -1 +#define IGB_MNG_VLAN_NONE -1 enum igb_tx_flags { /* cmd_type flags */ @@ -164,11 +165,10 @@ enum igb_tx_flags { }; /* VLAN info */ -#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGB_TX_FLAGS_VLAN_SHIFT 16 -/* - * The largest size we can write to the descriptor is 65535. In order to +/* The largest size we can write to the descriptor is 65535. In order to * maintain a power of two alignment we have to limit ourselves to 32K. */ #define IGB_MAX_TXD_PWR 15 @@ -178,8 +178,17 @@ enum igb_tx_flags { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) +/* EEPROM byte offsets */ +#define IGB_SFF_8472_SWAP 0x5C +#define IGB_SFF_8472_COMP 0x5E + +/* Bitmasks */ +#define IGB_SFF_ADDRESSING_MODE 0x4 +#define IGB_SFF_8472_UNSUP 0x00 + /* wrapper around a pointer to a socket buffer, - * so a DMA handle can be stored along with the buffer */ + * so a DMA handle can be stored along with the buffer + */ struct igb_tx_buffer { union e1000_adv_tx_desc *next_to_watch; unsigned long time_stamp; @@ -284,25 +293,17 @@ struct igb_q_vector { enum e1000_ring_flags_t { IGB_RING_FLAG_RX_SCTP_CSUM, IGB_RING_FLAG_RX_LB_VLAN_BSWAP, - IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, IGB_RING_FLAG_TX_CTX_IDX, IGB_RING_FLAG_TX_DETECT_HANG }; -#define ring_uses_build_skb(ring) \ - test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) -#define set_ring_build_skb_enabled(ring) \ - set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) -#define clear_ring_build_skb_enabled(ring) \ - clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) - #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) -#define IGB_RX_DESC(R, i) \ +#define IGB_RX_DESC(R, i) \ (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) -#define IGB_TX_DESC(R, i) \ +#define IGB_TX_DESC(R, i) \ (&(((union e1000_adv_tx_desc *)((R)->desc))[i])) -#define IGB_TX_CTXTDESC(R, i) \ +#define IGB_TX_CTXTDESC(R, i) \ (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i])) /* igb_test_staterr - tests bits within Rx descriptor status and error fields */ @@ -461,12 +462,12 @@ struct igb_adapter { #define IGB_FLAG_WOL_SUPPORTED (1 << 8) /* DMA Coalescing defines */ -#define IGB_MIN_TXPBSIZE 20408 -#define IGB_TX_BUF_4096 4096 -#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */ +#define IGB_MIN_TXPBSIZE 20408 +#define IGB_TX_BUF_4096 4096 +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */ -#define IGB_82576_TSYNC_SHIFT 19 -#define IGB_TS_HDR_LEN 16 +#define IGB_82576_TSYNC_SHIFT 19 +#define IGB_TS_HDR_LEN 16 enum e1000_state_t { __IGB_TESTING, __IGB_RESETTING, diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index a3830a8ba4c1..48b594701efa 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/pm_runtime.h> #include <linux/highmem.h> +#include <linux/mdio.h> #include "igb.h" @@ -178,44 +179,67 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) ecmd->port = PORT_TP; ecmd->phy_address = hw->phy.addr; + ecmd->transceiver = XCVR_INTERNAL; } else { - ecmd->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_FIBRE | - SUPPORTED_Autoneg); + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg | + SUPPORTED_Pause); + if (hw->mac.type == e1000_i354) + ecmd->supported |= SUPPORTED_2500baseX_Full; - ecmd->advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE | - ADVERTISED_Autoneg | - ADVERTISED_Pause); + ecmd->advertising = ADVERTISED_FIBRE; + + switch (adapter->link_speed) { + case SPEED_2500: + ecmd->advertising = ADVERTISED_2500baseX_Full; + break; + case SPEED_1000: + ecmd->advertising = ADVERTISED_1000baseT_Full; + break; + case SPEED_100: + ecmd->advertising = ADVERTISED_100baseT_Full; + break; + default: + break; + } + + if (hw->mac.autoneg == 1) + ecmd->advertising |= ADVERTISED_Autoneg; ecmd->port = PORT_FIBRE; + ecmd->transceiver = XCVR_EXTERNAL; } - ecmd->transceiver = XCVR_INTERNAL; - status = rd32(E1000_STATUS); if (status & E1000_STATUS_LU) { - - if ((status & E1000_STATUS_SPEED_1000) || - hw->phy.media_type != e1000_media_type_copper) - ethtool_cmd_speed_set(ecmd, SPEED_1000); + if ((hw->mac.type == e1000_i354) && + (status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) + ecmd->speed = SPEED_2500; + else if (status & E1000_STATUS_SPEED_1000) + ecmd->speed = SPEED_1000; else if (status & E1000_STATUS_SPEED_100) - ethtool_cmd_speed_set(ecmd, SPEED_100); + ecmd->speed = SPEED_100; else - ethtool_cmd_speed_set(ecmd, SPEED_10); - + ecmd->speed = SPEED_10; if ((status & E1000_STATUS_FD) || hw->phy.media_type != e1000_media_type_copper) ecmd->duplex = DUPLEX_FULL; else ecmd->duplex = DUPLEX_HALF; } else { - ethtool_cmd_speed_set(ecmd, -1); + ecmd->speed = -1; ecmd->duplex = -1; } - ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + if ((hw->phy.media_type == e1000_media_type_fiber) || + hw->mac.autoneg) + ecmd->autoneg = AUTONEG_ENABLE; + else + ecmd->autoneg = AUTONEG_DISABLE; /* MDI-X => 2; MDI =>1; Invalid =>0 */ if (hw->phy.media_type == e1000_media_type_copper) @@ -238,15 +262,15 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) struct e1000_hw *hw = &adapter->hw; /* When SoL/IDER sessions are active, autoneg/speed/duplex - * cannot be changed */ + * cannot be changed + */ if (igb_check_reset_block(hw)) { dev_err(&adapter->pdev->dev, "Cannot change link characteristics when SoL/IDER is active.\n"); return -EINVAL; } - /* - * MDI setting is only allowed when autoneg enabled because + /* MDI setting is only allowed when autoneg enabled because * some hardware doesn't allow MDI setting when speed or * duplex is forced. */ @@ -266,9 +290,31 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) if (ecmd->autoneg == AUTONEG_ENABLE) { hw->mac.autoneg = 1; - hw->phy.autoneg_advertised = ecmd->advertising | - ADVERTISED_TP | - ADVERTISED_Autoneg; + if (hw->phy.media_type == e1000_media_type_fiber) { + hw->phy.autoneg_advertised = ecmd->advertising | + ADVERTISED_FIBRE | + ADVERTISED_Autoneg; + switch (adapter->link_speed) { + case SPEED_2500: + hw->phy.autoneg_advertised = + ADVERTISED_2500baseX_Full; + break; + case SPEED_1000: + hw->phy.autoneg_advertised = + ADVERTISED_1000baseT_Full; + break; + case SPEED_100: + hw->phy.autoneg_advertised = + ADVERTISED_100baseT_Full; + break; + default: + break; + } + } else { + hw->phy.autoneg_advertised = ecmd->advertising | + ADVERTISED_TP | + ADVERTISED_Autoneg; + } ecmd->advertising = hw->phy.autoneg_advertised; if (adapter->fc_autoneg) hw->fc.requested_mode = e1000_fc_default; @@ -283,8 +329,7 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) /* MDI-X => 2; MDI => 1; Auto => 3 */ if (ecmd->eth_tp_mdix_ctrl) { - /* - * fix up the value for auto (3 => 0) as zero is mapped + /* fix up the value for auto (3 => 0) as zero is mapped * internally to auto */ if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) @@ -309,8 +354,7 @@ static u32 igb_get_link(struct net_device *netdev) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_mac_info *mac = &adapter->hw.mac; - /* - * If the link is not reported up to netdev, interrupts are disabled, + /* If the link is not reported up to netdev, interrupts are disabled, * and so the physical link state may have changed since we last * looked. Set get_link_status to make sure that the true link * state is interrogated, rather than pulling a cached and possibly @@ -430,7 +474,8 @@ static void igb_get_regs(struct net_device *netdev, /* Interrupt */ /* Reading EICS for EICR because they read the - * same but EICS does not clear on read */ + * same but EICS does not clear on read + */ regs_buff[13] = rd32(E1000_EICS); regs_buff[14] = rd32(E1000_EICS); regs_buff[15] = rd32(E1000_EIMS); @@ -438,7 +483,8 @@ static void igb_get_regs(struct net_device *netdev, regs_buff[17] = rd32(E1000_EIAC); regs_buff[18] = rd32(E1000_EIAM); /* Reading ICS for ICR because they read the - * same but ICS does not clear on read */ + * same but ICS does not clear on read + */ regs_buff[19] = rd32(E1000_ICS); regs_buff[20] = rd32(E1000_ICS); regs_buff[21] = rd32(E1000_IMS); @@ -688,12 +734,12 @@ static int igb_get_eeprom(struct net_device *netdev, if (hw->nvm.type == e1000_nvm_eeprom_spi) ret_val = hw->nvm.ops.read(hw, first_word, - last_word - first_word + 1, - eeprom_buff); + last_word - first_word + 1, + eeprom_buff); else { for (i = 0; i < last_word - first_word + 1; i++) { ret_val = hw->nvm.ops.read(hw, first_word + i, 1, - &eeprom_buff[i]); + &eeprom_buff[i]); if (ret_val) break; } @@ -740,15 +786,17 @@ static int igb_set_eeprom(struct net_device *netdev, ptr = (void *)eeprom_buff; if (eeprom->offset & 1) { - /* need read/modify/write of first changed EEPROM word */ - /* only the second byte of the word is being modified */ + /* need read/modify/write of first changed EEPROM word + * only the second byte of the word is being modified + */ ret_val = hw->nvm.ops.read(hw, first_word, 1, &eeprom_buff[0]); ptr++; } if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { - /* need read/modify/write of last changed EEPROM word */ - /* only the first byte of the word is being modified */ + /* need read/modify/write of last changed EEPROM word + * only the first byte of the word is being modified + */ ret_val = hw->nvm.ops.read(hw, last_word, 1, &eeprom_buff[last_word - first_word]); } @@ -763,10 +811,11 @@ static int igb_set_eeprom(struct net_device *netdev, eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]); ret_val = hw->nvm.ops.write(hw, first_word, - last_word - first_word + 1, eeprom_buff); + last_word - first_word + 1, eeprom_buff); /* Update the checksum over the first part of the EEPROM if needed - * and flush shadow RAM for 82573 controllers */ + * and flush shadow RAM for 82573 controllers + */ if ((ret_val == 0) && ((first_word <= NVM_CHECKSUM_REG))) hw->nvm.ops.update(hw); @@ -783,8 +832,7 @@ static void igb_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version)); - /* - * EEPROM image version # is reported as firmware version # for + /* EEPROM image version # is reported as firmware version # for * 82575 controllers */ strlcpy(drvinfo->fw_version, adapter->fw_version, @@ -847,9 +895,11 @@ static int igb_set_ringparam(struct net_device *netdev, } if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring)); + temp_ring = vmalloc(adapter->num_tx_queues * + sizeof(struct igb_ring)); else - temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring)); + temp_ring = vmalloc(adapter->num_rx_queues * + sizeof(struct igb_ring)); if (!temp_ring) { err = -ENOMEM; @@ -858,10 +908,9 @@ static int igb_set_ringparam(struct net_device *netdev, igb_down(adapter); - /* - * We can't just free everything and then setup again, + /* We can't just free everything and then setup again, * because the ISRs in MSI-X mode get passed pointers - * to the tx and rx ring structs. + * to the Tx and Rx ring structs. */ if (new_tx_count != adapter->tx_ring_count) { for (i = 0; i < adapter->num_tx_queues; i++) { @@ -1199,6 +1248,7 @@ static int igb_reg_test(struct igb_adapter *adapter, u64 *data) switch (adapter->hw.mac.type) { case e1000_i350: + case e1000_i354: test = reg_test_i350; toggle = 0x7FEFF3FF; break; @@ -1361,6 +1411,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) ics_mask = 0x77DCFED5; break; case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: ics_mask = 0x77DCFED5; @@ -1723,8 +1774,8 @@ static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer, } static int igb_clean_test_rings(struct igb_ring *rx_ring, - struct igb_ring *tx_ring, - unsigned int size) + struct igb_ring *tx_ring, + unsigned int size) { union e1000_adv_rx_desc *rx_desc; struct igb_rx_buffer *rx_buffer_info; @@ -1737,7 +1788,7 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { - /* check rx buffer */ + /* check Rx buffer */ rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc]; /* sync Rx buffer for CPU read */ @@ -1756,11 +1807,11 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, IGB_RX_BUFSZ, DMA_FROM_DEVICE); - /* unmap buffer on tx side */ + /* unmap buffer on Tx side */ tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc]; igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - /* increment rx/tx next to clean counters */ + /* increment Rx/Tx next to clean counters */ rx_ntc++; if (rx_ntc == rx_ring->count) rx_ntc = 0; @@ -1801,8 +1852,7 @@ static int igb_run_loopback_test(struct igb_adapter *adapter) igb_create_lbtest_frame(skb, size); skb_put(skb, size); - /* - * Calculate the loop count based on the largest descriptor ring + /* Calculate the loop count based on the largest descriptor ring * The idea is to wrap the largest ring a number of times using 64 * send/receive pairs during each loop */ @@ -1829,7 +1879,7 @@ static int igb_run_loopback_test(struct igb_adapter *adapter) break; } - /* allow 200 milliseconds for packets to go from tx to rx */ + /* allow 200 milliseconds for packets to go from Tx to Rx */ msleep(200); good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size); @@ -1848,13 +1898,21 @@ static int igb_run_loopback_test(struct igb_adapter *adapter) static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) { /* PHY loopback cannot be performed if SoL/IDER - * sessions are active */ + * sessions are active + */ if (igb_check_reset_block(&adapter->hw)) { dev_err(&adapter->pdev->dev, "Cannot do PHY loopback test when SoL/IDER is active.\n"); *data = 0; goto out; } + + if (adapter->hw.mac.type == e1000_i354) { + dev_info(&adapter->pdev->dev, + "Loopback test not supported on i354.\n"); + *data = 0; + goto out; + } *data = igb_setup_desc_rings(adapter); if (*data) goto out; @@ -1879,7 +1937,8 @@ static int igb_link_test(struct igb_adapter *adapter, u64 *data) hw->mac.serdes_has_link = false; /* On some blade server designs, link establishment - * could take as long as 2-3 minutes */ + * could take as long as 2-3 minutes + */ do { hw->mac.ops.check_for_link(&adapter->hw); if (hw->mac.serdes_has_link) @@ -1922,7 +1981,8 @@ static void igb_diag_test(struct net_device *netdev, igb_power_up_link(adapter); /* Link test performed before hardware reset so autoneg doesn't - * interfere with test result */ + * interfere with test result + */ if (igb_link_test(adapter, &data[4])) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -1987,8 +2047,8 @@ static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) struct igb_adapter *adapter = netdev_priv(netdev); wol->supported = WAKE_UCAST | WAKE_MCAST | - WAKE_BCAST | WAKE_MAGIC | - WAKE_PHY; + WAKE_BCAST | WAKE_MAGIC | + WAKE_PHY; wol->wolopts = 0; if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) @@ -2263,7 +2323,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) sprintf(p, "rx_queue_%u_alloc_failed", i); p += ETH_GSTRING_LEN; } -/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */ + /* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */ break; } } @@ -2283,6 +2343,7 @@ static int igb_get_ts_info(struct net_device *dev, case e1000_82576: case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: info->so_timestamping = @@ -2362,7 +2423,7 @@ static int igb_get_rss_hash_opts(struct igb_adapter *adapter, } static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, - u32 *rule_locs) + u32 *rule_locs) { struct igb_adapter *adapter = netdev_priv(dev); int ret = -EOPNOTSUPP; @@ -2506,7 +2567,8 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 ipcnfg, eeer; + u32 ipcnfg, eeer, ret_val; + u16 phy_data; if ((hw->mac.type < e1000_i350) || (hw->phy.media_type != e1000_media_type_copper)) @@ -2525,6 +2587,32 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) if (ipcnfg & E1000_IPCNFG_EEE_100M_AN) edata->advertised |= ADVERTISED_100baseT_Full; + /* EEE Link Partner Advertised */ + switch (hw->mac.type) { + case e1000_i350: + ret_val = igb_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350, + &phy_data); + if (ret_val) + return -ENODATA; + + edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + + break; + case e1000_i210: + case e1000_i211: + ret_val = igb_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210, + E1000_EEE_LP_ADV_DEV_I210, + &phy_data); + if (ret_val) + return -ENODATA; + + edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + + break; + default: + break; + } + if (eeer & E1000_EEER_EEE_NEG) edata->eee_active = true; @@ -2600,6 +2688,85 @@ static int igb_set_eee(struct net_device *netdev, return 0; } +static int igb_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 status = E1000_SUCCESS; + u16 sff8472_rev, addr_mode; + bool page_swap = false; + + if ((hw->phy.media_type == e1000_media_type_copper) || + (hw->phy.media_type == e1000_media_type_unknown)) + return -EOPNOTSUPP; + + /* Check whether we support SFF-8472 or not */ + status = igb_read_phy_reg_i2c(hw, IGB_SFF_8472_COMP, &sff8472_rev); + if (status != E1000_SUCCESS) + return -EIO; + + /* addressing mode is not supported */ + status = igb_read_phy_reg_i2c(hw, IGB_SFF_8472_SWAP, &addr_mode); + if (status != E1000_SUCCESS) + return -EIO; + + /* addressing mode is not supported */ + if ((addr_mode & 0xFF) & IGB_SFF_ADDRESSING_MODE) { + hw_dbg("Address change required to access page 0xA2, but not supported. Please report the module type to the driver maintainers.\n"); + page_swap = true; + } + + if ((sff8472_rev & 0xFF) == IGB_SFF_8472_UNSUP || page_swap) { + /* We have an SFP, but it does not support SFF-8472 */ + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + /* We have an SFP which supports a revision of SFF-8472 */ + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + + return 0; +} + +static int igb_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 status = E1000_SUCCESS; + u16 *dataword; + u16 first_word, last_word; + int i = 0; + + if (ee->len == 0) + return -EINVAL; + + first_word = ee->offset >> 1; + last_word = (ee->offset + ee->len - 1) >> 1; + + dataword = kmalloc(sizeof(u16) * (last_word - first_word + 1), + GFP_KERNEL); + if (!dataword) + return -ENOMEM; + + /* Read EEPROM block, SFF-8079/SFF-8472, word at a time */ + for (i = 0; i < last_word - first_word + 1; i++) { + status = igb_read_phy_reg_i2c(hw, first_word + i, &dataword[i]); + if (status != E1000_SUCCESS) + /* Error occurred while reading module */ + return -EIO; + + be16_to_cpus(&dataword[i]); + } + + memcpy(data, (u8 *)dataword + (ee->offset & 1), ee->len); + kfree(dataword); + + return 0; +} + static int igb_ethtool_begin(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -2614,36 +2781,38 @@ static void igb_ethtool_complete(struct net_device *netdev) } static const struct ethtool_ops igb_ethtool_ops = { - .get_settings = igb_get_settings, - .set_settings = igb_set_settings, - .get_drvinfo = igb_get_drvinfo, - .get_regs_len = igb_get_regs_len, - .get_regs = igb_get_regs, - .get_wol = igb_get_wol, - .set_wol = igb_set_wol, - .get_msglevel = igb_get_msglevel, - .set_msglevel = igb_set_msglevel, - .nway_reset = igb_nway_reset, - .get_link = igb_get_link, - .get_eeprom_len = igb_get_eeprom_len, - .get_eeprom = igb_get_eeprom, - .set_eeprom = igb_set_eeprom, - .get_ringparam = igb_get_ringparam, - .set_ringparam = igb_set_ringparam, - .get_pauseparam = igb_get_pauseparam, - .set_pauseparam = igb_set_pauseparam, - .self_test = igb_diag_test, - .get_strings = igb_get_strings, - .set_phys_id = igb_set_phys_id, - .get_sset_count = igb_get_sset_count, - .get_ethtool_stats = igb_get_ethtool_stats, - .get_coalesce = igb_get_coalesce, - .set_coalesce = igb_set_coalesce, - .get_ts_info = igb_get_ts_info, + .get_settings = igb_get_settings, + .set_settings = igb_set_settings, + .get_drvinfo = igb_get_drvinfo, + .get_regs_len = igb_get_regs_len, + .get_regs = igb_get_regs, + .get_wol = igb_get_wol, + .set_wol = igb_set_wol, + .get_msglevel = igb_get_msglevel, + .set_msglevel = igb_set_msglevel, + .nway_reset = igb_nway_reset, + .get_link = igb_get_link, + .get_eeprom_len = igb_get_eeprom_len, + .get_eeprom = igb_get_eeprom, + .set_eeprom = igb_set_eeprom, + .get_ringparam = igb_get_ringparam, + .set_ringparam = igb_set_ringparam, + .get_pauseparam = igb_get_pauseparam, + .set_pauseparam = igb_set_pauseparam, + .self_test = igb_diag_test, + .get_strings = igb_get_strings, + .set_phys_id = igb_set_phys_id, + .get_sset_count = igb_get_sset_count, + .get_ethtool_stats = igb_get_ethtool_stats, + .get_coalesce = igb_get_coalesce, + .set_coalesce = igb_set_coalesce, + .get_ts_info = igb_get_ts_info, .get_rxnfc = igb_get_rxnfc, .set_rxnfc = igb_set_rxnfc, .get_eee = igb_get_eee, .set_eee = igb_set_eee, + .get_module_info = igb_get_module_info, + .get_module_eeprom = igb_get_module_eeprom, .begin = igb_ethtool_begin, .complete = igb_ethtool_complete, }; diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c index 4623502054d5..58f1ce967aeb 100644 --- a/drivers/net/ethernet/intel/igb/igb_hwmon.c +++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c @@ -39,27 +39,27 @@ #include <linux/pci.h> #ifdef CONFIG_IGB_HWMON -struct i2c_board_info i350_sensor_info = { +static struct i2c_board_info i350_sensor_info = { I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), }; /* hwmon callback functions */ static ssize_t igb_hwmon_show_location(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); + dev_attr); return sprintf(buf, "loc%u\n", igb_attr->sensor->location); } static ssize_t igb_hwmon_show_temp(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); + dev_attr); unsigned int value; /* reset the temp field */ @@ -74,11 +74,11 @@ static ssize_t igb_hwmon_show_temp(struct device *dev, } static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); + dev_attr); unsigned int value = igb_attr->sensor->caution_thresh; /* display millidegree */ @@ -88,11 +88,11 @@ static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, } static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); + dev_attr); unsigned int value = igb_attr->sensor->max_op_thresh; /* display millidegree */ @@ -111,7 +111,8 @@ static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, * the data structures we need to get the data to display. */ static int igb_add_hwmon_attr(struct igb_adapter *adapter, - unsigned int offset, int type) { + unsigned int offset, int type) +{ int rc; unsigned int n_attr; struct hwmon_attr *igb_attr; @@ -217,7 +218,7 @@ int igb_sysfs_init(struct igb_adapter *adapter) */ n_attrs = E1000_MAX_SENSORS * 4; igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr), - GFP_KERNEL); + GFP_KERNEL); if (!igb_hwmon->hwmon_list) { rc = -ENOMEM; goto err; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 4dbd62968c7a..dcaa35481dd7 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -77,6 +77,9 @@ static const struct e1000_info *igb_info_tbl[] = { }; static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 }, @@ -156,8 +159,8 @@ static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); static void igb_reset_task(struct work_struct *); static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); -static int igb_vlan_rx_add_vid(struct net_device *, u16); -static int igb_vlan_rx_kill_vid(struct net_device *, u16); +static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16); static void igb_restore_vlan(struct igb_adapter *); static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); static void igb_ping_all_vfs(struct igb_adapter *); @@ -169,6 +172,8 @@ static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); static void igb_check_vf_rate_limit(struct igb_adapter *); @@ -292,9 +297,7 @@ static const struct igb_reg_info igb_reg_info_tbl[] = { {} }; -/* - * igb_regdump - register printout routine - */ +/* igb_regdump - register printout routine */ static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) { int n = 0; @@ -360,9 +363,7 @@ static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) regs[2], regs[3]); } -/* - * igb_dump - Print registers, tx-rings and rx-rings - */ +/* igb_dump - Print registers, Tx-rings and Rx-rings */ static void igb_dump(struct igb_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -569,12 +570,13 @@ exit: return; } -/* igb_get_i2c_data - Reads the I2C SDA data bit +/** + * igb_get_i2c_data - Reads the I2C SDA data bit * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Returns the I2C data bit value - */ + **/ static int igb_get_i2c_data(void *data) { struct igb_adapter *adapter = (struct igb_adapter *)data; @@ -584,12 +586,13 @@ static int igb_get_i2c_data(void *data) return ((i2cctl & E1000_I2C_DATA_IN) != 0); } -/* igb_set_i2c_data - Sets the I2C data bit +/** + * igb_set_i2c_data - Sets the I2C data bit * @data: pointer to hardware structure * @state: I2C data value (0 or 1) to set * * Sets the I2C data bit - */ + **/ static void igb_set_i2c_data(void *data, int state) { struct igb_adapter *adapter = (struct igb_adapter *)data; @@ -608,12 +611,13 @@ static void igb_set_i2c_data(void *data, int state) } -/* igb_set_i2c_clk - Sets the I2C SCL clock +/** + * igb_set_i2c_clk - Sets the I2C SCL clock * @data: pointer to hardware structure * @state: state to set clock * * Sets the I2C clock line to state - */ + **/ static void igb_set_i2c_clk(void *data, int state) { struct igb_adapter *adapter = (struct igb_adapter *)data; @@ -631,11 +635,12 @@ static void igb_set_i2c_clk(void *data, int state) wrfl(); } -/* igb_get_i2c_clk - Gets the I2C SCL clock state +/** + * igb_get_i2c_clk - Gets the I2C SCL clock state * @data: pointer to hardware structure * * Gets the I2C clock state - */ + **/ static int igb_get_i2c_clk(void *data) { struct igb_adapter *adapter = (struct igb_adapter *)data; @@ -655,8 +660,10 @@ static const struct i2c_algo_bit_data igb_i2c_algo = { }; /** - * igb_get_hw_dev - return device - * used by hardware layer to print debugging information + * igb_get_hw_dev - return device + * @hw: pointer to hardware structure + * + * used by hardware layer to print debugging information **/ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) { @@ -665,10 +672,10 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) } /** - * igb_init_module - Driver Registration Routine + * igb_init_module - Driver Registration Routine * - * igb_init_module is the first routine called when the driver is - * loaded. All it does is register with the PCI subsystem. + * igb_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. **/ static int __init igb_init_module(void) { @@ -688,10 +695,10 @@ static int __init igb_init_module(void) module_init(igb_init_module); /** - * igb_exit_module - Driver Exit Cleanup Routine + * igb_exit_module - Driver Exit Cleanup Routine * - * igb_exit_module is called just before the driver is removed - * from memory. + * igb_exit_module is called just before the driver is removed + * from memory. **/ static void __exit igb_exit_module(void) { @@ -705,11 +712,11 @@ module_exit(igb_exit_module); #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) /** - * igb_cache_ring_register - Descriptor ring to register mapping - * @adapter: board private structure to initialize + * igb_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize * - * Once we know the feature-set enabled for the device, we'll cache - * the register offset the descriptor ring is assigned to. + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. **/ static void igb_cache_ring_register(struct igb_adapter *adapter) { @@ -726,11 +733,12 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) if (adapter->vfs_allocated_count) { for (; i < adapter->rss_queues; i++) adapter->rx_ring[i]->reg_idx = rbase_offset + - Q_IDX_82576(i); + Q_IDX_82576(i); } case e1000_82575: case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: default: @@ -785,9 +793,10 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) switch (hw->mac.type) { case e1000_82575: /* The 82575 assigns vectors using a bitmask, which matches the - bitmask for the EICR/EIMS/EIMC registers. To assign one - or more queues to a vector, we write the appropriate bits - into the MSIXBM register for that vector. */ + * bitmask for the EICR/EIMS/EIMC registers. To assign one + * or more queues to a vector, we write the appropriate bits + * into the MSIXBM register for that vector. + */ if (rx_queue > IGB_N0_QUEUE) msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; if (tx_queue > IGB_N0_QUEUE) @@ -798,8 +807,7 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) q_vector->eims_value = msixbm; break; case e1000_82576: - /* - * 82576 uses a table that essentially consists of 2 columns + /* 82576 uses a table that essentially consists of 2 columns * with 8 rows. The ordering is column-major so we use the * lower 3 bits as the row index, and the 4th bit as the * column offset. @@ -816,10 +824,10 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) break; case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: - /* - * On 82580 and newer adapters the scheme is similar to 82576 + /* On 82580 and newer adapters the scheme is similar to 82576 * however instead of ordering column-major we have things * ordered row-major. So we traverse the table by using * bit 0 as the column offset, and the remaining bits as the @@ -848,10 +856,11 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) } /** - * igb_configure_msix - Configure MSI-X hardware + * igb_configure_msix - Configure MSI-X hardware + * @adapter: board private structure to initialize * - * igb_configure_msix sets up the hardware to properly - * generate MSI-X interrupts. + * igb_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. **/ static void igb_configure_msix(struct igb_adapter *adapter) { @@ -875,8 +884,7 @@ static void igb_configure_msix(struct igb_adapter *adapter) wr32(E1000_CTRL_EXT, tmp); /* enable msix_other interrupt */ - array_wr32(E1000_MSIXBM(0), vector++, - E1000_EIMS_OTHER); + array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER); adapter->eims_other = E1000_EIMS_OTHER; break; @@ -884,13 +892,15 @@ static void igb_configure_msix(struct igb_adapter *adapter) case e1000_82576: case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: case e1000_i211: /* Turn on MSI-X capability first, or our settings - * won't stick. And it will take days to debug. */ + * won't stick. And it will take days to debug. + */ wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | - E1000_GPIE_PBA | E1000_GPIE_EIAME | - E1000_GPIE_NSICR); + E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_NSICR); /* enable msix_other interrupt */ adapter->eims_other = 1 << vector; @@ -912,10 +922,11 @@ static void igb_configure_msix(struct igb_adapter *adapter) } /** - * igb_request_msix - Initialize MSI-X interrupts + * igb_request_msix - Initialize MSI-X interrupts + * @adapter: board private structure to initialize * - * igb_request_msix allocates MSI-X vectors and requests interrupts from the - * kernel. + * igb_request_msix allocates MSI-X vectors and requests interrupts from the + * kernel. **/ static int igb_request_msix(struct igb_adapter *adapter) { @@ -924,7 +935,7 @@ static int igb_request_msix(struct igb_adapter *adapter) int i, err = 0, vector = 0, free_vector = 0; err = request_irq(adapter->msix_entries[vector].vector, - igb_msix_other, 0, netdev->name, adapter); + igb_msix_other, 0, netdev->name, adapter); if (err) goto err_out; @@ -948,8 +959,8 @@ static int igb_request_msix(struct igb_adapter *adapter) sprintf(q_vector->name, "%s-unused", netdev->name); err = request_irq(adapter->msix_entries[vector].vector, - igb_msix_ring, 0, q_vector->name, - q_vector); + igb_msix_ring, 0, q_vector->name, + q_vector); if (err) goto err_free; } @@ -982,13 +993,13 @@ static void igb_reset_interrupt_capability(struct igb_adapter *adapter) } /** - * igb_free_q_vector - Free memory allocated for specific interrupt vector - * @adapter: board private structure to initialize - * @v_idx: Index of vector to be freed + * igb_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed * - * This function frees the memory allocated to the q_vector. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. **/ static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) { @@ -1003,20 +1014,19 @@ static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) adapter->q_vector[v_idx] = NULL; netif_napi_del(&q_vector->napi); - /* - * ixgbe_get_stats64() might access the rings on this vector, + /* ixgbe_get_stats64() might access the rings on this vector, * we must wait a grace period before freeing it. */ kfree_rcu(q_vector, rcu); } /** - * igb_free_q_vectors - Free memory allocated for interrupt vectors - * @adapter: board private structure to initialize + * igb_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize * - * This function frees the memory allocated to the q_vectors. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. **/ static void igb_free_q_vectors(struct igb_adapter *adapter) { @@ -1031,10 +1041,11 @@ static void igb_free_q_vectors(struct igb_adapter *adapter) } /** - * igb_clear_interrupt_scheme - reset the device to a state of no interrupts + * igb_clear_interrupt_scheme - reset the device to a state of no interrupts + * @adapter: board private structure to initialize * - * This function resets the device so that it has 0 rx queues, tx queues, and - * MSI-X interrupts allocated. + * This function resets the device so that it has 0 Rx queues, Tx queues, and + * MSI-X interrupts allocated. */ static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) { @@ -1043,10 +1054,12 @@ static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) } /** - * igb_set_interrupt_capability - set MSI or MSI-X if supported + * igb_set_interrupt_capability - set MSI or MSI-X if supported + * @adapter: board private structure to initialize + * @msix: boolean value of MSIX capability * - * Attempt to configure interrupts using the best available - * capabilities of the hardware and kernel. + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. **/ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) { @@ -1063,10 +1076,10 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) else adapter->num_tx_queues = adapter->rss_queues; - /* start with one vector for every rx queue */ + /* start with one vector for every Rx queue */ numvecs = adapter->num_rx_queues; - /* if tx handler is separate add 1 for every tx queue */ + /* if Tx handler is separate add 1 for every Tx queue */ if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) numvecs += adapter->num_tx_queues; @@ -1128,16 +1141,16 @@ static void igb_add_ring(struct igb_ring *ring, } /** - * igb_alloc_q_vector - Allocate memory for a single interrupt vector - * @adapter: board private structure to initialize - * @v_count: q_vectors allocated on adapter, used for ring interleaving - * @v_idx: index of vector in adapter struct - * @txr_count: total number of Tx rings to allocate - * @txr_idx: index of first Tx ring to allocate - * @rxr_count: total number of Rx rings to allocate - * @rxr_idx: index of first Rx ring to allocate + * igb_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate * - * We allocate one q_vector. If allocation fails we return -ENOMEM. + * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ static int igb_alloc_q_vector(struct igb_adapter *adapter, int v_count, int v_idx, @@ -1179,6 +1192,17 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, /* initialize pointer to rings */ ring = q_vector->ring; + /* intialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + if (txr_count) { /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; @@ -1221,9 +1245,9 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); /* - * On i350, i210, and i211, loopback VLAN packets + * On i350, i354, i210, and i211, loopback VLAN packets * have the tag byte-swapped. - * */ + */ if (adapter->hw.mac.type >= e1000_i350) set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); @@ -1240,11 +1264,11 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, /** - * igb_alloc_q_vectors - Allocate memory for interrupt vectors - * @adapter: board private structure to initialize + * igb_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. **/ static int igb_alloc_q_vectors(struct igb_adapter *adapter) { @@ -1298,9 +1322,11 @@ err_out: } /** - * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * @adapter: board private structure to initialize + * @msix: boolean value of MSIX capability * - * This function initializes the interrupts and allocates all of the queues. + * This function initializes the interrupts and allocates all of the queues. **/ static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) { @@ -1325,10 +1351,11 @@ err_alloc_q_vectors: } /** - * igb_request_irq - initialize interrupts + * igb_request_irq - initialize interrupts + * @adapter: board private structure to initialize * - * Attempts to configure interrupts using the best available - * capabilities of the hardware and kernel. + * Attempts to configure interrupts using the best available + * capabilities of the hardware and kernel. **/ static int igb_request_irq(struct igb_adapter *adapter) { @@ -1394,15 +1421,14 @@ static void igb_free_irq(struct igb_adapter *adapter) } /** - * igb_irq_disable - Mask off interrupt generation on the NIC - * @adapter: board private structure + * igb_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure **/ static void igb_irq_disable(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - /* - * we need to be careful when disabling interrupts. The VFs are also + /* we need to be careful when disabling interrupts. The VFs are also * mapped into these registers and so clearing the bits can cause * issues on the VF drivers so we only need to clear what we set */ @@ -1427,8 +1453,8 @@ static void igb_irq_disable(struct igb_adapter *adapter) } /** - * igb_irq_enable - Enable default interrupt generation settings - * @adapter: board private structure + * igb_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure **/ static void igb_irq_enable(struct igb_adapter *adapter) { @@ -1477,13 +1503,12 @@ static void igb_update_mng_vlan(struct igb_adapter *adapter) } /** - * igb_release_hw_control - release control of the h/w to f/w - * @adapter: address of board private structure - * - * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that the - * driver is no longer loaded. + * igb_release_hw_control - release control of the h/w to f/w + * @adapter: address of board private structure * + * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. **/ static void igb_release_hw_control(struct igb_adapter *adapter) { @@ -1497,13 +1522,12 @@ static void igb_release_hw_control(struct igb_adapter *adapter) } /** - * igb_get_hw_control - get control of the h/w from f/w - * @adapter: address of board private structure - * - * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is loaded. + * igb_get_hw_control - get control of the h/w from f/w + * @adapter: address of board private structure * + * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. **/ static void igb_get_hw_control(struct igb_adapter *adapter) { @@ -1517,8 +1541,8 @@ static void igb_get_hw_control(struct igb_adapter *adapter) } /** - * igb_configure - configure the hardware for RX and TX - * @adapter: private board structure + * igb_configure - configure the hardware for RX and TX + * @adapter: private board structure **/ static void igb_configure(struct igb_adapter *adapter) { @@ -1541,7 +1565,8 @@ static void igb_configure(struct igb_adapter *adapter) /* call igb_desc_unused which always leaves * at least 1 descriptor unused to make sure - * next_to_use != next_to_clean */ + * next_to_use != next_to_clean + */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = adapter->rx_ring[i]; igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); @@ -1549,8 +1574,8 @@ static void igb_configure(struct igb_adapter *adapter) } /** - * igb_power_up_link - Power up the phy/serdes link - * @adapter: address of board private structure + * igb_power_up_link - Power up the phy/serdes link + * @adapter: address of board private structure **/ void igb_power_up_link(struct igb_adapter *adapter) { @@ -1563,8 +1588,8 @@ void igb_power_up_link(struct igb_adapter *adapter) } /** - * igb_power_down_link - Power down the phy/serdes link - * @adapter: address of board private structure + * igb_power_down_link - Power down the phy/serdes link + * @adapter: address of board private structure */ static void igb_power_down_link(struct igb_adapter *adapter) { @@ -1575,8 +1600,8 @@ static void igb_power_down_link(struct igb_adapter *adapter) } /** - * igb_up - Open the interface and prepare it to handle traffic - * @adapter: board private structure + * igb_up - Open the interface and prepare it to handle traffic + * @adapter: board private structure **/ int igb_up(struct igb_adapter *adapter) { @@ -1624,7 +1649,8 @@ void igb_down(struct igb_adapter *adapter) int i; /* signal that we're down so the interrupt handler does not - * reschedule our watchdog timer */ + * reschedule our watchdog timer + */ set_bit(__IGB_DOWN, &adapter->state); /* disable receives in the hardware */ @@ -1694,6 +1720,7 @@ void igb_reset(struct igb_adapter *adapter) */ switch (mac->type) { case e1000_i350: + case e1000_i354: case e1000_82580: pba = rd32(E1000_RXPBS); pba = igb_rxpbs_adjust_82580(pba); @@ -1720,14 +1747,16 @@ void igb_reset(struct igb_adapter *adapter) * rounded up to the next 1KB and expressed in KB. Likewise, * the Rx FIFO should be large enough to accommodate at least * one full receive packet and is similarly rounded up and - * expressed in KB. */ + * expressed in KB. + */ pba = rd32(E1000_PBA); /* upper 16 bits has Tx packet buffer allocation size in KB */ tx_space = pba >> 16; /* lower 16 bits has Rx packet buffer allocation size in KB */ pba &= 0xffff; - /* the tx fifo also stores 16 bytes of information about the tx - * but don't include ethernet FCS because hardware appends it */ + /* the Tx fifo also stores 16 bytes of information about the Tx + * but don't include ethernet FCS because hardware appends it + */ min_tx_space = (adapter->max_frame_size + sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN) * 2; @@ -1740,13 +1769,15 @@ void igb_reset(struct igb_adapter *adapter) /* If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation */ + * allocation, take space away from current Rx allocation + */ if (tx_space < min_tx_space && ((min_tx_space - tx_space) < pba)) { pba = pba - (min_tx_space - tx_space); - /* if short on rx space, rx wins and must trump tx - * adjustment */ + /* if short on Rx space, Rx wins and must trump Tx + * adjustment + */ if (pba < min_rx_space) pba = min_rx_space; } @@ -1758,7 +1789,8 @@ void igb_reset(struct igb_adapter *adapter) * (or the size used for early receive) above it in the Rx FIFO. * Set it to the lower of: * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame */ + * - the full Rx FIFO size minus one full frame + */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); @@ -1789,8 +1821,7 @@ void igb_reset(struct igb_adapter *adapter) if (hw->mac.ops.init_hw(hw)) dev_err(&pdev->dev, "Hardware Error\n"); - /* - * Flow control settings reset on hardware reset, so guarantee flow + /* Flow control settings reset on hardware reset, so guarantee flow * control is off when forcing speed. */ if (!hw->mac.autoneg) @@ -1826,14 +1857,13 @@ void igb_reset(struct igb_adapter *adapter) static netdev_features_t igb_fix_features(struct net_device *netdev, netdev_features_t features) { - /* - * Since there is no support for separate rx/tx vlan accel - * enable/disable make sure tx flag is always in same state as rx. + /* Since there is no support for separate Rx/Tx vlan accel + * enable/disable make sure Tx flag is always in same state as Rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1844,7 +1874,7 @@ static int igb_set_features(struct net_device *netdev, netdev_features_t changed = netdev->features ^ features; struct igb_adapter *adapter = netdev_priv(netdev); - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); if (!(changed & NETIF_F_RXALL)) @@ -1876,6 +1906,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_vf_mac = igb_ndo_set_vf_mac, .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, + .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, .ndo_get_vf_config = igb_ndo_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = igb_netpoll, @@ -1887,7 +1918,6 @@ static const struct net_device_ops igb_netdev_ops = { /** * igb_set_fw_version - Configure version string for ethtool * @adapter: adapter struct - * **/ void igb_set_fw_version(struct igb_adapter *adapter) { @@ -1923,10 +1953,10 @@ void igb_set_fw_version(struct igb_adapter *adapter) return; } -/* igb_init_i2c - Init I2C interface +/** + * igb_init_i2c - Init I2C interface * @adapter: pointer to adapter structure - * - */ + **/ static s32 igb_init_i2c(struct igb_adapter *adapter) { s32 status = E1000_SUCCESS; @@ -1951,15 +1981,15 @@ static s32 igb_init_i2c(struct igb_adapter *adapter) } /** - * igb_probe - Device Initialization Routine - * @pdev: PCI device information struct - * @ent: entry in igb_pci_tbl + * igb_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igb_pci_tbl * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure * - * igb_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, - * and a hardware reset occur. + * igb_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. **/ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1996,18 +2026,19 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } else { err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "No usable DMA " - "configuration, aborting\n"); + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); goto err_dma; } } } err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), - igb_driver_name); + IORESOURCE_MEM), + igb_driver_name); if (err) goto err_pci_reg; @@ -2085,8 +2116,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "PHY reset is blocked due to SOL/IDER session.\n"); - /* - * features is initialized to 0 in allocation, it might have bits + /* features is initialized to 0 in allocation, it might have bits * set by igb_sw_init so we should use an or instead of an * assignment. */ @@ -2097,15 +2127,15 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features |= NETIF_F_RXALL; /* set this bit last since it cannot be part of hw_features */ - netdev->features |= NETIF_F_HW_VLAN_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_TSO | NETIF_F_TSO6 | @@ -2130,11 +2160,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); /* before reading the NVM, reset the controller to put the device in a - * known good starting state */ + * known good starting state + */ hw->mac.ops.reset_hw(hw); - /* - * make sure the NVM is good , i211 parts have special NVM that + /* make sure the NVM is good , i211 parts have special NVM that * doesn't contain a checksum */ if (hw->mac.type != e1000_i211) { @@ -2161,9 +2191,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) igb_set_fw_version(adapter); setup_timer(&adapter->watchdog_timer, igb_watchdog, - (unsigned long) adapter); + (unsigned long) adapter); setup_timer(&adapter->phy_info_timer, igb_update_phy_info, - (unsigned long) adapter); + (unsigned long) adapter); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); @@ -2185,8 +2215,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Check the NVM for wake support on non-port A ports */ if (hw->mac.type >= e1000_82580) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + - NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, - &eeprom_data); + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &eeprom_data); else if (hw->bus.func == 1) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); @@ -2195,7 +2225,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* now that we have the eeprom settings, apply the special cases where * the eeprom may be wrong or the board simply won't support wake on - * lan on a particular port */ + * lan on a particular port + */ switch (pdev->device) { case E1000_DEV_ID_82575GB_QUAD_COPPER: adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; @@ -2204,7 +2235,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: /* Wake events only supported on port A for dual fiber - * regardless of eeprom setting */ + * regardless of eeprom setting + */ if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; break; @@ -2274,8 +2306,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac.type == e1000_i350 && hw->bus.func == 0) { u16 ets_word; - /* - * Read the NVM to determine if this i350 device supports an + /* Read the NVM to determine if this i350 device supports an * external thermal sensor. */ hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word); @@ -2294,17 +2325,20 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) igb_ptp_init(adapter); dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); - /* print bus type/speed/width info */ - dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", - netdev->name, - ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : - (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : - "unknown"), - ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : - (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : - "unknown"), - netdev->dev_addr); + /* print bus type/speed/width info, not applicable to i354 */ + if (hw->mac.type != e1000_i354) { + dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", + netdev->name, + ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : + (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : + "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? + "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? + "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? + "Width x1" : "unknown"), netdev->dev_addr); + } ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); if (ret_val) @@ -2321,6 +2355,13 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case e1000_i211: igb_set_eee_i350(hw); break; + case e1000_i354: + if (hw->phy.media_type == e1000_media_type_copper) { + if ((rd32(E1000_CTRL_EXT) & + E1000_CTRL_EXT_LINK_MODE_SGMII)) + igb_set_eee_i354(hw); + } + break; default: break; } @@ -2344,7 +2385,7 @@ err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -2444,26 +2485,24 @@ out: } #endif -/* +/** * igb_remove_i2c - Cleanup I2C interface * @adapter: pointer to adapter structure - * - */ + **/ static void igb_remove_i2c(struct igb_adapter *adapter) { - /* free the adapter bus structure */ i2c_del_adapter(&adapter->i2c_adap); } /** - * igb_remove - Device Removal Routine - * @pdev: PCI device information struct + * igb_remove - Device Removal Routine + * @pdev: PCI device information struct * - * igb_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a - * Hot-Plug event, or because the driver is going to be removed from - * memory. + * igb_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. **/ static void igb_remove(struct pci_dev *pdev) { @@ -2477,8 +2516,7 @@ static void igb_remove(struct pci_dev *pdev) #endif igb_remove_i2c(adapter); igb_ptp_stop(adapter); - /* - * The watchdog timer may be rescheduled, so explicitly + /* The watchdog timer may be rescheduled, so explicitly * disable watchdog from being rescheduled. */ set_bit(__IGB_DOWN, &adapter->state); @@ -2498,7 +2536,8 @@ static void igb_remove(struct pci_dev *pdev) #endif /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ + * would have already happened in close and is redundant. + */ igb_release_hw_control(adapter); unregister_netdev(netdev); @@ -2513,7 +2552,7 @@ static void igb_remove(struct pci_dev *pdev) if (hw->flash_address) iounmap(hw->flash_address); pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_select_bars(pdev, IORESOURCE_MEM)); kfree(adapter->shadow_vfta); free_netdev(netdev); @@ -2524,13 +2563,13 @@ static void igb_remove(struct pci_dev *pdev) } /** - * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space - * @adapter: board private structure to initialize + * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space + * @adapter: board private structure to initialize * - * This function initializes the vf specific data storage and then attempts to - * allocate the VFs. The reason for ordering it this way is because it is much - * mor expensive time wise to disable SR-IOV than it is to allocate and free - * the memory for the VFs. + * This function initializes the vf specific data storage and then attempts to + * allocate the VFs. The reason for ordering it this way is because it is much + * mor expensive time wise to disable SR-IOV than it is to allocate and free + * the memory for the VFs. **/ static void igb_probe_vfs(struct igb_adapter *adapter) { @@ -2542,8 +2581,8 @@ static void igb_probe_vfs(struct igb_adapter *adapter) if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) return; - igb_enable_sriov(pdev, max_vfs); pci_sriov_set_totalvfs(pdev, 7); + igb_enable_sriov(pdev, max_vfs); #endif /* CONFIG_PCI_IOV */ } @@ -2576,6 +2615,7 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) } /* fall through */ case e1000_82580: + case e1000_i354: default: max_rss_queues = IGB_MAX_RX_QUEUES; break; @@ -2590,8 +2630,7 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) /* Device supports enough interrupts without queue pairing. */ break; case e1000_82576: - /* - * If VFs are going to be allocated with RSS queues then we + /* If VFs are going to be allocated with RSS queues then we * should pair the queues in order to conserve interrupts due * to limited supply. */ @@ -2601,10 +2640,10 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) /* fall through */ case e1000_82580: case e1000_i350: + case e1000_i354: case e1000_i210: default: - /* - * If rss_queues > half of max_rss_queues, pair the queues in + /* If rss_queues > half of max_rss_queues, pair the queues in * order to conserve interrupts due to limited supply. */ if (adapter->rss_queues > (max_rss_queues / 2)) @@ -2614,12 +2653,12 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) } /** - * igb_sw_init - Initialize general software structures (struct igb_adapter) - * @adapter: board private structure to initialize + * igb_sw_init - Initialize general software structures (struct igb_adapter) + * @adapter: board private structure to initialize * - * igb_sw_init initializes the Adapter private data structure. - * Fields are initialized based on PCI device information and - * OS network device settings (MTU size). + * igb_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). **/ static int igb_sw_init(struct igb_adapter *adapter) { @@ -2652,7 +2691,7 @@ static int igb_sw_init(struct igb_adapter *adapter) if (max_vfs > 7) { dev_warn(&pdev->dev, "Maximum of 7 VFs per PF, using max\n"); - adapter->vfs_allocated_count = 7; + max_vfs = adapter->vfs_allocated_count = 7; } else adapter->vfs_allocated_count = max_vfs; if (adapter->vfs_allocated_count) @@ -2689,16 +2728,16 @@ static int igb_sw_init(struct igb_adapter *adapter) } /** - * igb_open - Called when a network interface is made active - * @netdev: network interface device structure + * igb_open - Called when a network interface is made active + * @netdev: network interface device structure * - * Returns 0 on success, negative value on failure + * Returns 0 on success, negative value on failure * - * The open entry point is called when a network interface is made - * active by the system (IFF_UP). At this point all resources needed - * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, - * and the stack is notified that the interface is ready. + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. **/ static int __igb_open(struct net_device *netdev, bool resuming) { @@ -2734,7 +2773,8 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* before we allocate an interrupt, we must be ready to handle it. * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt * as soon as we call pci_request_irq, so we have to setup our - * clean_rx handler before we do so. */ + * clean_rx handler before we do so. + */ igb_configure(adapter); err = igb_request_irq(adapter); @@ -2803,15 +2843,15 @@ static int igb_open(struct net_device *netdev) } /** - * igb_close - Disables a network interface - * @netdev: network interface device structure + * igb_close - Disables a network interface + * @netdev: network interface device structure * - * Returns 0, this is not allowed to fail + * Returns 0, this is not allowed to fail * - * The close entry point is called when an interface is de-activated - * by the OS. The hardware is still under the driver's control, but - * needs to be disabled. A global MAC reset is issued to stop the - * hardware, and all transmit and receive resources are freed. + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the driver's control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. **/ static int __igb_close(struct net_device *netdev, bool suspending) { @@ -2840,10 +2880,10 @@ static int igb_close(struct net_device *netdev) } /** - * igb_setup_tx_resources - allocate Tx resources (Descriptors) - * @tx_ring: tx descriptor ring (for a specific queue) to setup + * igb_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup * - * Return 0 on success, negative on failure + * Return 0 on success, negative on failure **/ int igb_setup_tx_resources(struct igb_ring *tx_ring) { @@ -2878,11 +2918,11 @@ err: } /** - * igb_setup_all_tx_resources - wrapper to allocate Tx resources - * (Descriptors) for all queues - * @adapter: board private structure + * igb_setup_all_tx_resources - wrapper to allocate Tx resources + * (Descriptors) for all queues + * @adapter: board private structure * - * Return 0 on success, negative on failure + * Return 0 on success, negative on failure **/ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) { @@ -2904,8 +2944,8 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) } /** - * igb_setup_tctl - configure the transmit control registers - * @adapter: Board private structure + * igb_setup_tctl - configure the transmit control registers + * @adapter: Board private structure **/ void igb_setup_tctl(struct igb_adapter *adapter) { @@ -2930,11 +2970,11 @@ void igb_setup_tctl(struct igb_adapter *adapter) } /** - * igb_configure_tx_ring - Configure transmit ring after Reset - * @adapter: board private structure - * @ring: tx ring to configure + * igb_configure_tx_ring - Configure transmit ring after Reset + * @adapter: board private structure + * @ring: tx ring to configure * - * Configure a transmit ring after a reset. + * Configure a transmit ring after a reset. **/ void igb_configure_tx_ring(struct igb_adapter *adapter, struct igb_ring *ring) @@ -2950,9 +2990,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, mdelay(10); wr32(E1000_TDLEN(reg_idx), - ring->count * sizeof(union e1000_adv_tx_desc)); + ring->count * sizeof(union e1000_adv_tx_desc)); wr32(E1000_TDBAL(reg_idx), - tdba & 0x00000000ffffffffULL); + tdba & 0x00000000ffffffffULL); wr32(E1000_TDBAH(reg_idx), tdba >> 32); ring->tail = hw->hw_addr + E1000_TDT(reg_idx); @@ -2968,10 +3008,10 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, } /** - * igb_configure_tx - Configure transmit Unit after Reset - * @adapter: board private structure + * igb_configure_tx - Configure transmit Unit after Reset + * @adapter: board private structure * - * Configure the Tx unit of the MAC after a reset. + * Configure the Tx unit of the MAC after a reset. **/ static void igb_configure_tx(struct igb_adapter *adapter) { @@ -2982,10 +3022,10 @@ static void igb_configure_tx(struct igb_adapter *adapter) } /** - * igb_setup_rx_resources - allocate Rx resources (Descriptors) - * @rx_ring: rx descriptor ring (for a specific queue) to setup + * igb_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: Rx descriptor ring (for a specific queue) to setup * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure **/ int igb_setup_rx_resources(struct igb_ring *rx_ring) { @@ -3021,11 +3061,11 @@ err: } /** - * igb_setup_all_rx_resources - wrapper to allocate Rx resources - * (Descriptors) for all queues - * @adapter: board private structure + * igb_setup_all_rx_resources - wrapper to allocate Rx resources + * (Descriptors) for all queues + * @adapter: board private structure * - * Return 0 on success, negative on failure + * Return 0 on success, negative on failure **/ static int igb_setup_all_rx_resources(struct igb_adapter *adapter) { @@ -3047,8 +3087,8 @@ static int igb_setup_all_rx_resources(struct igb_adapter *adapter) } /** - * igb_setup_mrqc - configure the multiple receive queue control registers - * @adapter: Board private structure + * igb_setup_mrqc - configure the multiple receive queue control registers + * @adapter: Board private structure **/ static void igb_setup_mrqc(struct igb_adapter *adapter) { @@ -3081,8 +3121,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) break; } - /* - * Populate the indirection table 4 entries at a time. To do this + /* Populate the indirection table 4 entries at a time. To do this * we are generating the results for n and n+2 and then interleaving * those with the results with n+1 and n+3. */ @@ -3098,8 +3137,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) wr32(E1000_RETA(j), reta); } - /* - * Disable raw packet checksumming so that RSS hash is placed in + /* Disable raw packet checksumming so that RSS hash is placed in * descriptor on writeback. No need to enable TCP/UDP/IP checksum * offloads as they are enabled by default */ @@ -3129,7 +3167,8 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) /* If VMDq is enabled then we set the appropriate mode for that, else * we default to RSS so that an RSS hash is calculated per packet even - * if we are only using one queue */ + * if we are only using one queue + */ if (adapter->vfs_allocated_count) { if (hw->mac.type > e1000_82575) { /* Set the default pool for the PF's first queue */ @@ -3154,8 +3193,8 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) } /** - * igb_setup_rctl - configure the receive control registers - * @adapter: Board private structure + * igb_setup_rctl - configure the receive control registers + * @adapter: Board private structure **/ void igb_setup_rctl(struct igb_adapter *adapter) { @@ -3170,8 +3209,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - /* - * enable stripping of CRC. It's unlikely this will break BMC + /* enable stripping of CRC. It's unlikely this will break BMC * redirection as it did with e1000. Newer features require * that the HW strips the CRC. */ @@ -3198,7 +3236,8 @@ void igb_setup_rctl(struct igb_adapter *adapter) /* This is useful for sniffing bad packets. */ if (adapter->netdev->features & NETIF_F_RXALL) { /* UPE and MPE will be handled by normal PROMISC logic - * in e1000e_set_rx_mode */ + * in e1000e_set_rx_mode + */ rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ E1000_RCTL_BAM | /* RX All Bcast Pkts */ E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ @@ -3221,7 +3260,8 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, u32 vmolr; /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags */ + * increase the size to support vlan tags + */ if (vfn < adapter->vfs_allocated_count && adapter->vf_data[vfn].vlans_enabled) size += VLAN_TAG_SIZE; @@ -3235,10 +3275,10 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, } /** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure + * igb_rlpml_set - set maximum receive packet size + * @adapter: board private structure * - * Configure maximum receivable packet size. + * Configure maximum receivable packet size. **/ static void igb_rlpml_set(struct igb_adapter *adapter) { @@ -3248,8 +3288,7 @@ static void igb_rlpml_set(struct igb_adapter *adapter) if (pf_id) { igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - /* - * If we're in VMDQ or SR-IOV mode, then set global RLPML + /* If we're in VMDQ or SR-IOV mode, then set global RLPML * to our max jumbo frame size, in case we need to enable * jumbo frames on one of the rings later. * This will not pass over-length frames into the default @@ -3267,17 +3306,16 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, struct e1000_hw *hw = &adapter->hw; u32 vmolr; - /* - * This register exists only on 82576 and newer so if we are older then + /* This register exists only on 82576 and newer so if we are older then * we should exit and do nothing */ if (hw->mac.type < e1000_82576) return; vmolr = rd32(E1000_VMOLR(vfn)); - vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ + vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ if (aupe) - vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ + vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ else vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ @@ -3286,25 +3324,24 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ - /* - * for VMDq only allow the VFs and pool 0 to accept broadcast and + /* for VMDq only allow the VFs and pool 0 to accept broadcast and * multicast packets */ if (vfn <= adapter->vfs_allocated_count) - vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ + vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ wr32(E1000_VMOLR(vfn), vmolr); } /** - * igb_configure_rx_ring - Configure a receive ring after Reset - * @adapter: board private structure - * @ring: receive ring to be configured + * igb_configure_rx_ring - Configure a receive ring after Reset + * @adapter: board private structure + * @ring: receive ring to be configured * - * Configure the Rx unit of the MAC after a reset. + * Configure the Rx unit of the MAC after a reset. **/ void igb_configure_rx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) + struct igb_ring *ring) { struct e1000_hw *hw = &adapter->hw; u64 rdba = ring->dma; @@ -3319,7 +3356,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, rdba & 0x00000000ffffffffULL); wr32(E1000_RDBAH(reg_idx), rdba >> 32); wr32(E1000_RDLEN(reg_idx), - ring->count * sizeof(union e1000_adv_rx_desc)); + ring->count * sizeof(union e1000_adv_rx_desc)); /* initialize head and tail */ ring->tail = hw->hw_addr + E1000_RDT(reg_idx); @@ -3350,25 +3387,11 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, wr32(E1000_RXDCTL(reg_idx), rxdctl); } -static void igb_set_rx_buffer_len(struct igb_adapter *adapter, - struct igb_ring *rx_ring) -{ -#define IGB_MAX_BUILD_SKB_SIZE \ - (SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) - \ - (NET_SKB_PAD + NET_IP_ALIGN + IGB_TS_HDR_LEN)) - - /* set build_skb flag */ - if (adapter->max_frame_size <= IGB_MAX_BUILD_SKB_SIZE) - set_ring_build_skb_enabled(rx_ring); - else - clear_ring_build_skb_enabled(rx_ring); -} - /** - * igb_configure_rx - Configure receive Unit after Reset - * @adapter: board private structure + * igb_configure_rx - Configure receive Unit after Reset + * @adapter: board private structure * - * Configure the Rx unit of the MAC after a reset. + * Configure the Rx unit of the MAC after a reset. **/ static void igb_configure_rx(struct igb_adapter *adapter) { @@ -3379,22 +3402,20 @@ static void igb_configure_rx(struct igb_adapter *adapter) /* set the correct pool for the PF default MAC address in entry 0 */ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); /* Setup the HW Rx Head and Tail Descriptor Pointers and - * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct igb_ring *rx_ring = adapter->rx_ring[i]; - igb_set_rx_buffer_len(adapter, rx_ring); - igb_configure_rx_ring(adapter, rx_ring); - } + * the Base and Length of the Rx Descriptor Ring + */ + for (i = 0; i < adapter->num_rx_queues; i++) + igb_configure_rx_ring(adapter, adapter->rx_ring[i]); } /** - * igb_free_tx_resources - Free Tx Resources per Queue - * @tx_ring: Tx descriptor ring for a specific queue + * igb_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue * - * Free all transmit software resources + * Free all transmit software resources **/ void igb_free_tx_resources(struct igb_ring *tx_ring) { @@ -3414,10 +3435,10 @@ void igb_free_tx_resources(struct igb_ring *tx_ring) } /** - * igb_free_all_tx_resources - Free Tx Resources for All Queues - * @adapter: board private structure + * igb_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure * - * Free all transmit software resources + * Free all transmit software resources **/ static void igb_free_all_tx_resources(struct igb_adapter *adapter) { @@ -3450,8 +3471,8 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring, } /** - * igb_clean_tx_ring - Free Tx Buffers - * @tx_ring: ring to be cleaned + * igb_clean_tx_ring - Free Tx Buffers + * @tx_ring: ring to be cleaned **/ static void igb_clean_tx_ring(struct igb_ring *tx_ring) { @@ -3481,8 +3502,8 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) } /** - * igb_clean_all_tx_rings - Free Tx Buffers for all queues - * @adapter: board private structure + * igb_clean_all_tx_rings - Free Tx Buffers for all queues + * @adapter: board private structure **/ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) { @@ -3493,10 +3514,10 @@ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) } /** - * igb_free_rx_resources - Free Rx Resources - * @rx_ring: ring to clean the resources from + * igb_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from * - * Free all receive software resources + * Free all receive software resources **/ void igb_free_rx_resources(struct igb_ring *rx_ring) { @@ -3516,10 +3537,10 @@ void igb_free_rx_resources(struct igb_ring *rx_ring) } /** - * igb_free_all_rx_resources - Free Rx Resources for All Queues - * @adapter: board private structure + * igb_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure * - * Free all receive software resources + * Free all receive software resources **/ static void igb_free_all_rx_resources(struct igb_adapter *adapter) { @@ -3530,8 +3551,8 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter) } /** - * igb_clean_rx_ring - Free Rx Buffers per Queue - * @rx_ring: ring to free buffers from + * igb_clean_rx_ring - Free Rx Buffers per Queue + * @rx_ring: ring to free buffers from **/ static void igb_clean_rx_ring(struct igb_ring *rx_ring) { @@ -3573,8 +3594,8 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) } /** - * igb_clean_all_rx_rings - Free Rx Buffers for all queues - * @adapter: board private structure + * igb_clean_all_rx_rings - Free Rx Buffers for all queues + * @adapter: board private structure **/ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) { @@ -3585,11 +3606,11 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) } /** - * igb_set_mac - Change the Ethernet Address of the NIC - * @netdev: network interface device structure - * @p: pointer to an address structure + * igb_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure **/ static int igb_set_mac(struct net_device *netdev, void *p) { @@ -3605,19 +3626,19 @@ static int igb_set_mac(struct net_device *netdev, void *p) /* set the correct pool for the new PF MAC address in entry 0 */ igb_rar_set_qsel(adapter, hw->mac.addr, 0, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); return 0; } /** - * igb_write_mc_addr_list - write multicast addresses to MTA - * @netdev: network interface device structure + * igb_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure * - * Writes multicast address list to the MTA hash table. - * Returns: -ENOMEM on failure - * 0 on no addresses written - * X on writing X addresses to MTA + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA **/ static int igb_write_mc_addr_list(struct net_device *netdev) { @@ -3650,13 +3671,13 @@ static int igb_write_mc_addr_list(struct net_device *netdev) } /** - * igb_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure + * igb_write_uc_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table **/ static int igb_write_uc_addr_list(struct net_device *netdev) { @@ -3677,8 +3698,8 @@ static int igb_write_uc_addr_list(struct net_device *netdev) if (!rar_entries) break; igb_rar_set_qsel(adapter, ha->addr, - rar_entries--, - vfn); + rar_entries--, + vfn); count++; } } @@ -3693,13 +3714,13 @@ static int igb_write_uc_addr_list(struct net_device *netdev) } /** - * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set - * @netdev: network interface device structure + * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure * - * The set_rx_mode entry point is called whenever the unicast or multicast - * address lists or the network interface flags are updated. This routine is - * responsible for configuring the hardware for proper unicast, multicast, - * promiscuous mode, and all-multi behavior. + * The set_rx_mode entry point is called whenever the unicast or multicast + * address lists or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast, + * promiscuous mode, and all-multi behavior. **/ static void igb_set_rx_mode(struct net_device *netdev) { @@ -3723,8 +3744,7 @@ static void igb_set_rx_mode(struct net_device *netdev) rctl |= E1000_RCTL_MPE; vmolr |= E1000_VMOLR_MPME; } else { - /* - * Write addresses to the MTA, if the attempt fails + /* Write addresses to the MTA, if the attempt fails * then we should just turn on promiscuous mode so * that we can at least receive multicast traffic */ @@ -3736,8 +3756,7 @@ static void igb_set_rx_mode(struct net_device *netdev) vmolr |= E1000_VMOLR_ROMPE; } } - /* - * Write addresses to available RAR registers, if there is not + /* Write addresses to available RAR registers, if there is not * sufficient space to store all the addresses then enable * unicast promiscuous mode */ @@ -3750,8 +3769,7 @@ static void igb_set_rx_mode(struct net_device *netdev) } wr32(E1000_RCTL, rctl); - /* - * In order to support SR-IOV and eventually VMDq it is necessary to set + /* In order to support SR-IOV and eventually VMDq it is necessary to set * the VMOLR to enable the appropriate modes. Without this workaround * we will have issues with VLAN tag stripping not being done for frames * that are only arriving because we are the default pool @@ -3760,7 +3778,7 @@ static void igb_set_rx_mode(struct net_device *netdev) return; vmolr |= rd32(E1000_VMOLR(vfn)) & - ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); wr32(E1000_VMOLR(vfn), vmolr); igb_restore_vf_multicasts(adapter); } @@ -3805,7 +3823,8 @@ static void igb_spoof_check(struct igb_adapter *adapter) } /* Need to wait a few seconds after link up to get diagnostic information from - * the phy */ + * the phy + */ static void igb_update_phy_info(unsigned long data) { struct igb_adapter *adapter = (struct igb_adapter *) data; @@ -3813,8 +3832,8 @@ static void igb_update_phy_info(unsigned long data) } /** - * igb_has_link - check shared code for link and determine up/down - * @adapter: pointer to driver private info + * igb_has_link - check shared code for link and determine up/down + * @adapter: pointer to driver private info **/ bool igb_has_link(struct igb_adapter *adapter) { @@ -3859,17 +3878,16 @@ static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) ctrl_ext = rd32(E1000_CTRL_EXT); if ((hw->phy.media_type == e1000_media_type_copper) && - !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) { + !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) ret = !!(thstat & event); - } } return ret; } /** - * igb_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * igb_watchdog - Timer Call-back + * @data: pointer to adapter cast into an unsigned long **/ static void igb_watchdog(unsigned long data) { @@ -3881,8 +3899,8 @@ static void igb_watchdog(unsigned long data) static void igb_watchdog_task(struct work_struct *work) { struct igb_adapter *adapter = container_of(work, - struct igb_adapter, - watchdog_task); + struct igb_adapter, + watchdog_task); struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; u32 link; @@ -3896,8 +3914,8 @@ static void igb_watchdog_task(struct work_struct *work) if (!netif_carrier_ok(netdev)) { u32 ctrl; hw->mac.ops.get_speed_and_duplex(hw, - &adapter->link_speed, - &adapter->link_duplex); + &adapter->link_speed, + &adapter->link_duplex); ctrl = rd32(E1000_CTRL); /* Links status message must follow this format */ @@ -3980,7 +3998,8 @@ static void igb_watchdog_task(struct work_struct *work) /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. - * (Do the reset outside of interrupt context). */ + * (Do the reset outside of interrupt context). + */ if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); @@ -3993,7 +4012,7 @@ static void igb_watchdog_task(struct work_struct *work) set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); } - /* Cause software interrupt to ensure rx ring is cleaned */ + /* Cause software interrupt to ensure Rx ring is cleaned */ if (adapter->msix_entries) { u32 eics = 0; for (i = 0; i < adapter->num_q_vectors; i++) @@ -4020,20 +4039,20 @@ enum latency_range { }; /** - * igb_update_ring_itr - update the dynamic ITR value based on packet size + * igb_update_ring_itr - update the dynamic ITR value based on packet size + * @q_vector: pointer to q_vector * - * Stores a new ITR value based on strictly on packet size. This - * algorithm is less sophisticated than that used in igb_update_itr, - * due to the difficulty of synchronizing statistics across multiple - * receive rings. The divisors and thresholds used by this function - * were determined based on theoretical maximum wire speed and testing - * data, in order to minimize response time while increasing bulk - * throughput. - * This functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) - * NOTE: This function is called only when operating in a multiqueue - * receive environment. - * @q_vector: pointer to q_vector + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igb_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * This functionality is controlled by the InterruptThrottleRate module + * parameter (see igb_param.c) + * NOTE: This function is called only when operating in a multiqueue + * receive environment. **/ static void igb_update_ring_itr(struct igb_q_vector *q_vector) { @@ -4094,20 +4113,21 @@ clear_counts: } /** - * igb_update_itr - update the dynamic ITR value based on statistics - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - * this functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) - * NOTE: These calculations are only valid when operating in a single- - * queue environment. - * @q_vector: pointer to q_vector - * @ring_container: ring info to update the itr for + * igb_update_itr - update the dynamic ITR value based on statistics + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * this functionality is controlled by the InterruptThrottleRate module + * parameter (see igb_param.c) + * NOTE: These calculations are only valid when operating in a single- + * queue environment. **/ static void igb_update_itr(struct igb_q_vector *q_vector, struct igb_ring_container *ring_container) @@ -4205,12 +4225,12 @@ set_itr_now: if (new_itr != q_vector->itr_val) { /* this attempts to bias the interrupt rate towards Bulk * by adding intermediate steps when interrupt rate is - * increasing */ + * increasing + */ new_itr = new_itr > q_vector->itr_val ? - max((new_itr * q_vector->itr_val) / - (new_itr + (q_vector->itr_val >> 2)), - new_itr) : - new_itr; + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : new_itr; /* Don't write the value here; it resets the adapter's * internal timer, and causes us to delay far longer than * we should between interrupts. Instead, we write the ITR @@ -4337,8 +4357,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); + "partial checksum but proto=%x!\n", + first->protocol); } break; } @@ -4361,8 +4381,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum but l4 proto=%x!\n", + l4_hdr); } break; } @@ -4514,8 +4534,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, /* set the timestamp */ first->time_stamp = jiffies; - /* - * Force memory writes to complete before letting h/w know there + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered * memory model archs, such as IA-64). * @@ -4536,7 +4555,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, writel(i, tx_ring->tail); /* we need this if more than one processor can write to our tail - * at a time, it syncronizes IO on IA64/Altix systems */ + * at a time, it synchronizes IO on IA64/Altix systems + */ mmiowb(); return; @@ -4566,11 +4586,13 @@ static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) /* Herbert's original patch had: * smp_mb__after_netif_stop_queue(); - * but since that doesn't exist yet, just open code it. */ + * but since that doesn't exist yet, just open code it. + */ smp_mb(); /* We need to check again in a case another CPU has just - * made room available. */ + * made room available. + */ if (igb_desc_unused(tx_ring) < size) return -EBUSY; @@ -4594,7 +4616,6 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { - struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; @@ -4629,15 +4650,18 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, skb_tx_timestamp(skb); - if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - !(adapter->ptp_tx_skb))) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - tx_flags |= IGB_TX_FLAGS_TSTAMP; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); - adapter->ptp_tx_skb = skb_get(skb); - adapter->ptp_tx_start = jiffies; - if (adapter->hw.mac.type == e1000_82576) - schedule_work(&adapter->ptp_tx_work); + if (!(adapter->ptp_tx_skb)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); + } } if (vlan_tx_tag_present(skb)) { @@ -4694,8 +4718,7 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } - /* - * The minimum packet size with TCTL.PSP set is 17 so pad the skb + /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ if (unlikely(skb->len < 17)) { @@ -4709,8 +4732,8 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, } /** - * igb_tx_timeout - Respond to a Tx Hang - * @netdev: network interface device structure + * igb_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure **/ static void igb_tx_timeout(struct net_device *netdev) { @@ -4739,13 +4762,12 @@ static void igb_reset_task(struct work_struct *work) } /** - * igb_get_stats64 - Get System Network Statistics - * @netdev: network interface device structure - * @stats: rtnl_link_stats64 pointer - * + * igb_get_stats64 - Get System Network Statistics + * @netdev: network interface device structure + * @stats: rtnl_link_stats64 pointer **/ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) + struct rtnl_link_stats64 *stats) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -4758,11 +4780,11 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, } /** - * igb_change_mtu - Change the Maximum Transfer Unit - * @netdev: network interface device structure - * @new_mtu: new value for maximum frame size + * igb_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure **/ static int igb_change_mtu(struct net_device *netdev, int new_mtu) { @@ -4805,10 +4827,9 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) } /** - * igb_update_stats - Update the board statistics counters - * @adapter: board private structure + * igb_update_stats - Update the board statistics counters + * @adapter: board private structure **/ - void igb_update_stats(struct igb_adapter *adapter, struct rtnl_link_stats64 *net_stats) { @@ -4823,8 +4844,7 @@ void igb_update_stats(struct igb_adapter *adapter, #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF - /* - * Prevent stats update while adapter is being reset, or if the pci + /* Prevent stats update while adapter is being reset, or if the pci * connection is down. */ if (adapter->link_speed == 0) @@ -4958,7 +4978,8 @@ void igb_update_stats(struct igb_adapter *adapter, /* Rx Errors */ /* RLEC on some newer hardware can be incorrect so build - * our own version based on RUC and ROC */ + * our own version based on RUC and ROC + */ net_stats->rx_errors = adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + @@ -5017,7 +5038,8 @@ static irqreturn_t igb_msix_other(int irq, void *data) adapter->stats.doosync++; /* The DMA Out of Sync is also indication of a spoof event * in IOV mode. Check the Wrong VM Behavior register to - * see if it is really a spoof event. */ + * see if it is really a spoof event. + */ igb_check_wvbr(adapter); } @@ -5091,8 +5113,7 @@ static void igb_update_tx_dca(struct igb_adapter *adapter, if (hw->mac.type != e1000_82575) txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT; - /* - * We can enable relaxed ordering for reads, but not writes when + /* We can enable relaxed ordering for reads, but not writes when * DCA is enabled. This is due to a known issue in some chipsets * which will cause the DCA tag to be cleared. */ @@ -5113,8 +5134,7 @@ static void igb_update_rx_dca(struct igb_adapter *adapter, if (hw->mac.type != e1000_82575) rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT; - /* - * We can enable relaxed ordering for reads, but not writes when + /* We can enable relaxed ordering for reads, but not writes when * DCA is enabled. This is due to a known issue in some chipsets * which will cause the DCA tag to be cleared. */ @@ -5183,7 +5203,8 @@ static int __igb_notify_dca(struct device *dev, void *data) case DCA_PROVIDER_REMOVE: if (adapter->flags & IGB_FLAG_DCA_ENABLED) { /* without this a class_device is left - * hanging around in the sysfs model */ + * hanging around in the sysfs model + */ dca_remove_requester(dev); dev_info(&pdev->dev, "DCA disabled\n"); adapter->flags &= ~IGB_FLAG_DCA_ENABLED; @@ -5196,12 +5217,12 @@ static int __igb_notify_dca(struct device *dev, void *data) } static int igb_notify_dca(struct notifier_block *nb, unsigned long event, - void *p) + void *p) { int ret_val; ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, - __igb_notify_dca); + __igb_notify_dca); return ret_val ? NOTIFY_BAD : NOTIFY_DONE; } @@ -5215,6 +5236,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf) eth_zero_addr(mac_addr); igb_set_vf_mac(adapter, vf, mac_addr); + /* By default spoof check is enabled for all VFs */ + adapter->vf_data[vf].spoofchk_enabled = true; + return 0; } @@ -5273,7 +5297,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) struct vf_data_storage *vf_data = &adapter->vf_data[vf]; vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | - IGB_VF_FLAG_MULTI_PROMISC); + IGB_VF_FLAG_MULTI_PROMISC); vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { @@ -5281,8 +5305,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; } else { - /* - * if we have hashes and we are clearing a multicast promisc + /* if we have hashes and we are clearing a multicast promisc * flag we need to write the hashes to the MTA as this step * was previously skipped */ @@ -5303,7 +5326,6 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) return -EINVAL; return 0; - } static int igb_set_vf_multicasts(struct igb_adapter *adapter, @@ -5510,22 +5532,20 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); if (test_bit(__IGB_DOWN, &adapter->state)) { dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set," - " but the PF device is not up.\n"); + "The VF VLAN has been set, but the PF device is not up.\n"); dev_warn(&adapter->pdev->dev, - "Bring the PF device up before" - " attempting to use the VF device.\n"); + "Bring the PF device up before attempting to use the VF device.\n"); } } else { igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); + false, vf); igb_set_vmvir(adapter, vlan, vf); igb_set_vmolr(adapter, vf, true); adapter->vf_data[vf].pf_vlan = 0; adapter->vf_data[vf].pf_qos = 0; - } + } out: - return err; + return err; } static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) @@ -5603,8 +5623,7 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) { - /* - * The VF MAC Address is stored in a packed array of bytes + /* The VF MAC Address is stored in a packed array of bytes * starting at the second 32 bit word of the msg array */ unsigned char *addr = (char *)&msg[1]; @@ -5653,11 +5672,9 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) return; - /* - * until the vf completes a reset it should not be + /* until the vf completes a reset it should not be * allowed to start any configuration. */ - if (msgbuf[0] == E1000_VF_RESET) { igb_vf_reset_msg(adapter, vf); return; @@ -5677,9 +5694,8 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); else dev_warn(&pdev->dev, - "VF %d attempted to override administratively " - "set MAC address\nReload the VF driver to " - "resume operations\n", vf); + "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n", + vf); break; case E1000_VF_SET_PROMISC: retval = igb_set_vf_promisc(adapter, msgbuf, vf); @@ -5694,9 +5710,8 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) retval = -1; if (vf_data->pf_vlan) dev_warn(&pdev->dev, - "VF %d attempted to override administratively " - "set VLAN tag\nReload the VF driver to " - "resume operations\n", vf); + "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", + vf); else retval = igb_set_vf_vlan(adapter, msgbuf, vf); break; @@ -5765,9 +5780,9 @@ static void igb_set_uta(struct igb_adapter *adapter) } /** - * igb_intr_msi - Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure + * igb_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure **/ static irqreturn_t igb_intr_msi(int irq, void *data) { @@ -5810,9 +5825,9 @@ static irqreturn_t igb_intr_msi(int irq, void *data) } /** - * igb_intr - Legacy Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure + * igb_intr - Legacy Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure **/ static irqreturn_t igb_intr(int irq, void *data) { @@ -5820,11 +5835,13 @@ static irqreturn_t igb_intr(int irq, void *data) struct igb_q_vector *q_vector = adapter->q_vector[0]; struct e1000_hw *hw = &adapter->hw; /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No - * need for the IMC write */ + * need for the IMC write + */ u32 icr = rd32(E1000_ICR); /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is - * not set, then the adapter didn't send an interrupt */ + * not set, then the adapter didn't send an interrupt + */ if (!(icr & E1000_ICR_INT_ASSERTED)) return IRQ_NONE; @@ -5883,15 +5900,15 @@ static void igb_ring_irq_enable(struct igb_q_vector *q_vector) } /** - * igb_poll - NAPI Rx polling callback - * @napi: napi polling structure - * @budget: count of how many packets we should handle + * igb_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle **/ static int igb_poll(struct napi_struct *napi, int budget) { struct igb_q_vector *q_vector = container_of(napi, - struct igb_q_vector, - napi); + struct igb_q_vector, + napi); bool clean_complete = true; #ifdef CONFIG_IGB_DCA @@ -5916,10 +5933,10 @@ static int igb_poll(struct napi_struct *napi, int budget) } /** - * igb_clean_tx_irq - Reclaim resources after transmit completes - * @q_vector: pointer to q_vector containing needed info + * igb_clean_tx_irq - Reclaim resources after transmit completes + * @q_vector: pointer to q_vector containing needed info * - * returns true if ring is completely cleaned + * returns true if ring is completely cleaned **/ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) { @@ -6025,7 +6042,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) struct e1000_hw *hw = &adapter->hw; /* Detect a transmit hang in hardware, this serializes the - * check with the clearing of time_stamp and movement of i */ + * check with the clearing of time_stamp and movement of i + */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + @@ -6064,8 +6082,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && - netif_carrier_ok(tx_ring->netdev) && - igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { + netif_carrier_ok(tx_ring->netdev) && + igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -6086,11 +6104,11 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } /** - * igb_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused + * igb_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused * - * Synchronizes page for reuse by the adapter + * Synchronizes page for reuse by the adapter **/ static void igb_reuse_rx_page(struct igb_ring *rx_ring, struct igb_rx_buffer *old_buff) @@ -6150,19 +6168,19 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, } /** - * igb_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware - * @skb: sk_buff to place the data into + * igb_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into * - * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. **/ static bool igb_add_rx_frag(struct igb_ring *rx_ring, struct igb_rx_buffer *rx_buffer, @@ -6203,78 +6221,6 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, return igb_can_reuse_rx_page(rx_buffer, page, truesize); } -static struct sk_buff *igb_build_rx_buffer(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc) -{ - struct igb_rx_buffer *rx_buffer; - struct sk_buff *skb; - struct page *page; - void *page_addr; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); -#if (PAGE_SIZE < 8192) - unsigned int truesize = IGB_RX_BUFSZ; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(NET_SKB_PAD + - NET_IP_ALIGN + - size); -#endif - - /* If we spanned a buffer we have a huge mess so test for it */ - BUG_ON(unlikely(!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))); - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); - - page_addr = page_address(page) + rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr + NET_SKB_PAD + NET_IP_ALIGN); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES + NET_SKB_PAD + NET_IP_ALIGN); -#endif - - /* build an skb to around the page buffer */ - skb = build_skb(page_addr, truesize); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_failed++; - return NULL; - } - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); - - /* update pointers within the skb to store the data */ - skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); - __skb_put(skb, size); - - /* pull timestamp out of packet data */ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - __skb_pull(skb, IGB_TS_HDR_LEN); - } - - if (igb_can_reuse_rx_page(rx_buffer, page, truesize)) { - /* hand second half of page back to the ring */ - igb_reuse_rx_page(rx_ring, rx_buffer); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); - } - - /* clear contents of buffer_info */ - rx_buffer->dma = 0; - rx_buffer->page = NULL; - - return skb; -} - static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -6305,8 +6251,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, return NULL; } - /* - * we will be copying header into skb->data in + /* we will be copying header into skb->data in * pskb_may_pull so it is in our interest to prefetch * it now to avoid a possible cache miss */ @@ -6354,8 +6299,7 @@ static inline void igb_rx_checksum(struct igb_ring *ring, if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) { - /* - * work around errata with sctp packets where the TCPE aka + /* work around errata with sctp packets where the TCPE aka * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) * packets, (aka let the stack check the crc32c) */ @@ -6386,15 +6330,15 @@ static inline void igb_rx_hash(struct igb_ring *ring, } /** - * igb_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * @skb: current socket buffer containing buffer in progress + * igb_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: current socket buffer containing buffer in progress * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. **/ static bool igb_is_non_eop(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc) @@ -6414,15 +6358,15 @@ static bool igb_is_non_eop(struct igb_ring *rx_ring, } /** - * igb_get_headlen - determine size of header for LRO/GRO - * @data: pointer to the start of the headers - * @max_len: total length of section to find headers in + * igb_get_headlen - determine size of header for LRO/GRO + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in * - * This function is meant to determine the length of headers that will - * be recognized by hardware for LRO, and GRO offloads. The main - * motivation of doing this is to only perform one pull for IPv4 TCP - * packets so that we can do basic things like calculating the gso_size - * based on the average data per packet. + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, and GRO offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. **/ static unsigned int igb_get_headlen(unsigned char *data, unsigned int max_len) @@ -6473,7 +6417,7 @@ static unsigned int igb_get_headlen(unsigned char *data, return hdr.network - data; /* record next protocol if header is present */ - if (!hdr.ipv4->frag_off) + if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) nexthdr = hdr.ipv4->protocol; } else if (protocol == __constant_htons(ETH_P_IPV6)) { if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) @@ -6509,8 +6453,7 @@ static unsigned int igb_get_headlen(unsigned char *data, hdr.network += sizeof(struct udphdr); } - /* - * If everything has gone correctly hdr.network should be the + /* If everything has gone correctly hdr.network should be the * data section of the packet and will be the end of the header. * If not then it probably represents the end of the last recognized * header. @@ -6522,17 +6465,17 @@ static unsigned int igb_get_headlen(unsigned char *data, } /** - * igb_pull_tail - igb specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being adjusted + * igb_pull_tail - igb specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being adjusted * - * This function is an igb specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. + * This function is an igb specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. */ static void igb_pull_tail(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc, @@ -6542,8 +6485,7 @@ static void igb_pull_tail(struct igb_ring *rx_ring, unsigned char *va; unsigned int pull_len; - /* - * it is valid to use page_address instead of kmap since we are + /* it is valid to use page_address instead of kmap since we are * working with pages allocated out of the lomem pool per * alloc_page(GFP_ATOMIC) */ @@ -6563,8 +6505,7 @@ static void igb_pull_tail(struct igb_ring *rx_ring, va += IGB_TS_HDR_LEN; } - /* - * we need the header to contain the greater of either ETH_HLEN or + /* we need the header to contain the greater of either ETH_HLEN or * 60 bytes if the skb->len is less than 60 for skb_pad. */ pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); @@ -6580,24 +6521,23 @@ static void igb_pull_tail(struct igb_ring *rx_ring, } /** - * igb_cleanup_headers - Correct corrupted or empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being fixed + * igb_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed * - * Address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. * - * Returns true if an error was encountered and skb was freed. + * Returns true if an error was encountered and skb was freed. **/ static bool igb_cleanup_headers(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (unlikely((igb_test_staterr(rx_desc, E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { struct net_device *netdev = rx_ring->netdev; @@ -6624,14 +6564,14 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring, } /** - * igb_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated + * igb_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated * - * This function checks the ring, descriptor, and packet information in - * order to populate the hash, checksum, VLAN, timestamp, protocol, and - * other fields within the skb. + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. **/ static void igb_process_skb_fields(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc, @@ -6645,7 +6585,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, igb_ptp_rx_hwtstamp(rx_ring->q_vector, rx_desc, skb); - if ((dev->features & NETIF_F_HW_VLAN_RX) && + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { u16 vid; if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && @@ -6654,7 +6594,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, else vid = le16_to_cpu(rx_desc->wb.upper.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } skb_record_rx_queue(skb, rx_ring->queue_index); @@ -6690,10 +6630,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) rmb(); /* retrieve a buffer from the ring */ - if (ring_uses_build_skb(rx_ring)) - skb = igb_build_rx_buffer(rx_ring, rx_desc); - else - skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); /* exit if we failed to retrieve a buffer */ if (!skb) @@ -6762,8 +6699,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, /* map page for use */ dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - /* - * if mapping failed free memory back to system since + /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { @@ -6780,17 +6716,9 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, return true; } -static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring) -{ - if (ring_uses_build_skb(rx_ring)) - return NET_SKB_PAD + NET_IP_ALIGN; - else - return 0; -} - /** - * igb_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure + * igb_alloc_rx_buffers - Replace used receive buffers; packet split + * @adapter: address of board private structure **/ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) { @@ -6810,13 +6738,10 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) if (!igb_alloc_mapped_page(rx_ring, bi)) break; - /* - * Refresh the desc even if buffer_addrs didn't change + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + - bi->page_offset + - igb_rx_offset(rx_ring)); + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -6842,8 +6767,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) /* update next to alloc since we have filled the ring */ rx_ring->next_to_alloc = i; - /* - * Force memory writes to complete before letting h/w + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). @@ -6928,7 +6852,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl; - bool enable = !!(features & NETIF_F_HW_VLAN_RX); + bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); if (enable) { /* enable VLAN tag insert/strip */ @@ -6950,7 +6874,8 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) igb_rlpml_set(adapter); } -static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int igb_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -6967,7 +6892,8 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int igb_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -6993,7 +6919,7 @@ static void igb_restore_vlan(struct igb_adapter *adapter) igb_vlan_mode(adapter->netdev, adapter->netdev->features); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - igb_vlan_rx_add_vid(adapter->netdev, vid); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) @@ -7004,15 +6930,24 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) mac->autoneg = 0; /* Make sure dplx is at most 1 bit and lsb of speed is not set - * for the switch() below to work */ + * for the switch() below to work + */ if ((spd & 1) || (dplx & ~1)) goto err_inval; - /* Fiber NIC's only allow 1000 Gbps Full duplex */ - if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) && - spd != SPEED_1000 && - dplx != DUPLEX_FULL) - goto err_inval; + /* Fiber NIC's only allow 1000 gbps Full duplex + * and 100Mbps Full duplex for 100baseFx sfp + */ + if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + switch (spd + dplx) { + case SPEED_10 + DUPLEX_HALF: + case SPEED_10 + DUPLEX_FULL: + case SPEED_100 + DUPLEX_HALF: + goto err_inval; + default: + break; + } + } switch (spd + dplx) { case SPEED_10 + DUPLEX_HALF: @@ -7111,7 +7046,8 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, igb_power_up_link(adapter); /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ + * would have already happened in close and is redundant. + */ igb_release_hw_control(adapter); pci_disable_device(pdev); @@ -7173,7 +7109,8 @@ static int igb_resume(struct device *dev) igb_reset(adapter); /* let the f/w know that the h/w is now under the control of the - * driver. */ + * driver. + */ igb_get_hw_control(adapter); wr32(E1000_WUS, ~0); @@ -7309,8 +7246,7 @@ static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) } #ifdef CONFIG_NET_POLL_CONTROLLER -/* - * Polling 'interrupt' - used by things like netconsole to send skbs +/* Polling 'interrupt' - used by things like netconsole to send skbs * without having to re-enable interrupts. It's not called while * the interrupt routine is executing. */ @@ -7333,13 +7269,13 @@ static void igb_netpoll(struct net_device *netdev) #endif /* CONFIG_NET_POLL_CONTROLLER */ /** - * igb_io_error_detected - called when PCI error is detected - * @pdev: Pointer to PCI device - * @state: The current pci connection state + * igb_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state * - * This function is called after a PCI bus error affecting - * this device has been detected. - */ + * This function is called after a PCI bus error affecting + * this device has been detected. + **/ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -7360,12 +7296,12 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, } /** - * igb_io_slot_reset - called after the pci bus has been reset. - * @pdev: Pointer to PCI device + * igb_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device * - * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. - */ + * Restart the card from scratch, as if from a cold-boot. Implementation + * resembles the first-half of the igb_resume routine. + **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -7393,8 +7329,9 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) err = pci_cleanup_aer_uncorrect_error_status(pdev); if (err) { - dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status " - "failed 0x%0x\n", err); + dev_err(&pdev->dev, + "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", + err); /* non-fatal, continue */ } @@ -7402,12 +7339,12 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) } /** - * igb_io_resume - called when traffic can start flowing again. - * @pdev: Pointer to PCI device + * igb_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device * - * This callback is called when the error recovery driver tells us that - * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. Implementation resembles the + * second-half of the igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { @@ -7424,12 +7361,13 @@ static void igb_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); /* let the f/w know that the h/w is now under the control of the - * driver. */ + * driver. + */ igb_get_hw_control(adapter); } static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, - u8 qsel) + u8 qsel) { u32 rar_low, rar_high; struct e1000_hw *hw = &adapter->hw; @@ -7438,7 +7376,7 @@ static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); + ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* Indicate to hardware the Address is Valid. */ @@ -7456,11 +7394,12 @@ static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, } static int igb_set_vf_mac(struct igb_adapter *adapter, - int vf, unsigned char *mac_addr) + int vf, unsigned char *mac_addr) { struct e1000_hw *hw = &adapter->hw; /* VF MAC addresses start at end of receive addresses and moves - * torwards the first, as a result a collision should not be possible */ + * towards the first, as a result a collision should not be possible + */ int rar_entry = hw->mac.rar_entry_count - (vf + 1); memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); @@ -7477,13 +7416,13 @@ static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) return -EINVAL; adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); - dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" - " change effective."); + dev_info(&adapter->pdev->dev, + "Reload the VF driver to make this change effective."); if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," - " but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, "Bring the PF device up before" - " attempting to use the VF device.\n"); + dev_warn(&adapter->pdev->dev, + "The VF MAC address has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); } return igb_set_vf_mac(adapter, vf, mac); } @@ -7510,19 +7449,19 @@ static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, /* Calculate the rate factor values to set */ rf_int = link_speed / tx_rate; rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; + rf_dec = (rf_dec * (1 << E1000_RTTBCNRC_RF_INT_SHIFT)) / + tx_rate; bcnrc_val = E1000_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) & - E1000_RTTBCNRC_RF_INT_MASK); + bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) & + E1000_RTTBCNRC_RF_INT_MASK); bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); } else { bcnrc_val = 0; } wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */ - /* - * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM + /* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported. */ wr32(E1000_RTTBCNRM, 0x14); @@ -7544,8 +7483,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) reset_rate = true; adapter->vf_rate_link_speed = 0; dev_info(&adapter->pdev->dev, - "Link speed has been changed. VF Transmit " - "rate is disabled\n"); + "Link speed has been changed. VF Transmit rate is disabled\n"); } for (i = 0; i < adapter->vfs_allocated_count; i++) { @@ -7553,8 +7491,8 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) adapter->vf_data[i].tx_rate = 0; igb_set_vf_rate_limit(&adapter->hw, i, - adapter->vf_data[i].tx_rate, - actual_link_speed); + adapter->vf_data[i].tx_rate, + actual_link_speed); } } @@ -7580,6 +7518,33 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) return 0; } +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 reg_val, reg_offset; + + if (!adapter->vfs_allocated_count) + return -EOPNOTSUPP; + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + + reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; + reg_val = rd32(reg_offset); + if (setting) + reg_val |= ((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + else + reg_val &= ~((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + wr32(reg_offset, reg_val); + + adapter->vf_data[vf].spoofchk_enabled = setting; + return E1000_SUCCESS; +} + static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi) { @@ -7591,6 +7556,7 @@ static int igb_ndo_get_vf_config(struct net_device *netdev, ivi->tx_rate = adapter->vf_data[vf].tx_rate; ivi->vlan = adapter->vf_data[vf].pf_vlan; ivi->qos = adapter->vf_data[vf].pf_qos; + ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; return 0; } @@ -7603,6 +7569,7 @@ static void igb_vmm_control(struct igb_adapter *adapter) case e1000_82575: case e1000_i210: case e1000_i211: + case e1000_i354: default: /* replication is not supported for 82575 */ return; @@ -7625,7 +7592,7 @@ static void igb_vmm_control(struct igb_adapter *adapter) igb_vmdq_set_loopback_pf(hw, true); igb_vmdq_set_replication_pf(hw, true); igb_vmdq_set_anti_spoofing_pf(hw, true, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); } else { igb_vmdq_set_loopback_pf(hw, false); igb_vmdq_set_replication_pf(hw, false); @@ -7645,8 +7612,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); - /* - * DMA Coalescing high water mark needs to be greater + /* DMA Coalescing high water mark needs to be greater * than the Rx threshold. Set hwm to PBA - max frame * size in 16B units, capping it at PBA - 6KB. */ @@ -7659,8 +7625,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) & E1000_FCRTC_RTH_COAL_MASK); wr32(E1000_FCRTC, reg); - /* - * Set the DMA Coalescing Rx threshold to PBA - 2 * max + /* Set the DMA Coalescing Rx threshold to PBA - 2 * max * frame size, capping it at PBA - 10KB. */ dmac_thr = pba - adapter->max_frame_size / 512; @@ -7678,11 +7643,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) reg |= (1000 >> 5); /* Disable BMC-to-OS Watchdog Enable */ - reg &= ~E1000_DMACR_DC_BMC2OSW_EN; + if (hw->mac.type != e1000_i354) + reg &= ~E1000_DMACR_DC_BMC2OSW_EN; + wr32(E1000_DMACR, reg); - /* - * no lower threshold to disable + /* no lower threshold to disable * coalescing(smart fifb)-UTRESH=0 */ wr32(E1000_DMCRTRH, 0); @@ -7691,15 +7657,13 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) wr32(E1000_DMCTLX, reg); - /* - * free space in tx packet buffer to wake from + /* free space in tx packet buffer to wake from * DMA coal */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); - /* - * make low power state decision controlled + /* make low power state decision controlled * by DMA coal */ reg = rd32(E1000_PCIEMISC); @@ -7713,7 +7677,8 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) } } -/* igb_read_i2c_byte - Reads 8 bit word over I2C +/** + * igb_read_i2c_byte - Reads 8 bit word over I2C * @hw: pointer to hardware structure * @byte_offset: byte offset to read * @dev_addr: device address @@ -7721,9 +7686,9 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) * * Performs byte read operation over I2C interface at * a specified device address. - */ + **/ s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 *data) + u8 dev_addr, u8 *data) { struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); struct i2c_client *this_client = adapter->i2c_client; @@ -7750,7 +7715,8 @@ s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, } } -/* igb_write_i2c_byte - Writes 8 bit word over I2C +/** + * igb_write_i2c_byte - Writes 8 bit word over I2C * @hw: pointer to hardware structure * @byte_offset: byte offset to write * @dev_addr: device address @@ -7758,9 +7724,9 @@ s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, * * Performs byte write operation over I2C interface at * a specified device address. - */ + **/ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, - u8 dev_addr, u8 data) + u8 dev_addr, u8 data) { struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); struct i2c_client *this_client = adapter->i2c_client; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 0987822359f0..7e8c477b0ab9 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1,5 +1,4 @@ -/* - * PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580 +/* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580 * * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> * @@ -27,8 +26,7 @@ #define INCVALUE_MASK 0x7fffffff #define ISGN 0x80000000 -/* - * The 82580 timesync updates the system timer every 8ns by 8ns, +/* The 82580 timesync updates the system timer every 8ns by 8ns, * and this update value cannot be reprogrammed. * * Neither the 82576 nor the 82580 offer registers wide enough to hold @@ -77,10 +75,7 @@ #define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) #define IGB_NBITS_82580 40 -/* - * SYSTIM read access for the 82576 - */ - +/* SYSTIM read access for the 82576 */ static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) { struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); @@ -97,10 +92,7 @@ static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) return val; } -/* - * SYSTIM read access for the 82580 - */ - +/* SYSTIM read access for the 82580 */ static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) { struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); @@ -108,8 +100,7 @@ static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) u64 val; u32 lo, hi, jk; - /* - * The timestamp latches on lowest register read. For the 82580 + /* The timestamp latches on lowest register read. For the 82580 * the lowest register is SYSTIMR instead of SYSTIML. However we only * need to provide nanosecond resolution, so we just ignore it. */ @@ -123,17 +114,13 @@ static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) return val; } -/* - * SYSTIM read access for I210/I211 - */ - +/* SYSTIM read access for I210/I211 */ static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) { struct e1000_hw *hw = &adapter->hw; u32 sec, nsec, jk; - /* - * The timestamp latches on lowest register read. For I210/I211, the + /* The timestamp latches on lowest register read. For I210/I211, the * lowest register is SYSTIMR. Since we only need to provide nanosecond * resolution, we can ignore it. */ @@ -150,8 +137,7 @@ static void igb_ptp_write_i210(struct igb_adapter *adapter, { struct e1000_hw *hw = &adapter->hw; - /* - * Writing the SYSTIMR register is not necessary as it only provides + /* Writing the SYSTIMR register is not necessary as it only provides * sub-nanosecond resolution. */ wr32(E1000_SYSTIML, ts->tv_nsec); @@ -185,6 +171,7 @@ static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: + case e1000_i354: case e1000_i350: spin_lock_irqsave(&adapter->tmreg_lock, flags); @@ -207,10 +194,7 @@ static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, } } -/* - * PTP clock operations - */ - +/* PTP clock operations */ static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) { struct igb_adapter *igb = container_of(ptp, struct igb_adapter, @@ -387,7 +371,7 @@ static int igb_ptp_enable(struct ptp_clock_info *ptp, * * This work function polls the TSYNCTXCTL valid bit to determine when a * timestamp has been taken for the current stored skb. - */ + **/ void igb_ptp_tx_work(struct work_struct *work) { struct igb_adapter *adapter = container_of(work, struct igb_adapter, @@ -437,7 +421,7 @@ static void igb_ptp_overflow_check(struct work_struct *work) * dropped an Rx packet that was timestamped when the ring is full. The * particular error is rare but leaves the device in a state unable to timestamp * any future packets. - */ + **/ void igb_ptp_rx_hang(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; @@ -481,7 +465,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. - */ + **/ void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; @@ -506,15 +490,14 @@ void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) * This function is meant to retrieve a timestamp from the first buffer of an * incoming frame. The value is stored in little endian format starting on * byte 8. - */ + **/ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va, struct sk_buff *skb) { __le64 *regval = (__le64 *)va; - /* - * The timestamp is recorded in little endian format. + /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ @@ -529,7 +512,7 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, * * This function is meant to retrieve a timestamp from the internal registers * of the adapter and store it in the skb. - */ + **/ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { @@ -537,8 +520,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct e1000_hw *hw = &adapter->hw; u64 regval; - /* - * If this bit is set, then the RX registers contain the time stamp. No + /* If this bit is set, then the RX registers contain the time stamp. No * other packet will be time stamped until we read these registers, so * read the registers to make them available again. Because only one * packet can be time stamped at a time, we know that the register @@ -574,7 +556,6 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, * type has to be specified. Matching the kind of event packet is * not supported, with the exception of "all V2 events regardless of * level 2 or 4". - * **/ int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) @@ -655,10 +636,9 @@ int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, return 0; } - /* - * Per-packet timestamping only works if all packets are + /* Per-packet timestamping only works if all packets are * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. + * long as one Rx filter was configured. */ if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; @@ -740,7 +720,7 @@ void igb_ptp_init(struct igb_adapter *adapter) case e1000_82576: snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 1000000000; + adapter->ptp_caps.max_adj = 999999881; adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.pps = 0; adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; @@ -756,6 +736,7 @@ void igb_ptp_init(struct igb_adapter *adapter) wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); break; case e1000_82580: + case e1000_i354: case e1000_i350: snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); adapter->ptp_caps.owner = THIS_MODULE; @@ -844,6 +825,7 @@ void igb_ptp_stop(struct igb_adapter *adapter) switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: + case e1000_i354: case e1000_i350: cancel_delayed_work_sync(&adapter->ptp_overflow_work); break; @@ -888,6 +870,7 @@ void igb_ptp_reset(struct igb_adapter *adapter) wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); break; case e1000_82580: + case e1000_i354: case e1000_i350: case e1000_i210: case e1000_i211: diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index d60cd4393415..93eb7ee06d3e 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -116,7 +116,7 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter, else vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; if (test_bit(vid, adapter->active_vlans)) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&adapter->rx_ring->napi, skb); @@ -447,7 +447,6 @@ int igbvf_setup_tx_resources(struct igbvf_adapter *adapter, tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); - if (!tx_ring->desc) goto err; @@ -488,7 +487,6 @@ int igbvf_setup_rx_resources(struct igbvf_adapter *adapter, rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->desc) goto err; @@ -1232,7 +1230,8 @@ static void igbvf_set_rlpml(struct igbvf_adapter *adapter) e1000_rlpml_set_vf(hw, max_frame_size); } -static int igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int igbvf_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -1245,7 +1244,8 @@ static int igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -1264,7 +1264,7 @@ static void igbvf_restore_vlan(struct igbvf_adapter *adapter) u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - igbvf_vlan_rx_add_vid(adapter->netdev, vid); + igbvf_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } /** @@ -2724,9 +2724,9 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_RXCSUM; netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index ea4808373435..fce3e92f9d11 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -101,8 +101,10 @@ static void ixgb_tx_timeout_task(struct work_struct *work); static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter); static void ixgb_vlan_strip_disable(struct ixgb_adapter *adapter); -static int ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid); -static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid); +static int ixgb_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid); +static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid); static void ixgb_restore_vlan(struct ixgb_adapter *adapter); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -332,8 +334,8 @@ ixgb_fix_features(struct net_device *netdev, netdev_features_t features) * Tx VLAN insertion does not work per HW design when Rx stripping is * disabled. */ - if (!(features & NETIF_F_HW_VLAN_RX)) - features &= ~NETIF_F_HW_VLAN_TX; + if (!(features & NETIF_F_HW_VLAN_CTAG_RX)) + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -344,7 +346,7 @@ ixgb_set_features(struct net_device *netdev, netdev_features_t features) struct ixgb_adapter *adapter = netdev_priv(netdev); netdev_features_t changed = features ^ netdev->features; - if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_RX))) + if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_CTAG_RX))) return 0; adapter->rx_csum = !!(features & NETIF_F_RXCSUM); @@ -479,10 +481,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_RXCSUM; if (pci_using_dac) { @@ -717,14 +719,11 @@ ixgb_setup_tx_resources(struct ixgb_adapter *adapter) txdr->size = ALIGN(txdr->size, 4096); txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (!txdr->desc) { vfree(txdr->buffer_info); - netif_err(adapter, probe, adapter->netdev, - "Unable to allocate transmit descriptor memory\n"); return -ENOMEM; } - memset(txdr->desc, 0, txdr->size); txdr->next_to_use = 0; txdr->next_to_clean = 0; @@ -807,8 +806,6 @@ ixgb_setup_rx_resources(struct ixgb_adapter *adapter) if (!rxdr->desc) { vfree(rxdr->buffer_info); - netif_err(adapter, probe, adapter->netdev, - "Unable to allocate receive descriptors\n"); return -ENOMEM; } memset(rxdr->desc, 0, rxdr->size); @@ -1145,7 +1142,7 @@ ixgb_set_multi(struct net_device *netdev) } alloc_failed: - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ixgb_vlan_strip_enable(adapter); else ixgb_vlan_strip_disable(adapter); @@ -2085,8 +2082,8 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do) skb->protocol = eth_type_trans(skb, netdev); if (status & IXGB_RX_DESC_STATUS_VP) - __vlan_hwaccel_put_tag(skb, - le16_to_cpu(rx_desc->special)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rx_desc->special)); netif_receive_skb(skb); @@ -2159,6 +2156,10 @@ map_skb: skb->data, adapter->rx_buffer_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + adapter->alloc_rx_buff_failed++; + break; + } rx_desc = IXGB_RX_DESC(*rx_ring, i); rx_desc->buff_addr = cpu_to_le64(buffer_info->dma); @@ -2168,7 +2169,8 @@ map_skb: rx_desc->status = 0; - if (++i == rx_ring->count) i = 0; + if (++i == rx_ring->count) + i = 0; buffer_info = &rx_ring->buffer_info[i]; } @@ -2209,7 +2211,7 @@ ixgb_vlan_strip_disable(struct ixgb_adapter *adapter) } static int -ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +ixgb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ixgb_adapter *adapter = netdev_priv(netdev); u32 vfta, index; @@ -2226,7 +2228,7 @@ ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) } static int -ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +ixgb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ixgb_adapter *adapter = netdev_priv(netdev); u32 vfta, index; @@ -2248,7 +2250,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter) u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgb_vlan_rx_add_vid(adapter->netdev, vid); + ixgb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index a8e10cff7a89..ca932387a80f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -740,6 +740,11 @@ extern void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter); extern void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter); extern void ixgbe_dbg_init(void); extern void ixgbe_dbg_exit(void); +#else +static inline void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter) {} +static inline void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter) {} +static inline void ixgbe_dbg_init(void) {} +static inline void ixgbe_dbg_exit(void) {} #endif /* CONFIG_DEBUG_FS */ static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index d0113fc97b6f..4a5bfb6b3af0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1305,6 +1305,7 @@ static struct ixgbe_mac_operations mac_ops_82598 = { .release_swfw_sync = &ixgbe_release_swfw_sync, .get_thermal_sensor_data = NULL, .init_thermal_sensor_thresh = NULL, + .mng_fw_enabled = NULL, }; static struct ixgbe_eeprom_operations eeprom_ops_82598 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 203a00c24330..7946da94b228 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -59,12 +59,34 @@ static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, bool autoneg_wait_to_complete); static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw); +static bool ixgbe_mng_enabled(struct ixgbe_hw *hw) +{ + u32 fwsm, manc, factps; + + fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); + if ((fwsm & IXGBE_FWSM_MODE_MASK) != IXGBE_FWSM_FW_MODE_PT) + return false; + + manc = IXGBE_READ_REG(hw, IXGBE_MANC); + if (!(manc & IXGBE_MANC_RCV_TCO_EN)) + return false; + + factps = IXGBE_READ_REG(hw, IXGBE_FACTPS); + if (factps & IXGBE_FACTPS_MNGCG) + return false; + + return true; +} + static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) { struct ixgbe_mac_info *mac = &hw->mac; - /* enable the laser control functions for SFP+ fiber */ - if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) { + /* enable the laser control functions for SFP+ fiber + * and MNG not enabled + */ + if ((mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) && + !hw->mng_fw_enabled) { mac->ops.disable_tx_laser = &ixgbe_disable_tx_laser_multispeed_fiber; mac->ops.enable_tx_laser = @@ -563,7 +585,8 @@ static s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, return status; /* Flap the tx laser if it has not already been done */ - hw->mac.ops.flap_tx_laser(hw); + if (hw->mac.ops.flap_tx_laser) + hw->mac.ops.flap_tx_laser(hw); /* * Wait for the controller to acquire link. Per IEEE 802.3ap, @@ -615,7 +638,8 @@ static s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, return status; /* Flap the tx laser if it has not already been done */ - hw->mac.ops.flap_tx_laser(hw); + if (hw->mac.ops.flap_tx_laser) + hw->mac.ops.flap_tx_laser(hw); /* Wait for the link partner to also set speed */ msleep(100); @@ -933,6 +957,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) ixgbe_link_speed link_speed; s32 status; u32 ctrl, i, autoc, autoc2; + u32 curr_lms; bool link_up = false; /* Call adapter stop to disable tx/rx and clear interrupts */ @@ -964,6 +989,9 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL) hw->phy.ops.reset(hw); + /* remember AUTOC LMS from before we reset */ + curr_lms = IXGBE_READ_REG(hw, IXGBE_AUTOC) & IXGBE_AUTOC_LMS_MASK; + mac_reset_top: /* * Issue global reset to the MAC. Needs to be SW reset if link is up. @@ -1019,6 +1047,19 @@ mac_reset_top: hw->mac.orig_autoc2 = autoc2; hw->mac.orig_link_settings_stored = true; } else { + + /* If MNG FW is running on a multi-speed device that + * doesn't autoneg with out driver support we need to + * leave LMS in the state it was before we MAC reset. + * Likewise if we support WoL we don't want change the + * LMS state either. + */ + if ((hw->phy.multispeed_fiber && hw->mng_fw_enabled) || + hw->wol_supported) + hw->mac.orig_autoc = + (hw->mac.orig_autoc & ~IXGBE_AUTOC_LMS_MASK) | + curr_lms; + if (autoc != hw->mac.orig_autoc) { /* Need SW/FW semaphore around AUTOC writes if LESM is * on, likewise reset_pipeline requires us to hold @@ -2216,7 +2257,7 @@ static struct ixgbe_mac_operations mac_ops_82599 = { .release_swfw_sync = &ixgbe_release_swfw_sync, .get_thermal_sensor_data = &ixgbe_get_thermal_sensor_data_generic, .init_thermal_sensor_thresh = &ixgbe_init_thermal_sensor_thresh_generic, - + .mng_fw_enabled = &ixgbe_mng_enabled, }; static struct ixgbe_eeprom_operations eeprom_ops_82599 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 99e472ebaa75..9bcdeb89af5a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -592,6 +592,36 @@ s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr) return 0; } +enum ixgbe_bus_width ixgbe_convert_bus_width(u16 link_status) +{ + switch (link_status & IXGBE_PCI_LINK_WIDTH) { + case IXGBE_PCI_LINK_WIDTH_1: + return ixgbe_bus_width_pcie_x1; + case IXGBE_PCI_LINK_WIDTH_2: + return ixgbe_bus_width_pcie_x2; + case IXGBE_PCI_LINK_WIDTH_4: + return ixgbe_bus_width_pcie_x4; + case IXGBE_PCI_LINK_WIDTH_8: + return ixgbe_bus_width_pcie_x8; + default: + return ixgbe_bus_width_unknown; + } +} + +enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status) +{ + switch (link_status & IXGBE_PCI_LINK_SPEED) { + case IXGBE_PCI_LINK_SPEED_2500: + return ixgbe_bus_speed_2500; + case IXGBE_PCI_LINK_SPEED_5000: + return ixgbe_bus_speed_5000; + case IXGBE_PCI_LINK_SPEED_8000: + return ixgbe_bus_speed_8000; + default: + return ixgbe_bus_speed_unknown; + } +} + /** * ixgbe_get_bus_info_generic - Generic set PCI bus info * @hw: pointer to hardware structure @@ -610,35 +640,8 @@ s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw) pci_read_config_word(adapter->pdev, IXGBE_PCI_LINK_STATUS, &link_status); - switch (link_status & IXGBE_PCI_LINK_WIDTH) { - case IXGBE_PCI_LINK_WIDTH_1: - hw->bus.width = ixgbe_bus_width_pcie_x1; - break; - case IXGBE_PCI_LINK_WIDTH_2: - hw->bus.width = ixgbe_bus_width_pcie_x2; - break; - case IXGBE_PCI_LINK_WIDTH_4: - hw->bus.width = ixgbe_bus_width_pcie_x4; - break; - case IXGBE_PCI_LINK_WIDTH_8: - hw->bus.width = ixgbe_bus_width_pcie_x8; - break; - default: - hw->bus.width = ixgbe_bus_width_unknown; - break; - } - - switch (link_status & IXGBE_PCI_LINK_SPEED) { - case IXGBE_PCI_LINK_SPEED_2500: - hw->bus.speed = ixgbe_bus_speed_2500; - break; - case IXGBE_PCI_LINK_SPEED_5000: - hw->bus.speed = ixgbe_bus_speed_5000; - break; - default: - hw->bus.speed = ixgbe_bus_speed_unknown; - break; - } + hw->bus.width = ixgbe_convert_bus_width(link_status); + hw->bus.speed = ixgbe_convert_bus_speed(link_status); mac->ops.set_lan_id(hw); @@ -1125,7 +1128,7 @@ s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, } for (i = 0; i < words; i++) { - eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) + + eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | IXGBE_EEPROM_RW_REG_START; IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index bc3948ead6e0..22eee38868f1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -40,6 +40,8 @@ s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw); s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size); s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr); +enum ixgbe_bus_width ixgbe_convert_bus_width(u16 link_status); +enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status); s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw); void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw); s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index db5611ae407e..6225f880a3f4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -63,7 +63,7 @@ char ixgbe_default_device_descr[] = static char ixgbe_default_device_descr[] = "Intel(R) 10 Gigabit Network Connection"; #endif -#define DRV_VERSION "3.11.33-k" +#define DRV_VERSION "3.13.10-k" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2013 Intel Corporation."; @@ -149,6 +149,52 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, + u32 reg, u16 *value) +{ + int pos = 0; + struct pci_dev *parent_dev; + struct pci_bus *parent_bus; + + parent_bus = adapter->pdev->bus->parent; + if (!parent_bus) + return -1; + + parent_dev = parent_bus->self; + if (!parent_dev) + return -1; + + pos = pci_find_capability(parent_dev, PCI_CAP_ID_EXP); + if (!pos) + return -1; + + pci_read_config_word(parent_dev, pos + reg, value); + return 0; +} + +static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u16 link_status = 0; + int err; + + hw->bus.type = ixgbe_bus_type_pci_express; + + /* Get the negotiated link width and speed from PCI config space of the + * parent, as this device is behind a switch + */ + err = ixgbe_read_pci_cfg_word_parent(adapter, 18, &link_status); + + /* assume caller will handle error case */ + if (err) + return err; + + hw->bus.width = ixgbe_convert_bus_width(link_status); + hw->bus.speed = ixgbe_convert_bus_speed(link_status); + + return 0; +} + static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter) { if (!test_bit(__IXGBE_DOWN, &adapter->state) && @@ -1337,7 +1383,7 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return hdr.network - data; /* record next protocol if header is present */ - if (!hdr.ipv4->frag_off) + if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) nexthdr = hdr.ipv4->protocol; } else if (protocol == __constant_htons(ETH_P_IPV6)) { if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) @@ -1442,10 +1488,10 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); - if ((dev->features & NETIF_F_HW_VLAN_RX) && + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } skb_record_rx_queue(skb, rx_ring->queue_index); @@ -3421,7 +3467,8 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) hw->mac.ops.enable_rx_dma(hw, rxctrl); } -static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3433,7 +3480,8 @@ static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3538,10 +3586,10 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) { u16 vid; - ixgbe_vlan_rx_add_vid(adapter->netdev, 0); + ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgbe_vlan_rx_add_vid(adapter->netdev, vid); + ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } /** @@ -3676,7 +3724,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ixgbe_vlan_strip_enable(adapter); else ixgbe_vlan_strip_disable(adapter); @@ -5077,14 +5125,14 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) netif_device_detach(netdev); + rtnl_lock(); if (netif_running(netdev)) { - rtnl_lock(); ixgbe_down(adapter); ixgbe_free_irq(adapter); ixgbe_free_all_tx_resources(adapter); ixgbe_free_all_rx_resources(adapter); - rtnl_unlock(); } + rtnl_unlock(); ixgbe_clear_interrupt_scheme(adapter); @@ -6425,9 +6473,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, struct ixgbe_tx_buffer *first; int tso; u32 tx_flags = 0; -#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD unsigned short f; -#endif u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = skb->protocol; u8 hdr_len = 0; @@ -6439,12 +6485,9 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, * + 1 desc for context descriptor, * otherwise try next time */ -#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); -#else - count += skb_shinfo(skb)->nr_frags; -#endif + if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) { tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; @@ -6983,7 +7026,7 @@ static int ixgbe_set_features(struct net_device *netdev, break; } - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) ixgbe_vlan_strip_enable(adapter); else ixgbe_vlan_strip_disable(adapter); @@ -7007,7 +7050,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], int err; if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) - return -EOPNOTSUPP; + return ndo_dflt_fdb_add(ndm, tb, dev, addr, flags); /* Hardware does not support aging addresses so if a * ndm_state is given only allow permanent addresses @@ -7038,44 +7081,6 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr) -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - int err = -EOPNOTSUPP; - - if (ndm->ndm_state & NUD_PERMANENT) { - pr_info("%s: FDB only supports static addresses\n", - ixgbe_driver_name); - return -EINVAL; - } - - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - if (is_unicast_ether_addr(addr)) - err = dev_uc_del(dev, addr); - else if (is_multicast_ether_addr(addr)) - err = dev_mc_del(dev, addr); - else - err = -EINVAL; - } - - return err; -} - -static int ixgbe_ndo_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - int idx) -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); - - return idx; -} - static int ixgbe_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh) { @@ -7171,8 +7176,6 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_features = ixgbe_set_features, .ndo_fix_features = ixgbe_fix_features, .ndo_fdb_add = ixgbe_ndo_fdb_add, - .ndo_fdb_del = ixgbe_ndo_fdb_del, - .ndo_fdb_dump = ixgbe_ndo_fdb_dump, .ndo_bridge_setlink = ixgbe_ndo_bridge_setlink, .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, }; @@ -7205,6 +7208,7 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, case IXGBE_SUBDEV_ID_82599_SFP: case IXGBE_SUBDEV_ID_82599_RNDC: case IXGBE_SUBDEV_ID_82599_ECNA_DP: + case IXGBE_SUBDEV_ID_82599_LOM_SFP: is_wol_supported = 1; break; } @@ -7369,6 +7373,10 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; + /* Cache if MNG FW is up so we don't have to read the REG later */ + if (hw->mac.ops.mng_fw_enabled) + hw->mng_fw_enabled = hw->mac.ops.mng_fw_enabled(hw); + /* Make it possible the adapter to be woken up via WOL */ switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: @@ -7425,9 +7433,9 @@ skip_sriov: netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | @@ -7521,7 +7529,9 @@ skip_sriov: /* WOL not supported for all devices */ adapter->wol = 0; hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap); - if (ixgbe_wol_supported(adapter, pdev->device, pdev->subsystem_device)) + hw->wol_supported = ixgbe_wol_supported(adapter, pdev->device, + pdev->subsystem_device); + if (hw->wol_supported) adapter->wol = IXGBE_WUFC_MAG; device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); @@ -7532,10 +7542,13 @@ skip_sriov: /* pick up the PCI bus settings for reporting later */ hw->mac.ops.get_bus_info(hw); + if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) + ixgbe_get_parent_bus_info(adapter); /* print bus type/speed/width info */ e_dev_info("(PCI Express:%s:%s) %pM\n", - (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : + (hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" : + hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown"), (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" : @@ -7615,9 +7628,13 @@ skip_sriov: e_err(probe, "failed to allocate sysfs resources\n"); #endif /* CONFIG_IXGBE_HWMON */ -#ifdef CONFIG_DEBUG_FS ixgbe_dbg_adapter_init(adapter); -#endif /* CONFIG_DEBUG_FS */ + + /* Need link setup for MNG FW, else wait for IXGBE_UP */ + if (hw->mng_fw_enabled && hw->mac.ops.setup_link) + hw->mac.ops.setup_link(hw, + IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL, + true); return 0; @@ -7653,9 +7670,7 @@ static void ixgbe_remove(struct pci_dev *pdev) struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; -#ifdef CONFIG_DEBUG_FS ixgbe_dbg_adapter_exit(adapter); -#endif /*CONFIG_DEBUG_FS */ set_bit(__IXGBE_DOWN, &adapter->state); cancel_work_sync(&adapter->service_task); @@ -7918,16 +7933,19 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); -#ifdef CONFIG_DEBUG_FS ixgbe_dbg_init(); -#endif /* CONFIG_DEBUG_FS */ + + ret = pci_register_driver(&ixgbe_driver); + if (ret) { + ixgbe_dbg_exit(); + return ret; + } #ifdef CONFIG_IXGBE_DCA dca_register_notify(&dca_notifier); #endif - ret = pci_register_driver(&ixgbe_driver); - return ret; + return 0; } module_init(ixgbe_init_module); @@ -7945,9 +7963,7 @@ static void __exit ixgbe_exit_module(void) #endif pci_unregister_driver(&ixgbe_driver); -#ifdef CONFIG_DEBUG_FS ixgbe_dbg_exit(); -#endif /* CONFIG_DEBUG_FS */ rcu_barrier(); /* Wait for completion of call_rcu()'s */ } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index d44b4d21268c..1e7d587c4e57 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -35,7 +35,7 @@ #include <linux/ip.h> #include <linux/tcp.h> #include <linux/ipv6.h> -#ifdef NETIF_F_HW_VLAN_TX +#ifdef NETIF_F_HW_VLAN_CTAG_TX #include <linux/if_vlan.h> #endif @@ -661,13 +661,7 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask) bool enable = ((event_mask & 0x10000000U) != 0); if (enable) { - eth_random_addr(vf_mac_addr); - e_info(probe, "IOV: VF %d is enabled MAC %pM\n", - vfn, vf_mac_addr); - /* - * Store away the VF "permananet" MAC address, it will ask - * for it later. - */ + eth_zero_addr(vf_mac_addr); memcpy(adapter->vfinfo[vfn].vf_mac_addresses, vf_mac_addr, 6); } @@ -688,7 +682,8 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) ixgbe_vf_reset_event(adapter, vf); /* set vf mac address */ - ixgbe_set_vf_mac(adapter, vf, vf_mac); + if (!is_zero_ether_addr(vf_mac)) + ixgbe_set_vf_mac(adapter, vf, vf_mac); vf_shift = vf % 32; reg_offset = vf / 32; @@ -729,8 +724,16 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) IXGBE_WRITE_REG(hw, IXGBE_VMECM(reg_offset), reg); /* reply to reset with ack and vf mac address */ - msgbuf[0] = IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK; - memcpy(addr, vf_mac, ETH_ALEN); + msgbuf[0] = IXGBE_VF_RESET; + if (!is_zero_ether_addr(vf_mac)) { + msgbuf[0] |= IXGBE_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, ETH_ALEN); + } else { + msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK; + dev_warn(&adapter->pdev->dev, + "VF %d has no MAC address assigned, you may have to assign one manually\n", + vf); + } /* * Piggyback the multicast filter type so VF can compute the @@ -1049,6 +1052,12 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; if (vlan || qos) { + if (adapter->vfinfo[vf].pf_vlan) + err = ixgbe_set_vf_vlan(adapter, false, + adapter->vfinfo[vf].pf_vlan, + vf); + if (err) + goto out; err = ixgbe_set_vf_vlan(adapter, true, vlan, vf); if (err) goto out; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 6652e96c352d..402f1a2ada3e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -57,6 +57,7 @@ #define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 #define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 #define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 +#define IXGBE_SUBDEV_ID_82599_LOM_SFP 0x8976 #define IXGBE_DEV_ID_82599_SFP_EM 0x1507 #define IXGBE_DEV_ID_82599_SFP_SF2 0x154D #define IXGBE_DEV_ID_82599EN_SFP 0x1557 @@ -729,6 +730,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */ #define IXGBE_LSWFW 0x15014 +/* Management Bit Fields and Masks */ +#define IXGBE_MANC_RCV_TCO_EN 0x00020000 /* Rcv TCO packet enable */ + +/* Firmware Semaphore Register */ +#define IXGBE_FWSM_MODE_MASK 0xE +#define IXGBE_FWSM_FW_MODE_PT 0x4 + /* ARC Subsystem registers */ #define IXGBE_HICR 0x15F00 #define IXGBE_FWSTS 0x15F0C @@ -1019,6 +1027,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST) /* FACTPS */ +#define IXGBE_FACTPS_MNGCG 0x20000000 /* Manageblility Clock Gated */ #define IXGBE_FACTPS_LFS 0x40000000 /* LAN Function Select */ /* MHADD Bit Masks */ @@ -1827,6 +1836,7 @@ enum { #define IXGBE_PCI_LINK_SPEED 0xF #define IXGBE_PCI_LINK_SPEED_2500 0x1 #define IXGBE_PCI_LINK_SPEED_5000 0x2 +#define IXGBE_PCI_LINK_SPEED_8000 0x3 #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E #define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 #define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 @@ -2650,6 +2660,7 @@ enum ixgbe_bus_speed { ixgbe_bus_speed_133 = 133, ixgbe_bus_speed_2500 = 2500, ixgbe_bus_speed_5000 = 5000, + ixgbe_bus_speed_8000 = 8000, ixgbe_bus_speed_reserved }; @@ -2859,6 +2870,7 @@ struct ixgbe_mac_operations { s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); s32 (*get_thermal_sensor_data)(struct ixgbe_hw *); s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); + bool (*mng_fw_enabled)(struct ixgbe_hw *hw); }; struct ixgbe_phy_operations { @@ -2986,6 +2998,8 @@ struct ixgbe_hw { bool adapter_stopped; bool force_full_reset; bool allow_unsupported_sfp; + bool mng_fw_enabled; + bool wol_supported; }; struct ixgbe_info { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 66c5e946284e..389324f5929a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -854,6 +854,7 @@ static struct ixgbe_mac_operations mac_ops_X540 = { .enable_rx_buff = &ixgbe_enable_rx_buff_generic, .get_thermal_sensor_data = NULL, .init_thermal_sensor_thresh = NULL, + .mng_fw_enabled = NULL, }; static struct ixgbe_eeprom_operations eeprom_ops_X540 = { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index fc0af9a3bb35..fff0d9867529 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -44,8 +44,8 @@ struct ixgbevf_tx_buffer { struct sk_buff *skb; dma_addr_t dma; unsigned long time_stamp; + union ixgbe_adv_tx_desc *next_to_watch; u16 length; - u16 next_to_watch; u16 mapped_as_page; }; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index c3db6cd69b68..1f5166ad6bb5 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -76,12 +76,9 @@ static const struct ixgbevf_info *ixgbevf_info_tbl[] = { * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, * Class, Class Mask, private data (not used) } */ -static struct pci_device_id ixgbevf_pci_tbl[] = { - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF), - board_82599_vf}, - {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF), - board_X540_vf}, - +static DEFINE_PCI_DEVICE_TABLE(ixgbevf_pci_tbl) = { + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF), board_82599_vf }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF), board_X540_vf }, /* required last entry */ {0, } }; @@ -190,28 +187,37 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_adapter *adapter = q_vector->adapter; union ixgbe_adv_tx_desc *tx_desc, *eop_desc; struct ixgbevf_tx_buffer *tx_buffer_info; - unsigned int i, eop, count = 0; + unsigned int i, count = 0; unsigned int total_bytes = 0, total_packets = 0; if (test_bit(__IXGBEVF_DOWN, &adapter->state)) return true; i = tx_ring->next_to_clean; - eop = tx_ring->tx_buffer_info[i].next_to_watch; - eop_desc = IXGBEVF_TX_DESC(tx_ring, eop); + tx_buffer_info = &tx_ring->tx_buffer_info[i]; + eop_desc = tx_buffer_info->next_to_watch; - while ((eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) && - (count < tx_ring->count)) { + do { bool cleaned = false; - rmb(); /* read buffer_info after eop_desc */ - /* eop could change between read and DD-check */ - if (unlikely(eop != tx_ring->tx_buffer_info[i].next_to_watch)) - goto cont_loop; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + + /* if DD is not set pending work has not been completed */ + if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buffer_info->next_to_watch = NULL; + for ( ; !cleaned; count++) { struct sk_buff *skb; tx_desc = IXGBEVF_TX_DESC(tx_ring, i); - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - cleaned = (i == eop); + cleaned = (tx_desc == eop_desc); skb = tx_buffer_info->skb; if (cleaned && skb) { @@ -234,12 +240,12 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, i++; if (i == tx_ring->count) i = 0; + + tx_buffer_info = &tx_ring->tx_buffer_info[i]; } -cont_loop: - eop = tx_ring->tx_buffer_info[i].next_to_watch; - eop_desc = IXGBEVF_TX_DESC(tx_ring, eop); - } + eop_desc = tx_buffer_info->next_to_watch; + } while (count < tx_ring->count); tx_ring->next_to_clean = i; @@ -285,7 +291,7 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan); if (is_vlan && test_bit(tag & VLAN_VID_MASK, adapter->active_vlans)) - __vlan_hwaccel_put_tag(skb, tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) napi_gro_receive(&q_vector->napi, skb); @@ -944,9 +950,17 @@ free_queue_irqs: free_irq(adapter->msix_entries[vector].vector, adapter->q_vector[vector]); } - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; + /* This failure is non-recoverable - it indicates the system is + * out of MSIX vector resources and the VF driver cannot run + * without them. Set the number of msix vectors to zero + * indicating that not enough can be allocated. The error + * will be returned to the user indicating device open failed. + * Any further attempts to force the driver to open will also + * fail. The only way to recover is to unload the driver and + * reload it again. If the system has recovered some MSIX + * vectors then it may succeed. + */ + adapter->num_msix_vectors = 0; return err; } @@ -1165,7 +1179,8 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) } } -static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -1190,7 +1205,8 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return err; } -static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -1213,7 +1229,8 @@ static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter) u16 vid; for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgbevf_vlan_rx_add_vid(adapter->netdev, vid); + ixgbevf_vlan_rx_add_vid(adapter->netdev, + htons(ETH_P_8021Q), vid); } static int ixgbevf_write_uc_addr_list(struct net_device *netdev) @@ -2038,6 +2055,7 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; + struct net_device *netdev = adapter->netdev; int err; /* PCI config space info */ @@ -2057,18 +2075,26 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) err = hw->mac.ops.reset_hw(hw); if (err) { dev_info(&pdev->dev, - "PF still in reset state, assigning new address\n"); - eth_hw_addr_random(adapter->netdev); - memcpy(adapter->hw.mac.addr, adapter->netdev->dev_addr, - adapter->netdev->addr_len); + "PF still in reset state. Is the PF interface up?\n"); } else { err = hw->mac.ops.init_hw(hw); if (err) { pr_err("init_shared_code failed: %d\n", err); goto out; } - memcpy(adapter->netdev->dev_addr, adapter->hw.mac.addr, - adapter->netdev->addr_len); + err = hw->mac.ops.get_mac_addr(hw, hw->mac.addr); + if (err) + dev_info(&pdev->dev, "Error reading MAC address\n"); + else if (is_zero_ether_addr(adapter->hw.mac.addr)) + dev_info(&pdev->dev, + "MAC address not assigned by administrator.\n"); + memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + } + + if (!is_valid_ether_addr(netdev->dev_addr)) { + dev_info(&pdev->dev, "Assigning random MAC address\n"); + eth_hw_addr_random(netdev); + memcpy(hw->mac.addr, netdev->dev_addr, netdev->addr_len); } /* lock to protect mailbox accesses */ @@ -2417,9 +2443,6 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { - hw_dbg(&adapter->hw, - "Unable to allocate memory for " - "the receive descriptor ring\n"); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; goto alloc_failed; @@ -2572,6 +2595,15 @@ static int ixgbevf_open(struct net_device *netdev) struct ixgbe_hw *hw = &adapter->hw; int err; + /* A previous failure to open the device because of a lack of + * available MSIX vector resources may have reset the number + * of msix vectors variable to zero. The only way to recover + * is to unload/reload the driver and hope that the system has + * been able to recover some MSIX vector resources. + */ + if (!adapter->num_msix_vectors) + return -ENOMEM; + /* disallow open during test */ if (test_bit(__IXGBEVF_TESTING, &adapter->state)) return -EBUSY; @@ -2628,7 +2660,6 @@ static int ixgbevf_open(struct net_device *netdev) err_req_irq: ixgbevf_down(adapter); - ixgbevf_free_irq(adapter); err_setup_rx: ixgbevf_free_all_rx_resources(adapter); err_setup_tx: @@ -2806,8 +2837,7 @@ static bool ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, } static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, - unsigned int first) + struct sk_buff *skb, u32 tx_flags) { struct ixgbevf_tx_buffer *tx_buffer_info; unsigned int len; @@ -2832,7 +2862,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, size, DMA_TO_DEVICE); if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma)) goto dma_error; - tx_buffer_info->next_to_watch = i; len -= size; total -= size; @@ -2862,7 +2891,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_buffer_info->dma)) goto dma_error; tx_buffer_info->mapped_as_page = true; - tx_buffer_info->next_to_watch = i; len -= size; total -= size; @@ -2881,8 +2909,6 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, else i = i - 1; tx_ring->tx_buffer_info[i].skb = skb; - tx_ring->tx_buffer_info[first].next_to_watch = i; - tx_ring->tx_buffer_info[first].time_stamp = jiffies; return count; @@ -2891,7 +2917,6 @@ dma_error: /* clear timestamp and dma mappings for failed tx_buffer_info map */ tx_buffer_info->dma = 0; - tx_buffer_info->next_to_watch = 0; count--; /* clear timestamp and dma mappings for remaining portion of packet */ @@ -2908,7 +2933,8 @@ dma_error: } static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, - int count, u32 paylen, u8 hdr_len) + int count, unsigned int first, u32 paylen, + u8 hdr_len) { union ixgbe_adv_tx_desc *tx_desc = NULL; struct ixgbevf_tx_buffer *tx_buffer_info; @@ -2959,6 +2985,16 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags, tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd); + tx_ring->tx_buffer_info[first].time_stamp = jiffies; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + + tx_ring->tx_buffer_info[first].next_to_watch = tx_desc; tx_ring->next_to_use = i; } @@ -3050,15 +3086,8 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tx_flags |= IXGBE_TX_FLAGS_CSUM; ixgbevf_tx_queue(tx_ring, tx_flags, - ixgbevf_tx_map(tx_ring, skb, tx_flags, first), - skb->len, hdr_len); - /* - * Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); + ixgbevf_tx_map(tx_ring, skb, tx_flags), + first, skb->len, hdr_len); writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail); @@ -3384,9 +3413,9 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_RXCSUM; netdev->features = netdev->hw_features | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 0c94557b53df..387b52635bc0 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -109,7 +109,12 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) if (ret_val) return ret_val; - if (msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK)) + /* New versions of the PF may NACK the reset return message + * to indicate that no MAC address has yet been assigned for + * the VF. + */ + if (msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK) && + msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK)) return IXGBE_ERR_INVALID_MAC_ADDR; memcpy(hw->mac.perm_addr, addr, ETH_ALEN); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 0519afa413d2..070a6f1a0577 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1059,7 +1059,7 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx) if (rxdesc->descwb.flags & cpu_to_le16(RXWBFLAG_TAGON)) { u16 vid = le16_to_cpu(rxdesc->descwb.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); NET_STAT(jme).rx_bytes += 4; } jme->jme_rx(skb); @@ -3030,8 +3030,8 @@ jme_init_one(struct pci_dev *pdev, NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; if (using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 6a2127489af7..bfdb06860397 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -769,7 +769,7 @@ ltq_etop_probe(struct platform_device *pdev) return 0; err_free: - kfree(dev); + free_netdev(dev); err_out: return err; } diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index edfba9370922..a49e81bdf8e8 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -21,8 +21,8 @@ if NET_VENDOR_MARVELL config MV643XX_ETH tristate "Marvell Discovery (643XX) and Orion ethernet support" depends on (MV64X60 || PPC32 || PLAT_ORION) && INET - select INET_LRO select PHYLIB + select MVMDIO ---help--- This driver supports the gigabit ethernet MACs in the Marvell Discovery PPC/MIPS chipset family (MV643XX) and @@ -33,19 +33,17 @@ config MV643XX_ETH config MVMDIO tristate "Marvell MDIO interface support" + select PHYLIB ---help--- This driver supports the MDIO interface found in the network interface units of the Marvell EBU SoCs (Kirkwood, Orion5x, Dove, Armada 370 and Armada XP). - For now, this driver is only needed for the MVNETA driver - (used on Armada 370 and XP), but it could be used in the - future by the MV643XX_ETH driver. + This driver is used by the MV643XX_ETH and MVNETA drivers. config MVNETA tristate "Marvell Armada 370/XP network interface support" depends on MACH_ARMADA_370_XP - select PHYLIB select MVMDIO ---help--- This driver supports the network interface units in the diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile index 7f63b4aac434..5c4a7765ff0e 100644 --- a/drivers/net/ethernet/marvell/Makefile +++ b/drivers/net/ethernet/marvell/Makefile @@ -2,8 +2,8 @@ # Makefile for the Marvell device drivers. # -obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o obj-$(CONFIG_MVMDIO) += mvmdio.o +obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o obj-$(CONFIG_MVNETA) += mvneta.o obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_SKGE) += skge.o diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 29140502b71a..d0afeea181fb 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -20,6 +20,8 @@ * Copyright (C) 2007-2008 Marvell Semiconductor * Lennert Buytenhek <buytenh@marvell.com> * + * Copyright (C) 2013 Michael Stapelberg <michael@stapelberg.de> + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 @@ -54,8 +56,8 @@ #include <linux/phy.h> #include <linux/mv643xx_eth.h> #include <linux/io.h> +#include <linux/interrupt.h> #include <linux/types.h> -#include <linux/inet_lro.h> #include <linux/slab.h> #include <linux/clk.h> @@ -67,14 +69,6 @@ static char mv643xx_eth_driver_version[] = "1.4"; * Registers shared between all ports. */ #define PHY_ADDR 0x0000 -#define SMI_REG 0x0004 -#define SMI_BUSY 0x10000000 -#define SMI_READ_VALID 0x08000000 -#define SMI_OPCODE_READ 0x04000000 -#define SMI_OPCODE_WRITE 0x00000000 -#define ERR_INT_CAUSE 0x0080 -#define ERR_INT_SMI_DONE 0x00000010 -#define ERR_INT_MASK 0x0084 #define WINDOW_BASE(w) (0x0200 + ((w) << 3)) #define WINDOW_SIZE(w) (0x0204 + ((w) << 3)) #define WINDOW_REMAP_HIGH(w) (0x0280 + ((w) << 2)) @@ -264,25 +258,6 @@ struct mv643xx_eth_shared_private { void __iomem *base; /* - * Points at the right SMI instance to use. - */ - struct mv643xx_eth_shared_private *smi; - - /* - * Provides access to local SMI interface. - */ - struct mii_bus *smi_bus; - - /* - * If we have access to the error interrupt pin (which is - * somewhat misnamed as it not only reflects internal errors - * but also reflects SMI completion), use that to wait for - * SMI access completion instead of polling the SMI busy bit. - */ - int err_interrupt; - wait_queue_head_t smi_busy_wait; - - /* * Per-port MBUS window access register value. */ u32 win_protect; @@ -293,7 +268,7 @@ struct mv643xx_eth_shared_private { int extended_rx_coal_limit; int tx_bw_control; int tx_csum_limit; - + struct clk *clk; }; #define TX_BW_CONTROL_ABSENT 0 @@ -341,12 +316,6 @@ struct mib_counters { u32 rx_overrun; }; -struct lro_counters { - u32 lro_aggregated; - u32 lro_flushed; - u32 lro_no_desc; -}; - struct rx_queue { int index; @@ -360,9 +329,6 @@ struct rx_queue { dma_addr_t rx_desc_dma; int rx_desc_area_size; struct sk_buff **rx_skb; - - struct net_lro_mgr lro_mgr; - struct net_lro_desc lro_arr[8]; }; struct tx_queue { @@ -398,8 +364,6 @@ struct mv643xx_eth_private { spinlock_t mib_counters_lock; struct mib_counters mib_counters; - struct lro_counters lro_counters; - struct work_struct tx_timeout_task; struct napi_struct napi; @@ -435,9 +399,7 @@ struct mv643xx_eth_private { /* * Hardware-specific parameters. */ -#if defined(CONFIG_HAVE_CLK) struct clk *clk; -#endif unsigned int t_clk; }; @@ -530,42 +492,12 @@ static void txq_maybe_wake(struct tx_queue *txq) } } - -/* rx napi ******************************************************************/ -static int -mv643xx_get_skb_header(struct sk_buff *skb, void **iphdr, void **tcph, - u64 *hdr_flags, void *priv) -{ - unsigned long cmd_sts = (unsigned long)priv; - - /* - * Make sure that this packet is Ethernet II, is not VLAN - * tagged, is IPv4, has a valid IP header, and is TCP. - */ - if ((cmd_sts & (RX_IP_HDR_OK | RX_PKT_IS_IPV4 | - RX_PKT_IS_ETHERNETV2 | RX_PKT_LAYER4_TYPE_MASK | - RX_PKT_IS_VLAN_TAGGED)) != - (RX_IP_HDR_OK | RX_PKT_IS_IPV4 | - RX_PKT_IS_ETHERNETV2 | RX_PKT_LAYER4_TYPE_TCP_IPV4)) - return -1; - - skb_reset_network_header(skb); - skb_set_transport_header(skb, ip_hdrlen(skb)); - *iphdr = ip_hdr(skb); - *tcph = tcp_hdr(skb); - *hdr_flags = LRO_IPV4 | LRO_TCP; - - return 0; -} - static int rxq_process(struct rx_queue *rxq, int budget) { struct mv643xx_eth_private *mp = rxq_to_mp(rxq); struct net_device_stats *stats = &mp->dev->stats; - int lro_flush_needed; int rx; - lro_flush_needed = 0; rx = 0; while (rx < budget && rxq->rx_desc_count) { struct rx_desc *rx_desc; @@ -626,12 +558,7 @@ static int rxq_process(struct rx_queue *rxq, int budget) skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, mp->dev); - if (skb->dev->features & NETIF_F_LRO && - skb->ip_summed == CHECKSUM_UNNECESSARY) { - lro_receive_skb(&rxq->lro_mgr, skb, (void *)cmd_sts); - lro_flush_needed = 1; - } else - netif_receive_skb(skb); + napi_gro_receive(&mp->napi, skb); continue; @@ -651,9 +578,6 @@ err: dev_kfree_skb(skb); } - if (lro_flush_needed) - lro_flush_all(&rxq->lro_mgr); - if (rx < budget) mp->work_rx &= ~(1 << rxq->index); @@ -1081,97 +1005,45 @@ static void txq_set_fixed_prio_mode(struct tx_queue *txq) /* mii management interface *************************************************/ -static irqreturn_t mv643xx_eth_err_irq(int irq, void *dev_id) -{ - struct mv643xx_eth_shared_private *msp = dev_id; - - if (readl(msp->base + ERR_INT_CAUSE) & ERR_INT_SMI_DONE) { - writel(~ERR_INT_SMI_DONE, msp->base + ERR_INT_CAUSE); - wake_up(&msp->smi_busy_wait); - return IRQ_HANDLED; - } - - return IRQ_NONE; -} - -static int smi_is_done(struct mv643xx_eth_shared_private *msp) -{ - return !(readl(msp->base + SMI_REG) & SMI_BUSY); -} - -static int smi_wait_ready(struct mv643xx_eth_shared_private *msp) -{ - if (msp->err_interrupt == NO_IRQ) { - int i; - - for (i = 0; !smi_is_done(msp); i++) { - if (i == 10) - return -ETIMEDOUT; - msleep(10); - } - - return 0; - } - - if (!smi_is_done(msp)) { - wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), - msecs_to_jiffies(100)); - if (!smi_is_done(msp)) - return -ETIMEDOUT; - } - - return 0; -} - -static int smi_bus_read(struct mii_bus *bus, int addr, int reg) +static void mv643xx_adjust_pscr(struct mv643xx_eth_private *mp) { - struct mv643xx_eth_shared_private *msp = bus->priv; - void __iomem *smi_reg = msp->base + SMI_REG; - int ret; + u32 pscr = rdlp(mp, PORT_SERIAL_CONTROL); + u32 autoneg_disable = FORCE_LINK_PASS | + DISABLE_AUTO_NEG_SPEED_GMII | + DISABLE_AUTO_NEG_FOR_FLOW_CTRL | + DISABLE_AUTO_NEG_FOR_DUPLEX; - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; + if (mp->phy->autoneg == AUTONEG_ENABLE) { + /* enable auto negotiation */ + pscr &= ~autoneg_disable; + goto out_write; } - writel(SMI_OPCODE_READ | (reg << 21) | (addr << 16), smi_reg); + pscr |= autoneg_disable; - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; + if (mp->phy->speed == SPEED_1000) { + /* force gigabit, half duplex not supported */ + pscr |= SET_GMII_SPEED_TO_1000; + pscr |= SET_FULL_DUPLEX_MODE; + goto out_write; } - ret = readl(smi_reg); - if (!(ret & SMI_READ_VALID)) { - pr_warn("SMI bus read not valid\n"); - return -ENODEV; - } - - return ret & 0xffff; -} - -static int smi_bus_write(struct mii_bus *bus, int addr, int reg, u16 val) -{ - struct mv643xx_eth_shared_private *msp = bus->priv; - void __iomem *smi_reg = msp->base + SMI_REG; + pscr &= ~SET_GMII_SPEED_TO_1000; - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } - - writel(SMI_OPCODE_WRITE | (reg << 21) | - (addr << 16) | (val & 0xffff), smi_reg); + if (mp->phy->speed == SPEED_100) + pscr |= SET_MII_SPEED_TO_100; + else + pscr &= ~SET_MII_SPEED_TO_100; - if (smi_wait_ready(msp)) { - pr_warn("SMI bus busy timeout\n"); - return -ETIMEDOUT; - } + if (mp->phy->duplex == DUPLEX_FULL) + pscr |= SET_FULL_DUPLEX_MODE; + else + pscr &= ~SET_FULL_DUPLEX_MODE; - return 0; +out_write: + wrlp(mp, PORT_SERIAL_CONTROL, pscr); } - /* statistics ***************************************************************/ static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *dev) { @@ -1197,26 +1069,6 @@ static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *dev) return stats; } -static void mv643xx_eth_grab_lro_stats(struct mv643xx_eth_private *mp) -{ - u32 lro_aggregated = 0; - u32 lro_flushed = 0; - u32 lro_no_desc = 0; - int i; - - for (i = 0; i < mp->rxq_count; i++) { - struct rx_queue *rxq = mp->rxq + i; - - lro_aggregated += rxq->lro_mgr.stats.aggregated; - lro_flushed += rxq->lro_mgr.stats.flushed; - lro_no_desc += rxq->lro_mgr.stats.no_desc; - } - - mp->lro_counters.lro_aggregated = lro_aggregated; - mp->lro_counters.lro_flushed = lro_flushed; - mp->lro_counters.lro_no_desc = lro_no_desc; -} - static inline u32 mib_read(struct mv643xx_eth_private *mp, int offset) { return rdl(mp, MIB_COUNTERS(mp->port_num) + offset); @@ -1380,10 +1232,6 @@ struct mv643xx_eth_stats { { #m, FIELD_SIZEOF(struct mib_counters, m), \ -1, offsetof(struct mv643xx_eth_private, mib_counters.m) } -#define LROSTAT(m) \ - { #m, FIELD_SIZEOF(struct lro_counters, m), \ - -1, offsetof(struct mv643xx_eth_private, lro_counters.m) } - static const struct mv643xx_eth_stats mv643xx_eth_stats[] = { SSTAT(rx_packets), SSTAT(tx_packets), @@ -1425,9 +1273,6 @@ static const struct mv643xx_eth_stats mv643xx_eth_stats[] = { MIBSTAT(late_collision), MIBSTAT(rx_discard), MIBSTAT(rx_overrun), - LROSTAT(lro_aggregated), - LROSTAT(lro_flushed), - LROSTAT(lro_no_desc), }; static int @@ -1484,6 +1329,34 @@ mv643xx_eth_get_settings_phyless(struct mv643xx_eth_private *mp, return 0; } +static void +mv643xx_eth_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct mv643xx_eth_private *mp = netdev_priv(dev); + wol->supported = 0; + wol->wolopts = 0; + if (mp->phy) + phy_ethtool_get_wol(mp->phy, wol); +} + +static int +mv643xx_eth_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct mv643xx_eth_private *mp = netdev_priv(dev); + int err; + + if (mp->phy == NULL) + return -EOPNOTSUPP; + + err = phy_ethtool_set_wol(mp->phy, wol); + /* Given that mv643xx_eth works without the marvell-specific PHY driver, + * this debugging hint is useful to have. + */ + if (err == -EOPNOTSUPP) + netdev_info(dev, "The PHY does not support set_wol, was CONFIG_MARVELL_PHY enabled?\n"); + return err; +} + static int mv643xx_eth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -1499,6 +1372,7 @@ static int mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mv643xx_eth_private *mp = netdev_priv(dev); + int ret; if (mp->phy == NULL) return -EINVAL; @@ -1508,7 +1382,10 @@ mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) */ cmd->advertising &= ~ADVERTISED_1000baseT_Half; - return phy_ethtool_sset(mp->phy, cmd); + ret = phy_ethtool_sset(mp->phy, cmd); + if (!ret) + mv643xx_adjust_pscr(mp); + return ret; } static void mv643xx_eth_get_drvinfo(struct net_device *dev, @@ -1625,7 +1502,6 @@ static void mv643xx_eth_get_ethtool_stats(struct net_device *dev, mv643xx_eth_get_stats(dev); mib_counters_update(mp); - mv643xx_eth_grab_lro_stats(mp); for (i = 0; i < ARRAY_SIZE(mv643xx_eth_stats); i++) { const struct mv643xx_eth_stats *stat; @@ -1665,6 +1541,8 @@ static const struct ethtool_ops mv643xx_eth_ethtool_ops = { .get_ethtool_stats = mv643xx_eth_get_ethtool_stats, .get_sset_count = mv643xx_eth_get_sset_count, .get_ts_info = ethtool_op_get_ts_info, + .get_wol = mv643xx_eth_get_wol, + .set_wol = mv643xx_eth_set_wol, }; @@ -1896,19 +1774,6 @@ static int rxq_init(struct mv643xx_eth_private *mp, int index) nexti * sizeof(struct rx_desc); } - rxq->lro_mgr.dev = mp->dev; - memset(&rxq->lro_mgr.stats, 0, sizeof(rxq->lro_mgr.stats)); - rxq->lro_mgr.features = LRO_F_NAPI; - rxq->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; - rxq->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; - rxq->lro_mgr.max_desc = ARRAY_SIZE(rxq->lro_arr); - rxq->lro_mgr.max_aggr = 32; - rxq->lro_mgr.frag_align_pad = 0; - rxq->lro_mgr.lro_arr = rxq->lro_arr; - rxq->lro_mgr.get_skb_header = mv643xx_get_skb_header; - - memset(&rxq->lro_arr, 0, sizeof(rxq->lro_arr)); - return 0; @@ -2442,11 +2307,15 @@ static int mv643xx_eth_stop(struct net_device *dev) static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mv643xx_eth_private *mp = netdev_priv(dev); + int ret; - if (mp->phy != NULL) - return phy_mii_ioctl(mp->phy, ifr, cmd); + if (mp->phy == NULL) + return -ENOTSUPP; - return -EOPNOTSUPP; + ret = phy_mii_ioctl(mp->phy, ifr, cmd); + if (!ret) + mv643xx_adjust_pscr(mp); + return ret; } static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu) @@ -2588,66 +2457,26 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) struct mv643xx_eth_shared_private *msp; const struct mbus_dram_target_info *dram; struct resource *res; - int ret; if (!mv643xx_eth_version_printed++) pr_notice("MV-643xx 10/100/1000 ethernet driver version %s\n", mv643xx_eth_driver_version); - ret = -EINVAL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) - goto out; + return -EINVAL; - ret = -ENOMEM; - msp = kzalloc(sizeof(*msp), GFP_KERNEL); + msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL); if (msp == NULL) - goto out; + return -ENOMEM; msp->base = ioremap(res->start, resource_size(res)); if (msp->base == NULL) - goto out_free; - - /* - * Set up and register SMI bus. - */ - if (pd == NULL || pd->shared_smi == NULL) { - msp->smi_bus = mdiobus_alloc(); - if (msp->smi_bus == NULL) - goto out_unmap; - - msp->smi_bus->priv = msp; - msp->smi_bus->name = "mv643xx_eth smi"; - msp->smi_bus->read = smi_bus_read; - msp->smi_bus->write = smi_bus_write, - snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d", - pdev->name, pdev->id); - msp->smi_bus->parent = &pdev->dev; - msp->smi_bus->phy_mask = 0xffffffff; - if (mdiobus_register(msp->smi_bus) < 0) - goto out_free_mii_bus; - msp->smi = msp; - } else { - msp->smi = platform_get_drvdata(pd->shared_smi); - } - - msp->err_interrupt = NO_IRQ; - init_waitqueue_head(&msp->smi_busy_wait); + return -ENOMEM; - /* - * Check whether the error interrupt is hooked up. - */ - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (res != NULL) { - int err; - - err = request_irq(res->start, mv643xx_eth_err_irq, - IRQF_SHARED, "mv643xx_eth", msp); - if (!err) { - writel(ERR_INT_SMI_DONE, msp->base + ERR_INT_MASK); - msp->err_interrupt = res->start; - } - } + msp->clk = devm_clk_get(&pdev->dev, NULL); + if (!IS_ERR(msp->clk)) + clk_prepare_enable(msp->clk); /* * (Re-)program MBUS remapping windows if we are asked to. @@ -2663,30 +2492,15 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) platform_set_drvdata(pdev, msp); return 0; - -out_free_mii_bus: - mdiobus_free(msp->smi_bus); -out_unmap: - iounmap(msp->base); -out_free: - kfree(msp); -out: - return ret; } static int mv643xx_eth_shared_remove(struct platform_device *pdev) { struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev); - struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data; - if (pd == NULL || pd->shared_smi == NULL) { - mdiobus_unregister(msp->smi_bus); - mdiobus_free(msp->smi_bus); - } - if (msp->err_interrupt != NO_IRQ) - free_irq(msp->err_interrupt, msp); iounmap(msp->base); - kfree(msp); + if (!IS_ERR(msp->clk)) + clk_disable_unprepare(msp->clk); return 0; } @@ -2747,14 +2561,21 @@ static void set_params(struct mv643xx_eth_private *mp, mp->txq_count = pd->tx_queue_count ? : 1; } +static void mv643xx_eth_adjust_link(struct net_device *dev) +{ + struct mv643xx_eth_private *mp = netdev_priv(dev); + + mv643xx_adjust_pscr(mp); +} + static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, int phy_addr) { - struct mii_bus *bus = mp->shared->smi->smi_bus; struct phy_device *phydev; int start; int num; int i; + char phy_id[MII_BUS_ID_SIZE + 3]; if (phy_addr == MV643XX_ETH_PHY_ADDR_DEFAULT) { start = phy_addr_get(mp) & 0x1f; @@ -2764,17 +2585,19 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, num = 1; } - phydev = NULL; + /* Attempt to connect to the PHY using orion-mdio */ + phydev = ERR_PTR(-ENODEV); for (i = 0; i < num; i++) { int addr = (start + i) & 0x1f; - if (bus->phy_map[addr] == NULL) - mdiobus_scan(bus, addr); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, + "orion-mdio-mii", addr); - if (phydev == NULL) { - phydev = bus->phy_map[addr]; - if (phydev != NULL) - phy_addr_set(mp, addr); + phydev = phy_connect(mp->dev, phy_id, mv643xx_eth_adjust_link, + PHY_INTERFACE_MODE_GMII); + if (!IS_ERR(phydev)) { + phy_addr_set(mp, addr); + break; } } @@ -2787,8 +2610,6 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) phy_reset(mp); - phy_attach(mp->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_GMII); - if (speed == 0) { phy->autoneg = AUTONEG_ENABLE; phy->speed = 0; @@ -2885,22 +2706,27 @@ static int mv643xx_eth_probe(struct platform_device *pdev) * it to override the default. */ mp->t_clk = 133000000; -#if defined(CONFIG_HAVE_CLK) - mp->clk = clk_get(&pdev->dev, (pdev->id ? "1" : "0")); + mp->clk = devm_clk_get(&pdev->dev, NULL); if (!IS_ERR(mp->clk)) { clk_prepare_enable(mp->clk); mp->t_clk = clk_get_rate(mp->clk); } -#endif + set_params(mp, pd); netif_set_real_num_tx_queues(dev, mp->txq_count); netif_set_real_num_rx_queues(dev, mp->rxq_count); - if (pd->phy_addr != MV643XX_ETH_PHY_NONE) + if (pd->phy_addr != MV643XX_ETH_PHY_NONE) { mp->phy = phy_scan(mp, pd->phy_addr); - if (mp->phy != NULL) + if (IS_ERR(mp->phy)) { + err = PTR_ERR(mp->phy); + if (err == -ENODEV) + err = -EPROBE_DEFER; + goto out; + } phy_init(mp, pd->speed, pd->duplex); + } SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops); @@ -2935,8 +2761,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->watchdog_timeo = 2 * HZ; dev->base_addr = 0; - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_RXCSUM | NETIF_F_LRO; + dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM; dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM; @@ -2967,12 +2792,8 @@ static int mv643xx_eth_probe(struct platform_device *pdev) return 0; out: -#if defined(CONFIG_HAVE_CLK) - if (!IS_ERR(mp->clk)) { + if (!IS_ERR(mp->clk)) clk_disable_unprepare(mp->clk); - clk_put(mp->clk); - } -#endif free_netdev(dev); return err; @@ -2987,12 +2808,8 @@ static int mv643xx_eth_remove(struct platform_device *pdev) phy_detach(mp->phy); cancel_work_sync(&mp->tx_timeout_task); -#if defined(CONFIG_HAVE_CLK) - if (!IS_ERR(mp->clk)) { + if (!IS_ERR(mp->clk)) clk_disable_unprepare(mp->clk); - clk_put(mp->clk); - } -#endif free_netdev(mp->dev); diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 77b7c80262f4..e2f662660313 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -24,10 +24,14 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/phy.h> -#include <linux/of_address.h> -#include <linux/of_mdio.h> +#include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/delay.h> +#include <linux/io.h> +#include <linux/clk.h> +#include <linux/of_mdio.h> +#include <linux/sched.h> +#include <linux/wait.h> #define MVMDIO_SMI_DATA_SHIFT 0 #define MVMDIO_SMI_PHY_ADDR_SHIFT 16 @@ -36,33 +40,59 @@ #define MVMDIO_SMI_WRITE_OPERATION 0 #define MVMDIO_SMI_READ_VALID BIT(27) #define MVMDIO_SMI_BUSY BIT(28) +#define MVMDIO_ERR_INT_CAUSE 0x007C +#define MVMDIO_ERR_INT_SMI_DONE 0x00000010 +#define MVMDIO_ERR_INT_MASK 0x0080 struct orion_mdio_dev { struct mutex lock; - void __iomem *smireg; + void __iomem *regs; + struct clk *clk; + /* + * If we have access to the error interrupt pin (which is + * somewhat misnamed as it not only reflects internal errors + * but also reflects SMI completion), use that to wait for + * SMI access completion instead of polling the SMI busy bit. + */ + int err_interrupt; + wait_queue_head_t smi_busy_wait; }; +static int orion_mdio_smi_is_done(struct orion_mdio_dev *dev) +{ + return !(readl(dev->regs) & MVMDIO_SMI_BUSY); +} + /* Wait for the SMI unit to be ready for another operation */ static int orion_mdio_wait_ready(struct mii_bus *bus) { struct orion_mdio_dev *dev = bus->priv; int count; - u32 val; - count = 0; - while (1) { - val = readl(dev->smireg); - if (!(val & MVMDIO_SMI_BUSY)) - break; + if (dev->err_interrupt <= 0) { + count = 0; + while (1) { + if (orion_mdio_smi_is_done(dev)) + break; - if (count > 100) { - dev_err(bus->parent, "Timeout: SMI busy for too long\n"); - return -ETIMEDOUT; - } + if (count > 100) { + dev_err(bus->parent, + "Timeout: SMI busy for too long\n"); + return -ETIMEDOUT; + } - udelay(10); - count++; + udelay(10); + count++; + } + } else { + if (!orion_mdio_smi_is_done(dev)) { + wait_event_timeout(dev->smi_busy_wait, + orion_mdio_smi_is_done(dev), + msecs_to_jiffies(100)); + if (!orion_mdio_smi_is_done(dev)) + return -ETIMEDOUT; + } } return 0; @@ -87,12 +117,12 @@ static int orion_mdio_read(struct mii_bus *bus, int mii_id, writel(((mii_id << MVMDIO_SMI_PHY_ADDR_SHIFT) | (regnum << MVMDIO_SMI_PHY_REG_SHIFT) | MVMDIO_SMI_READ_OPERATION), - dev->smireg); + dev->regs); /* Wait for the value to become available */ count = 0; while (1) { - val = readl(dev->smireg); + val = readl(dev->regs); if (val & MVMDIO_SMI_READ_VALID) break; @@ -129,7 +159,7 @@ static int orion_mdio_write(struct mii_bus *bus, int mii_id, (regnum << MVMDIO_SMI_PHY_REG_SHIFT) | MVMDIO_SMI_WRITE_OPERATION | (value << MVMDIO_SMI_DATA_SHIFT)), - dev->smireg); + dev->regs); mutex_unlock(&dev->lock); @@ -141,13 +171,34 @@ static int orion_mdio_reset(struct mii_bus *bus) return 0; } +static irqreturn_t orion_mdio_err_irq(int irq, void *dev_id) +{ + struct orion_mdio_dev *dev = dev_id; + + if (readl(dev->regs + MVMDIO_ERR_INT_CAUSE) & + MVMDIO_ERR_INT_SMI_DONE) { + writel(~MVMDIO_ERR_INT_SMI_DONE, + dev->regs + MVMDIO_ERR_INT_CAUSE); + wake_up(&dev->smi_busy_wait); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + static int orion_mdio_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; + struct resource *r; struct mii_bus *bus; struct orion_mdio_dev *dev; int i, ret; + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "No SMI register address given\n"); + return -ENODEV; + } + bus = mdiobus_alloc_size(sizeof(struct orion_mdio_dev)); if (!bus) { dev_err(&pdev->dev, "Cannot allocate MDIO bus\n"); @@ -172,36 +223,66 @@ static int orion_mdio_probe(struct platform_device *pdev) bus->irq[i] = PHY_POLL; dev = bus->priv; - dev->smireg = of_iomap(pdev->dev.of_node, 0); - if (!dev->smireg) { - dev_err(&pdev->dev, "No SMI register address given in DT\n"); - kfree(bus->irq); - mdiobus_free(bus); - return -ENODEV; + dev->regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!dev->regs) { + dev_err(&pdev->dev, "Unable to remap SMI register\n"); + ret = -ENODEV; + goto out_mdio; + } + + init_waitqueue_head(&dev->smi_busy_wait); + + dev->clk = devm_clk_get(&pdev->dev, NULL); + if (!IS_ERR(dev->clk)) + clk_prepare_enable(dev->clk); + + dev->err_interrupt = platform_get_irq(pdev, 0); + if (dev->err_interrupt != -ENXIO) { + ret = devm_request_irq(&pdev->dev, dev->err_interrupt, + orion_mdio_err_irq, + IRQF_SHARED, pdev->name, dev); + if (ret) + goto out_mdio; + + writel(MVMDIO_ERR_INT_SMI_DONE, + dev->regs + MVMDIO_ERR_INT_MASK); } mutex_init(&dev->lock); - ret = of_mdiobus_register(bus, np); + if (pdev->dev.of_node) + ret = of_mdiobus_register(bus, pdev->dev.of_node); + else + ret = mdiobus_register(bus); if (ret < 0) { dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret); - iounmap(dev->smireg); - kfree(bus->irq); - mdiobus_free(bus); - return ret; + goto out_mdio; } platform_set_drvdata(pdev, bus); return 0; + +out_mdio: + if (!IS_ERR(dev->clk)) + clk_disable_unprepare(dev->clk); + kfree(bus->irq); + mdiobus_free(bus); + return ret; } static int orion_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); + struct orion_mdio_dev *dev = bus->priv; + + writel(0, dev->regs + MVMDIO_ERR_INT_MASK); mdiobus_unregister(bus); kfree(bus->irq); mdiobus_free(bus); + if (!IS_ERR(dev->clk)) + clk_disable_unprepare(dev->clk); + return 0; } @@ -225,3 +306,4 @@ module_platform_driver(orion_mdio_driver); MODULE_DESCRIPTION("Marvell MDIO interface driver"); MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:orion-mdio"); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index cd345b8969bc..c96678555233 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -374,7 +374,6 @@ static int rxq_number = 8; static int txq_number = 8; static int rxq_def; -static int txq_def; #define MVNETA_DRIVER_NAME "mvneta" #define MVNETA_DRIVER_VERSION "1.0" @@ -1475,7 +1474,8 @@ error: static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) { struct mvneta_port *pp = netdev_priv(dev); - struct mvneta_tx_queue *txq = &pp->txqs[txq_def]; + u16 txq_id = skb_get_queue_mapping(skb); + struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; struct mvneta_tx_desc *tx_desc; struct netdev_queue *nq; int frags = 0; @@ -1485,7 +1485,7 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) goto out; frags = skb_shinfo(skb)->nr_frags + 1; - nq = netdev_get_tx_queue(dev, txq_def); + nq = netdev_get_tx_queue(dev, txq_id); /* Get a descriptor for the first part of the packet */ tx_desc = mvneta_txq_next_desc_get(txq); @@ -1969,13 +1969,8 @@ static int mvneta_rxq_init(struct mvneta_port *pp, rxq->descs = dma_alloc_coherent(pp->dev->dev.parent, rxq->size * MVNETA_DESC_ALIGNED_SIZE, &rxq->descs_phys, GFP_KERNEL); - if (rxq->descs == NULL) { - netdev_err(pp->dev, - "rxq=%d: Can't allocate %d bytes for %d RX descr\n", - rxq->id, rxq->size * MVNETA_DESC_ALIGNED_SIZE, - rxq->size); + if (rxq->descs == NULL) return -ENOMEM; - } BUG_ON(rxq->descs != PTR_ALIGN(rxq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE)); @@ -2029,13 +2024,8 @@ static int mvneta_txq_init(struct mvneta_port *pp, txq->descs = dma_alloc_coherent(pp->dev->dev.parent, txq->size * MVNETA_DESC_ALIGNED_SIZE, &txq->descs_phys, GFP_KERNEL); - if (txq->descs == NULL) { - netdev_err(pp->dev, - "txQ=%d: Can't allocate %d bytes for %d TX descr\n", - txq->id, txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->size); + if (txq->descs == NULL) return -ENOMEM; - } /* Make sure descriptor address is cache line size aligned */ BUG_ON(txq->descs != @@ -2689,7 +2679,7 @@ static int mvneta_probe(struct platform_device *pdev) return -EINVAL; } - dev = alloc_etherdev_mq(sizeof(struct mvneta_port), 8); + dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number); if (!dev) return -ENOMEM; @@ -2771,16 +2761,17 @@ static int mvneta_probe(struct platform_device *pdev) netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight); + dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; + dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM; + dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM; + dev->priv_flags |= IFF_UNICAST_FLT; + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register\n"); goto err_deinit; } - dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM; - dev->priv_flags |= IFF_UNICAST_FLT; - netdev_info(dev, "mac: %pM\n", dev->dev_addr); platform_set_drvdata(pdev, pp->dev); @@ -2843,4 +2834,3 @@ module_param(rxq_number, int, S_IRUGO); module_param(txq_number, int, S_IRUGO); module_param(rxq_def, int, S_IRUGO); -module_param(txq_def, int, S_IRUGO); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 037ed866c22f..339bb323cb0c 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -584,12 +584,14 @@ static int init_hash_table(struct pxa168_eth_private *pep) */ if (pep->htpr == NULL) { pep->htpr = dma_alloc_coherent(pep->dev->dev.parent, - HASH_ADDR_TABLE_SIZE, - &pep->htpr_dma, GFP_KERNEL); + HASH_ADDR_TABLE_SIZE, + &pep->htpr_dma, + GFP_KERNEL | __GFP_ZERO); if (pep->htpr == NULL) return -ENOMEM; + } else { + memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE); } - memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE); wrl(pep, HTPR, pep->htpr_dma); return 0; } @@ -1023,13 +1025,11 @@ static int rxq_init(struct net_device *dev) size = pep->rx_ring_size * sizeof(struct rx_desc); pep->rx_desc_area_size = size; pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, - &pep->rx_desc_dma, GFP_KERNEL); - if (!pep->p_rx_desc_area) { - printk(KERN_ERR "%s: Cannot alloc RX ring (size %d bytes)\n", - dev->name, size); + &pep->rx_desc_dma, + GFP_KERNEL | __GFP_ZERO); + if (!pep->p_rx_desc_area) goto out; - } - memset((void *)pep->p_rx_desc_area, 0, size); + /* initialize the next_desc_ptr links in the Rx descriptors ring */ p_rx_desc = pep->p_rx_desc_area; for (i = 0; i < rx_desc_num; i++) { @@ -1086,13 +1086,10 @@ static int txq_init(struct net_device *dev) size = pep->tx_ring_size * sizeof(struct tx_desc); pep->tx_desc_area_size = size; pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, - &pep->tx_desc_dma, GFP_KERNEL); - if (!pep->p_tx_desc_area) { - printk(KERN_ERR "%s: Cannot allocate Tx Ring (size %d bytes)\n", - dev->name, size); + &pep->tx_desc_dma, + GFP_KERNEL | __GFP_ZERO); + if (!pep->p_tx_desc_area) goto out; - } - memset((void *)pep->p_tx_desc_area, 0, pep->tx_desc_area_size); /* Initialize the next_desc_ptr links in the Tx descriptors ring */ p_tx_desc = pep->p_tx_desc_area; for (i = 0; i < tx_desc_num; i++) { diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index fc07ca35721b..256ae789c143 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -1067,7 +1067,7 @@ static void sky2_ramset(struct sky2_hw *hw, u16 q, u32 start, u32 space) sky2_write32(hw, RB_ADDR(q, RB_RX_UTHP), tp); sky2_write32(hw, RB_ADDR(q, RB_RX_LTHP), space/2); - tp = space - 2048/8; + tp = space - 8192/8; sky2_write32(hw, RB_ADDR(q, RB_RX_UTPP), tp); sky2_write32(hw, RB_ADDR(q, RB_RX_LTPP), space/4); } else { @@ -1421,14 +1421,14 @@ static void sky2_vlan_mode(struct net_device *dev, netdev_features_t features) struct sky2_hw *hw = sky2->hw; u16 port = sky2->port; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON); else sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T), RX_VLAN_STRIP_OFF); - if (features & NETIF_F_HW_VLAN_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) { sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), TX_VLAN_TAG_ON); @@ -2713,7 +2713,7 @@ static void sky2_rx_tag(struct sky2_port *sky2, u16 length) struct sk_buff *skb; skb = sky2->rx_ring[sky2->rx_next].skb; - __vlan_hwaccel_put_tag(skb, be16_to_cpu(length)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(length)); } static void sky2_rx_hash(struct sky2_port *sky2, u32 status) @@ -4406,7 +4406,7 @@ static int sky2_set_features(struct net_device *dev, netdev_features_t features) if (changed & NETIF_F_RXHASH) rx_set_rss(dev, features); - if (changed & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)) + if (changed & (NETIF_F_HW_VLAN_CTAG_TX|NETIF_F_HW_VLAN_CTAG_RX)) sky2_vlan_mode(dev, features); return 0; @@ -4793,7 +4793,8 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, dev->hw_features |= NETIF_F_RXHASH; if (!(hw->flags & SKY2_HW_VLAN_BROKEN)) { - dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features |= SKY2_VLAN_OFFLOADS; } diff --git a/drivers/net/ethernet/marvell/sky2.h b/drivers/net/ethernet/marvell/sky2.h index 615ac63ea860..ec6dcd80152b 100644 --- a/drivers/net/ethernet/marvell/sky2.h +++ b/drivers/net/ethernet/marvell/sky2.h @@ -2074,7 +2074,7 @@ enum { GM_IS_RX_FF_OR = 1<<1, /* Receive FIFO Overrun */ GM_IS_RX_COMPL = 1<<0, /* Frame Reception Complete */ -#define GMAC_DEF_MSK GM_IS_TX_FF_UR +#define GMAC_DEF_MSK (GM_IS_TX_FF_UR | GM_IS_RX_FF_OR) }; /* GMAC_LINK_CTRL 16 bit GMAC Link Control Reg (YUKON only) */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index fdc5f23d8e9f..05267d716e86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1837,10 +1837,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); - if (!priv->mfunc.vhcr) { - mlx4_err(dev, "Couldn't allocate VHCR.\n"); + if (!priv->mfunc.vhcr) goto err_hcr; - } } priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 7e64033d7de3..0706623cfb96 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -226,7 +226,7 @@ void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) { - u64 in_param; + u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index b799ab12a291..0f91222ea3d7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -186,7 +186,7 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) { - return DCB_CAP_DCBX_VER_IEEE; + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; } static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) @@ -253,3 +253,11 @@ const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, }; + +const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = { + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getdcbx = mlx4_en_dcbnl_getdcbx, + .setdcbx = mlx4_en_dcbnl_setdcbx, +}; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bb4d8d99f36d..e7e27842d8d4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -356,7 +356,8 @@ static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) } #endif -static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -381,7 +382,8 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -411,8 +413,8 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) { - unsigned int i; - for (i = ETH_ALEN - 1; i; --i) { + int i; + for (i = ETH_ALEN - 1; i >= 0; --i) { dst_mac[i] = src_mac & 0xff; src_mac >>= 8; } @@ -565,34 +567,38 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; - u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); - - en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", - priv->dev->dev_addr); - mlx4_unregister_mac(dev, priv->port, mac); + u64 mac; - if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { + mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); + en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", + priv->dev->dev_addr); + mlx4_unregister_mac(dev, priv->port, mac); + } else { struct mlx4_mac_entry *entry; struct hlist_node *tmp; struct hlist_head *bucket; - unsigned int mac_hash; + unsigned int i; - mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX]; - bucket = &priv->mac_hash[mac_hash]; - hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { - if (ether_addr_equal_64bits(entry->mac, - priv->dev->dev_addr)) { - en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n", - priv->port, priv->dev->dev_addr, qpn); + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { + bucket = &priv->mac_hash[i]; + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { + mac = mlx4_en_mac_to_u64(entry->mac); + en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", + entry->mac); mlx4_en_uc_steer_release(priv, entry->mac, qpn, entry->reg_id); - mlx4_qp_release_range(dev, qpn, 1); + mlx4_unregister_mac(dev, priv->port, mac); hlist_del_rcu(&entry->hlist); kfree_rcu(entry, rcu); - break; } } + + en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", + priv->port, qpn); + mlx4_qp_release_range(dev, qpn, 1); + priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; } } @@ -650,28 +656,10 @@ u64 mlx4_en_mac_to_u64(u8 *addr) return mac; } -static int mlx4_en_set_mac(struct net_device *dev, void *addr) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; - struct sockaddr *saddr = addr; - - if (!is_valid_ether_addr(saddr->sa_data)) - return -EADDRNOTAVAIL; - - memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); - queue_work(mdev->workqueue, &priv->mac_task); - return 0; -} - -static void mlx4_en_do_set_mac(struct work_struct *work) +static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv) { - struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - mac_task); - struct mlx4_en_dev *mdev = priv->mdev; int err = 0; - mutex_lock(&mdev->state_lock); if (priv->port_up) { /* Remove old MAC and insert the new one */ err = mlx4_en_replace_mac(priv, priv->base_qpn, @@ -683,7 +671,26 @@ static void mlx4_en_do_set_mac(struct work_struct *work) } else en_dbg(HW, priv, "Port is down while registering mac, exiting...\n"); + return err; +} + +static int mlx4_en_set_mac(struct net_device *dev, void *addr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct sockaddr *saddr = addr; + int err; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + + mutex_lock(&mdev->state_lock); + err = mlx4_en_do_set_mac(priv); mutex_unlock(&mdev->state_lock); + + return err; } static void mlx4_en_clear_list(struct net_device *dev) @@ -1348,7 +1355,7 @@ static void mlx4_en_do_get_stats(struct work_struct *work) queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } if (mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port]) { - queue_work(mdev->workqueue, &priv->mac_task); + mlx4_en_do_set_mac(priv); mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port] = 0; } mutex_unlock(&mdev->state_lock); @@ -1632,6 +1639,17 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); + /* Remove flow steering rules for the port*/ + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ASSERT_RTNL(); + list_for_each_entry_safe(flow, tmp_flow, + &priv->ethtool_list, list) { + mlx4_flow_detach(mdev->dev, flow->id); + list_del(&flow->list); + } + } + mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ @@ -1652,17 +1670,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) mdev->mac_removed[priv->port] = 1; - /* Remove flow steering rules for the port*/ - if (mdev->dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - ASSERT_RTNL(); - list_for_each_entry_safe(flow, tmp_flow, - &priv->ethtool_list, list) { - mlx4_flow_detach(mdev->dev, flow->id); - list_del(&flow->list); - } - } - /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); @@ -1828,9 +1835,11 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) } #ifdef CONFIG_RFS_ACCEL - priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool); - if (!priv->dev->rx_cpu_rmap) - goto err; + if (priv->mdev->dev->caps.comp_pool) { + priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool); + if (!priv->dev->rx_cpu_rmap) + goto err; + } #endif return 0; @@ -1924,79 +1933,6 @@ static int mlx4_en_set_features(struct net_device *netdev, } -static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 flags) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_dev *mdev = priv->mdev->dev; - int err; - - if (!mlx4_is_mfunc(mdev)) - return -EOPNOTSUPP; - - /* Hardware does not support aging addresses, allow only - * permanent addresses if ndm_state is given - */ - if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { - en_info(priv, "Add FDB only supports static addresses\n"); - return -EINVAL; - } - - if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) - err = dev_uc_add_excl(dev, addr); - else if (is_multicast_ether_addr(addr)) - err = dev_mc_add_excl(dev, addr); - else - err = -EINVAL; - - /* Only return duplicate errors if NLM_F_EXCL is set */ - if (err == -EEXIST && !(flags & NLM_F_EXCL)) - err = 0; - - return err; -} - -static int mlx4_en_fdb_del(struct ndmsg *ndm, - struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_dev *mdev = priv->mdev->dev; - int err; - - if (!mlx4_is_mfunc(mdev)) - return -EOPNOTSUPP; - - if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { - en_info(priv, "Del FDB only supports static addresses\n"); - return -EINVAL; - } - - if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) - err = dev_uc_del(dev, addr); - else if (is_multicast_ether_addr(addr)) - err = dev_mc_del(dev, addr); - else - err = -EINVAL; - - return err; -} - -static int mlx4_en_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, int idx) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_dev *mdev = priv->mdev->dev; - - if (mlx4_is_mfunc(mdev)) - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); - - return idx; -} - static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2018,9 +1954,6 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif - .ndo_fdb_add = mlx4_en_fdb_add, - .ndo_fdb_del = mlx4_en_fdb_del, - .ndo_fdb_dump = mlx4_en_fdb_dump, }; int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, @@ -2078,13 +2011,18 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->msg_enable = MLX4_EN_MSG_LEVEL; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); - INIT_WORK(&priv->mac_task, mlx4_en_do_set_mac); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); #ifdef CONFIG_MLX4_EN_DCB - if (!mlx4_is_slave(priv->mdev->dev)) - dev->dcbnl_ops = &mlx4_en_dcbnl_ops; + if (!mlx4_is_slave(priv->mdev->dev)) { + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { + dev->dcbnl_ops = &mlx4_en_dcbnl_ops; + } else { + en_info(priv, "enabling only PFC DCB ops\n"); + dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; + } + } #endif for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) @@ -2146,8 +2084,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH; dev->features = dev->hw_features | NETIF_F_HIGHDMA | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features |= NETIF_F_LOOPBACK; if (mdev->dev->caps.steering_mode == diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c7f856308e1a..4006f8857cb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -673,7 +673,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) { u16 vid = be16_to_cpu(cqe->sl_vid); - __vlan_hwaccel_put_tag(gro_skb, vid); + __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); } if (dev->features & NETIF_F_RXHASH) @@ -716,7 +716,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) - __vlan_hwaccel_put_tag(skb, be16_to_cpu(cqe->sl_vid)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); /* Push it up the stack */ netif_receive_skb(skb); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 3488c6d9e6b5..2448f0d669e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -58,10 +58,9 @@ static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) /* build the pkt before xmit */ skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); - if (!skb) { - en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create skb for xmit\n"); + if (!skb) return -ENOMEM; - } + skb_reserve(skb, NET_IP_ALIGN); ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 251ae2f93116..8e3123a1df88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -771,7 +771,7 @@ int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_slave_event_eq_info *event_eq = priv->mfunc.master.slave_state[slave].event_eq; u32 in_modifier = vhcr->in_modifier; - u32 eqn = in_modifier & 0x1FF; + u32 eqn = in_modifier & 0x3FF; u64 in_param = vhcr->in_param; int err = 0; int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 50917eb3013e..ab470d991ade 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -91,7 +91,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [ 8] = "P_Key violation counter", [ 9] = "Q_Key violation counter", [10] = "VMM", - [12] = "DPDP", + [12] = "Dual Port Different Protocol (DPDP) support", [15] = "Big LSO headers", [16] = "MW support", [17] = "APM support", @@ -109,6 +109,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", + [53] = "Port ETS Scheduler support", + [55] = "Port link type sensing support", [59] = "Port management change event support", [61] = "64 byte EQE support", [62] = "64 byte CQE support", @@ -787,6 +789,14 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + /* turn off device-managed steering capability if not enabled */ + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + MLX4_GET(field, outbox->buf, + QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, + QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d180bc46826a..16abde20e1fc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1555,7 +1555,7 @@ void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) { - u64 in_param; + u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, idx); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 52685524708d..ffc78d2cb0cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1125,28 +1125,11 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, return err; } -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - u8 port, int block_mcast_loopback, - enum mlx4_protocol prot, u64 *reg_id) +int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 port, + int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) { - - switch (dev->caps.steering_mode) { - case MLX4_STEERING_MODE_A0: - if (prot == MLX4_PROT_ETH) - return 0; - - case MLX4_STEERING_MODE_B0: - if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); - - if (mlx4_is_mfunc(dev)) - return mlx4_QP_ATTACH(dev, qp, gid, 1, - block_mcast_loopback, prot); - return mlx4_qp_attach_common(dev, qp, gid, - block_mcast_loopback, prot, - MLX4_MC_STEER); - - case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); @@ -1180,8 +1163,32 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], list_add_tail(&spec.list, &rule.list); return mlx4_flow_attach(dev, &rule, reg_id); - } +} +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + u8 port, int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; + + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 1, + block_mcast_loopback, prot); + return mlx4_qp_attach_common(dev, qp, gid, + block_mcast_loopback, prot, + MLX4_MC_STEER); + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, + block_mcast_loopback, + prot, reg_id); default: return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index cf883345af88..252f4ba7f32c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1190,6 +1190,10 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer); +int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 port, + int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id); int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1235,7 +1239,7 @@ int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); static inline void set_param_l(u64 *arg, u32 val) { - *((u32 *)arg) = val; + *arg = (*arg & 0xffffffff00000000ULL) | (u64) val; } static inline void set_param_h(u64 *arg, u32 val) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c313d7e943a9..d4cb5d3b28a2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -509,7 +509,6 @@ struct mlx4_en_priv { struct mlx4_en_cq rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; - struct work_struct mac_task; struct work_struct watchdog_task; struct work_struct linkstate_task; struct delayed_work stats_task; @@ -625,6 +624,7 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); #ifdef CONFIG_MLX4_EN_DCB extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 602ca9bf78e4..f91719a08cba 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -183,7 +183,7 @@ u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) { - u64 in_param; + u64 in_param = 0; u64 out_param; int err; @@ -240,7 +240,7 @@ void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) { - u64 in_param; + u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { @@ -351,7 +351,7 @@ void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { - u64 in_param; + u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, index); @@ -374,7 +374,7 @@ int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { - u64 param; + u64 param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(¶m, index); @@ -395,7 +395,7 @@ void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { - u64 in_param; + u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, index); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 1ac88637ad9d..00f223acada7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -101,7 +101,7 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { - u64 in_param; + u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 719ead15e491..4b6aad39e72c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -32,6 +32,7 @@ #include <linux/errno.h> #include <linux/if_ether.h> +#include <linux/if_vlan.h> #include <linux/export.h> #include <linux/mlx4/cmd.h> @@ -175,7 +176,7 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { - u64 out_param; + u64 out_param = 0; int err; if (mlx4_is_mfunc(dev)) { @@ -222,7 +223,7 @@ EXPORT_SYMBOL_GPL(__mlx4_unregister_mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) { - u64 out_param; + u64 out_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&out_param, port); @@ -361,7 +362,7 @@ out: int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) { - u64 out_param; + u64 out_param = 0; int err; if (mlx4_is_mfunc(dev)) { @@ -406,7 +407,7 @@ out: void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) { - u64 in_param; + u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { @@ -517,7 +518,8 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, /* Mtu is configured as the max MTU among all the * the functions on the port. */ mtu = be16_to_cpu(gen_context->mtu); - mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port]); + mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); prev_mtu = slave_st->mtu[port]; slave_st->mtu[port] = mtu; if (mtu > master->max_mtu[port]) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 81e2abe07bbb..e891b058c1be 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -222,7 +222,7 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base) { - u64 in_param; + u64 in_param = 0; u64 out_param; int err; @@ -255,7 +255,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) { - u64 in_param; + u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { @@ -319,7 +319,7 @@ err_out: static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { - u64 param; + u64 param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(¶m, qpn); @@ -344,7 +344,7 @@ void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) { - u64 in_param; + u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, qpn); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 083fb48dc3d7..f2d64435d8ef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -75,6 +75,7 @@ struct res_gid { u8 gid[16]; enum mlx4_protocol prot; enum mlx4_steer_type steer; + u64 reg_id; }; enum res_qp_states { @@ -99,6 +100,7 @@ struct res_qp { struct list_head mcg_list; spinlock_t mcg_spl; int local_qpn; + atomic_t ref_count; }; enum res_mtt_states { @@ -197,6 +199,7 @@ enum res_fs_rule_states { struct res_fs_rule { struct res_common com; + int qpn; }; static void *res_tracker_lookup(struct rb_root *root, u64 res_id) @@ -355,7 +358,7 @@ static int mpt_mask(struct mlx4_dev *dev) return dev->caps.num_mpts - 1; } -static void *find_res(struct mlx4_dev *dev, int res_id, +static void *find_res(struct mlx4_dev *dev, u64 res_id, enum mlx4_resource type) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -447,6 +450,7 @@ static struct res_common *alloc_qp_tr(int id) ret->local_qpn = id; INIT_LIST_HEAD(&ret->mcg_list); spin_lock_init(&ret->mcg_spl); + atomic_set(&ret->ref_count, 0); return &ret->com; } @@ -554,7 +558,7 @@ static struct res_common *alloc_xrcdn_tr(int id) return &ret->com; } -static struct res_common *alloc_fs_rule_tr(u64 id) +static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) { struct res_fs_rule *ret; @@ -564,7 +568,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id) ret->com.res_id = id; ret->com.state = RES_FS_RULE_ALLOCATED; - + ret->qpn = qpn; return &ret->com; } @@ -602,7 +606,7 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, ret = alloc_xrcdn_tr(id); break; case RES_FS_RULE: - ret = alloc_fs_rule_tr(id); + ret = alloc_fs_rule_tr(id, extra); break; default: return NULL; @@ -671,10 +675,14 @@ undo: static int remove_qp_ok(struct res_qp *res) { - if (res->com.state == RES_QP_BUSY) + if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) || + !list_empty(&res->mcg_list)) { + pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n", + res->com.state, atomic_read(&res->ref_count)); return -EBUSY; - else if (res->com.state != RES_QP_RESERVED) + } else if (res->com.state != RES_QP_RESERVED) { return -EPERM; + } return 0; } @@ -2927,7 +2935,7 @@ static struct res_gid *find_gid(struct mlx4_dev *dev, int slave, static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, - enum mlx4_steer_type steer) + enum mlx4_steer_type steer, u64 reg_id) { struct res_gid *res; int err; @@ -2944,6 +2952,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, memcpy(res->gid, gid, 16); res->prot = prot; res->steer = steer; + res->reg_id = reg_id; list_add_tail(&res->list, &rqp->mcg_list); err = 0; } @@ -2954,7 +2963,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, - enum mlx4_steer_type steer) + enum mlx4_steer_type steer, u64 *reg_id) { struct res_gid *res; int err; @@ -2964,6 +2973,7 @@ static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, if (!res || res->prot != prot || res->steer != steer) err = -EINVAL; else { + *reg_id = res->reg_id; list_del(&res->list); kfree(res); err = 0; @@ -2973,6 +2983,37 @@ static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, return err; } +static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_loopback, enum mlx4_protocol prot, + enum mlx4_steer_type type, u64 *reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_trans_to_dmfs_attach(dev, qp, gid, gid[5], + block_loopback, prot, + reg_id); + case MLX4_STEERING_MODE_B0: + return mlx4_qp_attach_common(dev, qp, gid, + block_loopback, prot, type); + default: + return -EINVAL; + } +} + +static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol prot, enum mlx4_steer_type type, + u64 reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_flow_detach(dev, reg_id); + case MLX4_STEERING_MODE_B0: + return mlx4_qp_detach_common(dev, qp, gid, prot, type); + default: + return -EINVAL; + } +} + int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2985,6 +3026,7 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, int err; int qpn; struct res_qp *rqp; + u64 reg_id = 0; int attach = vhcr->op_modifier; int block_loopback = vhcr->in_modifier >> 31; u8 steer_type_mask = 2; @@ -2997,30 +3039,32 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, qp.qpn = qpn; if (attach) { - err = add_mcg_res(dev, slave, rqp, gid, prot, type); - if (err) + err = qp_attach(dev, &qp, gid, block_loopback, prot, + type, ®_id); + if (err) { + pr_err("Fail to attach rule to qp 0x%x\n", qpn); goto ex_put; - - err = mlx4_qp_attach_common(dev, &qp, gid, - block_loopback, prot, type); + } + err = add_mcg_res(dev, slave, rqp, gid, prot, type, reg_id); if (err) - goto ex_rem; + goto ex_detach; } else { - err = rem_mcg_res(dev, slave, rqp, gid, prot, type); + err = rem_mcg_res(dev, slave, rqp, gid, prot, type, ®_id); if (err) goto ex_put; - err = mlx4_qp_detach_common(dev, &qp, gid, prot, type); - } + err = qp_detach(dev, &qp, gid, prot, type, reg_id); + if (err) + pr_err("Fail to detach rule from qp 0x%x reg_id = 0x%llx\n", + qpn, reg_id); + } put_res(dev, slave, qpn, RES_QP); - return 0; + return err; -ex_rem: - /* ignore error return below, already in error */ - (void) rem_mcg_res(dev, slave, rqp, gid, prot, type); +ex_detach: + qp_detach(dev, &qp, gid, prot, type, reg_id); ex_put: put_res(dev, slave, qpn, RES_QP); - return err; } @@ -3121,6 +3165,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; int qpn; + struct res_qp *rqp; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; @@ -3131,7 +3176,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; - err = get_res(dev, slave, qpn, RES_QP, NULL); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); return err; @@ -3172,14 +3217,16 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, if (err) goto err_put; - err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + goto err_put; } + atomic_inc(&rqp->ref_count); err_put: put_res(dev, slave, qpn, RES_QP); return err; @@ -3192,20 +3239,35 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { int err; + struct res_qp *rqp; + struct res_fs_rule *rrule; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; + err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); + if (err) + return err; + /* Release the rule form busy state before removal */ + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + if (err) + return err; + err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to remove flow steering resources.\n "); - return err; + goto out; } err = mlx4_cmd(dev, vhcr->in_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (!err) + atomic_dec(&rqp->ref_count); +out: + put_res(dev, slave, rrule->qpn, RES_QP); return err; } @@ -3238,9 +3300,16 @@ static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) struct mlx4_qp qp; /* dummy for calling attach/detach */ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) { - qp.qpn = rqp->local_qpn; - (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, rgid->prot, - rgid->steer); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + mlx4_flow_detach(dev, rgid->reg_id); + break; + case MLX4_STEERING_MODE_B0: + qp.qpn = rqp->local_qpn; + (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, + rgid->prot, rgid->steer); + break; + } list_del(&rgid->list); kfree(rgid); } @@ -3803,6 +3872,7 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); /*VLAN*/ rem_slave_macs(dev, slave); + rem_slave_fs_rule(dev, slave); rem_slave_qps(dev, slave); rem_slave_srqs(dev, slave); rem_slave_cqs(dev, slave); @@ -3811,6 +3881,5 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) rem_slave_mtts(dev, slave); rem_slave_counters(dev, slave); rem_slave_xrcdns(dev, slave); - rem_slave_fs_rule(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index feda6c00829f..e329fe1f11b7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -149,7 +149,7 @@ void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) { - u64 in_param; + u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, srqn); diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index 07a6ebc47c92..b6c60fdef4ff 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -1622,25 +1622,7 @@ static struct platform_driver ks8695_driver = { .resume = ks8695_drv_resume, }; -/* Module interface */ - -static int __init -ks8695_init(void) -{ - printk(KERN_INFO "%s Ethernet driver, V%s\n", - MODULENAME, MODULEVERSION); - - return platform_driver_register(&ks8695_driver); -} - -static void __exit -ks8695_cleanup(void) -{ - platform_driver_unregister(&ks8695_driver); -} - -module_init(ks8695_init); -module_exit(ks8695_cleanup); +module_platform_driver(ks8695_driver); MODULE_AUTHOR("Simtec Electronics"); MODULE_DESCRIPTION("Micrel KS8695 (Centaur) Ethernet driver"); diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 33bcb63d56a2..727b546a9eb8 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -528,7 +528,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) for (; rxfc != 0; rxfc--) { rxh = ks8851_rdreg32(ks, KS_RXFHSR); rxstat = rxh & 0xffff; - rxlen = rxh >> 16; + rxlen = (rxh >> 16) & 0xfff; netif_dbg(ks, rx_status, ks->netdev, "rx: stat 0x%04x, len 0x%04x\n", rxstat, rxlen); @@ -1364,37 +1364,37 @@ static int ks8851_read_selftest(struct ks8851_net *ks) /* driver bus management functions */ -#ifdef CONFIG_PM -static int ks8851_suspend(struct spi_device *spi, pm_message_t state) +#ifdef CONFIG_PM_SLEEP + +static int ks8851_suspend(struct device *dev) { - struct ks8851_net *ks = dev_get_drvdata(&spi->dev); - struct net_device *dev = ks->netdev; + struct ks8851_net *ks = dev_get_drvdata(dev); + struct net_device *netdev = ks->netdev; - if (netif_running(dev)) { - netif_device_detach(dev); - ks8851_net_stop(dev); + if (netif_running(netdev)) { + netif_device_detach(netdev); + ks8851_net_stop(netdev); } return 0; } -static int ks8851_resume(struct spi_device *spi) +static int ks8851_resume(struct device *dev) { - struct ks8851_net *ks = dev_get_drvdata(&spi->dev); - struct net_device *dev = ks->netdev; + struct ks8851_net *ks = dev_get_drvdata(dev); + struct net_device *netdev = ks->netdev; - if (netif_running(dev)) { - ks8851_net_open(dev); - netif_device_attach(dev); + if (netif_running(netdev)) { + ks8851_net_open(netdev); + netif_device_attach(netdev); } return 0; } -#else -#define ks8851_suspend NULL -#define ks8851_resume NULL #endif +static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume); + static int ks8851_probe(struct spi_device *spi) { struct net_device *ndev; @@ -1456,7 +1456,7 @@ static int ks8851_probe(struct spi_device *spi) SET_ETHTOOL_OPS(ndev, &ks8851_ethtool_ops); SET_NETDEV_DEV(ndev, &spi->dev); - dev_set_drvdata(&spi->dev, ks); + spi_set_drvdata(spi, ks); ndev->if_port = IF_PORT_100BASET; ndev->netdev_ops = &ks8851_netdev_ops; @@ -1516,7 +1516,7 @@ err_irq: static int ks8851_remove(struct spi_device *spi) { - struct ks8851_net *priv = dev_get_drvdata(&spi->dev); + struct ks8851_net *priv = spi_get_drvdata(spi); if (netif_msg_drv(priv)) dev_info(&spi->dev, "remove\n"); @@ -1532,25 +1532,12 @@ static struct spi_driver ks8851_driver = { .driver = { .name = "ks8851", .owner = THIS_MODULE, + .pm = &ks8851_pm_ops, }, .probe = ks8851_probe, .remove = ks8851_remove, - .suspend = ks8851_suspend, - .resume = ks8851_resume, }; - -static int __init ks8851_init(void) -{ - return spi_register_driver(&ks8851_driver); -} - -static void __exit ks8851_exit(void) -{ - spi_unregister_driver(&ks8851_driver); -} - -module_init(ks8851_init); -module_exit(ks8851_exit); +module_spi_driver(ks8851_driver); MODULE_DESCRIPTION("KS8851 Network driver"); MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>"); diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index a343066f7b43..ddaf138ce0d4 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -792,20 +792,35 @@ static void ks_rcv(struct ks_net *ks, struct net_device *netdev) frame_hdr = ks->frame_head_info; while (ks->frame_cnt--) { + if (unlikely(!(frame_hdr->sts & RXFSHR_RXFV) || + frame_hdr->len >= RX_BUF_SIZE || + frame_hdr->len <= 0)) { + + /* discard an invalid packet */ + ks_wrreg16(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_RRXEF)); + netdev->stats.rx_dropped++; + if (!(frame_hdr->sts & RXFSHR_RXFV)) + netdev->stats.rx_frame_errors++; + else + netdev->stats.rx_length_errors++; + frame_hdr++; + continue; + } + skb = netdev_alloc_skb(netdev, frame_hdr->len + 16); - if (likely(skb && (frame_hdr->sts & RXFSHR_RXFV) && - (frame_hdr->len < RX_BUF_SIZE) && frame_hdr->len)) { + if (likely(skb)) { skb_reserve(skb, 2); /* read data block including CRC 4 bytes */ ks_read_qmu(ks, (u16 *)skb->data, frame_hdr->len); - skb_put(skb, frame_hdr->len); + skb_put(skb, frame_hdr->len - 4); skb->protocol = eth_type_trans(skb, netdev); netif_rx(skb); + /* exclude CRC size */ + netdev->stats.rx_bytes += frame_hdr->len - 4; + netdev->stats.rx_packets++; } else { - pr_err("%s: err:skb alloc\n", __func__); ks_wrreg16(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_RRXEF)); - if (skb) - dev_kfree_skb_irq(skb); + netdev->stats.rx_dropped++; } frame_hdr++; } @@ -877,6 +892,8 @@ static irqreturn_t ks_irq(int irq, void *pw) ks_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK); } + if (unlikely(status & IRQ_RXOI)) + ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); return IRQ_HANDLED; @@ -1015,6 +1032,9 @@ static int ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (likely(ks_tx_fifo_space(ks) >= skb->len + 12)) { ks_write_qmu(ks, skb->data, skb->len); + /* add tx statistics */ + netdev->stats.tx_bytes += skb->len; + netdev->stats.tx_packets++; dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 5d98a9f7bfc7..c7b40aa21f22 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1566,7 +1566,7 @@ static int enc28j60_probe(struct spi_device *spi) INIT_WORK(&priv->setrx_work, enc28j60_setrx_work_handler); INIT_WORK(&priv->irq_work, enc28j60_irq_work_handler); INIT_WORK(&priv->restart_work, enc28j60_restart_work_handler); - dev_set_drvdata(&spi->dev, priv); /* spi to priv reference */ + spi_set_drvdata(spi, priv); /* spi to priv reference */ SET_NETDEV_DEV(dev, &spi->dev); if (!enc28j60_chipset_init(dev)) { @@ -1618,7 +1618,7 @@ error_alloc: static int enc28j60_remove(struct spi_device *spi) { - struct enc28j60_net *priv = dev_get_drvdata(&spi->dev); + struct enc28j60_net *priv = spi_get_drvdata(spi); if (netif_msg_drv(priv)) printk(KERN_DEBUG DRV_NAME ": remove\n"); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 4f9937e026e5..7be9788ed0f6 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1281,7 +1281,8 @@ myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb) va = addr; va += MXGEFW_PAD; veh = (struct vlan_ethhdr *)va; - if ((dev->features & NETIF_F_HW_VLAN_RX) == NETIF_F_HW_VLAN_RX && + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) == + NETIF_F_HW_VLAN_CTAG_RX && veh->h_vlan_proto == htons(ETH_P_8021Q)) { /* fixup csum if needed */ if (skb->ip_summed == CHECKSUM_COMPLETE) { @@ -1289,7 +1290,7 @@ myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb) skb->csum = csum_sub(skb->csum, vsum); } /* pop tag */ - __vlan_hwaccel_put_tag(skb, ntohs(veh->h_vlan_TCI)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(veh->h_vlan_TCI)); memmove(va + VLAN_HLEN, va, 2 * ETH_ALEN); skb->len -= VLAN_HLEN; skb->data_len -= VLAN_HLEN; @@ -3592,10 +3593,9 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes, &ss->rx_done.bus, - GFP_KERNEL); + GFP_KERNEL | __GFP_ZERO); if (ss->rx_done.entry == NULL) goto abort; - memset(ss->rx_done.entry, 0, bytes); bytes = sizeof(*ss->fw_stats); ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes, &ss->fw_stats_bus, @@ -3888,8 +3888,8 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->mtu = myri10ge_initial_mtu; netdev->hw_features = mgp->features | NETIF_F_RXCSUM; - /* fake NETIF_F_HW_VLAN_RX for good GRO performance */ - netdev->hw_features |= NETIF_F_HW_VLAN_RX; + /* fake NETIF_F_HW_VLAN_CTAG_RX for good GRO performance */ + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->features = netdev->hw_features; diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index b0b361546365..c20766c2f65b 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -175,13 +175,13 @@ static int sonic_probe1(struct net_device *dev) /* Allocate the entire chunk of memory for the descriptors. Note that this cannot cross a 64K boundary. */ - if ((lp->descriptors = dma_alloc_coherent(lp->device, - SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), - &lp->descriptors_laddr, GFP_KERNEL)) == NULL) { - printk(KERN_ERR "%s: couldn't alloc DMA memory for descriptors.\n", - dev_name(lp->device)); + lp->descriptors = dma_alloc_coherent(lp->device, + SIZEOF_SONIC_DESC * + SONIC_BUS_SCALE(lp->dma_bitmode), + &lp->descriptors_laddr, + GFP_KERNEL); + if (lp->descriptors == NULL) goto out; - } /* Now set up the pointers to point to the appropriate places */ lp->cda = lp->descriptors; diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 0ffde69c8d01..346a4e025c34 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -202,13 +202,13 @@ static int macsonic_init(struct net_device *dev) /* Allocate the entire chunk of memory for the descriptors. Note that this cannot cross a 64K boundary. */ - if ((lp->descriptors = dma_alloc_coherent(lp->device, - SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), - &lp->descriptors_laddr, GFP_KERNEL)) == NULL) { - printk(KERN_ERR "%s: couldn't alloc DMA memory for descriptors.\n", - dev_name(lp->device)); + lp->descriptors = dma_alloc_coherent(lp->device, + SIZEOF_SONIC_DESC * + SONIC_BUS_SCALE(lp->dma_bitmode), + &lp->descriptors_laddr, + GFP_KERNEL); + if (lp->descriptors == NULL) return -ENOMEM; - } /* Now set up the pointers to point to the appropriate places */ lp->cda = lp->descriptors; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 77c070de621e..d3b47003a575 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -911,7 +911,7 @@ static void rx_irq(struct net_device *ndev) unsigned short tag; tag = ntohs(extsts & EXTSTS_VTG_MASK); - __vlan_hwaccel_put_tag(skb, tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_IPV6), tag); } #endif rx_rc = netif_rx(skb); @@ -2193,7 +2193,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev, #ifdef NS83820_VLAN_ACCEL_SUPPORT /* We also support hardware vlan acceleration */ - ndev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + ndev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; #endif if (using_dac) { diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 46795e403467..1bd419dbda6d 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -424,7 +424,6 @@ static void sonic_rx(struct net_device *dev) /* Malloc up new buffer. */ new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (new_skb == NULL) { - printk(KERN_ERR "%s: Memory squeeze, dropping packet.\n", dev->name); lp->stats.rx_dropped++; break; } diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 5e4748e855f6..c2e0256fe3df 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -197,14 +197,12 @@ static int __init sonic_probe1(struct net_device *dev) * We also allocate extra space for a pointer to allow freeing * this structure later on (in xtsonic_cleanup_module()). */ - lp->descriptors = - dma_alloc_coherent(lp->device, - SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), - &lp->descriptors_laddr, GFP_KERNEL); - + lp->descriptors = dma_alloc_coherent(lp->device, + SIZEOF_SONIC_DESC * + SONIC_BUS_SCALE(lp->dma_bitmode), + &lp->descriptors_laddr, + GFP_KERNEL); if (lp->descriptors == NULL) { - printk(KERN_ERR "%s: couldn't alloc DMA memory for " - " descriptors.\n", dev_name(lp->device)); err = -ENOMEM; goto out; } diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index bfd887382e19..51b00941302c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -80,6 +80,7 @@ #include <linux/slab.h> #include <linux/prefetch.h> #include <net/tcp.h> +#include <net/checksum.h> #include <asm/div64.h> #include <asm/irq.h> @@ -7919,7 +7920,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO; dev->features |= dev->hw_features | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (sp->device_type & XFRAME_II_DEVICE) { dev->hw_features |= NETIF_F_UFO; if (ufo) @@ -8337,16 +8338,13 @@ static void update_L3L4_header(struct s2io_nic *sp, struct lro *lro) { struct iphdr *ip = lro->iph; struct tcphdr *tcp = lro->tcph; - __sum16 nchk; struct swStat *swstats = &sp->mac_control.stats_info->sw_stat; DBG_PRINT(INFO_DBG, "%s: Been here...\n", __func__); /* Update L3 header */ + csum_replace2(&ip->check, ip->tot_len, htons(lro->total_len)); ip->tot_len = htons(lro->total_len); - ip->check = 0; - nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl); - ip->check = nchk; /* Update L4 header */ tcp->ack_seq = lro->tcp_ack; @@ -8557,7 +8555,7 @@ static void queue_rx_frame(struct sk_buff *skb, u16 vlan_tag) skb->protocol = eth_type_trans(skb, dev); if (vlan_tag && sp->vlan_strip_flag) - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); if (sp->config.napi) netif_receive_skb(skb); else diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 794444e09492..cbfaed5f2f8d 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -312,7 +312,7 @@ vxge_rx_complete(struct vxge_ring *ring, struct sk_buff *skb, u16 vlan, if (ext_info->vlan && ring->vlan_tag_strip == VXGE_HW_VPATH_RPA_STRIP_VLAN_TAG_ENABLE) - __vlan_hwaccel_put_tag(skb, ext_info->vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ext_info->vlan); napi_gro_receive(ring->napi_p, skb); vxge_debug_entryexit(VXGE_TRACE, @@ -3300,12 +3300,13 @@ static void vxge_tx_watchdog(struct net_device *dev) /** * vxge_vlan_rx_add_vid * @dev: net device pointer. + * @proto: vlan protocol * @vid: vid * * Add the vlan id to the devices vlan id table */ static int -vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +vxge_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct vxgedev *vdev = netdev_priv(dev); struct vxge_vpath *vpath; @@ -3323,14 +3324,15 @@ vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) } /** - * vxge_vlan_rx_add_vid + * vxge_vlan_rx_kill_vid * @dev: net device pointer. + * @proto: vlan protocol * @vid: vid * * Remove the vlan id from the device's vlan id table */ static int -vxge_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +vxge_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct vxgedev *vdev = netdev_priv(dev); struct vxge_vpath *vpath; @@ -3415,12 +3417,12 @@ static int vxge_device_register(struct __vxge_hw_device *hldev, ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; if (vdev->config.rth_steering != NO_STEERING) ndev->hw_features |= NETIF_F_RXHASH; ndev->features |= ndev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &vxge_netdev_ops; diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c index 63e7af44366f..cb9e63831500 100644 --- a/drivers/net/ethernet/netx-eth.c +++ b/drivers/net/ethernet/netx-eth.c @@ -152,8 +152,6 @@ static void netx_eth_receive(struct net_device *ndev) skb = netdev_alloc_skb(ndev, len); if (unlikely(skb == NULL)) { - printk(KERN_NOTICE "%s: Low memory, packet dropped.\n", - ndev->name); ndev->stats.rx_dropped++; return; } diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 162da8975b05..3df8287b7452 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -287,23 +287,16 @@ static int w90p910_init_desc(struct net_device *dev) ether = netdev_priv(dev); pdev = ether->pdev; - ether->tdesc = (struct tran_pdesc *) - dma_alloc_coherent(&pdev->dev, sizeof(struct tran_pdesc), - ðer->tdesc_phys, GFP_KERNEL); - - if (!ether->tdesc) { - dev_err(&pdev->dev, "Failed to allocate memory for tx desc\n"); + ether->tdesc = dma_alloc_coherent(&pdev->dev, sizeof(struct tran_pdesc), + ðer->tdesc_phys, GFP_KERNEL); + if (!ether->tdesc) return -ENOMEM; - } - - ether->rdesc = (struct recv_pdesc *) - dma_alloc_coherent(&pdev->dev, sizeof(struct recv_pdesc), - ðer->rdesc_phys, GFP_KERNEL); + ether->rdesc = dma_alloc_coherent(&pdev->dev, sizeof(struct recv_pdesc), + ðer->rdesc_phys, GFP_KERNEL); if (!ether->rdesc) { - dev_err(&pdev->dev, "Failed to allocate memory for rx desc\n"); dma_free_coherent(&pdev->dev, sizeof(struct tran_pdesc), - ether->tdesc, ether->tdesc_phys); + ether->tdesc, ether->tdesc_phys); return -ENOMEM; } @@ -737,7 +730,6 @@ static void netdev_rx(struct net_device *dev) data = ether->rdesc->recv_buf[ether->cur_rx]; skb = netdev_alloc_skb(dev, length + 2); if (!skb) { - dev_err(&pdev->dev, "get skb buffer error\n"); ether->stats.rx_dropped++; return; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 0b8de12bcbca..b003fe53c8e2 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2200,6 +2200,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ring_desc *start_tx; struct ring_desc *prev_tx; struct nv_skb_map *prev_tx_ctx; + struct nv_skb_map *tmp_tx_ctx = NULL, *start_tx_ctx = NULL; unsigned long flags; /* add fragments to entries count */ @@ -2261,12 +2262,31 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) do { prev_tx = put_tx; prev_tx_ctx = np->put_tx_ctx; + if (!start_tx_ctx) + start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx; + bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size; np->put_tx_ctx->dma = skb_frag_dma_map( &np->pci_dev->dev, frag, offset, bcnt, DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + + /* Unwind the mapped fragments */ + do { + nv_unmap_txskb(np, start_tx_ctx); + if (unlikely(tmp_tx_ctx++ == np->last_tx_ctx)) + tmp_tx_ctx = np->first_tx_ctx; + } while (tmp_tx_ctx != np->put_tx_ctx); + kfree_skb(skb); + np->put_tx_ctx = start_tx_ctx; + u64_stats_update_begin(&np->swstats_tx_syncp); + np->stat_tx_dropped++; + u64_stats_update_end(&np->swstats_tx_syncp); + return NETDEV_TX_OK; + } + np->put_tx_ctx->dma_len = bcnt; np->put_tx_ctx->dma_single = 0; put_tx->buf = cpu_to_le32(np->put_tx_ctx->dma); @@ -2327,7 +2347,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, struct ring_desc_ex *start_tx; struct ring_desc_ex *prev_tx; struct nv_skb_map *prev_tx_ctx; - struct nv_skb_map *start_tx_ctx; + struct nv_skb_map *start_tx_ctx = NULL; + struct nv_skb_map *tmp_tx_ctx = NULL; unsigned long flags; /* add fragments to entries count */ @@ -2392,11 +2413,29 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, prev_tx = put_tx; prev_tx_ctx = np->put_tx_ctx; bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size; + if (!start_tx_ctx) + start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx; np->put_tx_ctx->dma = skb_frag_dma_map( &np->pci_dev->dev, frag, offset, bcnt, DMA_TO_DEVICE); + + if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + + /* Unwind the mapped fragments */ + do { + nv_unmap_txskb(np, start_tx_ctx); + if (unlikely(tmp_tx_ctx++ == np->last_tx_ctx)) + tmp_tx_ctx = np->first_tx_ctx; + } while (tmp_tx_ctx != np->put_tx_ctx); + kfree_skb(skb); + np->put_tx_ctx = start_tx_ctx; + u64_stats_update_begin(&np->swstats_tx_syncp); + np->stat_tx_dropped++; + u64_stats_update_end(&np->swstats_tx_syncp); + return NETDEV_TX_OK; + } np->put_tx_ctx->dma_len = bcnt; np->put_tx_ctx->dma_single = 0; put_tx->bufhigh = cpu_to_le32(dma_high(np->put_tx_ctx->dma)); @@ -2922,15 +2961,15 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) vlanflags = le32_to_cpu(np->get_rx.ex->buflow); /* - * There's need to check for NETIF_F_HW_VLAN_RX here. - * Even if vlan rx accel is disabled, + * There's need to check for NETIF_F_HW_VLAN_CTAG_RX + * here. Even if vlan rx accel is disabled, * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set. */ - if (dev->features & NETIF_F_HW_VLAN_RX && + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX && vlanflags & NV_RX3_VLAN_TAG_PRESENT) { u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } napi_gro_receive(&np->napi, skb); u64_stats_update_begin(&np->swstats_rx_syncp); @@ -4777,7 +4816,7 @@ static netdev_features_t nv_fix_features(struct net_device *dev, netdev_features_t features) { /* vlan is dependent on rx checksum offload */ - if (features & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX)) + if (features & (NETIF_F_HW_VLAN_CTAG_TX|NETIF_F_HW_VLAN_CTAG_RX)) features |= NETIF_F_RXCSUM; return features; @@ -4789,12 +4828,12 @@ static void nv_vlan_mode(struct net_device *dev, netdev_features_t features) spin_lock_irq(&np->lock); - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP; else np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP; - if (features & NETIF_F_HW_VLAN_TX) + if (features & NETIF_F_HW_VLAN_CTAG_TX) np->txrxctl_bits |= NVREG_TXRXCTL_VLANINS; else np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS; @@ -4831,7 +4870,7 @@ static int nv_set_features(struct net_device *dev, netdev_features_t features) spin_unlock_irq(&np->lock); } - if (changed & (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX)) + if (changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX)) nv_vlan_mode(dev, features); return 0; @@ -5025,7 +5064,6 @@ static int nv_loopback_test(struct net_device *dev) pkt_len = ETH_DATA_LEN; tx_skb = netdev_alloc_skb(dev, pkt_len); if (!tx_skb) { - netdev_err(dev, "netdev_alloc_skb() failed during loopback test\n"); ret = 0; goto out; } @@ -5667,7 +5705,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) np->vlanctl_bits = 0; if (id->driver_data & DEV_HAS_VLAN) { np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; - dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; } dev->features |= dev->hw_features; @@ -5958,7 +5997,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) dev->features & NETIF_F_HIGHDMA ? "highdma " : "", dev->features & (NETIF_F_IP_CSUM | NETIF_F_SG) ? "csum " : "", - dev->features & (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX) ? + dev->features & (NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX) ? "vlan " : "", dev->features & (NETIF_F_LOOPBACK) ? "loopback " : "", diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index c4122c86f829..55a5548d6add 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1409,9 +1409,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) dma_alloc_coherent(&pldat->pdev->dev, pldat->dma_buff_size, &dma_handle, GFP_KERNEL); - if (pldat->dma_buff_base_v == NULL) { - dev_err(&pdev->dev, "error getting DMA region.\n"); ret = -ENOMEM; goto err_out_free_irq; } @@ -1434,13 +1432,11 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) /* Get MAC address from current HW setting (POR state is all zeros) */ __lpc_get_mac(pldat, ndev->dev_addr); -#ifdef CONFIG_OF_NET if (!is_valid_ether_addr(ndev->dev_addr)) { const char *macaddr = of_get_mac_address(pdev->dev.of_node); if (macaddr) memcpy(ndev->dev_addr, macaddr, ETH_ALEN); } -#endif if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); @@ -1472,7 +1468,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, ndev); - if (lpc_mii_init(pldat) != 0) + ret = lpc_mii_init(pldat); + if (ret) goto err_out_unregister_netdev; netdev_info(ndev, "LPC mac at 0x%08x irq %d\n", diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 39ab4d09faaa..0c1c65a9ce5e 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1469,13 +1469,11 @@ pch_gbe_alloc_rx_buffers_pool(struct pch_gbe_adapter *adapter, size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY; rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size, - &rx_ring->rx_buff_pool_logic, - GFP_KERNEL); - if (!rx_ring->rx_buff_pool) { - pr_err("Unable to allocate memory for the receive pool buffer\n"); + &rx_ring->rx_buff_pool_logic, + GFP_KERNEL | __GFP_ZERO); + if (!rx_ring->rx_buff_pool) return -ENOMEM; - } - memset(rx_ring->rx_buff_pool, 0, size); + rx_ring->rx_buff_pool_size = size; for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; @@ -1726,9 +1724,9 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK) - skb->ip_summed = CHECKSUM_NONE; - else skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; napi_gro_receive(&adapter->napi, skb); (*work_done)++; @@ -1774,13 +1772,12 @@ int pch_gbe_setup_tx_resources(struct pch_gbe_adapter *adapter, tx_ring->size = tx_ring->count * (int)sizeof(struct pch_gbe_tx_desc); tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size, - &tx_ring->dma, GFP_KERNEL); + &tx_ring->dma, + GFP_KERNEL | __GFP_ZERO); if (!tx_ring->desc) { vfree(tx_ring->buffer_info); - pr_err("Unable to allocate memory for the transmit descriptor ring\n"); return -ENOMEM; } - memset(tx_ring->desc, 0, tx_ring->size); tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -1820,14 +1817,12 @@ int pch_gbe_setup_rx_resources(struct pch_gbe_adapter *adapter, rx_ring->size = rx_ring->count * (int)sizeof(struct pch_gbe_rx_desc); rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - + &rx_ring->dma, + GFP_KERNEL | __GFP_ZERO); if (!rx_ring->desc) { - pr_err("Unable to allocate memory for the receive descriptor ring\n"); vfree(rx_ring->buffer_info); return -ENOMEM; } - memset(rx_ring->desc, 0, rx_ring->size); rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; for (desNo = 0; desNo < rx_ring->count; desNo++) { @@ -2268,7 +2263,7 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) if (err) { adapter->rx_buffer_len = old_rx_buffer_len; pch_gbe_up(adapter); - return -ENOMEM; + return err; } else { netdev->mtu = new_mtu; adapter->hw.mac.max_frame_size = max_frame; diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index b1cfbb75ff1e..a5f0b5da6149 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -441,12 +441,11 @@ static int pasemi_mac_setup_rx_resources(const struct net_device *dev) ring->buffers = dma_alloc_coherent(&mac->dma_pdev->dev, RX_RING_SIZE * sizeof(u64), - &ring->buf_dma, GFP_KERNEL); + &ring->buf_dma, + GFP_KERNEL | __GFP_ZERO); if (!ring->buffers) goto out_ring_desc; - memset(ring->buffers, 0, RX_RING_SIZE * sizeof(u64)); - write_dma_reg(PAS_DMA_RXCHAN_BASEL(chno), PAS_DMA_RXCHAN_BASEL_BRBL(ring->chan.ring_dma)); diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index a8669adecc97..0e1797295a48 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -35,6 +35,16 @@ config QLCNIC This driver supports QLogic QLE8240 and QLE8242 Converged Ethernet devices. +config QLCNIC_SRIOV + bool "QLOGIC QLCNIC 83XX family SR-IOV Support" + depends on QLCNIC && PCI_IOV + default y + ---help--- + This configuration parameter enables Single Root Input Output + Virtualization support for QLE83XX Converged Ethernet devices. + This allows for virtual function acceleration in virtualized + environments. + config QLGE tristate "QLogic QLGE 10Gb Ethernet Driver Support" depends on PCI diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index eb3dfdbb642b..322a36b76727 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -955,9 +955,10 @@ typedef struct nx_mac_list_s { uint8_t mac_addr[ETH_ALEN+2]; } nx_mac_list_t; -struct nx_vlan_ip_list { +struct nx_ip_list { struct list_head list; __be32 ip_addr; + bool master; }; /* @@ -1605,7 +1606,7 @@ struct netxen_adapter { struct net_device *netdev; struct pci_dev *pdev; struct list_head mac_list; - struct list_head vlan_ip_list; + struct list_head ip_list; spinlock_t tx_clean_lock; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 4782dcfde736..7692dfd4f262 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -27,6 +27,7 @@ #include <linux/delay.h> #include <linux/slab.h> #include <linux/if_vlan.h> +#include <net/checksum.h> #include "netxen_nic.h" #include "netxen_nic_hw.h" @@ -1641,9 +1642,8 @@ netxen_process_lro(struct netxen_adapter *adapter, th = (struct tcphdr *)((skb->data + vhdr_len) + (iph->ihl << 2)); length = (iph->ihl << 2) + (th->doff << 2) + lro_length; + csum_replace2(&iph->check, iph->tot_len, htons(length)); iph->tot_len = htons(length); - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); th->psh = push; th->seq = htonl(seq_number); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 501f49207da5..af951f343ff6 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -90,7 +90,7 @@ static irqreturn_t netxen_intr(int irq, void *data); static irqreturn_t netxen_msi_intr(int irq, void *data); static irqreturn_t netxen_msix_intr(int irq, void *data); -static void netxen_free_vlan_ip_list(struct netxen_adapter *); +static void netxen_free_ip_list(struct netxen_adapter *, bool); static void netxen_restore_indev_addr(struct net_device *dev, unsigned long); static struct rtnl_link_stats64 *netxen_nic_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); @@ -1345,7 +1345,7 @@ netxen_setup_netdev(struct netxen_adapter *adapter, } if (adapter->capabilities & NX_FW_CAPABILITY_FVLANTX) - netdev->hw_features |= NETIF_F_HW_VLAN_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; if (adapter->capabilities & NX_FW_CAPABILITY_HW_LRO) netdev->hw_features |= NETIF_F_LRO; @@ -1450,7 +1450,7 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->tx_clean_lock); INIT_LIST_HEAD(&adapter->mac_list); - INIT_LIST_HEAD(&adapter->vlan_ip_list); + INIT_LIST_HEAD(&adapter->ip_list); err = netxen_setup_pci_map(adapter); if (err) @@ -1585,7 +1585,7 @@ static void netxen_nic_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->tx_timeout_task); - netxen_free_vlan_ip_list(adapter); + netxen_free_ip_list(adapter, false); netxen_nic_detach(adapter); nx_decr_dev_ref_cnt(adapter); @@ -3137,62 +3137,77 @@ netxen_destip_supported(struct netxen_adapter *adapter) } static void -netxen_free_vlan_ip_list(struct netxen_adapter *adapter) +netxen_free_ip_list(struct netxen_adapter *adapter, bool master) { - struct nx_vlan_ip_list *cur; - struct list_head *head = &adapter->vlan_ip_list; + struct nx_ip_list *cur, *tmp_cur; - while (!list_empty(head)) { - cur = list_entry(head->next, struct nx_vlan_ip_list, list); - netxen_config_ipaddr(adapter, cur->ip_addr, NX_IP_DOWN); - list_del(&cur->list); - kfree(cur); + list_for_each_entry_safe(cur, tmp_cur, &adapter->ip_list, list) { + if (master) { + if (cur->master) { + netxen_config_ipaddr(adapter, cur->ip_addr, + NX_IP_DOWN); + list_del(&cur->list); + kfree(cur); + } + } else { + netxen_config_ipaddr(adapter, cur->ip_addr, NX_IP_DOWN); + list_del(&cur->list); + kfree(cur); + } } - } -static void -netxen_list_config_vlan_ip(struct netxen_adapter *adapter, + +static bool +netxen_list_config_ip(struct netxen_adapter *adapter, struct in_ifaddr *ifa, unsigned long event) { struct net_device *dev; - struct nx_vlan_ip_list *cur, *tmp_cur; + struct nx_ip_list *cur, *tmp_cur; struct list_head *head; + bool ret = false; dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL; if (dev == NULL) - return; - - if (!is_vlan_dev(dev)) - return; + goto out; switch (event) { case NX_IP_UP: - list_for_each(head, &adapter->vlan_ip_list) { - cur = list_entry(head, struct nx_vlan_ip_list, list); + list_for_each(head, &adapter->ip_list) { + cur = list_entry(head, struct nx_ip_list, list); if (cur->ip_addr == ifa->ifa_address) - return; + goto out; } - cur = kzalloc(sizeof(struct nx_vlan_ip_list), GFP_ATOMIC); + cur = kzalloc(sizeof(struct nx_ip_list), GFP_ATOMIC); if (cur == NULL) - return; - + goto out; + if (dev->priv_flags & IFF_802_1Q_VLAN) + dev = vlan_dev_real_dev(dev); + cur->master = !!netif_is_bond_master(dev); cur->ip_addr = ifa->ifa_address; - list_add_tail(&cur->list, &adapter->vlan_ip_list); + list_add_tail(&cur->list, &adapter->ip_list); + netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP); + ret = true; break; case NX_IP_DOWN: list_for_each_entry_safe(cur, tmp_cur, - &adapter->vlan_ip_list, list) { + &adapter->ip_list, list) { if (cur->ip_addr == ifa->ifa_address) { list_del(&cur->list); kfree(cur); + netxen_config_ipaddr(adapter, ifa->ifa_address, + NX_IP_DOWN); + ret = true; break; } } } +out: + return ret; } + static void netxen_config_indev_addr(struct netxen_adapter *adapter, struct net_device *dev, unsigned long event) @@ -3209,14 +3224,10 @@ netxen_config_indev_addr(struct netxen_adapter *adapter, for_ifa(indev) { switch (event) { case NETDEV_UP: - netxen_config_ipaddr(adapter, - ifa->ifa_address, NX_IP_UP); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP); + netxen_list_config_ip(adapter, ifa, NX_IP_UP); break; case NETDEV_DOWN: - netxen_config_ipaddr(adapter, - ifa->ifa_address, NX_IP_DOWN); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN); + netxen_list_config_ip(adapter, ifa, NX_IP_DOWN); break; default: break; @@ -3231,23 +3242,78 @@ netxen_restore_indev_addr(struct net_device *netdev, unsigned long event) { struct netxen_adapter *adapter = netdev_priv(netdev); - struct nx_vlan_ip_list *pos, *tmp_pos; + struct nx_ip_list *pos, *tmp_pos; unsigned long ip_event; ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN; netxen_config_indev_addr(adapter, netdev, event); - list_for_each_entry_safe(pos, tmp_pos, &adapter->vlan_ip_list, list) { + list_for_each_entry_safe(pos, tmp_pos, &adapter->ip_list, list) { netxen_config_ipaddr(adapter, pos->ip_addr, ip_event); } } +static inline bool +netxen_config_checkdev(struct net_device *dev) +{ + struct netxen_adapter *adapter; + + if (!is_netxen_netdev(dev)) + return false; + adapter = netdev_priv(dev); + if (!adapter) + return false; + if (!netxen_destip_supported(adapter)) + return false; + if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) + return false; + + return true; +} + +/** + * netxen_config_master - configure addresses based on master + * @dev: netxen device + * @event: netdev event + */ +static void netxen_config_master(struct net_device *dev, unsigned long event) +{ + struct net_device *master, *slave; + struct netxen_adapter *adapter = netdev_priv(dev); + + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + /* + * This is the case where the netxen nic is being + * enslaved and is dev_open()ed in bond_enslave() + * Now we should program the bond's (and its vlans') + * addresses in the netxen NIC. + */ + if (master && netif_is_bond_master(master) && + !netif_is_bond_slave(dev)) { + netxen_config_indev_addr(adapter, master, event); + for_each_netdev_rcu(&init_net, slave) + if (slave->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(slave) == master) + netxen_config_indev_addr(adapter, slave, event); + } + rcu_read_unlock(); + /* + * This is the case where the netxen nic is being + * released and is dev_close()ed in bond_release() + * just before IFF_BONDING is stripped. + */ + if (!master && dev->priv_flags & IFF_BONDING) + netxen_free_ip_list(adapter, true); +} + static int netxen_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netxen_adapter *adapter; struct net_device *dev = (struct net_device *)ptr; struct net_device *orig_dev = dev; + struct net_device *slave; recheck: if (dev == NULL) @@ -3257,19 +3323,28 @@ recheck: dev = vlan_dev_real_dev(dev); goto recheck; } - - if (!is_netxen_netdev(dev)) - goto done; - - adapter = netdev_priv(dev); - - if (!adapter) - goto done; - - if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) - goto done; - - netxen_config_indev_addr(adapter, orig_dev, event); + if (event == NETDEV_UP || event == NETDEV_DOWN) { + /* If this is a bonding device, look for netxen-based slaves*/ + if (netif_is_bond_master(dev)) { + rcu_read_lock(); + for_each_netdev_in_bond_rcu(dev, slave) { + if (!netxen_config_checkdev(slave)) + continue; + adapter = netdev_priv(slave); + netxen_config_indev_addr(adapter, + orig_dev, event); + } + rcu_read_unlock(); + } else { + if (!netxen_config_checkdev(dev)) + goto done; + adapter = netdev_priv(dev); + /* Act only if the actual netxen is the target */ + if (orig_dev == dev) + netxen_config_master(dev, event); + netxen_config_indev_addr(adapter, orig_dev, event); + } + } done: return NOTIFY_DONE; } @@ -3279,12 +3354,12 @@ netxen_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netxen_adapter *adapter; - struct net_device *dev; - + struct net_device *dev, *slave; struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + unsigned long ip_event; dev = ifa->ifa_dev ? ifa->ifa_dev->dev : NULL; - + ip_event = (event == NETDEV_UP) ? NX_IP_UP : NX_IP_DOWN; recheck: if (dev == NULL) goto done; @@ -3293,31 +3368,24 @@ recheck: dev = vlan_dev_real_dev(dev); goto recheck; } - - if (!is_netxen_netdev(dev)) - goto done; - - adapter = netdev_priv(dev); - - if (!adapter || !netxen_destip_supported(adapter)) - goto done; - - if (adapter->is_up != NETXEN_ADAPTER_UP_MAGIC) - goto done; - - switch (event) { - case NETDEV_UP: - netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_UP); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_UP); - break; - case NETDEV_DOWN: - netxen_config_ipaddr(adapter, ifa->ifa_address, NX_IP_DOWN); - netxen_list_config_vlan_ip(adapter, ifa, NX_IP_DOWN); - break; - default: - break; + if (event == NETDEV_UP || event == NETDEV_DOWN) { + /* If this is a bonding device, look for netxen-based slaves*/ + if (netif_is_bond_master(dev)) { + rcu_read_lock(); + for_each_netdev_in_bond_rcu(dev, slave) { + if (!netxen_config_checkdev(slave)) + continue; + adapter = netdev_priv(slave); + netxen_list_config_ip(adapter, ifa, ip_event); + } + rcu_read_unlock(); + } else { + if (!netxen_config_checkdev(dev)) + goto done; + adapter = netdev_priv(dev); + netxen_list_config_ip(adapter, ifa, ip_event); + } } - done: return NOTIFY_DONE; } @@ -3334,7 +3402,7 @@ static void netxen_restore_indev_addr(struct net_device *dev, unsigned long event) { } static void -netxen_free_vlan_ip_list(struct netxen_adapter *adapter) +netxen_free_ip_list(struct netxen_adapter *adapter, bool master) { } #endif diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 8fd38cb6d26a..91a8fcd6c246 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -312,7 +312,6 @@ static void ql_release_to_lrg_buf_free_list(struct ql3_adapter *qdev, lrg_buf_cb->skb = netdev_alloc_skb(qdev->ndev, qdev->lrg_buffer_len); if (unlikely(!lrg_buf_cb->skb)) { - netdev_err(qdev->ndev, "failed netdev_alloc_skb()\n"); qdev->lrg_buf_skb_check++; } else { /* diff --git a/drivers/net/ethernet/qlogic/qlcnic/Makefile b/drivers/net/ethernet/qlogic/qlcnic/Makefile index 7722a203e388..4b1fb3faa3b7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/Makefile +++ b/drivers/net/ethernet/qlogic/qlcnic/Makefile @@ -8,4 +8,6 @@ qlcnic-y := qlcnic_hw.o qlcnic_main.o qlcnic_init.o \ qlcnic_ethtool.o qlcnic_ctx.o qlcnic_io.o \ qlcnic_sysfs.o qlcnic_minidump.o qlcnic_83xx_hw.o \ qlcnic_83xx_init.o qlcnic_83xx_vnic.o \ - qlcnic_minidump.o + qlcnic_minidump.o qlcnic_sriov_common.o + +qlcnic-$(CONFIG_QLCNIC_SRIOV) += qlcnic_sriov_pf.o diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index ba3c72fce1f2..8d02dd75c9a2 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -37,9 +37,9 @@ #include "qlcnic_83xx_hw.h" #define _QLCNIC_LINUX_MAJOR 5 -#define _QLCNIC_LINUX_MINOR 1 -#define _QLCNIC_LINUX_SUBVERSION 35 -#define QLCNIC_LINUX_VERSIONID "5.1.35" +#define _QLCNIC_LINUX_MINOR 2 +#define _QLCNIC_LINUX_SUBVERSION 41 +#define QLCNIC_LINUX_VERSIONID "5.2.41" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) @@ -449,6 +449,7 @@ struct qlcnic_hardware_context { struct qlc_83xx_idc idc; struct qlc_83xx_fw_info fw_info; struct qlcnic_intrpt_config *intr_tbl; + struct qlcnic_sriov *sriov; u32 *reg_tbl; u32 *ext_reg_tbl; u32 mbox_aen[QLC_83XX_MBX_AEN_CNT]; @@ -896,6 +897,7 @@ struct qlcnic_ipaddr { #define QLCNIC_FW_RESET_OWNER 0x2000 #define QLCNIC_FW_HANG 0x4000 #define QLCNIC_FW_LRO_MSS_CAP 0x8000 +#define QLCNIC_TX_INTR_SHARED 0x10000 #define QLCNIC_IS_MSI_FAMILY(adapter) \ ((adapter)->flags & (QLCNIC_MSI_ENABLED | QLCNIC_MSIX_ENABLED)) @@ -914,7 +916,10 @@ struct qlcnic_ipaddr { #define __QLCNIC_AER 5 #define __QLCNIC_DIAG_RES_ALLOC 6 #define __QLCNIC_LED_ENABLE 7 -#define __QLCNIC_ELB_INPROGRESS 8 +#define __QLCNIC_ELB_INPROGRESS 8 +#define __QLCNIC_SRIOV_ENABLE 10 +#define __QLCNIC_SRIOV_CAPABLE 11 +#define __QLCNIC_MBX_POLL_ENABLE 12 #define QLCNIC_INTERRUPT_TEST 1 #define QLCNIC_LOOPBACK_TEST 2 @@ -935,7 +940,7 @@ struct qlcnic_ipaddr { struct qlcnic_filter { struct hlist_node fnode; u8 faddr[ETH_ALEN]; - __le16 vlan_id; + u16 vlan_id; unsigned long ftime; }; @@ -972,9 +977,11 @@ struct qlcnic_adapter { u8 fw_fail_cnt; u8 tx_timeo_cnt; u8 need_fw_reset; + u8 reset_ctx_cnt; u16 is_up; - u16 pvid; + u16 rx_pvid; + u16 tx_pvid; u32 irq; u32 heartbeat; @@ -1006,9 +1013,11 @@ struct qlcnic_adapter { struct workqueue_struct *qlcnic_wq; struct delayed_work fw_work; struct delayed_work idc_aen_work; + struct delayed_work mbx_poll_work; struct qlcnic_filter_hash fhash; struct qlcnic_filter_hash rx_fhash; + struct list_head vf_mc_list; spinlock_t tx_clean_lock; spinlock_t mac_learn_lock; @@ -1051,7 +1060,11 @@ struct qlcnic_info_le { u8 total_pf; u8 total_rss_engines; __le16 max_vports; - u8 reserved2[64]; + __le16 linkstate_reg_offset; + __le16 bit_offsets; + __le16 max_local_ipv6_addrs; + __le16 max_remote_ipv6_addrs; + u8 reserved2[56]; } __packed; struct qlcnic_info { @@ -1083,6 +1096,10 @@ struct qlcnic_info { u8 total_pf; u8 total_rss_engines; u16 max_vports; + u16 linkstate_reg_offset; + u16 bit_offsets; + u16 max_local_ipv6_addrs; + u16 max_remote_ipv6_addrs; }; struct qlcnic_pci_info_le { @@ -1348,6 +1365,7 @@ struct _cdrp_cmd { struct qlcnic_cmd_args { struct _cdrp_cmd req; struct _cdrp_cmd rsp; + int op_type; }; int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter); @@ -1430,9 +1448,10 @@ void qlcnic_post_rx_buffers(struct qlcnic_adapter *adapter, struct qlcnic_host_rds_ring *rds_ring, u8 ring_id); int qlcnic_process_rcv_ring(struct qlcnic_host_sds_ring *sds_ring, int max); void qlcnic_set_multi(struct net_device *netdev); -int qlcnic_nic_add_mac(struct qlcnic_adapter *, const u8 *); +void __qlcnic_set_multi(struct net_device *, u16); +int qlcnic_nic_add_mac(struct qlcnic_adapter *, const u8 *, u16); int qlcnic_nic_del_mac(struct qlcnic_adapter *, const u8 *); -void qlcnic_free_mac_list(struct qlcnic_adapter *adapter); +void qlcnic_82xx_free_mac_list(struct qlcnic_adapter *adapter); int qlcnic_fw_cmd_set_mtu(struct qlcnic_adapter *adapter, int mtu); int qlcnic_fw_cmd_set_drv_version(struct qlcnic_adapter *); @@ -1509,8 +1528,13 @@ int qlcnic_init_pci_info(struct qlcnic_adapter *); int qlcnic_set_default_offload_settings(struct qlcnic_adapter *); int qlcnic_reset_npar_config(struct qlcnic_adapter *); int qlcnic_set_eswitch_port_config(struct qlcnic_adapter *); -void qlcnic_add_lb_filter(struct qlcnic_adapter *, struct sk_buff *, int, - __le16); +void qlcnic_add_lb_filter(struct qlcnic_adapter *, struct sk_buff *, int, u16); +int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter); +int qlcnic_read_mac_addr(struct qlcnic_adapter *); +int qlcnic_setup_netdev(struct qlcnic_adapter *, struct net_device *, int); +void qlcnic_sriov_vf_schedule_multi(struct net_device *); +void qlcnic_vf_add_mc_list(struct net_device *, u16); + /* * QLOGIC Board information */ @@ -1567,11 +1591,14 @@ struct qlcnic_hardware_ops { int (*create_rx_ctx) (struct qlcnic_adapter *); int (*create_tx_ctx) (struct qlcnic_adapter *, struct qlcnic_host_tx_ring *, int); + void (*del_rx_ctx) (struct qlcnic_adapter *); + void (*del_tx_ctx) (struct qlcnic_adapter *, + struct qlcnic_host_tx_ring *); int (*setup_link_event) (struct qlcnic_adapter *, int); int (*get_nic_info) (struct qlcnic_adapter *, struct qlcnic_info *, u8); int (*get_pci_info) (struct qlcnic_adapter *, struct qlcnic_pci_info *); int (*set_nic_info) (struct qlcnic_adapter *, struct qlcnic_info *); - int (*change_macvlan) (struct qlcnic_adapter *, u8*, __le16, u8); + int (*change_macvlan) (struct qlcnic_adapter *, u8*, u16, u8); void (*napi_enable) (struct qlcnic_adapter *); void (*napi_disable) (struct qlcnic_adapter *); void (*config_intr_coal) (struct qlcnic_adapter *); @@ -1580,8 +1607,9 @@ struct qlcnic_hardware_ops { int (*config_loopback) (struct qlcnic_adapter *, u8); int (*clear_loopback) (struct qlcnic_adapter *, u8); int (*config_promisc_mode) (struct qlcnic_adapter *, u32); - void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, __le16); + void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); int (*get_board_info) (struct qlcnic_adapter *); + void (*free_mac_list) (struct qlcnic_adapter *); }; extern struct qlcnic_nic_template qlcnic_vf_ops; @@ -1635,7 +1663,10 @@ static inline int qlcnic_alloc_mbx_args(struct qlcnic_cmd_args *mbx, static inline int qlcnic_issue_cmd(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd) { - return adapter->ahw->hw_ops->mbx_cmd(adapter, cmd); + if (adapter->ahw->hw_ops->mbx_cmd) + return adapter->ahw->hw_ops->mbx_cmd(adapter, cmd); + + return -EIO; } static inline void qlcnic_get_func_no(struct qlcnic_adapter *adapter) @@ -1655,12 +1686,14 @@ static inline void qlcnic_api_unlock(struct qlcnic_adapter *adapter) static inline void qlcnic_add_sysfs(struct qlcnic_adapter *adapter) { - adapter->ahw->hw_ops->add_sysfs(adapter); + if (adapter->ahw->hw_ops->add_sysfs) + adapter->ahw->hw_ops->add_sysfs(adapter); } static inline void qlcnic_remove_sysfs(struct qlcnic_adapter *adapter) { - adapter->ahw->hw_ops->remove_sysfs(adapter); + if (adapter->ahw->hw_ops->remove_sysfs) + adapter->ahw->hw_ops->remove_sysfs(adapter); } static inline void @@ -1681,6 +1714,17 @@ static inline int qlcnic_fw_cmd_create_tx_ctx(struct qlcnic_adapter *adapter, return adapter->ahw->hw_ops->create_tx_ctx(adapter, ptr, ring); } +static inline void qlcnic_fw_cmd_del_rx_ctx(struct qlcnic_adapter *adapter) +{ + return adapter->ahw->hw_ops->del_rx_ctx(adapter); +} + +static inline void qlcnic_fw_cmd_del_tx_ctx(struct qlcnic_adapter *adapter, + struct qlcnic_host_tx_ring *ptr) +{ + return adapter->ahw->hw_ops->del_tx_ctx(adapter, ptr); +} + static inline int qlcnic_linkevent_request(struct qlcnic_adapter *adapter, int enable) { @@ -1706,7 +1750,7 @@ static inline int qlcnic_set_nic_info(struct qlcnic_adapter *adapter, } static inline int qlcnic_sre_macaddr_change(struct qlcnic_adapter *adapter, - u8 *addr, __le16 id, u8 cmd) + u8 *addr, u16 id, u8 cmd) { return adapter->ahw->hw_ops->change_macvlan(adapter, addr, id, cmd); } @@ -1765,7 +1809,7 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter, } static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, - u64 *addr, __le16 id) + u64 *addr, u16 id) { adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); } @@ -1775,15 +1819,22 @@ static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) return adapter->ahw->hw_ops->get_board_info(adapter); } +static inline void qlcnic_free_mac_list(struct qlcnic_adapter *adapter) +{ + return adapter->ahw->hw_ops->free_mac_list(adapter); +} + static inline void qlcnic_dev_request_reset(struct qlcnic_adapter *adapter, u32 key) { - adapter->nic_ops->request_reset(adapter, key); + if (adapter->nic_ops->request_reset) + adapter->nic_ops->request_reset(adapter, key); } static inline void qlcnic_cancel_idc_work(struct qlcnic_adapter *adapter) { - adapter->nic_ops->cancel_idc_work(adapter); + if (adapter->nic_ops->cancel_idc_work) + adapter->nic_ops->cancel_idc_work(adapter); } static inline irqreturn_t @@ -1819,6 +1870,7 @@ static inline void qlcnic_enable_int(struct qlcnic_host_sds_ring *sds_ring) writel(0xfbff, adapter->tgt_mask_reg); } +extern const struct ethtool_ops qlcnic_sriov_vf_ethtool_ops; extern const struct ethtool_ops qlcnic_ethtool_ops; extern const struct ethtool_ops qlcnic_ethtool_failed_ops; @@ -1830,7 +1882,9 @@ extern const struct ethtool_ops qlcnic_ethtool_failed_ops; } while (0) #define PCI_DEVICE_ID_QLOGIC_QLE834X 0x8030 +#define PCI_DEVICE_ID_QLOGIC_VF_QLE834X 0x8430 #define PCI_DEVICE_ID_QLOGIC_QLE824X 0x8020 + static inline bool qlcnic_82xx_check(struct qlcnic_adapter *adapter) { unsigned short device = adapter->pdev->device; @@ -1840,8 +1894,23 @@ static inline bool qlcnic_82xx_check(struct qlcnic_adapter *adapter) static inline bool qlcnic_83xx_check(struct qlcnic_adapter *adapter) { unsigned short device = adapter->pdev->device; - return (device == PCI_DEVICE_ID_QLOGIC_QLE834X) ? true : false; + bool status; + + status = ((device == PCI_DEVICE_ID_QLOGIC_QLE834X) || + (device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X)) ? true : false; + + return status; +} + +static inline bool qlcnic_sriov_pf_check(struct qlcnic_adapter *adapter) +{ + return (adapter->ahw->op_mode == QLCNIC_SRIOV_PF_FUNC) ? true : false; } +static inline bool qlcnic_sriov_vf_check(struct qlcnic_adapter *adapter) +{ + unsigned short device = adapter->pdev->device; + return (device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X) ? true : false; +} #endif /* __QLCNIC_H_ */ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index cd5ae8813cb3..fd0829c2839d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -6,6 +6,7 @@ */ #include "qlcnic.h" +#include "qlcnic_sriov.h" #include <linux/if_vlan.h> #include <linux/ipv6.h> #include <linux/ethtool.h> @@ -13,100 +14,7 @@ #define QLCNIC_MAX_TX_QUEUES 1 #define RSS_HASHTYPE_IP_TCP 0x3 - -/* status descriptor mailbox data - * @phy_addr: physical address of buffer - * @sds_ring_size: buffer size - * @intrpt_id: interrupt id - * @intrpt_val: source of interrupt - */ -struct qlcnic_sds_mbx { - u64 phy_addr; - u8 rsvd1[16]; - u16 sds_ring_size; - u16 rsvd2[3]; - u16 intrpt_id; - u8 intrpt_val; - u8 rsvd3[5]; -} __packed; - -/* receive descriptor buffer data - * phy_addr_reg: physical address of regular buffer - * phy_addr_jmb: physical address of jumbo buffer - * reg_ring_sz: size of regular buffer - * reg_ring_len: no. of entries in regular buffer - * jmb_ring_len: no. of entries in jumbo buffer - * jmb_ring_sz: size of jumbo buffer - */ -struct qlcnic_rds_mbx { - u64 phy_addr_reg; - u64 phy_addr_jmb; - u16 reg_ring_sz; - u16 reg_ring_len; - u16 jmb_ring_sz; - u16 jmb_ring_len; -} __packed; - -/* host producers for regular and jumbo rings */ -struct __host_producer_mbx { - u32 reg_buf; - u32 jmb_buf; -} __packed; - -/* Receive context mailbox data outbox registers - * @state: state of the context - * @vport_id: virtual port id - * @context_id: receive context id - * @num_pci_func: number of pci functions of the port - * @phy_port: physical port id - */ -struct qlcnic_rcv_mbx_out { - u8 rcv_num; - u8 sts_num; - u16 ctx_id; - u8 state; - u8 num_pci_func; - u8 phy_port; - u8 vport_id; - u32 host_csmr[QLCNIC_MAX_RING_SETS]; - struct __host_producer_mbx host_prod[QLCNIC_MAX_RING_SETS]; -} __packed; - -struct qlcnic_add_rings_mbx_out { - u8 rcv_num; - u8 sts_num; - u16 ctx_id; - u32 host_csmr[QLCNIC_MAX_RING_SETS]; - struct __host_producer_mbx host_prod[QLCNIC_MAX_RING_SETS]; -} __packed; - -/* Transmit context mailbox inbox registers - * @phys_addr: DMA address of the transmit buffer - * @cnsmr_index: host consumer index - * @size: legth of transmit buffer ring - * @intr_id: interrput id - * @src: src of interrupt - */ -struct qlcnic_tx_mbx { - u64 phys_addr; - u64 cnsmr_index; - u16 size; - u16 intr_id; - u8 src; - u8 rsvd[3]; -} __packed; - -/* Transmit context mailbox outbox registers - * @host_prod: host producer index - * @ctx_id: transmit context id - * @state: state of the transmit context - */ -struct qlcnic_tx_mbx_out { - u32 host_prod; - u16 ctx_id; - u8 state; - u8 rsvd; -} __packed; +#define QLC_83XX_FW_MBX_CMD 0 static const struct qlcnic_mailbox_metadata qlcnic_83xx_mbx_tbl[] = { {QLCNIC_CMD_CONFIGURE_IP_ADDR, 6, 1}, @@ -156,9 +64,11 @@ static const struct qlcnic_mailbox_metadata qlcnic_83xx_mbx_tbl[] = { {QLCNIC_CMD_SET_LED_CONFIG, 5, 1}, {QLCNIC_CMD_GET_LED_CONFIG, 1, 5}, {QLCNIC_CMD_ADD_RCV_RINGS, 130, 26}, + {QLCNIC_CMD_CONFIG_VPORT, 4, 4}, + {QLCNIC_CMD_BC_EVENT_SETUP, 2, 1}, }; -static const u32 qlcnic_83xx_ext_reg_tbl[] = { +const u32 qlcnic_83xx_ext_reg_tbl[] = { 0x38CC, /* Global Reset */ 0x38F0, /* Wildcard */ 0x38FC, /* Informant */ @@ -204,7 +114,7 @@ static const u32 qlcnic_83xx_ext_reg_tbl[] = { 0x34A4, /* QLC_83XX_ASIC_TEMP */ }; -static const u32 qlcnic_83xx_reg_tbl[] = { +const u32 qlcnic_83xx_reg_tbl[] = { 0x34A8, /* PEG_HALT_STAT1 */ 0x34AC, /* PEG_HALT_STAT2 */ 0x34B0, /* FW_HEARTBEAT */ @@ -247,6 +157,8 @@ static struct qlcnic_hardware_ops qlcnic_83xx_hw_ops = { .process_lb_rcv_ring_diag = qlcnic_83xx_process_rcv_ring_diag, .create_rx_ctx = qlcnic_83xx_create_rx_ctx, .create_tx_ctx = qlcnic_83xx_create_tx_ctx, + .del_rx_ctx = qlcnic_83xx_del_rx_ctx, + .del_tx_ctx = qlcnic_83xx_del_tx_ctx, .setup_link_event = qlcnic_83xx_setup_link_event, .get_nic_info = qlcnic_83xx_get_nic_info, .get_pci_info = qlcnic_83xx_get_pci_info, @@ -260,6 +172,7 @@ static struct qlcnic_hardware_ops qlcnic_83xx_hw_ops = { .config_promisc_mode = qlcnic_83xx_nic_set_promisc, .change_l2_filter = qlcnic_83xx_change_l2_filter, .get_board_info = qlcnic_83xx_get_port_info, + .free_mac_list = qlcnic_82xx_free_mac_list, }; static struct qlcnic_nic_template qlcnic_83xx_ops = { @@ -355,14 +268,20 @@ int qlcnic_83xx_setup_intr(struct qlcnic_adapter *adapter, u8 num_intr) num_intr)); /* account for AEN interrupt MSI-X based interrupts */ num_msix += 1; - num_msix += adapter->max_drv_tx_rings; + + if (!(adapter->flags & QLCNIC_TX_INTR_SHARED)) + num_msix += adapter->max_drv_tx_rings; + err = qlcnic_enable_msix(adapter, num_msix); if (err == -ENOMEM) return err; if (adapter->flags & QLCNIC_MSIX_ENABLED) num_msix = adapter->ahw->num_msix; - else + else { + if (qlcnic_sriov_vf_check(adapter)) + return -EINVAL; num_msix = 1; + } /* setup interrupt mapping table for fw */ ahw->intr_tbl = vzalloc(num_msix * sizeof(struct qlcnic_intrpt_config)); @@ -421,12 +340,13 @@ inline void qlcnic_83xx_enable_legacy_msix_mbx_intr(struct qlcnic_adapter writel(0, adapter->ahw->pci_base0 + mask); } -inline void qlcnic_83xx_disable_mbx_intr(struct qlcnic_adapter *adapter) +void qlcnic_83xx_disable_mbx_intr(struct qlcnic_adapter *adapter) { u32 mask; mask = QLCRDX(adapter->ahw, QLCNIC_DEF_INT_MASK); writel(1, adapter->ahw->pci_base0 + mask); + QLCWRX(adapter->ahw, QLCNIC_MBX_INTR_ENBL, 0); } static inline void qlcnic_83xx_get_mbx_data(struct qlcnic_adapter *adapter, @@ -482,7 +402,8 @@ static void qlcnic_83xx_poll_process_aen(struct qlcnic_adapter *adapter) event = readl(QLCNIC_MBX_FW(adapter->ahw, 0)); if (event & QLCNIC_MBX_ASYNC_EVENT) - qlcnic_83xx_process_aen(adapter); + __qlcnic_83xx_process_aen(adapter); + out: qlcnic_83xx_enable_legacy_msix_mbx_intr(adapter); spin_unlock_irqrestore(&adapter->ahw->mbx_lock, flags); @@ -535,17 +456,15 @@ done: void qlcnic_83xx_free_mbx_intr(struct qlcnic_adapter *adapter) { - u32 val = 0, num_msix = adapter->ahw->num_msix - 1; + u32 num_msix; + + qlcnic_83xx_disable_mbx_intr(adapter); if (adapter->flags & QLCNIC_MSIX_ENABLED) num_msix = adapter->ahw->num_msix - 1; else num_msix = 0; - QLCWRX(adapter->ahw, QLCNIC_MBX_INTR_ENBL, val); - - qlcnic_83xx_disable_mbx_intr(adapter); - msleep(20); synchronize_irq(adapter->msix_entries[num_msix].vector); free_irq(adapter->msix_entries[num_msix].vector, adapter); @@ -595,7 +514,7 @@ int qlcnic_83xx_setup_mbx_intr(struct qlcnic_adapter *adapter) void qlcnic_83xx_get_func_no(struct qlcnic_adapter *adapter) { u32 val = QLCRDX(adapter->ahw, QLCNIC_INFORMANT); - adapter->ahw->pci_func = val & 0xf; + adapter->ahw->pci_func = (val >> 24) & 0xff; } int qlcnic_83xx_cam_lock(struct qlcnic_adapter *adapter) @@ -707,6 +626,11 @@ void qlcnic_83xx_check_vf(struct qlcnic_adapter *adapter, ahw->fw_hal_version = 2; qlcnic_get_func_no(adapter); + if (qlcnic_sriov_vf_check(adapter)) { + qlcnic_sriov_vf_set_ops(adapter); + return; + } + /* Determine function privilege level */ op_mode = QLCRDX(adapter->ahw, QLC_83XX_DRV_OP_MODE); if (op_mode == QLC_83XX_DEFAULT_OPMODE) @@ -722,6 +646,9 @@ void qlcnic_83xx_check_vf(struct qlcnic_adapter *adapter, ahw->fw_hal_version); adapter->nic_ops = &qlcnic_vf_ops; } else { + if (pci_find_ext_capability(adapter->pdev, + PCI_EXT_CAP_ID_SRIOV)) + set_bit(__QLCNIC_SRIOV_CAPABLE, &adapter->state); adapter->nic_ops = &qlcnic_83xx_ops; } } @@ -755,7 +682,7 @@ static void qlcnic_dump_mbx(struct qlcnic_adapter *adapter, } /* Mailbox response for mac rcode */ -static u32 qlcnic_83xx_mac_rcode(struct qlcnic_adapter *adapter) +u32 qlcnic_83xx_mac_rcode(struct qlcnic_adapter *adapter) { u32 fw_data; u8 mac_cmd_rcode; @@ -769,7 +696,7 @@ static u32 qlcnic_83xx_mac_rcode(struct qlcnic_adapter *adapter) return 1; } -static u32 qlcnic_83xx_mbx_poll(struct qlcnic_adapter *adapter) +u32 qlcnic_83xx_mbx_poll(struct qlcnic_adapter *adapter) { u32 data; unsigned long wait_time = 0; @@ -832,7 +759,7 @@ poll: /* Get the FW response data */ fw_data = readl(QLCNIC_MBX_FW(ahw, 0)); if (fw_data & QLCNIC_MBX_ASYNC_EVENT) { - qlcnic_83xx_process_aen(adapter); + __qlcnic_83xx_process_aen(adapter); mbx_val = QLCRDX(ahw, QLCNIC_HOST_MBX_CTRL); if (mbx_val) goto poll; @@ -884,6 +811,7 @@ int qlcnic_83xx_alloc_mbx_args(struct qlcnic_cmd_args *mbx, size = ARRAY_SIZE(qlcnic_83xx_mbx_tbl); for (i = 0; i < size; i++) { if (type == mbx_tbl[i].cmd) { + mbx->op_type = QLC_83XX_FW_MBX_CMD; mbx->req.num = mbx_tbl[i].in_args; mbx->rsp.num = mbx_tbl[i].out_args; mbx->req.arg = kcalloc(mbx->req.num, sizeof(u32), @@ -901,10 +829,10 @@ int qlcnic_83xx_alloc_mbx_args(struct qlcnic_cmd_args *mbx, memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num); temp = adapter->ahw->fw_hal_version << 29; mbx->req.arg[0] = (type | (mbx->req.num << 16) | temp); - break; + return 0; } } - return 0; + return -EINVAL; } void qlcnic_83xx_idc_aen_work(struct work_struct *work) @@ -935,7 +863,7 @@ static void qlcnic_83xx_handle_idc_comp_aen(struct qlcnic_adapter *adapter, return; } -void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter) +void __qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter) { u32 event[QLC_83XX_MBX_AEN_CNT]; int i; @@ -960,6 +888,9 @@ void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter) break; case QLCNIC_MBX_TIME_EXTEND_EVENT: break; + case QLCNIC_MBX_BC_EVENT: + qlcnic_sriov_handle_bc_event(adapter, event[1]); + break; case QLCNIC_MBX_SFP_INSERT_EVENT: dev_info(&adapter->pdev->dev, "SFP+ Insert AEN:0x%x.\n", QLCNIC_MBX_RSP(event[0])); @@ -977,6 +908,53 @@ void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter) QLCWRX(ahw, QLCNIC_FW_MBX_CTRL, QLCNIC_CLR_OWNER); } +static void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + u32 resp, event; + unsigned long flags; + + spin_lock_irqsave(&ahw->mbx_lock, flags); + + resp = QLCRDX(ahw, QLCNIC_FW_MBX_CTRL); + if (resp & QLCNIC_SET_OWNER) { + event = readl(QLCNIC_MBX_FW(ahw, 0)); + if (event & QLCNIC_MBX_ASYNC_EVENT) + __qlcnic_83xx_process_aen(adapter); + } + + spin_unlock_irqrestore(&ahw->mbx_lock, flags); +} + +static void qlcnic_83xx_mbx_poll_work(struct work_struct *work) +{ + struct qlcnic_adapter *adapter; + + adapter = container_of(work, struct qlcnic_adapter, mbx_poll_work.work); + + if (!test_bit(__QLCNIC_MBX_POLL_ENABLE, &adapter->state)) + return; + + qlcnic_83xx_process_aen(adapter); + queue_delayed_work(adapter->qlcnic_wq, &adapter->mbx_poll_work, + (HZ / 10)); +} + +void qlcnic_83xx_enable_mbx_poll(struct qlcnic_adapter *adapter) +{ + if (test_and_set_bit(__QLCNIC_MBX_POLL_ENABLE, &adapter->state)) + return; + + INIT_DELAYED_WORK(&adapter->mbx_poll_work, qlcnic_83xx_mbx_poll_work); +} + +void qlcnic_83xx_disable_mbx_poll(struct qlcnic_adapter *adapter) +{ + if (!test_and_clear_bit(__QLCNIC_MBX_POLL_ENABLE, &adapter->state)) + return; + cancel_delayed_work_sync(&adapter->mbx_poll_work); +} + static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) { int index, i, err, sds_mbx_size; @@ -1004,7 +982,8 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) sds = &recv_ctx->sds_rings[i]; sds->consumer = 0; memset(sds->desc_head, 0, STATUS_DESC_RINGSIZE(sds)); - sds_mbx.phy_addr = sds->phys_addr; + sds_mbx.phy_addr_low = LSD(sds->phys_addr); + sds_mbx.phy_addr_high = MSD(sds->phys_addr); sds_mbx.sds_ring_size = sds->num_desc; if (adapter->flags & QLCNIC_MSIX_ENABLED) @@ -1050,6 +1029,32 @@ out: return err; } +void qlcnic_83xx_del_rx_ctx(struct qlcnic_adapter *adapter) +{ + int err; + u32 temp = 0; + struct qlcnic_cmd_args cmd; + struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DESTROY_RX_CTX)) + return; + + if (qlcnic_sriov_pf_check(adapter) || qlcnic_sriov_vf_check(adapter)) + cmd.req.arg[0] |= (0x3 << 29); + + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_pf_set_interface_id_del_rx_ctx(adapter, &temp); + + cmd.req.arg[1] = recv_ctx->context_id | temp; + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) + dev_err(&adapter->pdev->dev, + "Failed to destroy rx ctx in firmware\n"); + + recv_ctx->state = QLCNIC_HOST_CTX_STATE_FREED; + qlcnic_free_mbx_args(&cmd); +} + int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *adapter) { int i, err, index, sds_mbx_size, rds_mbx_size; @@ -1080,9 +1085,17 @@ int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *adapter) /* set mailbox hdr and capabilities */ qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CREATE_RX_CTX); + + if (qlcnic_sriov_pf_check(adapter) || qlcnic_sriov_vf_check(adapter)) + cmd.req.arg[0] |= (0x3 << 29); + cmd.req.arg[1] = cap; cmd.req.arg[5] = 1 | (num_rds << 5) | (num_sds << 8) | (QLC_83XX_HOST_RDS_MODE_UNIQUE << 16); + + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_pf_set_interface_id_create_rx_ctx(adapter, + &cmd.req.arg[6]); /* set up status rings, mbx 8-57/87 */ index = QLC_83XX_HOST_SDS_MBX_IDX; for (i = 0; i < num_sds; i++) { @@ -1090,7 +1103,8 @@ int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *adapter) sds = &recv_ctx->sds_rings[i]; sds->consumer = 0; memset(sds->desc_head, 0, STATUS_DESC_RINGSIZE(sds)); - sds_mbx.phy_addr = sds->phys_addr; + sds_mbx.phy_addr_low = LSD(sds->phys_addr); + sds_mbx.phy_addr_high = MSD(sds->phys_addr); sds_mbx.sds_ring_size = sds->num_desc; if (adapter->flags & QLCNIC_MSIX_ENABLED) intrpt_id = ahw->intr_tbl[i].id; @@ -1110,13 +1124,15 @@ int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *adapter) rds = &recv_ctx->rds_rings[0]; rds->producer = 0; memset(&rds_mbx, 0, rds_mbx_size); - rds_mbx.phy_addr_reg = rds->phys_addr; + rds_mbx.phy_addr_reg_low = LSD(rds->phys_addr); + rds_mbx.phy_addr_reg_high = MSD(rds->phys_addr); rds_mbx.reg_ring_sz = rds->dma_size; rds_mbx.reg_ring_len = rds->num_desc; /* Jumbo ring */ rds = &recv_ctx->rds_rings[1]; rds->producer = 0; - rds_mbx.phy_addr_jmb = rds->phys_addr; + rds_mbx.phy_addr_jmb_low = LSD(rds->phys_addr); + rds_mbx.phy_addr_jmb_high = MSD(rds->phys_addr); rds_mbx.jmb_ring_sz = rds->dma_size; rds_mbx.jmb_ring_len = rds->num_desc; buf = &cmd.req.arg[index]; @@ -1163,16 +1179,39 @@ out: return err; } +void qlcnic_83xx_del_tx_ctx(struct qlcnic_adapter *adapter, + struct qlcnic_host_tx_ring *tx_ring) +{ + struct qlcnic_cmd_args cmd; + u32 temp = 0; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DESTROY_TX_CTX)) + return; + + if (qlcnic_sriov_pf_check(adapter) || qlcnic_sriov_vf_check(adapter)) + cmd.req.arg[0] |= (0x3 << 29); + + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_pf_set_interface_id_del_tx_ctx(adapter, &temp); + + cmd.req.arg[1] = tx_ring->ctx_id | temp; + if (qlcnic_issue_cmd(adapter, &cmd)) + dev_err(&adapter->pdev->dev, + "Failed to destroy tx ctx in firmware\n"); + qlcnic_free_mbx_args(&cmd); +} + int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, struct qlcnic_host_tx_ring *tx, int ring) { int err; u16 msix_id; - u32 *buf, intr_mask; + u32 *buf, intr_mask, temp = 0; struct qlcnic_cmd_args cmd; struct qlcnic_tx_mbx mbx; struct qlcnic_tx_mbx_out *mbx_out; struct qlcnic_hardware_context *ahw = adapter->ahw; + u32 msix_vector; /* Reset host resources */ tx->producer = 0; @@ -1182,13 +1221,21 @@ int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, memset(&mbx, 0, sizeof(struct qlcnic_tx_mbx)); /* setup mailbox inbox registerss */ - mbx.phys_addr = tx->phys_addr; - mbx.cnsmr_index = tx->hw_cons_phys_addr; + mbx.phys_addr_low = LSD(tx->phys_addr); + mbx.phys_addr_high = MSD(tx->phys_addr); + mbx.cnsmr_index_low = LSD(tx->hw_cons_phys_addr); + mbx.cnsmr_index_high = MSD(tx->hw_cons_phys_addr); mbx.size = tx->num_desc; - if (adapter->flags & QLCNIC_MSIX_ENABLED) - msix_id = ahw->intr_tbl[adapter->max_sds_rings + ring].id; - else + if (adapter->flags & QLCNIC_MSIX_ENABLED) { + if (!(adapter->flags & QLCNIC_TX_INTR_SHARED)) + msix_vector = adapter->max_sds_rings + ring; + else + msix_vector = adapter->max_sds_rings - 1; + msix_id = ahw->intr_tbl[msix_vector].id; + } else { msix_id = QLCRDX(ahw, QLCNIC_DEF_INT_ID); + } + if (adapter->ahw->diag_test != QLCNIC_LOOPBACK_TEST) mbx.intr_id = msix_id; else @@ -1196,8 +1243,15 @@ int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, mbx.src = 0; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CREATE_TX_CTX); + + if (qlcnic_sriov_pf_check(adapter) || qlcnic_sriov_vf_check(adapter)) + cmd.req.arg[0] |= (0x3 << 29); + + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_pf_set_interface_id_create_tx_ctx(adapter, &temp); + cmd.req.arg[1] = QLCNIC_CAP0_LEGACY_CONTEXT; - cmd.req.arg[5] = QLCNIC_MAX_TX_QUEUES; + cmd.req.arg[5] = QLCNIC_MAX_TX_QUEUES | temp; buf = &cmd.req.arg[6]; memcpy(buf, &mbx, sizeof(struct qlcnic_tx_mbx)); /* send the mailbox command*/ @@ -1210,7 +1264,8 @@ int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *adapter, mbx_out = (struct qlcnic_tx_mbx_out *)&cmd.rsp.arg[2]; tx->crb_cmd_producer = ahw->pci_base0 + mbx_out->host_prod; tx->ctx_id = mbx_out->ctx_id; - if (adapter->flags & QLCNIC_MSIX_ENABLED) { + if ((adapter->flags & QLCNIC_MSIX_ENABLED) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { intr_mask = ahw->intr_tbl[adapter->max_sds_rings + ring].src; tx->crb_intr_mask = ahw->pci_base0 + intr_mask; } @@ -1267,7 +1322,8 @@ static int qlcnic_83xx_diag_alloc_res(struct net_device *netdev, int test) if (adapter->ahw->diag_test == QLCNIC_LOOPBACK_TEST) { /* disable and free mailbox interrupt */ - qlcnic_83xx_free_mbx_intr(adapter); + if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) + qlcnic_83xx_free_mbx_intr(adapter); adapter->ahw->loopback_state = 0; adapter->ahw->hw_ops->setup_link_event(adapter, 1); } @@ -1295,12 +1351,14 @@ static void qlcnic_83xx_diag_free_res(struct net_device *netdev, qlcnic_detach(adapter); if (adapter->ahw->diag_test == QLCNIC_LOOPBACK_TEST) { - err = qlcnic_83xx_setup_mbx_intr(adapter); - if (err) { - dev_err(&adapter->pdev->dev, - "%s: failed to setup mbx interrupt\n", - __func__); - goto out; + if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) { + err = qlcnic_83xx_setup_mbx_intr(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "%s: failed to setup mbx interrupt\n", + __func__); + goto out; + } } } adapter->ahw->diag_test = 0; @@ -1373,12 +1431,60 @@ mbx_err: } } +int qlcnic_83xx_set_led(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = -EIO, active = 1; + + if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC) { + netdev_warn(netdev, + "LED test is not supported in non-privileged mode\n"); + return -EOPNOTSUPP; + } + + switch (state) { + case ETHTOOL_ID_ACTIVE: + if (test_and_set_bit(__QLCNIC_LED_ENABLE, &adapter->state)) + return -EBUSY; + + if (test_bit(__QLCNIC_RESETTING, &adapter->state)) + break; + + err = qlcnic_83xx_config_led(adapter, active, 0); + if (err) + netdev_err(netdev, "Failed to set LED blink state\n"); + break; + case ETHTOOL_ID_INACTIVE: + active = 0; + + if (test_bit(__QLCNIC_RESETTING, &adapter->state)) + break; + + err = qlcnic_83xx_config_led(adapter, active, 0); + if (err) + netdev_err(netdev, "Failed to reset LED blink state\n"); + break; + + default: + return -EINVAL; + } + + if (!active || err) + clear_bit(__QLCNIC_LED_ENABLE, &adapter->state); + + return err; +} + void qlcnic_83xx_register_nic_idc_func(struct qlcnic_adapter *adapter, int enable) { struct qlcnic_cmd_args cmd; int status; + if (qlcnic_sriov_vf_check(adapter)) + return; + if (enable) { qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_INIT_NIC_FUNC); cmd.req.arg[1] = BIT_0 | BIT_31; @@ -1441,24 +1547,35 @@ int qlcnic_83xx_setup_link_event(struct qlcnic_adapter *adapter, int enable) return err; } +static void qlcnic_83xx_set_interface_id_promisc(struct qlcnic_adapter *adapter, + u32 *interface_id) +{ + if (qlcnic_sriov_pf_check(adapter)) { + qlcnic_pf_set_interface_id_promisc(adapter, interface_id); + } else { + if (!qlcnic_sriov_vf_check(adapter)) + *interface_id = adapter->recv_ctx->context_id << 16; + } +} + int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *adapter, u32 mode) { int err; - u32 temp; + u32 temp = 0; struct qlcnic_cmd_args cmd; if (adapter->recv_ctx->state == QLCNIC_HOST_CTX_STATE_FREED) return -EIO; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIGURE_MAC_RX_MODE); - temp = adapter->recv_ctx->context_id << 16; + qlcnic_83xx_set_interface_id_promisc(adapter, &temp); cmd.req.arg[1] = (mode ? 1 : 0) | temp; err = qlcnic_issue_cmd(adapter, &cmd); if (err) dev_info(&adapter->pdev->dev, "Promiscous mode config failed\n"); - qlcnic_free_mbx_args(&cmd); + qlcnic_free_mbx_args(&cmd); return err; } @@ -1490,7 +1607,9 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) /* Poll for link up event before running traffic */ do { msleep(500); - qlcnic_83xx_process_aen(adapter); + if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) + qlcnic_83xx_process_aen(adapter); + if (loop++ > QLCNIC_ILB_MAX_RCV_LOOP) { dev_info(&adapter->pdev->dev, "Firmware didn't sent link up event to loopback request\n"); @@ -1500,6 +1619,12 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) } } while ((adapter->ahw->linkup && ahw->has_link_events) != 1); + /* Make sure carrier is off and queue is stopped during loopback */ + if (netif_running(netdev)) { + netif_carrier_off(netdev); + netif_stop_queue(netdev); + } + ret = qlcnic_do_lb_test(adapter, mode); qlcnic_83xx_clear_lb_mode(adapter, mode); @@ -1544,7 +1669,9 @@ int qlcnic_83xx_set_lb_mode(struct qlcnic_adapter *adapter, u8 mode) /* Wait for Link and IDC Completion AEN */ do { msleep(300); - qlcnic_83xx_process_aen(adapter); + if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) + qlcnic_83xx_process_aen(adapter); + if (loop++ > QLCNIC_ILB_MAX_RCV_LOOP) { dev_err(&adapter->pdev->dev, "FW did not generate IDC completion AEN\n"); @@ -1584,7 +1711,9 @@ int qlcnic_83xx_clear_lb_mode(struct qlcnic_adapter *adapter, u8 mode) /* Wait for Link and IDC Completion AEN */ do { msleep(300); - qlcnic_83xx_process_aen(adapter); + if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) + qlcnic_83xx_process_aen(adapter); + if (loop++ > QLCNIC_ILB_MAX_RCV_LOOP) { dev_err(&adapter->pdev->dev, "Firmware didn't sent IDC completion AEN\n"); @@ -1598,21 +1727,31 @@ int qlcnic_83xx_clear_lb_mode(struct qlcnic_adapter *adapter, u8 mode) return status; } +static void qlcnic_83xx_set_interface_id_ipaddr(struct qlcnic_adapter *adapter, + u32 *interface_id) +{ + if (qlcnic_sriov_pf_check(adapter)) { + qlcnic_pf_set_interface_id_ipaddr(adapter, interface_id); + } else { + if (!qlcnic_sriov_vf_check(adapter)) + *interface_id = adapter->recv_ctx->context_id << 16; + } +} + void qlcnic_83xx_config_ipaddr(struct qlcnic_adapter *adapter, __be32 ip, int mode) { int err; - u32 temp, temp_ip; + u32 temp = 0, temp_ip; struct qlcnic_cmd_args cmd; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIGURE_IP_ADDR); - if (mode == QLCNIC_IP_UP) { - temp = adapter->recv_ctx->context_id << 16; + qlcnic_83xx_set_interface_id_ipaddr(adapter, &temp); + + if (mode == QLCNIC_IP_UP) cmd.req.arg[1] = 1 | temp; - } else { - temp = adapter->recv_ctx->context_id << 16; + else cmd.req.arg[1] = 2 | temp; - } /* * Adapter needs IP address in network byte order. @@ -1629,6 +1768,7 @@ void qlcnic_83xx_config_ipaddr(struct qlcnic_adapter *adapter, __be32 ip, dev_err(&adapter->netdev->dev, "could not notify %s IP 0x%x request\n", (mode == QLCNIC_IP_UP) ? "Add" : "Remove", ip); + qlcnic_free_mbx_args(&cmd); } @@ -1695,11 +1835,22 @@ int qlcnic_83xx_config_rss(struct qlcnic_adapter *adapter, int enable) } +static void qlcnic_83xx_set_interface_id_macaddr(struct qlcnic_adapter *adapter, + u32 *interface_id) +{ + if (qlcnic_sriov_pf_check(adapter)) { + qlcnic_pf_set_interface_id_macaddr(adapter, interface_id); + } else { + if (!qlcnic_sriov_vf_check(adapter)) + *interface_id = adapter->recv_ctx->context_id << 16; + } +} + int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, - __le16 vlan_id, u8 op) + u16 vlan_id, u8 op) { int err; - u32 *buf; + u32 *buf, temp = 0; struct qlcnic_cmd_args cmd; struct qlcnic_macvlan_mbx mv; @@ -1709,11 +1860,21 @@ int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIG_MAC_VLAN); if (err) return err; - cmd.req.arg[1] = op | (1 << 8) | - (adapter->recv_ctx->context_id << 16); - mv.vlan = le16_to_cpu(vlan_id); - memcpy(&mv.mac, addr, ETH_ALEN); + if (vlan_id) + op = (op == QLCNIC_MAC_ADD || op == QLCNIC_MAC_VLAN_ADD) ? + QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_VLAN_DEL; + + cmd.req.arg[1] = op | (1 << 8); + qlcnic_83xx_set_interface_id_macaddr(adapter, &temp); + cmd.req.arg[1] |= temp; + mv.vlan = vlan_id; + mv.mac_addr0 = addr[0]; + mv.mac_addr1 = addr[1]; + mv.mac_addr2 = addr[2]; + mv.mac_addr3 = addr[3]; + mv.mac_addr4 = addr[4]; + mv.mac_addr5 = addr[5]; buf = &cmd.req.arg[2]; memcpy(buf, &mv, sizeof(struct qlcnic_macvlan_mbx)); err = qlcnic_issue_cmd(adapter, &cmd); @@ -1726,7 +1887,7 @@ int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, } void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, - __le16 vlan_id) + u16 vlan_id) { u8 mac[ETH_ALEN]; memcpy(&mac, addr, ETH_ALEN); @@ -1826,7 +1987,7 @@ irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data) event = readl(QLCNIC_MBX_FW(adapter->ahw, 0)); if (event & QLCNIC_MBX_ASYNC_EVENT) - qlcnic_83xx_process_aen(adapter); + __qlcnic_83xx_process_aen(adapter); out: mask = QLCRDX(adapter->ahw, QLCNIC_DEF_INT_MASK); writel(0, adapter->ahw->pci_base0 + mask); @@ -2002,14 +2163,17 @@ int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *adapter, int qlcnic_83xx_config_intrpt(struct qlcnic_adapter *adapter, bool op_type) { int i, index, err; - bool type; u8 max_ints; - u32 val, temp; + u32 val, temp, type; struct qlcnic_cmd_args cmd; max_ints = adapter->ahw->num_msix - 1; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIG_INTRPT); cmd.req.arg[1] = max_ints; + + if (qlcnic_sriov_vf_check(adapter)) + cmd.req.arg[1] |= (adapter->ahw->pci_func << 8) | BIT_16; + for (i = 0, index = 2; i < max_ints; i++) { type = op_type ? QLCNIC_INTRPT_ADD : QLCNIC_INTRPT_DEL; val = type | (adapter->ahw->intr_tbl[i].type << 4); @@ -2163,7 +2327,7 @@ static int qlcnic_83xx_poll_flash_status_reg(struct qlcnic_adapter *adapter) return 0; } -static int qlcnic_83xx_enable_flash_write_op(struct qlcnic_adapter *adapter) +int qlcnic_83xx_enable_flash_write(struct qlcnic_adapter *adapter) { int ret; u32 cmd; @@ -2181,7 +2345,7 @@ static int qlcnic_83xx_enable_flash_write_op(struct qlcnic_adapter *adapter) return 0; } -static int qlcnic_83xx_disable_flash_write_op(struct qlcnic_adapter *adapter) +int qlcnic_83xx_disable_flash_write(struct qlcnic_adapter *adapter) { int ret; @@ -2255,7 +2419,7 @@ int qlcnic_83xx_erase_flash_sector(struct qlcnic_adapter *adapter, return -EIO; if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { - ret = qlcnic_83xx_enable_flash_write_op(adapter); + ret = qlcnic_83xx_enable_flash_write(adapter); if (ret) { qlcnic_83xx_unlock_flash(adapter); dev_err(&adapter->pdev->dev, @@ -2297,7 +2461,7 @@ int qlcnic_83xx_erase_flash_sector(struct qlcnic_adapter *adapter, } if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { - ret = qlcnic_83xx_disable_flash_write_op(adapter); + ret = qlcnic_83xx_disable_flash_write(adapter); if (ret) { qlcnic_83xx_unlock_flash(adapter); dev_err(&adapter->pdev->dev, @@ -2337,8 +2501,8 @@ int qlcnic_83xx_flash_bulk_write(struct qlcnic_adapter *adapter, u32 addr, u32 temp; int ret = -EIO; - if ((count < QLC_83XX_FLASH_BULK_WRITE_MIN) || - (count > QLC_83XX_FLASH_BULK_WRITE_MAX)) { + if ((count < QLC_83XX_FLASH_WRITE_MIN) || + (count > QLC_83XX_FLASH_WRITE_MAX)) { dev_err(&adapter->pdev->dev, "%s: Invalid word count\n", __func__); return -EIO; @@ -2616,13 +2780,19 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr, int qlcnic_83xx_test_link(struct qlcnic_adapter *adapter) { + u8 pci_func; int err; u32 config = 0, state; struct qlcnic_cmd_args cmd; struct qlcnic_hardware_context *ahw = adapter->ahw; - state = readl(ahw->pci_base0 + QLC_83XX_LINK_STATE(ahw->pci_func)); - if (!QLC_83xx_FUNC_VAL(state, ahw->pci_func)) { + if (qlcnic_sriov_vf_check(adapter)) + pci_func = adapter->portnum; + else + pci_func = ahw->pci_func; + + state = readl(ahw->pci_base0 + QLC_83XX_LINK_STATE(pci_func)); + if (!QLC_83xx_FUNC_VAL(state, pci_func)) { dev_info(&adapter->pdev->dev, "link state down\n"); return config; } @@ -2780,6 +2950,7 @@ static u64 *qlcnic_83xx_fill_stats(struct qlcnic_adapter *adapter, void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) { struct qlcnic_cmd_args cmd; + struct net_device *netdev = adapter->netdev; int ret = 0; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_STATISTICS); @@ -2789,7 +2960,7 @@ void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) data = qlcnic_83xx_fill_stats(adapter, &cmd, data, QLC_83XX_STAT_TX, &ret); if (ret) { - dev_info(&adapter->pdev->dev, "Error getting MAC stats\n"); + netdev_err(netdev, "Error getting Tx stats\n"); goto out; } /* Get MAC stats */ @@ -2799,8 +2970,7 @@ void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) data = qlcnic_83xx_fill_stats(adapter, &cmd, data, QLC_83XX_STAT_MAC, &ret); if (ret) { - dev_info(&adapter->pdev->dev, - "Error getting Rx stats\n"); + netdev_err(netdev, "Error getting MAC stats\n"); goto out; } /* Get Rx stats */ @@ -2810,8 +2980,7 @@ void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) data = qlcnic_83xx_fill_stats(adapter, &cmd, data, QLC_83XX_STAT_RX, &ret); if (ret) - dev_info(&adapter->pdev->dev, - "Error getting Tx stats\n"); + netdev_err(netdev, "Error getting Rx stats\n"); out: qlcnic_free_mbx_args(&cmd); } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index 61f81f6c84a9..4be411c2628a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -12,6 +12,8 @@ #include <linux/etherdevice.h> #include "qlcnic_hw.h" +#define QLCNIC_83XX_BAR0_LENGTH 0x4000 + /* Directly mapped registers */ #define QLC_83XX_CRB_WIN_BASE 0x3800 #define QLC_83XX_CRB_WIN_FUNC(f) (QLC_83XX_CRB_WIN_BASE+((f)*4)) @@ -86,6 +88,153 @@ #define QLC_83XX_MAX_RESET_SEQ_ENTRIES 16 +/* status descriptor mailbox data + * @phy_addr_{low|high}: physical address of buffer + * @sds_ring_size: buffer size + * @intrpt_id: interrupt id + * @intrpt_val: source of interrupt + */ +struct qlcnic_sds_mbx { + u32 phy_addr_low; + u32 phy_addr_high; + u32 rsvd1[4]; +#if defined(__LITTLE_ENDIAN) + u16 sds_ring_size; + u16 rsvd2; + u16 rsvd3[2]; + u16 intrpt_id; + u8 intrpt_val; + u8 rsvd4; +#elif defined(__BIG_ENDIAN) + u16 rsvd2; + u16 sds_ring_size; + u16 rsvd3[2]; + u8 rsvd4; + u8 intrpt_val; + u16 intrpt_id; +#endif + u32 rsvd5; +} __packed; + +/* receive descriptor buffer data + * phy_addr_reg_{low|high}: physical address of regular buffer + * phy_addr_jmb_{low|high}: physical address of jumbo buffer + * reg_ring_sz: size of regular buffer + * reg_ring_len: no. of entries in regular buffer + * jmb_ring_len: no. of entries in jumbo buffer + * jmb_ring_sz: size of jumbo buffer + */ +struct qlcnic_rds_mbx { + u32 phy_addr_reg_low; + u32 phy_addr_reg_high; + u32 phy_addr_jmb_low; + u32 phy_addr_jmb_high; +#if defined(__LITTLE_ENDIAN) + u16 reg_ring_sz; + u16 reg_ring_len; + u16 jmb_ring_sz; + u16 jmb_ring_len; +#elif defined(__BIG_ENDIAN) + u16 reg_ring_len; + u16 reg_ring_sz; + u16 jmb_ring_len; + u16 jmb_ring_sz; +#endif +} __packed; + +/* host producers for regular and jumbo rings */ +struct __host_producer_mbx { + u32 reg_buf; + u32 jmb_buf; +} __packed; + +/* Receive context mailbox data outbox registers + * @state: state of the context + * @vport_id: virtual port id + * @context_id: receive context id + * @num_pci_func: number of pci functions of the port + * @phy_port: physical port id + */ +struct qlcnic_rcv_mbx_out { +#if defined(__LITTLE_ENDIAN) + u8 rcv_num; + u8 sts_num; + u16 ctx_id; + u8 state; + u8 num_pci_func; + u8 phy_port; + u8 vport_id; +#elif defined(__BIG_ENDIAN) + u16 ctx_id; + u8 sts_num; + u8 rcv_num; + u8 vport_id; + u8 phy_port; + u8 num_pci_func; + u8 state; +#endif + u32 host_csmr[QLCNIC_MAX_RING_SETS]; + struct __host_producer_mbx host_prod[QLCNIC_MAX_RING_SETS]; +} __packed; + +struct qlcnic_add_rings_mbx_out { +#if defined(__LITTLE_ENDIAN) + u8 rcv_num; + u8 sts_num; + u16 ctx_id; +#elif defined(__BIG_ENDIAN) + u16 ctx_id; + u8 sts_num; + u8 rcv_num; +#endif + u32 host_csmr[QLCNIC_MAX_RING_SETS]; + struct __host_producer_mbx host_prod[QLCNIC_MAX_RING_SETS]; +} __packed; + +/* Transmit context mailbox inbox registers + * @phys_addr_{low|high}: DMA address of the transmit buffer + * @cnsmr_index_{low|high}: host consumer index + * @size: legth of transmit buffer ring + * @intr_id: interrput id + * @src: src of interrupt + */ +struct qlcnic_tx_mbx { + u32 phys_addr_low; + u32 phys_addr_high; + u32 cnsmr_index_low; + u32 cnsmr_index_high; +#if defined(__LITTLE_ENDIAN) + u16 size; + u16 intr_id; + u8 src; + u8 rsvd[3]; +#elif defined(__BIG_ENDIAN) + u16 intr_id; + u16 size; + u8 rsvd[3]; + u8 src; +#endif +} __packed; + +/* Transmit context mailbox outbox registers + * @host_prod: host producer index + * @ctx_id: transmit context id + * @state: state of the transmit context + */ + +struct qlcnic_tx_mbx_out { + u32 host_prod; +#if defined(__LITTLE_ENDIAN) + u16 ctx_id; + u8 state; + u8 rsvd; +#elif defined(__BIG_ENDIAN) + u8 rsvd; + u8 state; + u16 ctx_id; +#endif +} __packed; + struct qlcnic_intrpt_config { u8 type; u8 enabled; @@ -94,8 +243,23 @@ struct qlcnic_intrpt_config { }; struct qlcnic_macvlan_mbx { - u8 mac[ETH_ALEN]; +#if defined(__LITTLE_ENDIAN) + u8 mac_addr0; + u8 mac_addr1; + u8 mac_addr2; + u8 mac_addr3; + u8 mac_addr4; + u8 mac_addr5; u16 vlan; +#elif defined(__BIG_ENDIAN) + u8 mac_addr3; + u8 mac_addr2; + u8 mac_addr1; + u8 mac_addr0; + u16 vlan; + u8 mac_addr5; + u8 mac_addr4; +#endif }; struct qlc_83xx_fw_info { @@ -153,6 +317,18 @@ struct qlc_83xx_idc { char **name; }; +/* Device States */ +enum qlcnic_83xx_states { + QLC_83XX_IDC_DEV_UNKNOWN, + QLC_83XX_IDC_DEV_COLD, + QLC_83XX_IDC_DEV_INIT, + QLC_83XX_IDC_DEV_READY, + QLC_83XX_IDC_DEV_NEED_RESET, + QLC_83XX_IDC_DEV_NEED_QUISCENT, + QLC_83XX_IDC_DEV_FAILED, + QLC_83XX_IDC_DEV_QUISCENT +}; + #define QLCNIC_MBX_RSP(reg) LSW(reg) #define QLCNIC_MBX_NUM_REGS(reg) (MSW(reg) & 0x1FF) #define QLCNIC_MBX_STATUS(reg) (((reg) >> 25) & 0x7F) @@ -226,6 +402,7 @@ struct qlc_83xx_idc { #define QLC_83XX_GET_FW_LRO_MSS_CAPABILITY(val) (val & 0x20000) #define QLC_83XX_VIRTUAL_NIC_MODE 0xFF #define QLC_83XX_DEFAULT_MODE 0x0 +#define QLC_83XX_SRIOV_MODE 0x1 #define QLCNIC_BRDTYPE_83XX_10G 0x0083 #define QLC_83XX_FLASH_SPI_STATUS 0x2808E010 @@ -242,8 +419,8 @@ struct qlc_83xx_idc { #define QLC_83XX_FLASH_BULK_WRITE_CMD 0xcadcadca #define QLC_83XX_FLASH_READ_RETRY_COUNT 5000 #define QLC_83XX_FLASH_STATUS_READY 0x6 -#define QLC_83XX_FLASH_BULK_WRITE_MIN 2 -#define QLC_83XX_FLASH_BULK_WRITE_MAX 64 +#define QLC_83XX_FLASH_WRITE_MIN 2 +#define QLC_83XX_FLASH_WRITE_MAX 64 #define QLC_83XX_FLASH_STATUS_REG_POLL_DELAY 1 #define QLC_83XX_ERASE_MODE 1 #define QLC_83XX_WRITE_MODE 2 @@ -336,7 +513,7 @@ int qlcnic_83xx_clear_lb_mode(struct qlcnic_adapter *, u8); int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); int qlcnic_83xx_config_intr_coalesce(struct qlcnic_adapter *); -void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, __le16); +void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); void qlcnic_83xx_register_nic_idc_func(struct qlcnic_adapter *, int); @@ -351,11 +528,14 @@ int qlcnic_ind_rd(struct qlcnic_adapter *, u32); int qlcnic_83xx_create_rx_ctx(struct qlcnic_adapter *); int qlcnic_83xx_create_tx_ctx(struct qlcnic_adapter *, struct qlcnic_host_tx_ring *, int); +void qlcnic_83xx_del_rx_ctx(struct qlcnic_adapter *); +void qlcnic_83xx_del_tx_ctx(struct qlcnic_adapter *, + struct qlcnic_host_tx_ring *); int qlcnic_83xx_get_nic_info(struct qlcnic_adapter *, struct qlcnic_info *, u8); int qlcnic_83xx_setup_link_event(struct qlcnic_adapter *, int); void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *); int qlcnic_83xx_config_intrpt(struct qlcnic_adapter *, bool); -int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *, u8 *, __le16, u8); +int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *, u8 *, u16, u8); int qlcnic_83xx_get_mac_address(struct qlcnic_adapter *, u8 *); void qlcnic_83xx_configure_mac(struct qlcnic_adapter *, u8 *, u8, struct qlcnic_cmd_args *); @@ -368,6 +548,7 @@ void qlcnic_83xx_config_intr_coal(struct qlcnic_adapter *); irqreturn_t qlcnic_83xx_handle_aen(int, void *); int qlcnic_83xx_get_port_info(struct qlcnic_adapter *); void qlcnic_83xx_enable_mbx_intrpt(struct qlcnic_adapter *); +void qlcnic_83xx_disable_mbx_intr(struct qlcnic_adapter *); irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *); irqreturn_t qlcnic_83xx_intr(int, void *); irqreturn_t qlcnic_83xx_tmp_intr(int, void *); @@ -377,7 +558,7 @@ void qlcnic_83xx_disable_intr(struct qlcnic_adapter *, struct qlcnic_host_sds_ring *); void qlcnic_83xx_check_vf(struct qlcnic_adapter *, const struct pci_device_id *); -void qlcnic_83xx_process_aen(struct qlcnic_adapter *); +void __qlcnic_83xx_process_aen(struct qlcnic_adapter *); int qlcnic_83xx_get_port_config(struct qlcnic_adapter *); int qlcnic_83xx_set_port_config(struct qlcnic_adapter *); int qlcnic_enable_eswitch(struct qlcnic_adapter *, u8, u8); @@ -401,7 +582,7 @@ int qlcnic_83xx_read_flash_descriptor_table(struct qlcnic_adapter *); int qlcnic_83xx_flash_read32(struct qlcnic_adapter *, u32, u8 *, int); int qlcnic_83xx_lockless_flash_read32(struct qlcnic_adapter *, u32, u8 *, int); -int qlcnic_83xx_init(struct qlcnic_adapter *); +int qlcnic_83xx_init(struct qlcnic_adapter *, int); int qlcnic_83xx_idc_ready_state_entry(struct qlcnic_adapter *); int qlcnic_83xx_check_hw_status(struct qlcnic_adapter *p_dev); void qlcnic_83xx_idc_poll_dev_state(struct work_struct *); @@ -434,5 +615,12 @@ int qlcnic_83xx_get_regs_len(struct qlcnic_adapter *); int qlcnic_83xx_get_registers(struct qlcnic_adapter *, u32 *); int qlcnic_83xx_loopback_test(struct net_device *, u8); int qlcnic_83xx_interrupt_test(struct net_device *); +int qlcnic_83xx_set_led(struct net_device *, enum ethtool_phys_id_state); int qlcnic_83xx_flash_test(struct qlcnic_adapter *); +int qlcnic_83xx_enable_flash_write(struct qlcnic_adapter *); +int qlcnic_83xx_disable_flash_write(struct qlcnic_adapter *); +u32 qlcnic_83xx_mac_rcode(struct qlcnic_adapter *); +u32 qlcnic_83xx_mbx_poll(struct qlcnic_adapter *); +void qlcnic_83xx_enable_mbx_poll(struct qlcnic_adapter *); +void qlcnic_83xx_disable_mbx_poll(struct qlcnic_adapter *); #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 5c033f268ca5..6ea3a096054c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -5,6 +5,7 @@ * See LICENSE.qlcnic for copyright and licensing details. */ +#include "qlcnic_sriov.h" #include "qlcnic.h" #include "qlcnic_hw.h" @@ -25,12 +26,12 @@ #define QLC_83XX_OPCODE_POLL_READ_LIST 0x0100 static int qlcnic_83xx_init_default_driver(struct qlcnic_adapter *adapter); -static int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter); static int qlcnic_83xx_check_heartbeat(struct qlcnic_adapter *p_dev); static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter); /* Template header */ struct qlc_83xx_reset_hdr { +#if defined(__LITTLE_ENDIAN) u16 version; u16 signature; u16 size; @@ -39,14 +40,31 @@ struct qlc_83xx_reset_hdr { u16 checksum; u16 init_offset; u16 start_offset; +#elif defined(__BIG_ENDIAN) + u16 signature; + u16 version; + u16 entries; + u16 size; + u16 checksum; + u16 hdr_size; + u16 start_offset; + u16 init_offset; +#endif } __packed; /* Command entry header. */ struct qlc_83xx_entry_hdr { - u16 cmd; - u16 size; - u16 count; - u16 delay; +#if defined(__LITTLE_ENDIAN) + u16 cmd; + u16 size; + u16 count; + u16 delay; +#elif defined(__BIG_ENDIAN) + u16 size; + u16 cmd; + u16 delay; + u16 count; +#endif } __packed; /* Generic poll command */ @@ -60,10 +78,17 @@ struct qlc_83xx_rmw { u32 mask; u32 xor_value; u32 or_value; +#if defined(__LITTLE_ENDIAN) u8 shl; u8 shr; u8 index_a; u8 rsvd; +#elif defined(__BIG_ENDIAN) + u8 rsvd; + u8 index_a; + u8 shr; + u8 shl; +#endif } __packed; /* Generic command with 2 DWORD */ @@ -90,18 +115,6 @@ static const char *const qlc_83xx_idc_states[] = { "Quiesce" }; -/* Device States */ -enum qlcnic_83xx_states { - QLC_83XX_IDC_DEV_UNKNOWN, - QLC_83XX_IDC_DEV_COLD, - QLC_83XX_IDC_DEV_INIT, - QLC_83XX_IDC_DEV_READY, - QLC_83XX_IDC_DEV_NEED_RESET, - QLC_83XX_IDC_DEV_NEED_QUISCENT, - QLC_83XX_IDC_DEV_FAILED, - QLC_83XX_IDC_DEV_QUISCENT -}; - static int qlcnic_83xx_idc_check_driver_presence_reg(struct qlcnic_adapter *adapter) { @@ -137,7 +150,8 @@ static int qlcnic_83xx_idc_update_audit_reg(struct qlcnic_adapter *adapter, return -EBUSY; } - val = adapter->portnum & 0xf; + val = QLCRDX(adapter->ahw, QLC_83XX_IDC_DRV_AUDIT); + val |= (adapter->portnum & 0xf); val |= mode << 7; if (mode) seconds = jiffies / HZ - adapter->ahw->idc.sec_counter; @@ -376,14 +390,18 @@ static void qlcnic_83xx_idc_detach_driver(struct qlcnic_adapter *adapter) struct net_device *netdev = adapter->netdev; netif_device_detach(netdev); + /* Disable mailbox interrupt */ - QLCWRX(adapter->ahw, QLCNIC_MBX_INTR_ENBL, 0); + qlcnic_83xx_disable_mbx_intr(adapter); qlcnic_down(adapter, netdev); for (i = 0; i < adapter->ahw->num_msix; i++) { adapter->ahw->intr_tbl[i].id = i; adapter->ahw->intr_tbl[i].enabled = 0; adapter->ahw->intr_tbl[i].src = 0; } + + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_sriov_pf_reset(adapter); } /** @@ -585,9 +603,15 @@ static int qlcnic_83xx_idc_check_fan_failure(struct qlcnic_adapter *adapter) static int qlcnic_83xx_idc_reattach_driver(struct qlcnic_adapter *adapter) { + int err; + /* register for NIC IDC AEN Events */ qlcnic_83xx_register_nic_idc_func(adapter, 1); + err = qlcnic_sriov_pf_reinit(adapter); + if (err) + return err; + qlcnic_83xx_enable_mbx_intrpt(adapter); if (qlcnic_83xx_configure_opmode(adapter)) { @@ -1893,6 +1917,9 @@ int qlcnic_83xx_config_default_opmode(struct qlcnic_adapter *adapter) qlcnic_get_func_no(adapter); op_mode = QLCRDX(ahw, QLC_83XX_DRV_OP_MODE); + if (test_bit(__QLCNIC_SRIOV_CAPABLE, &adapter->state)) + op_mode = QLC_83XX_DEFAULT_OPMODE; + if (op_mode == QLC_83XX_DEFAULT_OPMODE) { adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver; ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry; @@ -1922,6 +1949,16 @@ int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) ahw->max_mac_filters = nic_info.max_mac_filters; ahw->max_mtu = nic_info.max_mtu; + /* VNIC mode is detected by BIT_23 in capabilities. This bit is also + * set in case device is SRIOV capable. VNIC and SRIOV are mutually + * exclusive. So in case of sriov capable device load driver in + * default mode + */ + if (test_bit(__QLCNIC_SRIOV_CAPABLE, &adapter->state)) { + ahw->nic_mode = QLC_83XX_DEFAULT_MODE; + return ahw->nic_mode; + } + if (ahw->capabilities & BIT_23) ahw->nic_mode = QLC_83XX_VIRTUAL_NIC_MODE; else @@ -1930,7 +1967,7 @@ int qlcnic_83xx_get_nic_configuration(struct qlcnic_adapter *adapter) return ahw->nic_mode; } -static int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) +int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter) { int ret; @@ -2008,10 +2045,13 @@ static void qlcnic_83xx_clear_function_resources(struct qlcnic_adapter *adapter) } } -int qlcnic_83xx_init(struct qlcnic_adapter *adapter) +int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) { struct qlcnic_hardware_context *ahw = adapter->ahw; + if (qlcnic_sriov_vf_check(adapter)) + return qlcnic_sriov_vf_init(adapter, pci_using_dac); + if (qlcnic_83xx_check_hw_status(adapter)) return -EIO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index a69097c6b84d..43562c256379 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -382,8 +382,7 @@ out_free_rq: return err; } -static void -qlcnic_fw_cmd_destroy_rx_ctx(struct qlcnic_adapter *adapter) +void qlcnic_82xx_fw_cmd_del_rx_ctx(struct qlcnic_adapter *adapter) { int err; struct qlcnic_cmd_args cmd; @@ -422,22 +421,20 @@ int qlcnic_82xx_fw_cmd_create_tx_ctx(struct qlcnic_adapter *adapter, rq_size = SIZEOF_HOSTRQ_TX(struct qlcnic_hostrq_tx_ctx); rq_addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size, - &rq_phys_addr, GFP_KERNEL); + &rq_phys_addr, GFP_KERNEL | __GFP_ZERO); if (!rq_addr) return -ENOMEM; rsp_size = SIZEOF_CARDRSP_TX(struct qlcnic_cardrsp_tx_ctx); rsp_addr = dma_alloc_coherent(&adapter->pdev->dev, rsp_size, - &rsp_phys_addr, GFP_KERNEL); + &rsp_phys_addr, GFP_KERNEL | __GFP_ZERO); if (!rsp_addr) { err = -ENOMEM; goto out_free_rq; } - memset(rq_addr, 0, rq_size); prq = rq_addr; - memset(rsp_addr, 0, rsp_size); prsp = rsp_addr; prq->host_rsp_dma_addr = cpu_to_le64(rsp_phys_addr); @@ -486,13 +483,13 @@ out_free_rq: return err; } -static void -qlcnic_fw_cmd_destroy_tx_ctx(struct qlcnic_adapter *adapter, - struct qlcnic_host_tx_ring *tx_ring) +void qlcnic_82xx_fw_cmd_del_tx_ctx(struct qlcnic_adapter *adapter, + struct qlcnic_host_tx_ring *tx_ring) { struct qlcnic_cmd_args cmd; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DESTROY_TX_CTX); + cmd.req.arg[1] = tx_ring->ctx_id; if (qlcnic_issue_cmd(adapter, &cmd)) dev_err(&adapter->pdev->dev, @@ -532,20 +529,15 @@ int qlcnic_alloc_hw_resources(struct qlcnic_adapter *adapter) ptr = (__le32 *)dma_alloc_coherent(&pdev->dev, sizeof(u32), &tx_ring->hw_cons_phys_addr, GFP_KERNEL); - - if (ptr == NULL) { - dev_err(&pdev->dev, "failed to allocate tx consumer\n"); + if (ptr == NULL) return -ENOMEM; - } + tx_ring->hw_consumer = ptr; /* cmd desc ring */ addr = dma_alloc_coherent(&pdev->dev, TX_DESC_RINGSIZE(tx_ring), &tx_ring->phys_addr, GFP_KERNEL); - if (addr == NULL) { - dev_err(&pdev->dev, - "failed to allocate tx desc ring\n"); err = -ENOMEM; goto err_out_free; } @@ -556,11 +548,9 @@ int qlcnic_alloc_hw_resources(struct qlcnic_adapter *adapter) for (ring = 0; ring < adapter->max_rds_rings; ring++) { rds_ring = &recv_ctx->rds_rings[ring]; addr = dma_alloc_coherent(&adapter->pdev->dev, - RCV_DESC_RINGSIZE(rds_ring), - &rds_ring->phys_addr, GFP_KERNEL); + RCV_DESC_RINGSIZE(rds_ring), + &rds_ring->phys_addr, GFP_KERNEL); if (addr == NULL) { - dev_err(&pdev->dev, - "failed to allocate rds ring [%d]\n", ring); err = -ENOMEM; goto err_out_free; } @@ -572,11 +562,9 @@ int qlcnic_alloc_hw_resources(struct qlcnic_adapter *adapter) sds_ring = &recv_ctx->sds_rings[ring]; addr = dma_alloc_coherent(&adapter->pdev->dev, - STATUS_DESC_RINGSIZE(sds_ring), - &sds_ring->phys_addr, GFP_KERNEL); + STATUS_DESC_RINGSIZE(sds_ring), + &sds_ring->phys_addr, GFP_KERNEL); if (addr == NULL) { - dev_err(&pdev->dev, - "failed to allocate sds ring [%d]\n", ring); err = -ENOMEM; goto err_out_free; } @@ -616,13 +604,12 @@ int qlcnic_fw_create_ctx(struct qlcnic_adapter *dev) &dev->tx_ring[ring], ring); if (err) { - qlcnic_fw_cmd_destroy_rx_ctx(dev); + qlcnic_fw_cmd_del_rx_ctx(dev); if (ring == 0) goto err_out; for (i = 0; i < ring; i++) - qlcnic_fw_cmd_destroy_tx_ctx(dev, - &dev->tx_ring[i]); + qlcnic_fw_cmd_del_tx_ctx(dev, &dev->tx_ring[i]); goto err_out; } @@ -644,10 +631,10 @@ void qlcnic_fw_destroy_ctx(struct qlcnic_adapter *adapter) int ring; if (test_and_clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state)) { - qlcnic_fw_cmd_destroy_rx_ctx(adapter); + qlcnic_fw_cmd_del_rx_ctx(adapter); for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) - qlcnic_fw_cmd_destroy_tx_ctx(adapter, - &adapter->tx_ring[ring]); + qlcnic_fw_cmd_del_tx_ctx(adapter, + &adapter->tx_ring[ring]); if (qlcnic_83xx_check(adapter) && (adapter->flags & QLCNIC_MSIX_ENABLED)) { @@ -655,7 +642,7 @@ void qlcnic_fw_destroy_ctx(struct qlcnic_adapter *adapter) qlcnic_83xx_config_intrpt(adapter, 0); } /* Allow dma queues to drain after context reset */ - mdelay(20); + msleep(20); } } @@ -753,10 +740,9 @@ int qlcnic_82xx_get_nic_info(struct qlcnic_adapter *adapter, size_t nic_size = sizeof(struct qlcnic_info_le); nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size, - &nic_dma_t, GFP_KERNEL); + &nic_dma_t, GFP_KERNEL | __GFP_ZERO); if (!nic_info_addr) return -ENOMEM; - memset(nic_info_addr, 0, nic_size); nic_info = nic_info_addr; @@ -804,11 +790,10 @@ int qlcnic_82xx_set_nic_info(struct qlcnic_adapter *adapter, return err; nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size, - &nic_dma_t, GFP_KERNEL); + &nic_dma_t, GFP_KERNEL | __GFP_ZERO); if (!nic_info_addr) return -ENOMEM; - memset(nic_info_addr, 0, nic_size); nic_info = nic_info_addr; nic_info->pci_func = cpu_to_le16(nic->pci_func); @@ -854,10 +839,10 @@ int qlcnic_82xx_get_pci_info(struct qlcnic_adapter *adapter, size_t pci_size = npar_size * QLCNIC_MAX_PCI_FUNC; pci_info_addr = dma_alloc_coherent(&adapter->pdev->dev, pci_size, - &pci_info_dma_t, GFP_KERNEL); + &pci_info_dma_t, + GFP_KERNEL | __GFP_ZERO); if (!pci_info_addr) return -ENOMEM; - memset(pci_info_addr, 0, pci_size); npar = pci_info_addr; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_PCI_INFO); @@ -949,12 +934,9 @@ int qlcnic_get_port_stats(struct qlcnic_adapter *adapter, const u8 func, } stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size, - &stats_dma_t, GFP_KERNEL); - if (!stats_addr) { - dev_err(&adapter->pdev->dev, "Unable to allocate memory\n"); + &stats_dma_t, GFP_KERNEL | __GFP_ZERO); + if (!stats_addr) return -ENOMEM; - } - memset(stats_addr, 0, stats_size); arg1 = func | QLCNIC_STATS_VERSION << 8 | QLCNIC_STATS_PORT << 12; arg1 |= rx_tx << 15 | stats_size << 16; @@ -1003,13 +985,10 @@ int qlcnic_get_mac_stats(struct qlcnic_adapter *adapter, return -ENOMEM; stats_addr = dma_alloc_coherent(&adapter->pdev->dev, stats_size, - &stats_dma_t, GFP_KERNEL); - if (!stats_addr) { - dev_err(&adapter->pdev->dev, - "%s: Unable to allocate memory.\n", __func__); + &stats_dma_t, GFP_KERNEL | __GFP_ZERO); + if (!stats_addr) return -ENOMEM; - } - memset(stats_addr, 0, stats_size); + qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_MAC_STATS); cmd.req.arg[1] = stats_size << 16; cmd.req.arg[2] = MSD(stats_dma_t); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 5641f8ec49ab..9f7aade4667c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -149,7 +149,8 @@ static const char qlcnic_gstrings_test[][ETH_GSTRING_LEN] = { static inline int qlcnic_82xx_statistics(void) { - return QLCNIC_STATS_LEN + ARRAY_SIZE(qlcnic_83xx_mac_stats_strings); + return ARRAY_SIZE(qlcnic_device_gstrings_stats) + + ARRAY_SIZE(qlcnic_83xx_mac_stats_strings); } static inline int qlcnic_83xx_statistics(void) @@ -858,9 +859,11 @@ clear_diag_irq: return ret; } -#define QLCNIC_ILB_PKT_SIZE 64 -#define QLCNIC_NUM_ILB_PKT 16 -#define QLCNIC_ILB_MAX_RCV_LOOP 10 +#define QLCNIC_ILB_PKT_SIZE 64 +#define QLCNIC_NUM_ILB_PKT 16 +#define QLCNIC_ILB_MAX_RCV_LOOP 10 +#define QLCNIC_LB_PKT_POLL_DELAY_MSEC 1 +#define QLCNIC_LB_PKT_POLL_COUNT 20 static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[]) { @@ -897,9 +900,9 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) loop = 0; do { - msleep(1); + msleep(QLCNIC_LB_PKT_POLL_DELAY_MSEC); qlcnic_process_rcv_ring_diag(sds_ring); - if (loop++ > QLCNIC_ILB_MAX_RCV_LOOP) + if (loop++ > QLCNIC_LB_PKT_POLL_COUNT) break; } while (!adapter->ahw->diag_cnt); @@ -1070,8 +1073,7 @@ qlcnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) } } -static void -qlcnic_fill_stats(u64 *data, void *stats, int type) +static u64 *qlcnic_fill_stats(u64 *data, void *stats, int type) { if (type == QLCNIC_MAC_STATS) { struct qlcnic_mac_statistics *mac_stats = @@ -1120,6 +1122,7 @@ qlcnic_fill_stats(u64 *data, void *stats, int type) *data++ = QLCNIC_FILL_STATS(esw_stats->local_frames); *data++ = QLCNIC_FILL_STATS(esw_stats->numbytes); } + return data; } static void qlcnic_get_ethtool_stats(struct net_device *dev, @@ -1147,7 +1150,7 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, /* Retrieve MAC statistics from firmware */ memset(&mac_stats, 0, sizeof(struct qlcnic_mac_statistics)); qlcnic_get_mac_stats(adapter, &mac_stats); - qlcnic_fill_stats(data, &mac_stats, QLCNIC_MAC_STATS); + data = qlcnic_fill_stats(data, &mac_stats, QLCNIC_MAC_STATS); } if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)) @@ -1159,7 +1162,7 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, if (ret) return; - qlcnic_fill_stats(data, &port_stats.rx, QLCNIC_ESW_STATS); + data = qlcnic_fill_stats(data, &port_stats.rx, QLCNIC_ESW_STATS); ret = qlcnic_get_port_stats(adapter, adapter->ahw->pci_func, QLCNIC_QUERY_TX_COUNTER, &port_stats.tx); if (ret) @@ -1176,7 +1179,8 @@ static int qlcnic_set_led(struct net_device *dev, int err = -EIO, active = 1; if (qlcnic_83xx_check(adapter)) - return -EOPNOTSUPP; + return qlcnic_83xx_set_led(dev, state); + if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC) { netdev_warn(dev, "LED test not supported for non " "privilege function\n"); @@ -1537,3 +1541,25 @@ const struct ethtool_ops qlcnic_ethtool_ops = { .get_dump_data = qlcnic_get_dump_data, .set_dump = qlcnic_set_dump, }; + +const struct ethtool_ops qlcnic_sriov_vf_ethtool_ops = { + .get_settings = qlcnic_get_settings, + .get_drvinfo = qlcnic_get_drvinfo, + .get_regs_len = qlcnic_get_regs_len, + .get_regs = qlcnic_get_regs, + .get_link = ethtool_op_get_link, + .get_eeprom_len = qlcnic_get_eeprom_len, + .get_eeprom = qlcnic_get_eeprom, + .get_ringparam = qlcnic_get_ringparam, + .set_ringparam = qlcnic_set_ringparam, + .get_channels = qlcnic_get_channels, + .get_pauseparam = qlcnic_get_pauseparam, + .get_wol = qlcnic_get_wol, + .get_strings = qlcnic_get_strings, + .get_ethtool_stats = qlcnic_get_ethtool_stats, + .get_sset_count = qlcnic_get_sset_count, + .get_coalesce = qlcnic_get_intr_coalesce, + .set_coalesce = qlcnic_set_intr_coalesce, + .set_msglevel = qlcnic_set_msglevel, + .get_msglevel = qlcnic_get_msglevel, +}; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h index 44197ca1456c..c0f0c0d0a790 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h @@ -669,7 +669,7 @@ enum { #define QLCNIC_CMDPEG_CHECK_RETRY_COUNT 60 #define QLCNIC_CMDPEG_CHECK_DELAY 500 #define QLCNIC_HEARTBEAT_PERIOD_MSECS 200 -#define QLCNIC_HEARTBEAT_CHECK_RETRY_COUNT 45 +#define QLCNIC_HEARTBEAT_CHECK_RETRY_COUNT 10 #define QLCNIC_MAX_MC_COUNT 38 #define QLCNIC_WATCHDOG_TIMEOUTVALUE 5 @@ -714,7 +714,9 @@ enum { QLCNIC_MGMT_FUNC = 0, QLCNIC_PRIV_FUNC = 1, QLCNIC_NON_PRIV_FUNC = 2, - QLCNIC_UNKNOWN_FUNC_MODE = 3 + QLCNIC_SRIOV_PF_FUNC = 3, + QLCNIC_SRIOV_VF_FUNC = 4, + QLCNIC_UNKNOWN_FUNC_MODE = 5 }; enum { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index f89cc7a3fe6c..6a6512ba9f38 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -423,7 +423,7 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, } int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, - __le16 vlan_id, u8 op) + u16 vlan_id, u8 op) { struct qlcnic_nic_req req; struct qlcnic_mac_req *mac_req; @@ -441,7 +441,7 @@ int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr, memcpy(mac_req->mac_addr, addr, 6); vlan_req = (struct qlcnic_vlan_req *)&req.words[1]; - vlan_req->vlan_id = vlan_id; + vlan_req->vlan_id = cpu_to_le16(vlan_id); return qlcnic_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1); } @@ -468,7 +468,7 @@ int qlcnic_nic_del_mac(struct qlcnic_adapter *adapter, const u8 *addr) return err; } -int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr) +int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr, u16 vlan) { struct list_head *head; struct qlcnic_mac_list_s *cur; @@ -487,7 +487,7 @@ int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr) memcpy(cur->mac_addr, addr, ETH_ALEN); if (qlcnic_sre_macaddr_change(adapter, - cur->mac_addr, 0, QLCNIC_MAC_ADD)) { + cur->mac_addr, vlan, QLCNIC_MAC_ADD)) { kfree(cur); return -EIO; } @@ -496,7 +496,7 @@ int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr) return 0; } -void qlcnic_set_multi(struct net_device *netdev) +void __qlcnic_set_multi(struct net_device *netdev, u16 vlan) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct netdev_hw_addr *ha; @@ -508,8 +508,9 @@ void qlcnic_set_multi(struct net_device *netdev) if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state)) return; - qlcnic_nic_add_mac(adapter, adapter->mac_addr); - qlcnic_nic_add_mac(adapter, bcast_addr); + if (!qlcnic_sriov_vf_check(adapter)) + qlcnic_nic_add_mac(adapter, adapter->mac_addr, vlan); + qlcnic_nic_add_mac(adapter, bcast_addr, vlan); if (netdev->flags & IFF_PROMISC) { if (!(adapter->flags & QLCNIC_PROMISC_DISABLED)) @@ -523,23 +524,55 @@ void qlcnic_set_multi(struct net_device *netdev) goto send_fw_cmd; } - if (!netdev_mc_empty(netdev)) { + if (!netdev_mc_empty(netdev) && !qlcnic_sriov_vf_check(adapter)) { netdev_for_each_mc_addr(ha, netdev) { - qlcnic_nic_add_mac(adapter, ha->addr); + qlcnic_nic_add_mac(adapter, ha->addr, vlan); } } + if (qlcnic_sriov_vf_check(adapter)) + qlcnic_vf_add_mc_list(netdev, vlan); + send_fw_cmd: - if (mode == VPORT_MISS_MODE_ACCEPT_ALL && !adapter->fdb_mac_learn) { - qlcnic_alloc_lb_filters_mem(adapter); - adapter->drv_mac_learn = true; - } else { - adapter->drv_mac_learn = false; + if (!qlcnic_sriov_vf_check(adapter)) { + if (mode == VPORT_MISS_MODE_ACCEPT_ALL && + !adapter->fdb_mac_learn) { + qlcnic_alloc_lb_filters_mem(adapter); + adapter->drv_mac_learn = true; + } else { + adapter->drv_mac_learn = false; + } } qlcnic_nic_set_promisc(adapter, mode); } +void qlcnic_set_multi(struct net_device *netdev) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct netdev_hw_addr *ha; + struct qlcnic_mac_list_s *cur; + + if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state)) + return; + if (qlcnic_sriov_vf_check(adapter)) { + if (!netdev_mc_empty(netdev)) { + netdev_for_each_mc_addr(ha, netdev) { + cur = kzalloc(sizeof(struct qlcnic_mac_list_s), + GFP_ATOMIC); + if (cur == NULL) + break; + memcpy(cur->mac_addr, + ha->addr, ETH_ALEN); + list_add_tail(&cur->list, &adapter->vf_mc_list); + } + } + qlcnic_sriov_vf_schedule_multi(adapter->netdev); + return; + } + __qlcnic_set_multi(netdev, 0); +} + int qlcnic_82xx_nic_set_promisc(struct qlcnic_adapter *adapter, u32 mode) { struct qlcnic_nic_req req; @@ -559,7 +592,7 @@ int qlcnic_82xx_nic_set_promisc(struct qlcnic_adapter *adapter, u32 mode) (struct cmd_desc_type0 *)&req, 1); } -void qlcnic_free_mac_list(struct qlcnic_adapter *adapter) +void qlcnic_82xx_free_mac_list(struct qlcnic_adapter *adapter) { struct qlcnic_mac_list_s *cur; struct list_head *head = &adapter->mac_list; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h index 5b8749eda11f..95b1b5732838 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h @@ -83,6 +83,8 @@ enum qlcnic_regs { #define QLCNIC_CMD_CONFIG_PORT 0x2e #define QLCNIC_CMD_TEMP_SIZE 0x2f #define QLCNIC_CMD_GET_TEMP_HDR 0x30 +#define QLCNIC_CMD_BC_EVENT_SETUP 0x31 +#define QLCNIC_CMD_CONFIG_VPORT 0x32 #define QLCNIC_CMD_GET_MAC_STATS 0x37 #define QLCNIC_CMD_SET_DRV_VER 0x38 #define QLCNIC_CMD_CONFIGURE_RSS 0x41 @@ -114,6 +116,7 @@ enum qlcnic_regs { #define QLCNIC_SET_FAC_DEF_MAC 5 #define QLCNIC_MBX_LINK_EVENT 0x8001 +#define QLCNIC_MBX_BC_EVENT 0x8002 #define QLCNIC_MBX_COMP_EVENT 0x8100 #define QLCNIC_MBX_REQUEST_EVENT 0x8101 #define QLCNIC_MBX_TIME_EXTEND_EVENT 0x8102 @@ -156,7 +159,7 @@ int qlcnic_82xx_nic_set_promisc(struct qlcnic_adapter *adapter, u32); int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, struct net_device *netdev); void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, - u64 *uaddr, __le16 vlan_id); + u64 *uaddr, u16 vlan_id); void qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *adapter); int qlcnic_82xx_config_rss(struct qlcnic_adapter *adapter, int); void qlcnic_82xx_config_ipaddr(struct qlcnic_adapter *adapter, @@ -175,7 +178,10 @@ int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter, int qlcnic_82xx_fw_cmd_create_rx_ctx(struct qlcnic_adapter *); int qlcnic_82xx_fw_cmd_create_tx_ctx(struct qlcnic_adapter *, struct qlcnic_host_tx_ring *tx_ring, int); -int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *, u8 *, __le16, u8); +void qlcnic_82xx_fw_cmd_del_rx_ctx(struct qlcnic_adapter *); +void qlcnic_82xx_fw_cmd_del_tx_ctx(struct qlcnic_adapter *, + struct qlcnic_host_tx_ring *); +int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *, u8 *, u16, u8); int qlcnic_82xx_get_mac_address(struct qlcnic_adapter *, u8*); int qlcnic_82xx_get_nic_info(struct qlcnic_adapter *, struct qlcnic_info *, u8); int qlcnic_82xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 0e630061bff3..d3f8797efcc3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -9,6 +9,7 @@ #include <linux/if_vlan.h> #include <net/ip.h> #include <linux/ipv6.h> +#include <net/checksum.h> #include "qlcnic.h" @@ -146,7 +147,10 @@ static inline u8 qlcnic_mac_hash(u64 mac) static inline u32 qlcnic_get_ref_handle(struct qlcnic_adapter *adapter, u16 handle, u8 ring_id) { - if (adapter->pdev->device == PCI_DEVICE_ID_QLOGIC_QLE834X) + unsigned short device = adapter->pdev->device; + + if ((device == PCI_DEVICE_ID_QLOGIC_QLE834X) || + (device == PCI_DEVICE_ID_QLOGIC_VF_QLE834X)) return handle | (ring_id << 15); else return handle; @@ -158,7 +162,7 @@ static inline int qlcnic_82xx_is_lb_pkt(u64 sts_data) } void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, - int loopback_pkt, __le16 vlan_id) + int loopback_pkt, u16 vlan_id) { struct ethhdr *phdr = (struct ethhdr *)(skb->data); struct qlcnic_filter *fil, *tmp_fil; @@ -236,7 +240,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, } void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, - __le16 vlan_id) + u16 vlan_id) { struct cmd_desc_type0 *hwdesc; struct qlcnic_nic_req *req; @@ -261,7 +265,7 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, memcpy(mac_req->mac_addr, &uaddr, ETH_ALEN); vlan_req = (struct qlcnic_vlan_req *)&req->words[1]; - vlan_req->vlan_id = vlan_id; + vlan_req->vlan_id = cpu_to_le16(vlan_id); tx_ring->producer = get_next_index(producer, tx_ring->num_desc); smp_mb(); @@ -277,7 +281,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct net_device *netdev = adapter->netdev; struct ethhdr *phdr = (struct ethhdr *)(skb->data); u64 src_addr = 0; - __le16 vlan_id = 0; + u16 vlan_id = 0; u8 hindex; if (ether_addr_equal(phdr->h_source, adapter->mac_addr)) @@ -340,14 +344,14 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter, flags = FLAGS_VLAN_OOB; vlan_tci = vlan_tx_tag_get(skb); } - if (unlikely(adapter->pvid)) { + if (unlikely(adapter->tx_pvid)) { if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED)) return -EIO; if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED)) goto set_flags; flags = FLAGS_VLAN_OOB; - vlan_tci = adapter->pvid; + vlan_tci = adapter->tx_pvid; } set_flags: qlcnic_set_tx_vlan_tci(first_desc, vlan_tci); @@ -358,8 +362,7 @@ set_flags: memcpy(&first_desc->eth_addr, skb->data, ETH_ALEN); } opcode = TX_ETHER_PKT; - if ((adapter->netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && - skb_shinfo(skb)->gso_size > 0) { + if (skb_is_gso(skb)) { hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); first_desc->total_hdr_length = hdr_len; @@ -976,10 +979,10 @@ static inline int qlcnic_check_rx_tagging(struct qlcnic_adapter *adapter, memmove(skb->data + VLAN_HLEN, eth_hdr, ETH_ALEN * 2); skb_pull(skb, VLAN_HLEN); } - if (!adapter->pvid) + if (!adapter->rx_pvid) return 0; - if (*vlan_tag == adapter->pvid) { + if (*vlan_tag == adapter->rx_pvid) { /* Outer vlan tag. Packet should follow non-vlan path */ *vlan_tag = 0xffff; return 0; @@ -1025,8 +1028,7 @@ qlcnic_process_rcv(struct qlcnic_adapter *adapter, (adapter->flags & QLCNIC_ESWITCH_ENABLED)) { t_vid = 0; is_lb_pkt = qlcnic_82xx_is_lb_pkt(sts_data0); - qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, - cpu_to_le16(t_vid)); + qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, t_vid); } if (length > rds_ring->skb_size) @@ -1046,7 +1048,7 @@ qlcnic_process_rcv(struct qlcnic_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); napi_gro_receive(&sds_ring->napi, skb); @@ -1103,8 +1105,7 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, (adapter->flags & QLCNIC_ESWITCH_ENABLED)) { t_vid = 0; is_lb_pkt = qlcnic_82xx_is_lb_pkt(sts_data0); - qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, - cpu_to_le16(t_vid)); + qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, t_vid); } if (timestamp) @@ -1132,9 +1133,8 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, iph = (struct iphdr *)skb->data; th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); length = (iph->ihl << 2) + (th->doff << 2) + lro_length; + csum_replace2(&iph->check, iph->tot_len, htons(length)); iph->tot_len = htons(length); - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } th->psh = push; @@ -1150,7 +1150,7 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, } if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); netif_receive_skb(skb); adapter->stats.lro_pkts++; @@ -1497,8 +1497,7 @@ qlcnic_83xx_process_rcv(struct qlcnic_adapter *adapter, (adapter->flags & QLCNIC_ESWITCH_ENABLED)) { t_vid = 0; is_lb_pkt = qlcnic_83xx_is_lb_pkt(sts_data[1], 0); - qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, - cpu_to_le16(t_vid)); + qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, t_vid); } if (length > rds_ring->skb_size) @@ -1515,7 +1514,7 @@ qlcnic_83xx_process_rcv(struct qlcnic_adapter *adapter, skb->protocol = eth_type_trans(skb, netdev); if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); napi_gro_receive(&sds_ring->napi, skb); @@ -1567,8 +1566,7 @@ qlcnic_83xx_process_lro(struct qlcnic_adapter *adapter, (adapter->flags & QLCNIC_ESWITCH_ENABLED)) { t_vid = 0; is_lb_pkt = qlcnic_83xx_is_lb_pkt(sts_data[1], 1); - qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, - cpu_to_le16(t_vid)); + qlcnic_add_lb_filter(adapter, skb, is_lb_pkt, t_vid); } if (qlcnic_83xx_is_tstamp(sts_data[1])) data_offset = l4_hdr_offset + QLCNIC_TCP_TS_HDR_SIZE; @@ -1595,9 +1593,8 @@ qlcnic_83xx_process_lro(struct qlcnic_adapter *adapter, iph = (struct iphdr *)skb->data; th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); length = (iph->ihl << 2) + (th->doff << 2) + lro_length; + csum_replace2(&iph->check, iph->tot_len, htons(length)); iph->tot_len = htons(length); - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } th->psh = push; @@ -1613,7 +1610,7 @@ qlcnic_83xx_process_lro(struct qlcnic_adapter *adapter, } if (vid != 0xffff) - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); netif_receive_skb(skb); @@ -1692,6 +1689,29 @@ skip: return count; } +static int qlcnic_83xx_msix_sriov_vf_poll(struct napi_struct *napi, int budget) +{ + int tx_complete; + int work_done; + struct qlcnic_host_sds_ring *sds_ring; + struct qlcnic_adapter *adapter; + struct qlcnic_host_tx_ring *tx_ring; + + sds_ring = container_of(napi, struct qlcnic_host_sds_ring, napi); + adapter = sds_ring->adapter; + /* tx ring count = 1 */ + tx_ring = adapter->tx_ring; + + tx_complete = qlcnic_process_cmd_ring(adapter, tx_ring, budget); + work_done = qlcnic_83xx_process_rcv_ring(sds_ring, budget); + if ((work_done < budget) && tx_complete) { + napi_complete(&sds_ring->napi); + qlcnic_83xx_enable_intr(adapter, sds_ring); + } + + return work_done; +} + static int qlcnic_83xx_poll(struct napi_struct *napi, int budget) { int tx_complete; @@ -1769,7 +1789,8 @@ void qlcnic_83xx_napi_enable(struct qlcnic_adapter *adapter) qlcnic_83xx_enable_intr(adapter, sds_ring); } - if (adapter->flags & QLCNIC_MSIX_ENABLED) { + if ((adapter->flags & QLCNIC_MSIX_ENABLED) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; napi_enable(&tx_ring->napi); @@ -1796,7 +1817,8 @@ void qlcnic_83xx_napi_disable(struct qlcnic_adapter *adapter) napi_disable(&sds_ring->napi); } - if (adapter->flags & QLCNIC_MSIX_ENABLED) { + if ((adapter->flags & QLCNIC_MSIX_ENABLED) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; qlcnic_83xx_disable_tx_intr(adapter, tx_ring); @@ -1809,7 +1831,7 @@ void qlcnic_83xx_napi_disable(struct qlcnic_adapter *adapter) int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, struct net_device *netdev) { - int ring, max_sds_rings; + int ring, max_sds_rings, temp; struct qlcnic_host_sds_ring *sds_ring; struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx; @@ -1820,14 +1842,23 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, max_sds_rings = adapter->max_sds_rings; for (ring = 0; ring < adapter->max_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; - if (adapter->flags & QLCNIC_MSIX_ENABLED) - netif_napi_add(netdev, &sds_ring->napi, - qlcnic_83xx_rx_poll, - QLCNIC_NETDEV_WEIGHT * 2); - else + if (adapter->flags & QLCNIC_MSIX_ENABLED) { + if (!(adapter->flags & QLCNIC_TX_INTR_SHARED)) { + netif_napi_add(netdev, &sds_ring->napi, + qlcnic_83xx_rx_poll, + QLCNIC_NETDEV_WEIGHT * 2); + } else { + temp = QLCNIC_NETDEV_WEIGHT / max_sds_rings; + netif_napi_add(netdev, &sds_ring->napi, + qlcnic_83xx_msix_sriov_vf_poll, + temp); + } + + } else { netif_napi_add(netdev, &sds_ring->napi, qlcnic_83xx_poll, QLCNIC_NETDEV_WEIGHT / max_sds_rings); + } } if (qlcnic_alloc_tx_rings(adapter, netdev)) { @@ -1835,7 +1866,8 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, return -ENOMEM; } - if (adapter->flags & QLCNIC_MSIX_ENABLED) { + if ((adapter->flags & QLCNIC_MSIX_ENABLED) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; netif_napi_add(netdev, &tx_ring->napi, @@ -1861,7 +1893,8 @@ void qlcnic_83xx_napi_del(struct qlcnic_adapter *adapter) qlcnic_free_sds_rings(adapter->recv_ctx); - if ((adapter->flags & QLCNIC_MSIX_ENABLED)) { + if ((adapter->flags & QLCNIC_MSIX_ENABLED) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; netif_napi_del(&tx_ring->napi); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 28a6d4838364..247a9f9b7bdc 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include "qlcnic.h" +#include "qlcnic_sriov.h" #include "qlcnic_hw.h" #include <linux/swab.h> @@ -85,8 +86,8 @@ static void qlcnic_dev_set_npar_ready(struct qlcnic_adapter *); static int qlcnicvf_start_firmware(struct qlcnic_adapter *); static void qlcnic_set_netdev_features(struct qlcnic_adapter *, struct qlcnic_esw_func_cfg *); -static int qlcnic_vlan_rx_add(struct net_device *, u16); -static int qlcnic_vlan_rx_del(struct net_device *, u16); +static int qlcnic_vlan_rx_add(struct net_device *, __be16, u16); +static int qlcnic_vlan_rx_del(struct net_device *, __be16, u16); #define QLCNIC_IS_TSO_CAPABLE(adapter) \ ((adapter)->ahw->capabilities & QLCNIC_FW_CAPABILITY_TSO) @@ -109,6 +110,7 @@ static u32 qlcnic_vlan_tx_check(struct qlcnic_adapter *adapter) static DEFINE_PCI_DEVICE_TABLE(qlcnic_pci_tbl) = { ENTRY(PCI_DEVICE_ID_QLOGIC_QLE824X), ENTRY(PCI_DEVICE_ID_QLOGIC_QLE834X), + ENTRY(PCI_DEVICE_ID_QLOGIC_VF_QLE834X), {0,} }; @@ -198,8 +200,7 @@ void qlcnic_free_sds_rings(struct qlcnic_recv_context *recv_ctx) recv_ctx->sds_rings = NULL; } -static int -qlcnic_read_mac_addr(struct qlcnic_adapter *adapter) +int qlcnic_read_mac_addr(struct qlcnic_adapter *adapter) { u8 mac_addr[ETH_ALEN]; struct net_device *netdev = adapter->netdev; @@ -225,6 +226,9 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) struct qlcnic_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; + if (qlcnic_sriov_vf_check(adapter)) + return -EINVAL; + if ((adapter->flags & QLCNIC_MAC_OVERRIDE_DISABLED)) return -EOPNOTSUPP; @@ -253,11 +257,8 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = -EOPNOTSUPP; - if (!adapter->fdb_mac_learn) { - pr_info("%s: Driver mac learn is enabled, FDB operation not allowed\n", - __func__); - return err; - } + if (!adapter->fdb_mac_learn) + return ndo_dflt_fdb_del(ndm, tb, netdev, addr); if (adapter->flags & QLCNIC_ESWITCH_ENABLED) { if (is_unicast_ether_addr(addr)) @@ -277,11 +278,8 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = 0; - if (!adapter->fdb_mac_learn) { - pr_info("%s: Driver mac learn is enabled, FDB operation not allowed\n", - __func__); - return -EOPNOTSUPP; - } + if (!adapter->fdb_mac_learn) + return ndo_dflt_fdb_add(ndm, tb, netdev, addr, flags); if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)) { pr_info("%s: FDB e-switch is not enabled\n", __func__); @@ -292,7 +290,7 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; if (is_unicast_ether_addr(addr)) - err = qlcnic_nic_add_mac(adapter, addr); + err = qlcnic_nic_add_mac(adapter, addr, 0); else if (is_multicast_ether_addr(addr)) err = dev_mc_add_excl(netdev, addr); else @@ -306,11 +304,8 @@ static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, { struct qlcnic_adapter *adapter = netdev_priv(netdev); - if (!adapter->fdb_mac_learn) { - pr_info("%s: Driver mac learn is enabled, FDB operation not allowed\n", - __func__); - return -EOPNOTSUPP; - } + if (!adapter->fdb_mac_learn) + return ndo_dflt_fdb_dump(skb, ncb, netdev, idx); if (adapter->flags & QLCNIC_ESWITCH_ENABLED) idx = ndo_dflt_fdb_dump(skb, ncb, netdev, idx); @@ -346,6 +341,12 @@ static const struct net_device_ops qlcnic_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = qlcnic_poll_controller, #endif +#ifdef CONFIG_QLCNIC_SRIOV + .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, + .ndo_set_vf_tx_rate = qlcnic_sriov_set_vf_tx_rate, + .ndo_get_vf_config = qlcnic_sriov_get_vf_config, + .ndo_set_vf_vlan = qlcnic_sriov_set_vf_vlan, +#endif }; static const struct net_device_ops qlcnic_netdev_failed_ops = { @@ -387,6 +388,8 @@ static struct qlcnic_hardware_ops qlcnic_hw_ops = { .process_lb_rcv_ring_diag = qlcnic_82xx_process_rcv_ring_diag, .create_rx_ctx = qlcnic_82xx_fw_cmd_create_rx_ctx, .create_tx_ctx = qlcnic_82xx_fw_cmd_create_tx_ctx, + .del_rx_ctx = qlcnic_82xx_fw_cmd_del_rx_ctx, + .del_tx_ctx = qlcnic_82xx_fw_cmd_del_tx_ctx, .setup_link_event = qlcnic_82xx_linkevent_request, .get_nic_info = qlcnic_82xx_get_nic_info, .get_pci_info = qlcnic_82xx_get_pci_info, @@ -402,13 +405,22 @@ static struct qlcnic_hardware_ops qlcnic_hw_ops = { .config_promisc_mode = qlcnic_82xx_nic_set_promisc, .change_l2_filter = qlcnic_82xx_change_filter, .get_board_info = qlcnic_82xx_get_board_info, + .free_mac_list = qlcnic_82xx_free_mac_list, }; int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) { struct pci_dev *pdev = adapter->pdev; int err = -1, i; - int max_tx_rings; + int max_tx_rings, tx_vector; + + if (adapter->flags & QLCNIC_TX_INTR_SHARED) { + max_tx_rings = 0; + tx_vector = 0; + } else { + max_tx_rings = adapter->max_drv_tx_rings; + tx_vector = 1; + } if (!adapter->msix_entries) { adapter->msix_entries = kcalloc(num_msix, @@ -431,7 +443,6 @@ int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) if (qlcnic_83xx_check(adapter)) { adapter->ahw->num_msix = num_msix; /* subtract mail box and tx ring vectors */ - max_tx_rings = adapter->max_drv_tx_rings; adapter->max_sds_rings = num_msix - max_tx_rings - 1; } else { @@ -444,11 +455,11 @@ int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix) "Unable to allocate %d MSI-X interrupt vectors\n", num_msix); if (qlcnic_83xx_check(adapter)) { - if (err < QLC_83XX_MINIMUM_VECTOR) + if (err < (QLC_83XX_MINIMUM_VECTOR - tx_vector)) return err; - err -= (adapter->max_drv_tx_rings + 1); + err -= (max_tx_rings + 1); num_msix = rounddown_pow_of_two(err); - num_msix += (adapter->max_drv_tx_rings + 1); + num_msix += (max_tx_rings + 1); } else { num_msix = rounddown_pow_of_two(err); } @@ -542,11 +553,10 @@ void qlcnic_teardown_intr(struct qlcnic_adapter *adapter) } } -static void -qlcnic_cleanup_pci_map(struct qlcnic_adapter *adapter) +static void qlcnic_cleanup_pci_map(struct qlcnic_hardware_context *ahw) { - if (adapter->ahw->pci_base0 != NULL) - iounmap(adapter->ahw->pci_base0); + if (ahw->pci_base0 != NULL) + iounmap(ahw->pci_base0); } static int qlcnic_get_act_pci_func(struct qlcnic_adapter *adapter) @@ -721,6 +731,7 @@ static void qlcnic_get_bar_length(u32 dev_id, ulong *bar) *bar = QLCNIC_82XX_BAR0_LENGTH; break; case PCI_DEVICE_ID_QLOGIC_QLE834X: + case PCI_DEVICE_ID_QLOGIC_VF_QLE834X: *bar = QLCNIC_83XX_BAR0_LENGTH; break; default: @@ -751,7 +762,7 @@ static int qlcnic_setup_pci_map(struct pci_dev *pdev, return -EIO; } - dev_info(&pdev->dev, "%dMB memory map\n", (int)(mem_len>>20)); + dev_info(&pdev->dev, "%dKB memory map\n", (int)(mem_len >> 10)); ahw->pci_base0 = mem_ptr0; ahw->pci_len0 = pci_len0; @@ -891,24 +902,50 @@ void qlcnic_set_vlan_config(struct qlcnic_adapter *adapter, else adapter->flags |= QLCNIC_TAGGING_ENABLED; - if (esw_cfg->vlan_id) - adapter->pvid = esw_cfg->vlan_id; - else - adapter->pvid = 0; + if (esw_cfg->vlan_id) { + adapter->rx_pvid = esw_cfg->vlan_id; + adapter->tx_pvid = esw_cfg->vlan_id; + } else { + adapter->rx_pvid = 0; + adapter->tx_pvid = 0; + } } static int -qlcnic_vlan_rx_add(struct net_device *netdev, u16 vid) +qlcnic_vlan_rx_add(struct net_device *netdev, __be16 proto, u16 vid) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err; + + if (qlcnic_sriov_vf_check(adapter)) { + err = qlcnic_sriov_cfg_vf_guest_vlan(adapter, vid, 1); + if (err) { + netdev_err(netdev, + "Cannot add VLAN filter for VLAN id %d, err=%d", + vid, err); + return err; + } + } + set_bit(vid, adapter->vlans); return 0; } static int -qlcnic_vlan_rx_del(struct net_device *netdev, u16 vid) +qlcnic_vlan_rx_del(struct net_device *netdev, __be16 proto, u16 vid) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err; + + if (qlcnic_sriov_vf_check(adapter)) { + err = qlcnic_sriov_cfg_vf_guest_vlan(adapter, vid, 0); + if (err) { + netdev_err(netdev, + "Cannot delete VLAN filter for VLAN id %d, err=%d", + vid, err); + return err; + } + } qlcnic_restore_indev_addr(netdev, NETDEV_DOWN); clear_bit(vid, adapter->vlans); @@ -1292,7 +1329,8 @@ qlcnic_request_irq(struct qlcnic_adapter *adapter) } } if (qlcnic_83xx_check(adapter) && - (adapter->flags & QLCNIC_MSIX_ENABLED)) { + (adapter->flags & QLCNIC_MSIX_ENABLED) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { handler = qlcnic_msix_tx_intr; for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { @@ -1328,7 +1366,8 @@ qlcnic_free_irq(struct qlcnic_adapter *adapter) free_irq(sds_ring->irq, sds_ring); } } - if (qlcnic_83xx_check(adapter)) { + if (qlcnic_83xx_check(adapter) && + !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->max_drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; @@ -1418,9 +1457,12 @@ void __qlcnic_down(struct qlcnic_adapter *adapter, struct net_device *netdev) if (!test_and_clear_bit(__QLCNIC_DEV_UP, &adapter->state)) return; + if (qlcnic_sriov_vf_check(adapter)) + qlcnic_sriov_cleanup_async_list(&adapter->ahw->sriov->bc); smp_mb(); spin_lock(&adapter->tx_clean_lock); netif_carrier_off(netdev); + adapter->ahw->linkup = 0; netif_tx_disable(netdev); qlcnic_free_mac_list(adapter); @@ -1685,7 +1727,7 @@ qlcnic_reset_context(struct qlcnic_adapter *adapter) return err; } -static int +int qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, int pci_using_dac) { @@ -1701,11 +1743,14 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, qlcnic_change_mtu(netdev, netdev->mtu); - SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops); + if (qlcnic_sriov_vf_check(adapter)) + SET_ETHTOOL_OPS(netdev, &qlcnic_sriov_vf_ethtool_ops); + else + SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops); netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO | - NETIF_F_HW_VLAN_RX); + NETIF_F_HW_VLAN_CTAG_RX); netdev->vlan_features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); @@ -1720,7 +1765,10 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, } if (qlcnic_vlan_tx_check(adapter)) - netdev->features |= (NETIF_F_HW_VLAN_TX); + netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX); + + if (qlcnic_sriov_vf_check(adapter)) + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO) netdev->features |= NETIF_F_LRO; @@ -1820,6 +1868,9 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u32 capab2; char board_name[QLCNIC_MAX_BOARD_NAME_LEN + 19]; /* MAC + ": " + name */ + if (pdev->is_virtfn) + return -ENODEV; + err = pci_enable_device(pdev); if (err) return err; @@ -1844,12 +1895,18 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!ahw) goto err_out_free_res; - if (ent->device == PCI_DEVICE_ID_QLOGIC_QLE824X) { + switch (ent->device) { + case PCI_DEVICE_ID_QLOGIC_QLE824X: ahw->hw_ops = &qlcnic_hw_ops; - ahw->reg_tbl = (u32 *)qlcnic_reg_tbl; - } else if (ent->device == PCI_DEVICE_ID_QLOGIC_QLE834X) { + ahw->reg_tbl = (u32 *) qlcnic_reg_tbl; + break; + case PCI_DEVICE_ID_QLOGIC_QLE834X: qlcnic_83xx_register_map(ahw); - } else { + break; + case PCI_DEVICE_ID_QLOGIC_VF_QLE834X: + qlcnic_sriov_vf_register_map(ahw); + break; + default: goto err_out_free_hw_res; } @@ -1911,11 +1968,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } else if (qlcnic_83xx_check(adapter)) { qlcnic_83xx_check_vf(adapter, ent); adapter->portnum = adapter->ahw->pci_func; - err = qlcnic_83xx_init(adapter); + err = qlcnic_83xx_init(adapter, pci_using_dac); if (err) { dev_err(&pdev->dev, "%s: failed\n", __func__); goto err_out_free_hw; } + if (qlcnic_sriov_vf_check(adapter)) + return 0; } else { dev_err(&pdev->dev, "%s: failed. Please Reboot\n", __func__); @@ -1932,6 +1991,12 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) module_name(THIS_MODULE), board_name, adapter->ahw->revision_id); } + + if (qlcnic_83xx_check(adapter) && !qlcnic_use_msi_x && + !!qlcnic_use_msi) + dev_warn(&pdev->dev, + "83xx adapter do not support MSI interrupts\n"); + err = qlcnic_setup_intr(adapter, 0); if (err) { dev_err(&pdev->dev, "Failed to setup interrupt\n"); @@ -1999,7 +2064,7 @@ err_out_free_netdev: free_netdev(netdev); err_out_iounmap: - qlcnic_cleanup_pci_map(adapter); + qlcnic_cleanup_pci_map(ahw); err_out_free_hw_res: kfree(ahw); @@ -2024,11 +2089,13 @@ static void qlcnic_remove(struct pci_dev *pdev) return; netdev = adapter->netdev; + qlcnic_sriov_pf_disable(adapter); qlcnic_cancel_idc_work(adapter); ahw = adapter->ahw; unregister_netdev(netdev); + qlcnic_sriov_cleanup(adapter); if (qlcnic_83xx_check(adapter)) { qlcnic_83xx_free_mbx_intr(adapter); @@ -2054,7 +2121,7 @@ static void qlcnic_remove(struct pci_dev *pdev) qlcnic_remove_sysfs(adapter); - qlcnic_cleanup_pci_map(adapter); + qlcnic_cleanup_pci_map(adapter->ahw); qlcnic_release_firmware(adapter); @@ -2084,6 +2151,7 @@ static int __qlcnic_shutdown(struct pci_dev *pdev) if (netif_running(netdev)) qlcnic_down(adapter, netdev); + qlcnic_sriov_cleanup(adapter); if (qlcnic_82xx_check(adapter)) qlcnic_clr_all_drv_state(adapter, 0); @@ -3238,8 +3306,10 @@ int qlcnic_set_max_rss(struct qlcnic_adapter *adapter, u8 data, size_t len) qlcnic_detach(adapter); - if (qlcnic_83xx_check(adapter)) + if (qlcnic_83xx_check(adapter)) { qlcnic_83xx_free_mbx_intr(adapter); + qlcnic_83xx_enable_mbx_poll(adapter); + } qlcnic_teardown_intr(adapter); err = qlcnic_setup_intr(adapter, data); @@ -3253,6 +3323,7 @@ int qlcnic_set_max_rss(struct qlcnic_adapter *adapter, u8 data, size_t len) /* register for NIC IDC AEN Events */ qlcnic_83xx_register_nic_idc_func(adapter, 1); err = qlcnic_83xx_setup_mbx_intr(adapter); + qlcnic_83xx_disable_mbx_poll(adapter); if (err) { dev_err(&adapter->pdev->dev, "failed to setup mbx interrupt\n"); @@ -3318,7 +3389,7 @@ void qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event) rcu_read_lock(); for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) { - dev = __vlan_find_dev_deep(netdev, vid); + dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid); if (!dev) continue; qlcnic_config_indev_addr(adapter, dev, event); @@ -3432,7 +3503,10 @@ static struct pci_driver qlcnic_driver = { .resume = qlcnic_resume, #endif .shutdown = qlcnic_shutdown, - .err_handler = &qlcnic_err_handler + .err_handler = &qlcnic_err_handler, +#ifdef CONFIG_QLCNIC_SRIOV + .sriov_configure = qlcnic_pci_sriov_configure, +#endif }; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index abbd22c814a6..4b9bab18ebd9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -810,11 +810,8 @@ static int __qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter, tmp_addr = dma_alloc_coherent(&adapter->pdev->dev, temp_size, &tmp_addr_t, GFP_KERNEL); - if (!tmp_addr) { - dev_err(&adapter->pdev->dev, - "Can't get memory for FW dump template\n"); + if (!tmp_addr) return -ENOMEM; - } if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_TEMP_HDR)) { err = -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h new file mode 100644 index 000000000000..d85fbb57c25b --- /dev/null +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -0,0 +1,263 @@ +/* + * QLogic qlcnic NIC Driver + * Copyright (c) 2009-2013 QLogic Corporation + * + * See LICENSE.qlcnic for copyright and licensing details. + */ + +#ifndef _QLCNIC_83XX_SRIOV_H_ +#define _QLCNIC_83XX_SRIOV_H_ + +#include "qlcnic.h" +#include <linux/types.h> +#include <linux/pci.h> + +extern const u32 qlcnic_83xx_reg_tbl[]; +extern const u32 qlcnic_83xx_ext_reg_tbl[]; + +struct qlcnic_bc_payload { + u64 payload[126]; +}; + +struct qlcnic_bc_hdr { +#if defined(__LITTLE_ENDIAN) + u8 version; + u8 msg_type:4; + u8 rsvd1:3; + u8 op_type:1; + u8 num_cmds; + u8 num_frags; + u8 frag_num; + u8 cmd_op; + u16 seq_id; + u64 rsvd3; +#elif defined(__BIG_ENDIAN) + u8 num_frags; + u8 num_cmds; + u8 op_type:1; + u8 rsvd1:3; + u8 msg_type:4; + u8 version; + u16 seq_id; + u8 cmd_op; + u8 frag_num; + u64 rsvd3; +#endif +}; + +enum qlcnic_bc_commands { + QLCNIC_BC_CMD_CHANNEL_INIT = 0x0, + QLCNIC_BC_CMD_CHANNEL_TERM = 0x1, + QLCNIC_BC_CMD_GET_ACL = 0x2, + QLCNIC_BC_CMD_CFG_GUEST_VLAN = 0x3, +}; + +#define QLC_BC_CMD 1 + +struct qlcnic_trans_list { + /* Lock for manipulating list */ + spinlock_t lock; + struct list_head wait_list; + int count; +}; + +enum qlcnic_trans_state { + QLC_INIT = 0, + QLC_WAIT_FOR_CHANNEL_FREE, + QLC_WAIT_FOR_RESP, + QLC_ABORT, + QLC_END, +}; + +struct qlcnic_bc_trans { + u8 func_id; + u8 active; + u8 curr_rsp_frag; + u8 curr_req_frag; + u16 cmd_id; + u16 req_pay_size; + u16 rsp_pay_size; + u32 trans_id; + enum qlcnic_trans_state trans_state; + struct list_head list; + struct qlcnic_bc_hdr *req_hdr; + struct qlcnic_bc_hdr *rsp_hdr; + struct qlcnic_bc_payload *req_pay; + struct qlcnic_bc_payload *rsp_pay; + struct completion resp_cmpl; + struct qlcnic_vf_info *vf; +}; + +enum qlcnic_vf_state { + QLC_BC_VF_SEND = 0, + QLC_BC_VF_RECV, + QLC_BC_VF_CHANNEL, + QLC_BC_VF_STATE, + QLC_BC_VF_FLR, + QLC_BC_VF_SOFT_FLR, +}; + +enum qlcnic_vlan_mode { + QLC_NO_VLAN_MODE = 0, + QLC_PVID_MODE, + QLC_GUEST_VLAN_MODE, +}; + +struct qlcnic_resources { + u16 num_tx_mac_filters; + u16 num_rx_ucast_mac_filters; + u16 num_rx_mcast_mac_filters; + + u16 num_txvlan_keys; + + u16 num_rx_queues; + u16 num_tx_queues; + + u16 num_rx_buf_rings; + u16 num_rx_status_rings; + + u16 num_destip; + u32 num_lro_flows_supported; + u16 max_local_ipv6_addrs; + u16 max_remote_ipv6_addrs; +}; + +struct qlcnic_vport { + u16 handle; + u16 max_tx_bw; + u16 min_tx_bw; + u8 vlan_mode; + u16 vlan; + u8 qos; + u8 mac[6]; +}; + +struct qlcnic_vf_info { + u8 pci_func; + u16 rx_ctx_id; + u16 tx_ctx_id; + unsigned long state; + struct completion ch_free_cmpl; + struct work_struct trans_work; + struct work_struct flr_work; + /* It synchronizes commands sent from VF */ + struct mutex send_cmd_lock; + struct qlcnic_bc_trans *send_cmd; + struct qlcnic_bc_trans *flr_trans; + struct qlcnic_trans_list rcv_act; + struct qlcnic_trans_list rcv_pend; + struct qlcnic_adapter *adapter; + struct qlcnic_vport *vp; +}; + +struct qlcnic_async_work_list { + struct list_head list; + struct work_struct work; + void *ptr; +}; + +struct qlcnic_back_channel { + u16 trans_counter; + struct workqueue_struct *bc_trans_wq; + struct workqueue_struct *bc_async_wq; + struct workqueue_struct *bc_flr_wq; + struct list_head async_list; +}; + +struct qlcnic_sriov { + u16 vp_handle; + u8 num_vfs; + u8 any_vlan; + u8 vlan_mode; + u16 num_allowed_vlans; + u16 *allowed_vlans; + u16 vlan; + struct qlcnic_resources ff_max; + struct qlcnic_back_channel bc; + struct qlcnic_vf_info *vf_info; +}; + +int qlcnic_sriov_init(struct qlcnic_adapter *, int); +void qlcnic_sriov_cleanup(struct qlcnic_adapter *); +void __qlcnic_sriov_cleanup(struct qlcnic_adapter *); +void qlcnic_sriov_vf_register_map(struct qlcnic_hardware_context *); +int qlcnic_sriov_vf_init(struct qlcnic_adapter *, int); +void qlcnic_sriov_vf_set_ops(struct qlcnic_adapter *); +int qlcnic_sriov_func_to_index(struct qlcnic_adapter *, u8); +int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *, u8); +void qlcnic_sriov_handle_bc_event(struct qlcnic_adapter *, u32); +int qlcnic_sriov_cfg_bc_intr(struct qlcnic_adapter *, u8); +void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *); +void qlcnic_sriov_cleanup_list(struct qlcnic_trans_list *); +int __qlcnic_sriov_add_act_list(struct qlcnic_sriov *, struct qlcnic_vf_info *, + struct qlcnic_bc_trans *); +int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *, + struct qlcnic_info *, u16); +int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *, u16, u8); + +static inline bool qlcnic_sriov_enable_check(struct qlcnic_adapter *adapter) +{ + return test_bit(__QLCNIC_SRIOV_ENABLE, &adapter->state) ? true : false; +} + +#ifdef CONFIG_QLCNIC_SRIOV +void qlcnic_sriov_pf_process_bc_cmd(struct qlcnic_adapter *, + struct qlcnic_bc_trans *, + struct qlcnic_cmd_args *); +void qlcnic_sriov_pf_disable(struct qlcnic_adapter *); +void qlcnic_sriov_pf_cleanup(struct qlcnic_adapter *); +int qlcnic_pci_sriov_configure(struct pci_dev *, int); +void qlcnic_pf_set_interface_id_create_rx_ctx(struct qlcnic_adapter *, u32 *); +void qlcnic_pf_set_interface_id_create_tx_ctx(struct qlcnic_adapter *, u32 *); +void qlcnic_pf_set_interface_id_del_rx_ctx(struct qlcnic_adapter *, u32 *); +void qlcnic_pf_set_interface_id_del_tx_ctx(struct qlcnic_adapter *, u32 *); +void qlcnic_pf_set_interface_id_promisc(struct qlcnic_adapter *, u32 *); +void qlcnic_pf_set_interface_id_ipaddr(struct qlcnic_adapter *, u32 *); +void qlcnic_pf_set_interface_id_macaddr(struct qlcnic_adapter *, u32 *); +void qlcnic_sriov_pf_handle_flr(struct qlcnic_sriov *, struct qlcnic_vf_info *); +bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *, + struct qlcnic_bc_trans *, + struct qlcnic_vf_info *); +void qlcnic_sriov_pf_reset(struct qlcnic_adapter *); +int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *); +int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); +int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int); +int qlcnic_sriov_get_vf_config(struct net_device *, int , + struct ifla_vf_info *); +int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); +#else +static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {} +static inline void qlcnic_sriov_pf_cleanup(struct qlcnic_adapter *adapter) {} +static inline void +qlcnic_pf_set_interface_id_create_rx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) {} +static inline void +qlcnic_pf_set_interface_id_create_tx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) {} +static inline void +qlcnic_pf_set_interface_id_del_rx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) {} +static inline void +qlcnic_pf_set_interface_id_del_tx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) {} +static inline void +qlcnic_pf_set_interface_id_ipaddr(struct qlcnic_adapter *adapter, u32 *int_id) +{} +static inline void +qlcnic_pf_set_interface_id_macaddr(struct qlcnic_adapter *adapter, u32 *int_id) +{} +static inline void +qlcnic_pf_set_interface_id_promisc(struct qlcnic_adapter *adapter, u32 *int_id) +{} +static inline void qlcnic_sriov_pf_handle_flr(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf) {} +static inline bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans, + struct qlcnic_vf_info *vf) +{ return false; } +static inline void qlcnic_sriov_pf_reset(struct qlcnic_adapter *adapter) {} +static inline int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *adapter) +{ return 0; } +#endif + +#endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c new file mode 100644 index 000000000000..44d547d78b84 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -0,0 +1,1954 @@ +/* + * QLogic qlcnic NIC Driver + * Copyright (c) 2009-2013 QLogic Corporation + * + * See LICENSE.qlcnic for copyright and licensing details. + */ + +#include "qlcnic_sriov.h" +#include "qlcnic.h" +#include "qlcnic_83xx_hw.h" +#include <linux/types.h> + +#define QLC_BC_COMMAND 0 +#define QLC_BC_RESPONSE 1 + +#define QLC_MBOX_RESP_TIMEOUT (10 * HZ) +#define QLC_MBOX_CH_FREE_TIMEOUT (10 * HZ) + +#define QLC_BC_MSG 0 +#define QLC_BC_CFREE 1 +#define QLC_BC_FLR 2 +#define QLC_BC_HDR_SZ 16 +#define QLC_BC_PAYLOAD_SZ (1024 - QLC_BC_HDR_SZ) + +#define QLC_DEFAULT_RCV_DESCRIPTORS_SRIOV_VF 2048 +#define QLC_DEFAULT_JUMBO_RCV_DESCRIPTORS_SRIOV_VF 512 + +#define QLC_83XX_VF_RESET_FAIL_THRESH 8 +#define QLC_BC_CMD_MAX_RETRY_CNT 5 + +static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *); +static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32); +static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *); +static void qlcnic_sriov_vf_cancel_fw_work(struct qlcnic_adapter *); +static void qlcnic_sriov_cleanup_transaction(struct qlcnic_bc_trans *); +static int qlcnic_sriov_vf_mbx_op(struct qlcnic_adapter *, + struct qlcnic_cmd_args *); + +static struct qlcnic_hardware_ops qlcnic_sriov_vf_hw_ops = { + .read_crb = qlcnic_83xx_read_crb, + .write_crb = qlcnic_83xx_write_crb, + .read_reg = qlcnic_83xx_rd_reg_indirect, + .write_reg = qlcnic_83xx_wrt_reg_indirect, + .get_mac_address = qlcnic_83xx_get_mac_address, + .setup_intr = qlcnic_83xx_setup_intr, + .alloc_mbx_args = qlcnic_83xx_alloc_mbx_args, + .mbx_cmd = qlcnic_sriov_vf_mbx_op, + .get_func_no = qlcnic_83xx_get_func_no, + .api_lock = qlcnic_83xx_cam_lock, + .api_unlock = qlcnic_83xx_cam_unlock, + .process_lb_rcv_ring_diag = qlcnic_83xx_process_rcv_ring_diag, + .create_rx_ctx = qlcnic_83xx_create_rx_ctx, + .create_tx_ctx = qlcnic_83xx_create_tx_ctx, + .del_rx_ctx = qlcnic_83xx_del_rx_ctx, + .del_tx_ctx = qlcnic_83xx_del_tx_ctx, + .setup_link_event = qlcnic_83xx_setup_link_event, + .get_nic_info = qlcnic_83xx_get_nic_info, + .get_pci_info = qlcnic_83xx_get_pci_info, + .set_nic_info = qlcnic_83xx_set_nic_info, + .change_macvlan = qlcnic_83xx_sre_macaddr_change, + .napi_enable = qlcnic_83xx_napi_enable, + .napi_disable = qlcnic_83xx_napi_disable, + .config_intr_coal = qlcnic_83xx_config_intr_coal, + .config_rss = qlcnic_83xx_config_rss, + .config_hw_lro = qlcnic_83xx_config_hw_lro, + .config_promisc_mode = qlcnic_83xx_nic_set_promisc, + .change_l2_filter = qlcnic_83xx_change_l2_filter, + .get_board_info = qlcnic_83xx_get_port_info, + .free_mac_list = qlcnic_sriov_vf_free_mac_list, +}; + +static struct qlcnic_nic_template qlcnic_sriov_vf_ops = { + .config_bridged_mode = qlcnic_config_bridged_mode, + .config_led = qlcnic_config_led, + .cancel_idc_work = qlcnic_sriov_vf_cancel_fw_work, + .napi_add = qlcnic_83xx_napi_add, + .napi_del = qlcnic_83xx_napi_del, + .config_ipaddr = qlcnic_83xx_config_ipaddr, + .clear_legacy_intr = qlcnic_83xx_clear_legacy_intr, +}; + +static const struct qlcnic_mailbox_metadata qlcnic_sriov_bc_mbx_tbl[] = { + {QLCNIC_BC_CMD_CHANNEL_INIT, 2, 2}, + {QLCNIC_BC_CMD_CHANNEL_TERM, 2, 2}, + {QLCNIC_BC_CMD_GET_ACL, 3, 14}, + {QLCNIC_BC_CMD_CFG_GUEST_VLAN, 2, 2}, +}; + +static inline bool qlcnic_sriov_bc_msg_check(u32 val) +{ + return (val & (1 << QLC_BC_MSG)) ? true : false; +} + +static inline bool qlcnic_sriov_channel_free_check(u32 val) +{ + return (val & (1 << QLC_BC_CFREE)) ? true : false; +} + +static inline bool qlcnic_sriov_flr_check(u32 val) +{ + return (val & (1 << QLC_BC_FLR)) ? true : false; +} + +static inline u8 qlcnic_sriov_target_func_id(u32 val) +{ + return (val >> 4) & 0xff; +} + +static int qlcnic_sriov_virtid_fn(struct qlcnic_adapter *adapter, int vf_id) +{ + struct pci_dev *dev = adapter->pdev; + int pos; + u16 stride, offset; + + if (qlcnic_sriov_vf_check(adapter)) + return 0; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); + + return (dev->devfn + offset + stride * vf_id) & 0xff; +} + +int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs) +{ + struct qlcnic_sriov *sriov; + struct qlcnic_back_channel *bc; + struct workqueue_struct *wq; + struct qlcnic_vport *vp; + struct qlcnic_vf_info *vf; + int err, i; + + if (!qlcnic_sriov_enable_check(adapter)) + return -EIO; + + sriov = kzalloc(sizeof(struct qlcnic_sriov), GFP_KERNEL); + if (!sriov) + return -ENOMEM; + + adapter->ahw->sriov = sriov; + sriov->num_vfs = num_vfs; + bc = &sriov->bc; + sriov->vf_info = kzalloc(sizeof(struct qlcnic_vf_info) * + num_vfs, GFP_KERNEL); + if (!sriov->vf_info) { + err = -ENOMEM; + goto qlcnic_free_sriov; + } + + wq = create_singlethread_workqueue("bc-trans"); + if (wq == NULL) { + err = -ENOMEM; + dev_err(&adapter->pdev->dev, + "Cannot create bc-trans workqueue\n"); + goto qlcnic_free_vf_info; + } + + bc->bc_trans_wq = wq; + + wq = create_singlethread_workqueue("async"); + if (wq == NULL) { + err = -ENOMEM; + dev_err(&adapter->pdev->dev, "Cannot create async workqueue\n"); + goto qlcnic_destroy_trans_wq; + } + + bc->bc_async_wq = wq; + INIT_LIST_HEAD(&bc->async_list); + + for (i = 0; i < num_vfs; i++) { + vf = &sriov->vf_info[i]; + vf->adapter = adapter; + vf->pci_func = qlcnic_sriov_virtid_fn(adapter, i); + mutex_init(&vf->send_cmd_lock); + INIT_LIST_HEAD(&vf->rcv_act.wait_list); + INIT_LIST_HEAD(&vf->rcv_pend.wait_list); + spin_lock_init(&vf->rcv_act.lock); + spin_lock_init(&vf->rcv_pend.lock); + init_completion(&vf->ch_free_cmpl); + + if (qlcnic_sriov_pf_check(adapter)) { + vp = kzalloc(sizeof(struct qlcnic_vport), GFP_KERNEL); + if (!vp) { + err = -ENOMEM; + goto qlcnic_destroy_async_wq; + } + sriov->vf_info[i].vp = vp; + vp->max_tx_bw = MAX_BW; + random_ether_addr(vp->mac); + dev_info(&adapter->pdev->dev, + "MAC Address %pM is configured for VF %d\n", + vp->mac, i); + } + } + + return 0; + +qlcnic_destroy_async_wq: + destroy_workqueue(bc->bc_async_wq); + +qlcnic_destroy_trans_wq: + destroy_workqueue(bc->bc_trans_wq); + +qlcnic_free_vf_info: + kfree(sriov->vf_info); + +qlcnic_free_sriov: + kfree(adapter->ahw->sriov); + return err; +} + +void qlcnic_sriov_cleanup_list(struct qlcnic_trans_list *t_list) +{ + struct qlcnic_bc_trans *trans; + struct qlcnic_cmd_args cmd; + unsigned long flags; + + spin_lock_irqsave(&t_list->lock, flags); + + while (!list_empty(&t_list->wait_list)) { + trans = list_first_entry(&t_list->wait_list, + struct qlcnic_bc_trans, list); + list_del(&trans->list); + t_list->count--; + cmd.req.arg = (u32 *)trans->req_pay; + cmd.rsp.arg = (u32 *)trans->rsp_pay; + qlcnic_free_mbx_args(&cmd); + qlcnic_sriov_cleanup_transaction(trans); + } + + spin_unlock_irqrestore(&t_list->lock, flags); +} + +void __qlcnic_sriov_cleanup(struct qlcnic_adapter *adapter) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_back_channel *bc = &sriov->bc; + struct qlcnic_vf_info *vf; + int i; + + if (!qlcnic_sriov_enable_check(adapter)) + return; + + qlcnic_sriov_cleanup_async_list(bc); + destroy_workqueue(bc->bc_async_wq); + + for (i = 0; i < sriov->num_vfs; i++) { + vf = &sriov->vf_info[i]; + qlcnic_sriov_cleanup_list(&vf->rcv_pend); + cancel_work_sync(&vf->trans_work); + qlcnic_sriov_cleanup_list(&vf->rcv_act); + } + + destroy_workqueue(bc->bc_trans_wq); + + for (i = 0; i < sriov->num_vfs; i++) + kfree(sriov->vf_info[i].vp); + + kfree(sriov->vf_info); + kfree(adapter->ahw->sriov); +} + +static void qlcnic_sriov_vf_cleanup(struct qlcnic_adapter *adapter) +{ + qlcnic_sriov_channel_cfg_cmd(adapter, QLCNIC_BC_CMD_CHANNEL_TERM); + qlcnic_sriov_cfg_bc_intr(adapter, 0); + __qlcnic_sriov_cleanup(adapter); +} + +void qlcnic_sriov_cleanup(struct qlcnic_adapter *adapter) +{ + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_sriov_pf_cleanup(adapter); + + if (qlcnic_sriov_vf_check(adapter)) + qlcnic_sriov_vf_cleanup(adapter); +} + +static int qlcnic_sriov_post_bc_msg(struct qlcnic_adapter *adapter, u32 *hdr, + u32 *pay, u8 pci_func, u8 size) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + unsigned long flags; + u32 rsp, mbx_val, fw_data, rsp_num, mbx_cmd, val; + u16 opcode; + u8 mbx_err_code; + int i, j; + + opcode = ((struct qlcnic_bc_hdr *)hdr)->cmd_op; + + if (!test_bit(QLC_83XX_MBX_READY, &adapter->ahw->idc.status)) { + dev_info(&adapter->pdev->dev, + "Mailbox cmd attempted, 0x%x\n", opcode); + dev_info(&adapter->pdev->dev, "Mailbox detached\n"); + return 0; + } + + spin_lock_irqsave(&ahw->mbx_lock, flags); + + mbx_val = QLCRDX(ahw, QLCNIC_HOST_MBX_CTRL); + if (mbx_val) { + QLCDB(adapter, DRV, "Mailbox cmd attempted, 0x%x\n", opcode); + spin_unlock_irqrestore(&ahw->mbx_lock, flags); + return QLCNIC_RCODE_TIMEOUT; + } + /* Fill in mailbox registers */ + val = size + (sizeof(struct qlcnic_bc_hdr) / sizeof(u32)); + mbx_cmd = 0x31 | (val << 16) | (adapter->ahw->fw_hal_version << 29); + + writel(mbx_cmd, QLCNIC_MBX_HOST(ahw, 0)); + mbx_cmd = 0x1 | (1 << 4); + + if (qlcnic_sriov_pf_check(adapter)) + mbx_cmd |= (pci_func << 5); + + writel(mbx_cmd, QLCNIC_MBX_HOST(ahw, 1)); + for (i = 2, j = 0; j < (sizeof(struct qlcnic_bc_hdr) / sizeof(u32)); + i++, j++) { + writel(*(hdr++), QLCNIC_MBX_HOST(ahw, i)); + } + for (j = 0; j < size; j++, i++) + writel(*(pay++), QLCNIC_MBX_HOST(ahw, i)); + + /* Signal FW about the impending command */ + QLCWRX(ahw, QLCNIC_HOST_MBX_CTRL, QLCNIC_SET_OWNER); + + /* Waiting for the mailbox cmd to complete and while waiting here + * some AEN might arrive. If more than 5 seconds expire we can + * assume something is wrong. + */ +poll: + rsp = qlcnic_83xx_mbx_poll(adapter); + if (rsp != QLCNIC_RCODE_TIMEOUT) { + /* Get the FW response data */ + fw_data = readl(QLCNIC_MBX_FW(ahw, 0)); + if (fw_data & QLCNIC_MBX_ASYNC_EVENT) { + __qlcnic_83xx_process_aen(adapter); + mbx_val = QLCRDX(ahw, QLCNIC_HOST_MBX_CTRL); + if (mbx_val) + goto poll; + } + mbx_err_code = QLCNIC_MBX_STATUS(fw_data); + rsp_num = QLCNIC_MBX_NUM_REGS(fw_data); + opcode = QLCNIC_MBX_RSP(fw_data); + + switch (mbx_err_code) { + case QLCNIC_MBX_RSP_OK: + case QLCNIC_MBX_PORT_RSP_OK: + rsp = QLCNIC_RCODE_SUCCESS; + break; + default: + if (opcode == QLCNIC_CMD_CONFIG_MAC_VLAN) { + rsp = qlcnic_83xx_mac_rcode(adapter); + if (!rsp) + goto out; + } + dev_err(&adapter->pdev->dev, + "MBX command 0x%x failed with err:0x%x\n", + opcode, mbx_err_code); + rsp = mbx_err_code; + break; + } + goto out; + } + + dev_err(&adapter->pdev->dev, "MBX command 0x%x timed out\n", + QLCNIC_MBX_RSP(mbx_cmd)); + rsp = QLCNIC_RCODE_TIMEOUT; +out: + /* clear fw mbx control register */ + QLCWRX(ahw, QLCNIC_FW_MBX_CTRL, QLCNIC_CLR_OWNER); + spin_unlock_irqrestore(&adapter->ahw->mbx_lock, flags); + return rsp; +} + +static void qlcnic_sriov_vf_cfg_buff_desc(struct qlcnic_adapter *adapter) +{ + adapter->num_rxd = QLC_DEFAULT_RCV_DESCRIPTORS_SRIOV_VF; + adapter->max_rxd = MAX_RCV_DESCRIPTORS_10G; + adapter->num_jumbo_rxd = QLC_DEFAULT_JUMBO_RCV_DESCRIPTORS_SRIOV_VF; + adapter->max_jumbo_rxd = MAX_JUMBO_RCV_DESCRIPTORS_10G; + adapter->num_txd = MAX_CMD_DESCRIPTORS; + adapter->max_rds_rings = MAX_RDS_RINGS; +} + +int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *adapter, + struct qlcnic_info *npar_info, u16 vport_id) +{ + struct device *dev = &adapter->pdev->dev; + struct qlcnic_cmd_args cmd; + int err; + u32 status; + + err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_NIC_INFO); + if (err) + return err; + + cmd.req.arg[1] = vport_id << 16 | 0x1; + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to get vport info, err=%d\n", err); + qlcnic_free_mbx_args(&cmd); + return err; + } + + status = cmd.rsp.arg[2] & 0xffff; + if (status & BIT_0) + npar_info->min_tx_bw = MSW(cmd.rsp.arg[2]); + if (status & BIT_1) + npar_info->max_tx_bw = LSW(cmd.rsp.arg[3]); + if (status & BIT_2) + npar_info->max_tx_ques = MSW(cmd.rsp.arg[3]); + if (status & BIT_3) + npar_info->max_tx_mac_filters = LSW(cmd.rsp.arg[4]); + if (status & BIT_4) + npar_info->max_rx_mcast_mac_filters = MSW(cmd.rsp.arg[4]); + if (status & BIT_5) + npar_info->max_rx_ucast_mac_filters = LSW(cmd.rsp.arg[5]); + if (status & BIT_6) + npar_info->max_rx_ip_addr = MSW(cmd.rsp.arg[5]); + if (status & BIT_7) + npar_info->max_rx_lro_flow = LSW(cmd.rsp.arg[6]); + if (status & BIT_8) + npar_info->max_rx_status_rings = MSW(cmd.rsp.arg[6]); + if (status & BIT_9) + npar_info->max_rx_buf_rings = LSW(cmd.rsp.arg[7]); + + npar_info->max_rx_ques = MSW(cmd.rsp.arg[7]); + npar_info->max_tx_vlan_keys = LSW(cmd.rsp.arg[8]); + npar_info->max_local_ipv6_addrs = MSW(cmd.rsp.arg[8]); + npar_info->max_remote_ipv6_addrs = LSW(cmd.rsp.arg[9]); + + dev_info(dev, "\n\tmin_tx_bw: %d, max_tx_bw: %d max_tx_ques: %d,\n" + "\tmax_tx_mac_filters: %d max_rx_mcast_mac_filters: %d,\n" + "\tmax_rx_ucast_mac_filters: 0x%x, max_rx_ip_addr: %d,\n" + "\tmax_rx_lro_flow: %d max_rx_status_rings: %d,\n" + "\tmax_rx_buf_rings: %d, max_rx_ques: %d, max_tx_vlan_keys %d\n" + "\tlocal_ipv6_addr: %d, remote_ipv6_addr: %d\n", + npar_info->min_tx_bw, npar_info->max_tx_bw, + npar_info->max_tx_ques, npar_info->max_tx_mac_filters, + npar_info->max_rx_mcast_mac_filters, + npar_info->max_rx_ucast_mac_filters, npar_info->max_rx_ip_addr, + npar_info->max_rx_lro_flow, npar_info->max_rx_status_rings, + npar_info->max_rx_buf_rings, npar_info->max_rx_ques, + npar_info->max_tx_vlan_keys, npar_info->max_local_ipv6_addrs, + npar_info->max_remote_ipv6_addrs); + + qlcnic_free_mbx_args(&cmd); + return err; +} + +static int qlcnic_sriov_set_pvid_mode(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd) +{ + adapter->rx_pvid = (cmd->rsp.arg[1] >> 16) & 0xffff; + adapter->flags &= ~QLCNIC_TAGGING_ENABLED; + return 0; +} + +static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + int i, num_vlans; + u16 *vlans; + + if (sriov->allowed_vlans) + return 0; + + sriov->any_vlan = cmd->rsp.arg[2] & 0xf; + if (!sriov->any_vlan) + return 0; + + sriov->num_allowed_vlans = cmd->rsp.arg[2] >> 16; + num_vlans = sriov->num_allowed_vlans; + sriov->allowed_vlans = kzalloc(sizeof(u16) * num_vlans, GFP_KERNEL); + if (!sriov->allowed_vlans) + return -ENOMEM; + + vlans = (u16 *)&cmd->rsp.arg[3]; + for (i = 0; i < num_vlans; i++) + sriov->allowed_vlans[i] = vlans[i]; + + return 0; +} + +static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_cmd_args cmd; + int ret; + + ret = qlcnic_sriov_alloc_bc_mbx_args(&cmd, QLCNIC_BC_CMD_GET_ACL); + if (ret) + return ret; + + ret = qlcnic_issue_cmd(adapter, &cmd); + if (ret) { + dev_err(&adapter->pdev->dev, "Failed to get ACL, err=%d\n", + ret); + } else { + sriov->vlan_mode = cmd.rsp.arg[1] & 0x3; + switch (sriov->vlan_mode) { + case QLC_GUEST_VLAN_MODE: + ret = qlcnic_sriov_set_guest_vlan_mode(adapter, &cmd); + break; + case QLC_PVID_MODE: + ret = qlcnic_sriov_set_pvid_mode(adapter, &cmd); + break; + } + } + + qlcnic_free_mbx_args(&cmd); + return ret; +} + +static int qlcnic_sriov_vf_init_driver(struct qlcnic_adapter *adapter) +{ + struct qlcnic_info nic_info; + struct qlcnic_hardware_context *ahw = adapter->ahw; + int err; + + err = qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, 0); + if (err) + return err; + + err = qlcnic_get_nic_info(adapter, &nic_info, ahw->pci_func); + if (err) + return -EIO; + + err = qlcnic_sriov_get_vf_acl(adapter); + if (err) + return err; + + if (qlcnic_83xx_get_port_info(adapter)) + return -EIO; + + qlcnic_sriov_vf_cfg_buff_desc(adapter); + adapter->flags |= QLCNIC_ADAPTER_INITIALIZED; + dev_info(&adapter->pdev->dev, "HAL Version: %d\n", + adapter->ahw->fw_hal_version); + + ahw->physical_port = (u8) nic_info.phys_port; + ahw->switch_mode = nic_info.switch_mode; + ahw->max_mtu = nic_info.max_mtu; + ahw->op_mode = nic_info.op_mode; + ahw->capabilities = nic_info.capabilities; + return 0; +} + +static int qlcnic_sriov_setup_vf(struct qlcnic_adapter *adapter, + int pci_using_dac) +{ + int err; + + INIT_LIST_HEAD(&adapter->vf_mc_list); + if (!qlcnic_use_msi_x && !!qlcnic_use_msi) + dev_warn(&adapter->pdev->dev, + "83xx adapter do not support MSI interrupts\n"); + + err = qlcnic_setup_intr(adapter, 1); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to setup interrupt\n"); + goto err_out_disable_msi; + } + + err = qlcnic_83xx_setup_mbx_intr(adapter); + if (err) + goto err_out_disable_msi; + + err = qlcnic_sriov_init(adapter, 1); + if (err) + goto err_out_disable_mbx_intr; + + err = qlcnic_sriov_cfg_bc_intr(adapter, 1); + if (err) + goto err_out_cleanup_sriov; + + err = qlcnic_sriov_channel_cfg_cmd(adapter, QLCNIC_BC_CMD_CHANNEL_INIT); + if (err) + goto err_out_disable_bc_intr; + + err = qlcnic_sriov_vf_init_driver(adapter); + if (err) + goto err_out_send_channel_term; + + err = qlcnic_setup_netdev(adapter, adapter->netdev, pci_using_dac); + if (err) + goto err_out_send_channel_term; + + pci_set_drvdata(adapter->pdev, adapter); + dev_info(&adapter->pdev->dev, "%s: XGbE port initialized\n", + adapter->netdev->name); + qlcnic_schedule_work(adapter, qlcnic_sriov_vf_poll_dev_state, + adapter->ahw->idc.delay); + return 0; + +err_out_send_channel_term: + qlcnic_sriov_channel_cfg_cmd(adapter, QLCNIC_BC_CMD_CHANNEL_TERM); + +err_out_disable_bc_intr: + qlcnic_sriov_cfg_bc_intr(adapter, 0); + +err_out_cleanup_sriov: + __qlcnic_sriov_cleanup(adapter); + +err_out_disable_mbx_intr: + qlcnic_83xx_free_mbx_intr(adapter); + +err_out_disable_msi: + qlcnic_teardown_intr(adapter); + return err; +} + +static int qlcnic_sriov_check_dev_ready(struct qlcnic_adapter *adapter) +{ + u32 state; + + do { + msleep(20); + if (++adapter->fw_fail_cnt > QLC_BC_CMD_MAX_RETRY_CNT) + return -EIO; + state = QLCRDX(adapter->ahw, QLC_83XX_IDC_DEV_STATE); + } while (state != QLC_83XX_IDC_DEV_READY); + + return 0; +} + +int qlcnic_sriov_vf_init(struct qlcnic_adapter *adapter, int pci_using_dac) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + int err; + + spin_lock_init(&ahw->mbx_lock); + set_bit(QLC_83XX_MBX_READY, &ahw->idc.status); + set_bit(QLC_83XX_MODULE_LOADED, &ahw->idc.status); + ahw->idc.delay = QLC_83XX_IDC_FW_POLL_DELAY; + ahw->reset_context = 0; + adapter->fw_fail_cnt = 0; + ahw->msix_supported = 1; + adapter->need_fw_reset = 0; + adapter->flags |= QLCNIC_TX_INTR_SHARED; + + err = qlcnic_sriov_check_dev_ready(adapter); + if (err) + return err; + + err = qlcnic_sriov_setup_vf(adapter, pci_using_dac); + if (err) + return err; + + if (qlcnic_read_mac_addr(adapter)) + dev_warn(&adapter->pdev->dev, "failed to read mac addr\n"); + + clear_bit(__QLCNIC_RESETTING, &adapter->state); + return 0; +} + +void qlcnic_sriov_vf_set_ops(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + + ahw->op_mode = QLCNIC_SRIOV_VF_FUNC; + dev_info(&adapter->pdev->dev, + "HAL Version: %d Non Privileged SRIOV function\n", + ahw->fw_hal_version); + adapter->nic_ops = &qlcnic_sriov_vf_ops; + set_bit(__QLCNIC_SRIOV_ENABLE, &adapter->state); + return; +} + +void qlcnic_sriov_vf_register_map(struct qlcnic_hardware_context *ahw) +{ + ahw->hw_ops = &qlcnic_sriov_vf_hw_ops; + ahw->reg_tbl = (u32 *)qlcnic_83xx_reg_tbl; + ahw->ext_reg_tbl = (u32 *)qlcnic_83xx_ext_reg_tbl; +} + +static u32 qlcnic_sriov_get_bc_paysize(u32 real_pay_size, u8 curr_frag) +{ + u32 pay_size; + + pay_size = real_pay_size / ((curr_frag + 1) * QLC_BC_PAYLOAD_SZ); + + if (pay_size) + pay_size = QLC_BC_PAYLOAD_SZ; + else + pay_size = real_pay_size % QLC_BC_PAYLOAD_SZ; + + return pay_size; +} + +int qlcnic_sriov_func_to_index(struct qlcnic_adapter *adapter, u8 pci_func) +{ + struct qlcnic_vf_info *vf_info = adapter->ahw->sriov->vf_info; + u8 i; + + if (qlcnic_sriov_vf_check(adapter)) + return 0; + + for (i = 0; i < adapter->ahw->sriov->num_vfs; i++) { + if (vf_info[i].pci_func == pci_func) + return i; + } + + return -EINVAL; +} + +static inline int qlcnic_sriov_alloc_bc_trans(struct qlcnic_bc_trans **trans) +{ + *trans = kzalloc(sizeof(struct qlcnic_bc_trans), GFP_ATOMIC); + if (!*trans) + return -ENOMEM; + + init_completion(&(*trans)->resp_cmpl); + return 0; +} + +static inline int qlcnic_sriov_alloc_bc_msg(struct qlcnic_bc_hdr **hdr, + u32 size) +{ + *hdr = kzalloc(sizeof(struct qlcnic_bc_hdr) * size, GFP_ATOMIC); + if (!*hdr) + return -ENOMEM; + + return 0; +} + +static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *mbx, u32 type) +{ + const struct qlcnic_mailbox_metadata *mbx_tbl; + int i, size; + + mbx_tbl = qlcnic_sriov_bc_mbx_tbl; + size = ARRAY_SIZE(qlcnic_sriov_bc_mbx_tbl); + + for (i = 0; i < size; i++) { + if (type == mbx_tbl[i].cmd) { + mbx->op_type = QLC_BC_CMD; + mbx->req.num = mbx_tbl[i].in_args; + mbx->rsp.num = mbx_tbl[i].out_args; + mbx->req.arg = kcalloc(mbx->req.num, sizeof(u32), + GFP_ATOMIC); + if (!mbx->req.arg) + return -ENOMEM; + mbx->rsp.arg = kcalloc(mbx->rsp.num, sizeof(u32), + GFP_ATOMIC); + if (!mbx->rsp.arg) { + kfree(mbx->req.arg); + mbx->req.arg = NULL; + return -ENOMEM; + } + memset(mbx->req.arg, 0, sizeof(u32) * mbx->req.num); + memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num); + mbx->req.arg[0] = (type | (mbx->req.num << 16) | + (3 << 29)); + return 0; + } + } + return -EINVAL; +} + +static int qlcnic_sriov_prepare_bc_hdr(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd, + u16 seq, u8 msg_type) +{ + struct qlcnic_bc_hdr *hdr; + int i; + u32 num_regs, bc_pay_sz; + u16 remainder; + u8 cmd_op, num_frags, t_num_frags; + + bc_pay_sz = QLC_BC_PAYLOAD_SZ; + if (msg_type == QLC_BC_COMMAND) { + trans->req_pay = (struct qlcnic_bc_payload *)cmd->req.arg; + trans->rsp_pay = (struct qlcnic_bc_payload *)cmd->rsp.arg; + num_regs = cmd->req.num; + trans->req_pay_size = (num_regs * 4); + num_regs = cmd->rsp.num; + trans->rsp_pay_size = (num_regs * 4); + cmd_op = cmd->req.arg[0] & 0xff; + remainder = (trans->req_pay_size) % (bc_pay_sz); + num_frags = (trans->req_pay_size) / (bc_pay_sz); + if (remainder) + num_frags++; + t_num_frags = num_frags; + if (qlcnic_sriov_alloc_bc_msg(&trans->req_hdr, num_frags)) + return -ENOMEM; + remainder = (trans->rsp_pay_size) % (bc_pay_sz); + num_frags = (trans->rsp_pay_size) / (bc_pay_sz); + if (remainder) + num_frags++; + if (qlcnic_sriov_alloc_bc_msg(&trans->rsp_hdr, num_frags)) + return -ENOMEM; + num_frags = t_num_frags; + hdr = trans->req_hdr; + } else { + cmd->req.arg = (u32 *)trans->req_pay; + cmd->rsp.arg = (u32 *)trans->rsp_pay; + cmd_op = cmd->req.arg[0] & 0xff; + remainder = (trans->rsp_pay_size) % (bc_pay_sz); + num_frags = (trans->rsp_pay_size) / (bc_pay_sz); + if (remainder) + num_frags++; + cmd->req.num = trans->req_pay_size / 4; + cmd->rsp.num = trans->rsp_pay_size / 4; + hdr = trans->rsp_hdr; + } + + trans->trans_id = seq; + trans->cmd_id = cmd_op; + for (i = 0; i < num_frags; i++) { + hdr[i].version = 2; + hdr[i].msg_type = msg_type; + hdr[i].op_type = cmd->op_type; + hdr[i].num_cmds = 1; + hdr[i].num_frags = num_frags; + hdr[i].frag_num = i + 1; + hdr[i].cmd_op = cmd_op; + hdr[i].seq_id = seq; + } + return 0; +} + +static void qlcnic_sriov_cleanup_transaction(struct qlcnic_bc_trans *trans) +{ + if (!trans) + return; + kfree(trans->req_hdr); + kfree(trans->rsp_hdr); + kfree(trans); +} + +static int qlcnic_sriov_clear_trans(struct qlcnic_vf_info *vf, + struct qlcnic_bc_trans *trans, u8 type) +{ + struct qlcnic_trans_list *t_list; + unsigned long flags; + int ret = 0; + + if (type == QLC_BC_RESPONSE) { + t_list = &vf->rcv_act; + spin_lock_irqsave(&t_list->lock, flags); + t_list->count--; + list_del(&trans->list); + if (t_list->count > 0) + ret = 1; + spin_unlock_irqrestore(&t_list->lock, flags); + } + if (type == QLC_BC_COMMAND) { + while (test_and_set_bit(QLC_BC_VF_SEND, &vf->state)) + msleep(100); + vf->send_cmd = NULL; + clear_bit(QLC_BC_VF_SEND, &vf->state); + } + return ret; +} + +static void qlcnic_sriov_schedule_bc_cmd(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf, + work_func_t func) +{ + if (test_bit(QLC_BC_VF_FLR, &vf->state) || + vf->adapter->need_fw_reset) + return; + + INIT_WORK(&vf->trans_work, func); + queue_work(sriov->bc.bc_trans_wq, &vf->trans_work); +} + +static inline void qlcnic_sriov_wait_for_resp(struct qlcnic_bc_trans *trans) +{ + struct completion *cmpl = &trans->resp_cmpl; + + if (wait_for_completion_timeout(cmpl, QLC_MBOX_RESP_TIMEOUT)) + trans->trans_state = QLC_END; + else + trans->trans_state = QLC_ABORT; + + return; +} + +static void qlcnic_sriov_handle_multi_frags(struct qlcnic_bc_trans *trans, + u8 type) +{ + if (type == QLC_BC_RESPONSE) { + trans->curr_rsp_frag++; + if (trans->curr_rsp_frag < trans->rsp_hdr->num_frags) + trans->trans_state = QLC_INIT; + else + trans->trans_state = QLC_END; + } else { + trans->curr_req_frag++; + if (trans->curr_req_frag < trans->req_hdr->num_frags) + trans->trans_state = QLC_INIT; + else + trans->trans_state = QLC_WAIT_FOR_RESP; + } +} + +static void qlcnic_sriov_wait_for_channel_free(struct qlcnic_bc_trans *trans, + u8 type) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct completion *cmpl = &vf->ch_free_cmpl; + + if (!wait_for_completion_timeout(cmpl, QLC_MBOX_CH_FREE_TIMEOUT)) { + trans->trans_state = QLC_ABORT; + return; + } + + clear_bit(QLC_BC_VF_CHANNEL, &vf->state); + qlcnic_sriov_handle_multi_frags(trans, type); +} + +static void qlcnic_sriov_pull_bc_msg(struct qlcnic_adapter *adapter, + u32 *hdr, u32 *pay, u32 size) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + u32 fw_mbx; + u8 i, max = 2, hdr_size, j; + + hdr_size = (sizeof(struct qlcnic_bc_hdr) / sizeof(u32)); + max = (size / sizeof(u32)) + hdr_size; + + fw_mbx = readl(QLCNIC_MBX_FW(ahw, 0)); + for (i = 2, j = 0; j < hdr_size; i++, j++) + *(hdr++) = readl(QLCNIC_MBX_FW(ahw, i)); + for (; j < max; i++, j++) + *(pay++) = readl(QLCNIC_MBX_FW(ahw, i)); +} + +static int __qlcnic_sriov_issue_bc_post(struct qlcnic_vf_info *vf) +{ + int ret = -EBUSY; + u32 timeout = 10000; + + do { + if (!test_and_set_bit(QLC_BC_VF_CHANNEL, &vf->state)) { + ret = 0; + break; + } + mdelay(1); + } while (--timeout); + + return ret; +} + +static int qlcnic_sriov_issue_bc_post(struct qlcnic_bc_trans *trans, u8 type) +{ + struct qlcnic_vf_info *vf = trans->vf; + u32 pay_size, hdr_size; + u32 *hdr, *pay; + int ret; + u8 pci_func = trans->func_id; + + if (__qlcnic_sriov_issue_bc_post(vf)) + return -EBUSY; + + if (type == QLC_BC_COMMAND) { + hdr = (u32 *)(trans->req_hdr + trans->curr_req_frag); + pay = (u32 *)(trans->req_pay + trans->curr_req_frag); + hdr_size = (sizeof(struct qlcnic_bc_hdr) / sizeof(u32)); + pay_size = qlcnic_sriov_get_bc_paysize(trans->req_pay_size, + trans->curr_req_frag); + pay_size = (pay_size / sizeof(u32)); + } else { + hdr = (u32 *)(trans->rsp_hdr + trans->curr_rsp_frag); + pay = (u32 *)(trans->rsp_pay + trans->curr_rsp_frag); + hdr_size = (sizeof(struct qlcnic_bc_hdr) / sizeof(u32)); + pay_size = qlcnic_sriov_get_bc_paysize(trans->rsp_pay_size, + trans->curr_rsp_frag); + pay_size = (pay_size / sizeof(u32)); + } + + ret = qlcnic_sriov_post_bc_msg(vf->adapter, hdr, pay, + pci_func, pay_size); + return ret; +} + +static int __qlcnic_sriov_send_bc_msg(struct qlcnic_bc_trans *trans, + struct qlcnic_vf_info *vf, u8 type) +{ + bool flag = true; + int err = -EIO; + + while (flag) { + if (test_bit(QLC_BC_VF_FLR, &vf->state) || + vf->adapter->need_fw_reset) + trans->trans_state = QLC_ABORT; + + switch (trans->trans_state) { + case QLC_INIT: + trans->trans_state = QLC_WAIT_FOR_CHANNEL_FREE; + if (qlcnic_sriov_issue_bc_post(trans, type)) + trans->trans_state = QLC_ABORT; + break; + case QLC_WAIT_FOR_CHANNEL_FREE: + qlcnic_sriov_wait_for_channel_free(trans, type); + break; + case QLC_WAIT_FOR_RESP: + qlcnic_sriov_wait_for_resp(trans); + break; + case QLC_END: + err = 0; + flag = false; + break; + case QLC_ABORT: + err = -EIO; + flag = false; + clear_bit(QLC_BC_VF_CHANNEL, &vf->state); + break; + default: + err = -EIO; + flag = false; + } + } + return err; +} + +static int qlcnic_sriov_send_bc_cmd(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans, int pci_func) +{ + struct qlcnic_vf_info *vf; + int err, index = qlcnic_sriov_func_to_index(adapter, pci_func); + + if (index < 0) + return -EIO; + + vf = &adapter->ahw->sriov->vf_info[index]; + trans->vf = vf; + trans->func_id = pci_func; + + if (!test_bit(QLC_BC_VF_STATE, &vf->state)) { + if (qlcnic_sriov_pf_check(adapter)) + return -EIO; + if (qlcnic_sriov_vf_check(adapter) && + trans->cmd_id != QLCNIC_BC_CMD_CHANNEL_INIT) + return -EIO; + } + + mutex_lock(&vf->send_cmd_lock); + vf->send_cmd = trans; + err = __qlcnic_sriov_send_bc_msg(trans, vf, QLC_BC_COMMAND); + qlcnic_sriov_clear_trans(vf, trans, QLC_BC_COMMAND); + mutex_unlock(&vf->send_cmd_lock); + return err; +} + +static void __qlcnic_sriov_process_bc_cmd(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ +#ifdef CONFIG_QLCNIC_SRIOV + if (qlcnic_sriov_pf_check(adapter)) { + qlcnic_sriov_pf_process_bc_cmd(adapter, trans, cmd); + return; + } +#endif + cmd->rsp.arg[0] |= (0x9 << 25); + return; +} + +static void qlcnic_sriov_process_bc_cmd(struct work_struct *work) +{ + struct qlcnic_vf_info *vf = container_of(work, struct qlcnic_vf_info, + trans_work); + struct qlcnic_bc_trans *trans = NULL; + struct qlcnic_adapter *adapter = vf->adapter; + struct qlcnic_cmd_args cmd; + u8 req; + + if (adapter->need_fw_reset) + return; + + if (test_bit(QLC_BC_VF_FLR, &vf->state)) + return; + + trans = list_first_entry(&vf->rcv_act.wait_list, + struct qlcnic_bc_trans, list); + adapter = vf->adapter; + + if (qlcnic_sriov_prepare_bc_hdr(trans, &cmd, trans->req_hdr->seq_id, + QLC_BC_RESPONSE)) + goto cleanup_trans; + + __qlcnic_sriov_process_bc_cmd(adapter, trans, &cmd); + trans->trans_state = QLC_INIT; + __qlcnic_sriov_send_bc_msg(trans, vf, QLC_BC_RESPONSE); + +cleanup_trans: + qlcnic_free_mbx_args(&cmd); + req = qlcnic_sriov_clear_trans(vf, trans, QLC_BC_RESPONSE); + qlcnic_sriov_cleanup_transaction(trans); + if (req) + qlcnic_sriov_schedule_bc_cmd(adapter->ahw->sriov, vf, + qlcnic_sriov_process_bc_cmd); +} + +static void qlcnic_sriov_handle_bc_resp(struct qlcnic_bc_hdr *hdr, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_bc_trans *trans; + u32 pay_size; + + if (test_and_set_bit(QLC_BC_VF_SEND, &vf->state)) + return; + + trans = vf->send_cmd; + + if (trans == NULL) + goto clear_send; + + if (trans->trans_id != hdr->seq_id) + goto clear_send; + + pay_size = qlcnic_sriov_get_bc_paysize(trans->rsp_pay_size, + trans->curr_rsp_frag); + qlcnic_sriov_pull_bc_msg(vf->adapter, + (u32 *)(trans->rsp_hdr + trans->curr_rsp_frag), + (u32 *)(trans->rsp_pay + trans->curr_rsp_frag), + pay_size); + if (++trans->curr_rsp_frag < trans->rsp_hdr->num_frags) + goto clear_send; + + complete(&trans->resp_cmpl); + +clear_send: + clear_bit(QLC_BC_VF_SEND, &vf->state); +} + +int __qlcnic_sriov_add_act_list(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf, + struct qlcnic_bc_trans *trans) +{ + struct qlcnic_trans_list *t_list = &vf->rcv_act; + + t_list->count++; + list_add_tail(&trans->list, &t_list->wait_list); + if (t_list->count == 1) + qlcnic_sriov_schedule_bc_cmd(sriov, vf, + qlcnic_sriov_process_bc_cmd); + return 0; +} + +static int qlcnic_sriov_add_act_list(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf, + struct qlcnic_bc_trans *trans) +{ + struct qlcnic_trans_list *t_list = &vf->rcv_act; + + spin_lock(&t_list->lock); + + __qlcnic_sriov_add_act_list(sriov, vf, trans); + + spin_unlock(&t_list->lock); + return 0; +} + +static void qlcnic_sriov_handle_pending_trans(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf, + struct qlcnic_bc_hdr *hdr) +{ + struct qlcnic_bc_trans *trans = NULL; + struct list_head *node; + u32 pay_size, curr_frag; + u8 found = 0, active = 0; + + spin_lock(&vf->rcv_pend.lock); + if (vf->rcv_pend.count > 0) { + list_for_each(node, &vf->rcv_pend.wait_list) { + trans = list_entry(node, struct qlcnic_bc_trans, list); + if (trans->trans_id == hdr->seq_id) { + found = 1; + break; + } + } + } + + if (found) { + curr_frag = trans->curr_req_frag; + pay_size = qlcnic_sriov_get_bc_paysize(trans->req_pay_size, + curr_frag); + qlcnic_sriov_pull_bc_msg(vf->adapter, + (u32 *)(trans->req_hdr + curr_frag), + (u32 *)(trans->req_pay + curr_frag), + pay_size); + trans->curr_req_frag++; + if (trans->curr_req_frag >= hdr->num_frags) { + vf->rcv_pend.count--; + list_del(&trans->list); + active = 1; + } + } + spin_unlock(&vf->rcv_pend.lock); + + if (active) + if (qlcnic_sriov_add_act_list(sriov, vf, trans)) + qlcnic_sriov_cleanup_transaction(trans); + + return; +} + +static void qlcnic_sriov_handle_bc_cmd(struct qlcnic_sriov *sriov, + struct qlcnic_bc_hdr *hdr, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_bc_trans *trans; + struct qlcnic_adapter *adapter = vf->adapter; + struct qlcnic_cmd_args cmd; + u32 pay_size; + int err; + u8 cmd_op; + + if (adapter->need_fw_reset) + return; + + if (!test_bit(QLC_BC_VF_STATE, &vf->state) && + hdr->op_type != QLC_BC_CMD && + hdr->cmd_op != QLCNIC_BC_CMD_CHANNEL_INIT) + return; + + if (hdr->frag_num > 1) { + qlcnic_sriov_handle_pending_trans(sriov, vf, hdr); + return; + } + + cmd_op = hdr->cmd_op; + if (qlcnic_sriov_alloc_bc_trans(&trans)) + return; + + if (hdr->op_type == QLC_BC_CMD) + err = qlcnic_sriov_alloc_bc_mbx_args(&cmd, cmd_op); + else + err = qlcnic_alloc_mbx_args(&cmd, adapter, cmd_op); + + if (err) { + qlcnic_sriov_cleanup_transaction(trans); + return; + } + + cmd.op_type = hdr->op_type; + if (qlcnic_sriov_prepare_bc_hdr(trans, &cmd, hdr->seq_id, + QLC_BC_COMMAND)) { + qlcnic_free_mbx_args(&cmd); + qlcnic_sriov_cleanup_transaction(trans); + return; + } + + pay_size = qlcnic_sriov_get_bc_paysize(trans->req_pay_size, + trans->curr_req_frag); + qlcnic_sriov_pull_bc_msg(vf->adapter, + (u32 *)(trans->req_hdr + trans->curr_req_frag), + (u32 *)(trans->req_pay + trans->curr_req_frag), + pay_size); + trans->func_id = vf->pci_func; + trans->vf = vf; + trans->trans_id = hdr->seq_id; + trans->curr_req_frag++; + + if (qlcnic_sriov_soft_flr_check(adapter, trans, vf)) + return; + + if (trans->curr_req_frag == trans->req_hdr->num_frags) { + if (qlcnic_sriov_add_act_list(sriov, vf, trans)) { + qlcnic_free_mbx_args(&cmd); + qlcnic_sriov_cleanup_transaction(trans); + } + } else { + spin_lock(&vf->rcv_pend.lock); + list_add_tail(&trans->list, &vf->rcv_pend.wait_list); + vf->rcv_pend.count++; + spin_unlock(&vf->rcv_pend.lock); + } +} + +static void qlcnic_sriov_handle_msg_event(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_bc_hdr hdr; + u32 *ptr = (u32 *)&hdr; + u8 msg_type, i; + + for (i = 2; i < 6; i++) + ptr[i - 2] = readl(QLCNIC_MBX_FW(vf->adapter->ahw, i)); + msg_type = hdr.msg_type; + + switch (msg_type) { + case QLC_BC_COMMAND: + qlcnic_sriov_handle_bc_cmd(sriov, &hdr, vf); + break; + case QLC_BC_RESPONSE: + qlcnic_sriov_handle_bc_resp(&hdr, vf); + break; + } +} + +static void qlcnic_sriov_handle_flr_event(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_adapter *adapter = vf->adapter; + + if (qlcnic_sriov_pf_check(adapter)) + qlcnic_sriov_pf_handle_flr(sriov, vf); + else + dev_err(&adapter->pdev->dev, + "Invalid event to VF. VF should not get FLR event\n"); +} + +void qlcnic_sriov_handle_bc_event(struct qlcnic_adapter *adapter, u32 event) +{ + struct qlcnic_vf_info *vf; + struct qlcnic_sriov *sriov; + int index; + u8 pci_func; + + sriov = adapter->ahw->sriov; + pci_func = qlcnic_sriov_target_func_id(event); + index = qlcnic_sriov_func_to_index(adapter, pci_func); + + if (index < 0) + return; + + vf = &sriov->vf_info[index]; + vf->pci_func = pci_func; + + if (qlcnic_sriov_channel_free_check(event)) + complete(&vf->ch_free_cmpl); + + if (qlcnic_sriov_flr_check(event)) { + qlcnic_sriov_handle_flr_event(sriov, vf); + return; + } + + if (qlcnic_sriov_bc_msg_check(event)) + qlcnic_sriov_handle_msg_event(sriov, vf); +} + +int qlcnic_sriov_cfg_bc_intr(struct qlcnic_adapter *adapter, u8 enable) +{ + struct qlcnic_cmd_args cmd; + int err; + + if (!test_bit(__QLCNIC_SRIOV_ENABLE, &adapter->state)) + return 0; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_BC_EVENT_SETUP)) + return -ENOMEM; + + if (enable) + cmd.req.arg[1] = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7); + + err = qlcnic_83xx_mbx_op(adapter, &cmd); + + if (err != QLCNIC_RCODE_SUCCESS) { + dev_err(&adapter->pdev->dev, + "Failed to %s bc events, err=%d\n", + (enable ? "enable" : "disable"), err); + } + + qlcnic_free_mbx_args(&cmd); + return err; +} + +static int qlcnic_sriov_retry_bc_cmd(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans) +{ + u8 max = QLC_BC_CMD_MAX_RETRY_CNT; + u32 state; + + state = QLCRDX(adapter->ahw, QLC_83XX_IDC_DEV_STATE); + if (state == QLC_83XX_IDC_DEV_READY) { + msleep(20); + clear_bit(QLC_BC_VF_CHANNEL, &trans->vf->state); + trans->trans_state = QLC_INIT; + if (++adapter->fw_fail_cnt > max) + return -EIO; + else + return 0; + } + + return -EIO; +} + +static int qlcnic_sriov_vf_mbx_op(struct qlcnic_adapter *adapter, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct device *dev = &adapter->pdev->dev; + struct qlcnic_bc_trans *trans; + int err; + u32 rsp_data, opcode, mbx_err_code, rsp; + u16 seq = ++adapter->ahw->sriov->bc.trans_counter; + u8 func = ahw->pci_func; + + rsp = qlcnic_sriov_alloc_bc_trans(&trans); + if (rsp) + return rsp; + + rsp = qlcnic_sriov_prepare_bc_hdr(trans, cmd, seq, QLC_BC_COMMAND); + if (rsp) + goto cleanup_transaction; + +retry: + if (!test_bit(QLC_83XX_MBX_READY, &adapter->ahw->idc.status)) { + rsp = -EIO; + QLCDB(adapter, DRV, "MBX not Ready!(cmd 0x%x) for VF 0x%x\n", + QLCNIC_MBX_RSP(cmd->req.arg[0]), func); + goto err_out; + } + + err = qlcnic_sriov_send_bc_cmd(adapter, trans, func); + if (err) { + dev_err(dev, "MBX command 0x%x timed out for VF %d\n", + (cmd->req.arg[0] & 0xffff), func); + rsp = QLCNIC_RCODE_TIMEOUT; + + /* After adapter reset PF driver may take some time to + * respond to VF's request. Retry request till maximum retries. + */ + if ((trans->req_hdr->cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) && + !qlcnic_sriov_retry_bc_cmd(adapter, trans)) + goto retry; + + goto err_out; + } + + rsp_data = cmd->rsp.arg[0]; + mbx_err_code = QLCNIC_MBX_STATUS(rsp_data); + opcode = QLCNIC_MBX_RSP(cmd->req.arg[0]); + + if ((mbx_err_code == QLCNIC_MBX_RSP_OK) || + (mbx_err_code == QLCNIC_MBX_PORT_RSP_OK)) { + rsp = QLCNIC_RCODE_SUCCESS; + } else { + rsp = mbx_err_code; + if (!rsp) + rsp = 1; + dev_err(dev, + "MBX command 0x%x failed with err:0x%x for VF %d\n", + opcode, mbx_err_code, func); + } + +err_out: + if (rsp == QLCNIC_RCODE_TIMEOUT) { + ahw->reset_context = 1; + adapter->need_fw_reset = 1; + clear_bit(QLC_83XX_MBX_READY, &ahw->idc.status); + } + +cleanup_transaction: + qlcnic_sriov_cleanup_transaction(trans); + return rsp; +} + +int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_op) +{ + struct qlcnic_cmd_args cmd; + struct qlcnic_vf_info *vf = &adapter->ahw->sriov->vf_info[0]; + int ret; + + if (qlcnic_sriov_alloc_bc_mbx_args(&cmd, cmd_op)) + return -ENOMEM; + + ret = qlcnic_issue_cmd(adapter, &cmd); + if (ret) { + dev_err(&adapter->pdev->dev, + "Failed bc channel %s %d\n", cmd_op ? "term" : "init", + ret); + goto out; + } + + cmd_op = (cmd.rsp.arg[0] & 0xff); + if (cmd.rsp.arg[0] >> 25 == 2) + return 2; + if (cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) + set_bit(QLC_BC_VF_STATE, &vf->state); + else + clear_bit(QLC_BC_VF_STATE, &vf->state); + +out: + qlcnic_free_mbx_args(&cmd); + return ret; +} + +void qlcnic_vf_add_mc_list(struct net_device *netdev, u16 vlan) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_mac_list_s *cur; + struct list_head *head, tmp_list; + + INIT_LIST_HEAD(&tmp_list); + head = &adapter->vf_mc_list; + netif_addr_lock_bh(netdev); + + while (!list_empty(head)) { + cur = list_entry(head->next, struct qlcnic_mac_list_s, list); + list_move(&cur->list, &tmp_list); + } + + netif_addr_unlock_bh(netdev); + + while (!list_empty(&tmp_list)) { + cur = list_entry((&tmp_list)->next, + struct qlcnic_mac_list_s, list); + qlcnic_nic_add_mac(adapter, cur->mac_addr, vlan); + list_del(&cur->list); + kfree(cur); + } +} + +void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc) +{ + struct list_head *head = &bc->async_list; + struct qlcnic_async_work_list *entry; + + while (!list_empty(head)) { + entry = list_entry(head->next, struct qlcnic_async_work_list, + list); + cancel_work_sync(&entry->work); + list_del(&entry->list); + kfree(entry); + } +} + +static void qlcnic_sriov_vf_set_multi(struct net_device *netdev) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + u16 vlan; + + if (!test_bit(__QLCNIC_FW_ATTACHED, &adapter->state)) + return; + + vlan = adapter->ahw->sriov->vlan; + __qlcnic_set_multi(netdev, vlan); +} + +static void qlcnic_sriov_handle_async_multi(struct work_struct *work) +{ + struct qlcnic_async_work_list *entry; + struct net_device *netdev; + + entry = container_of(work, struct qlcnic_async_work_list, work); + netdev = (struct net_device *)entry->ptr; + + qlcnic_sriov_vf_set_multi(netdev); + return; +} + +static struct qlcnic_async_work_list * +qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc) +{ + struct list_head *node; + struct qlcnic_async_work_list *entry = NULL; + u8 empty = 0; + + list_for_each(node, &bc->async_list) { + entry = list_entry(node, struct qlcnic_async_work_list, list); + if (!work_pending(&entry->work)) { + empty = 1; + break; + } + } + + if (!empty) { + entry = kzalloc(sizeof(struct qlcnic_async_work_list), + GFP_ATOMIC); + if (entry == NULL) + return NULL; + list_add_tail(&entry->list, &bc->async_list); + } + + return entry; +} + +static void qlcnic_sriov_schedule_bc_async_work(struct qlcnic_back_channel *bc, + work_func_t func, void *data) +{ + struct qlcnic_async_work_list *entry = NULL; + + entry = qlcnic_sriov_get_free_node_async_work(bc); + if (!entry) + return; + + entry->ptr = data; + INIT_WORK(&entry->work, func); + queue_work(bc->bc_async_wq, &entry->work); +} + +void qlcnic_sriov_vf_schedule_multi(struct net_device *netdev) +{ + + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_back_channel *bc = &adapter->ahw->sriov->bc; + + if (adapter->need_fw_reset) + return; + + qlcnic_sriov_schedule_bc_async_work(bc, qlcnic_sriov_handle_async_multi, + netdev); +} + +static int qlcnic_sriov_vf_reinit_driver(struct qlcnic_adapter *adapter) +{ + int err; + + set_bit(QLC_83XX_MBX_READY, &adapter->ahw->idc.status); + qlcnic_83xx_enable_mbx_intrpt(adapter); + + err = qlcnic_sriov_cfg_bc_intr(adapter, 1); + if (err) + return err; + + err = qlcnic_sriov_channel_cfg_cmd(adapter, QLCNIC_BC_CMD_CHANNEL_INIT); + if (err) + goto err_out_cleanup_bc_intr; + + err = qlcnic_sriov_vf_init_driver(adapter); + if (err) + goto err_out_term_channel; + + return 0; + +err_out_term_channel: + qlcnic_sriov_channel_cfg_cmd(adapter, QLCNIC_BC_CMD_CHANNEL_TERM); + +err_out_cleanup_bc_intr: + qlcnic_sriov_cfg_bc_intr(adapter, 0); + return err; +} + +static void qlcnic_sriov_vf_attach(struct qlcnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + if (netif_running(netdev)) { + if (!qlcnic_up(adapter, netdev)) + qlcnic_restore_indev_addr(netdev, NETDEV_UP); + } + + netif_device_attach(netdev); +} + +static void qlcnic_sriov_vf_detach(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlcnic_intrpt_config *intr_tbl = ahw->intr_tbl; + struct net_device *netdev = adapter->netdev; + u8 i, max_ints = ahw->num_msix - 1; + + qlcnic_83xx_disable_mbx_intr(adapter); + netif_device_detach(netdev); + if (netif_running(netdev)) + qlcnic_down(adapter, netdev); + + for (i = 0; i < max_ints; i++) { + intr_tbl[i].id = i; + intr_tbl[i].enabled = 0; + intr_tbl[i].src = 0; + } + ahw->reset_context = 0; +} + +static int qlcnic_sriov_vf_handle_dev_ready(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct device *dev = &adapter->pdev->dev; + struct qlc_83xx_idc *idc = &ahw->idc; + u8 func = ahw->pci_func; + u32 state; + + if ((idc->prev_state == QLC_83XX_IDC_DEV_NEED_RESET) || + (idc->prev_state == QLC_83XX_IDC_DEV_INIT)) { + if (!qlcnic_sriov_vf_reinit_driver(adapter)) { + qlcnic_sriov_vf_attach(adapter); + adapter->fw_fail_cnt = 0; + dev_info(dev, + "%s: Reinitalization of VF 0x%x done after FW reset\n", + __func__, func); + } else { + dev_err(dev, + "%s: Reinitialization of VF 0x%x failed after FW reset\n", + __func__, func); + state = QLCRDX(ahw, QLC_83XX_IDC_DEV_STATE); + dev_info(dev, "Current state 0x%x after FW reset\n", + state); + } + } + + return 0; +} + +static int qlcnic_sriov_vf_handle_context_reset(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct device *dev = &adapter->pdev->dev; + struct qlc_83xx_idc *idc = &ahw->idc; + u8 func = ahw->pci_func; + u32 state; + + adapter->reset_ctx_cnt++; + + /* Skip the context reset and check if FW is hung */ + if (adapter->reset_ctx_cnt < 3) { + adapter->need_fw_reset = 1; + clear_bit(QLC_83XX_MBX_READY, &idc->status); + dev_info(dev, + "Resetting context, wait here to check if FW is in failed state\n"); + return 0; + } + + /* Check if number of resets exceed the threshold. + * If it exceeds the threshold just fail the VF. + */ + if (adapter->reset_ctx_cnt > QLC_83XX_VF_RESET_FAIL_THRESH) { + clear_bit(QLC_83XX_MODULE_LOADED, &idc->status); + adapter->tx_timeo_cnt = 0; + adapter->fw_fail_cnt = 0; + adapter->reset_ctx_cnt = 0; + qlcnic_sriov_vf_detach(adapter); + dev_err(dev, + "Device context resets have exceeded the threshold, device interface will be shutdown\n"); + return -EIO; + } + + dev_info(dev, "Resetting context of VF 0x%x\n", func); + dev_info(dev, "%s: Context reset count %d for VF 0x%x\n", + __func__, adapter->reset_ctx_cnt, func); + set_bit(__QLCNIC_RESETTING, &adapter->state); + adapter->need_fw_reset = 1; + clear_bit(QLC_83XX_MBX_READY, &idc->status); + qlcnic_sriov_vf_detach(adapter); + adapter->need_fw_reset = 0; + + if (!qlcnic_sriov_vf_reinit_driver(adapter)) { + qlcnic_sriov_vf_attach(adapter); + adapter->netdev->trans_start = jiffies; + adapter->tx_timeo_cnt = 0; + adapter->reset_ctx_cnt = 0; + adapter->fw_fail_cnt = 0; + dev_info(dev, "Done resetting context for VF 0x%x\n", func); + } else { + dev_err(dev, "%s: Reinitialization of VF 0x%x failed\n", + __func__, func); + state = QLCRDX(ahw, QLC_83XX_IDC_DEV_STATE); + dev_info(dev, "%s: Current state 0x%x\n", __func__, state); + } + + return 0; +} + +static int qlcnic_sriov_vf_idc_ready_state(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + int ret = 0; + + if (ahw->idc.prev_state != QLC_83XX_IDC_DEV_READY) + ret = qlcnic_sriov_vf_handle_dev_ready(adapter); + else if (ahw->reset_context) + ret = qlcnic_sriov_vf_handle_context_reset(adapter); + + clear_bit(__QLCNIC_RESETTING, &adapter->state); + return ret; +} + +static int qlcnic_sriov_vf_idc_failed_state(struct qlcnic_adapter *adapter) +{ + struct qlc_83xx_idc *idc = &adapter->ahw->idc; + + dev_err(&adapter->pdev->dev, "Device is in failed state\n"); + if (idc->prev_state == QLC_83XX_IDC_DEV_READY) + qlcnic_sriov_vf_detach(adapter); + + clear_bit(QLC_83XX_MODULE_LOADED, &idc->status); + clear_bit(__QLCNIC_RESETTING, &adapter->state); + return -EIO; +} + +static int +qlcnic_sriov_vf_idc_need_quiescent_state(struct qlcnic_adapter *adapter) +{ + struct qlc_83xx_idc *idc = &adapter->ahw->idc; + + dev_info(&adapter->pdev->dev, "Device is in quiescent state\n"); + if (idc->prev_state == QLC_83XX_IDC_DEV_READY) { + set_bit(__QLCNIC_RESETTING, &adapter->state); + adapter->tx_timeo_cnt = 0; + adapter->reset_ctx_cnt = 0; + clear_bit(QLC_83XX_MBX_READY, &idc->status); + qlcnic_sriov_vf_detach(adapter); + } + + return 0; +} + +static int qlcnic_sriov_vf_idc_init_reset_state(struct qlcnic_adapter *adapter) +{ + struct qlc_83xx_idc *idc = &adapter->ahw->idc; + u8 func = adapter->ahw->pci_func; + + if (idc->prev_state == QLC_83XX_IDC_DEV_READY) { + dev_err(&adapter->pdev->dev, + "Firmware hang detected by VF 0x%x\n", func); + set_bit(__QLCNIC_RESETTING, &adapter->state); + adapter->tx_timeo_cnt = 0; + adapter->reset_ctx_cnt = 0; + clear_bit(QLC_83XX_MBX_READY, &idc->status); + qlcnic_sriov_vf_detach(adapter); + } + return 0; +} + +static int qlcnic_sriov_vf_idc_unknown_state(struct qlcnic_adapter *adapter) +{ + dev_err(&adapter->pdev->dev, "%s: Device in unknown state\n", __func__); + return 0; +} + +static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *work) +{ + struct qlcnic_adapter *adapter; + struct qlc_83xx_idc *idc; + int ret = 0; + + adapter = container_of(work, struct qlcnic_adapter, fw_work.work); + idc = &adapter->ahw->idc; + idc->curr_state = QLCRDX(adapter->ahw, QLC_83XX_IDC_DEV_STATE); + + switch (idc->curr_state) { + case QLC_83XX_IDC_DEV_READY: + ret = qlcnic_sriov_vf_idc_ready_state(adapter); + break; + case QLC_83XX_IDC_DEV_NEED_RESET: + case QLC_83XX_IDC_DEV_INIT: + ret = qlcnic_sriov_vf_idc_init_reset_state(adapter); + break; + case QLC_83XX_IDC_DEV_NEED_QUISCENT: + ret = qlcnic_sriov_vf_idc_need_quiescent_state(adapter); + break; + case QLC_83XX_IDC_DEV_FAILED: + ret = qlcnic_sriov_vf_idc_failed_state(adapter); + break; + case QLC_83XX_IDC_DEV_QUISCENT: + break; + default: + ret = qlcnic_sriov_vf_idc_unknown_state(adapter); + } + + idc->prev_state = idc->curr_state; + if (!ret && test_bit(QLC_83XX_MODULE_LOADED, &idc->status)) + qlcnic_schedule_work(adapter, qlcnic_sriov_vf_poll_dev_state, + idc->delay); +} + +static void qlcnic_sriov_vf_cancel_fw_work(struct qlcnic_adapter *adapter) +{ + while (test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) + msleep(20); + + clear_bit(QLC_83XX_MODULE_LOADED, &adapter->ahw->idc.status); + clear_bit(__QLCNIC_RESETTING, &adapter->state); + cancel_delayed_work_sync(&adapter->fw_work); +} + +static int qlcnic_sriov_validate_vlan_cfg(struct qlcnic_sriov *sriov, + u16 vid, u8 enable) +{ + u16 vlan = sriov->vlan; + u8 allowed = 0; + int i; + + if (sriov->vlan_mode != QLC_GUEST_VLAN_MODE) + return -EINVAL; + + if (enable) { + if (vlan) + return -EINVAL; + + if (sriov->any_vlan) { + for (i = 0; i < sriov->num_allowed_vlans; i++) { + if (sriov->allowed_vlans[i] == vid) + allowed = 1; + } + + if (!allowed) + return -EINVAL; + } + } else { + if (!vlan || vlan != vid) + return -EINVAL; + } + + return 0; +} + +int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *adapter, + u16 vid, u8 enable) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_cmd_args cmd; + int ret; + + if (vid == 0) + return 0; + + ret = qlcnic_sriov_validate_vlan_cfg(sriov, vid, enable); + if (ret) + return ret; + + ret = qlcnic_sriov_alloc_bc_mbx_args(&cmd, + QLCNIC_BC_CMD_CFG_GUEST_VLAN); + if (ret) + return ret; + + cmd.req.arg[1] = (enable & 1) | vid << 16; + + qlcnic_sriov_cleanup_async_list(&sriov->bc); + ret = qlcnic_issue_cmd(adapter, &cmd); + if (ret) { + dev_err(&adapter->pdev->dev, + "Failed to configure guest VLAN, err=%d\n", ret); + } else { + qlcnic_free_mac_list(adapter); + + if (enable) + sriov->vlan = vid; + else + sriov->vlan = 0; + + qlcnic_sriov_vf_set_multi(adapter->netdev); + } + + qlcnic_free_mbx_args(&cmd); + return ret; +} + +static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *adapter) +{ + struct list_head *head = &adapter->mac_list; + struct qlcnic_mac_list_s *cur; + u16 vlan; + + vlan = adapter->ahw->sriov->vlan; + + while (!list_empty(head)) { + cur = list_entry(head->next, struct qlcnic_mac_list_s, list); + qlcnic_sre_macaddr_change(adapter, cur->mac_addr, + vlan, QLCNIC_MAC_DEL); + list_del(&cur->list); + kfree(cur); + } +} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c new file mode 100644 index 000000000000..c81be2da119b --- /dev/null +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -0,0 +1,1780 @@ +/* + * QLogic qlcnic NIC Driver + * Copyright (c) 2009-2013 QLogic Corporation + * + * See LICENSE.qlcnic for copyright and licensing details. + */ + +#include "qlcnic_sriov.h" +#include "qlcnic.h" +#include <linux/types.h> + +#define QLCNIC_SRIOV_VF_MAX_MAC 1 +#define QLC_VF_MIN_TX_RATE 100 +#define QLC_VF_MAX_TX_RATE 9999 + +static int qlcnic_sriov_pf_get_vport_handle(struct qlcnic_adapter *, u8); + +struct qlcnic_sriov_cmd_handler { + int (*fn) (struct qlcnic_bc_trans *, struct qlcnic_cmd_args *); +}; + +struct qlcnic_sriov_fw_cmd_handler { + u32 cmd; + int (*fn) (struct qlcnic_bc_trans *, struct qlcnic_cmd_args *); +}; + +static int qlcnic_sriov_pf_set_vport_info(struct qlcnic_adapter *adapter, + struct qlcnic_info *npar_info, + u16 vport_id) +{ + struct qlcnic_cmd_args cmd; + int err; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_SET_NIC_INFO)) + return -ENOMEM; + + cmd.req.arg[1] = (vport_id << 16) | 0x1; + cmd.req.arg[2] = npar_info->bit_offsets; + cmd.req.arg[2] |= npar_info->min_tx_bw << 16; + cmd.req.arg[3] = npar_info->max_tx_bw | (npar_info->max_tx_ques << 16); + cmd.req.arg[4] = npar_info->max_tx_mac_filters; + cmd.req.arg[4] |= npar_info->max_rx_mcast_mac_filters << 16; + cmd.req.arg[5] = npar_info->max_rx_ucast_mac_filters | + (npar_info->max_rx_ip_addr << 16); + cmd.req.arg[6] = npar_info->max_rx_lro_flow | + (npar_info->max_rx_status_rings << 16); + cmd.req.arg[7] = npar_info->max_rx_buf_rings | + (npar_info->max_rx_ques << 16); + cmd.req.arg[8] = npar_info->max_tx_vlan_keys; + cmd.req.arg[8] |= npar_info->max_local_ipv6_addrs << 16; + cmd.req.arg[9] = npar_info->max_remote_ipv6_addrs; + + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) + dev_err(&adapter->pdev->dev, + "Failed to set vport info, err=%d\n", err); + + qlcnic_free_mbx_args(&cmd); + return err; +} + +static int qlcnic_sriov_pf_cal_res_limit(struct qlcnic_adapter *adapter, + struct qlcnic_info *info, u16 func) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_resources *res = &sriov->ff_max; + u32 temp, num_vf_macs, num_vfs, max; + int ret = -EIO, vpid, id; + struct qlcnic_vport *vp; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, func); + if (vpid < 0) + return -EINVAL; + + num_vfs = sriov->num_vfs; + max = num_vfs + 1; + info->bit_offsets = 0xffff; + info->max_tx_ques = res->num_tx_queues / max; + info->max_rx_mcast_mac_filters = res->num_rx_mcast_mac_filters; + num_vf_macs = QLCNIC_SRIOV_VF_MAX_MAC; + + if (adapter->ahw->pci_func == func) { + temp = res->num_rx_mcast_mac_filters - (num_vfs * num_vf_macs); + info->max_rx_ucast_mac_filters = temp; + temp = res->num_tx_mac_filters - (num_vfs * num_vf_macs); + info->max_tx_mac_filters = temp; + info->min_tx_bw = 0; + info->max_tx_bw = MAX_BW; + } else { + id = qlcnic_sriov_func_to_index(adapter, func); + if (id < 0) + return id; + vp = sriov->vf_info[id].vp; + info->min_tx_bw = vp->min_tx_bw; + info->max_tx_bw = vp->max_tx_bw; + info->max_rx_ucast_mac_filters = num_vf_macs; + info->max_tx_mac_filters = num_vf_macs; + } + + info->max_rx_ip_addr = res->num_destip / max; + info->max_rx_status_rings = res->num_rx_status_rings / max; + info->max_rx_buf_rings = res->num_rx_buf_rings / max; + info->max_rx_ques = res->num_rx_queues / max; + info->max_rx_lro_flow = res->num_lro_flows_supported / max; + info->max_tx_vlan_keys = res->num_txvlan_keys; + info->max_local_ipv6_addrs = res->max_local_ipv6_addrs; + info->max_remote_ipv6_addrs = res->max_remote_ipv6_addrs; + + ret = qlcnic_sriov_pf_set_vport_info(adapter, info, vpid); + if (ret) + return ret; + + return 0; +} + +static void qlcnic_sriov_pf_set_ff_max_res(struct qlcnic_adapter *adapter, + struct qlcnic_info *info) +{ + struct qlcnic_resources *ff_max = &adapter->ahw->sriov->ff_max; + + ff_max->num_tx_mac_filters = info->max_tx_mac_filters; + ff_max->num_rx_ucast_mac_filters = info->max_rx_ucast_mac_filters; + ff_max->num_rx_mcast_mac_filters = info->max_rx_mcast_mac_filters; + ff_max->num_txvlan_keys = info->max_tx_vlan_keys; + ff_max->num_rx_queues = info->max_rx_ques; + ff_max->num_tx_queues = info->max_tx_ques; + ff_max->num_lro_flows_supported = info->max_rx_lro_flow; + ff_max->num_destip = info->max_rx_ip_addr; + ff_max->num_rx_buf_rings = info->max_rx_buf_rings; + ff_max->num_rx_status_rings = info->max_rx_status_rings; + ff_max->max_remote_ipv6_addrs = info->max_remote_ipv6_addrs; + ff_max->max_local_ipv6_addrs = info->max_local_ipv6_addrs; +} + +static int qlcnic_sriov_get_pf_info(struct qlcnic_adapter *adapter, + struct qlcnic_info *npar_info) +{ + int err; + struct qlcnic_cmd_args cmd; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_NIC_INFO)) + return -ENOMEM; + + cmd.req.arg[1] = 0x2; + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to get PF info, err=%d\n", err); + goto out; + } + + npar_info->total_pf = cmd.rsp.arg[2] & 0xff; + npar_info->total_rss_engines = (cmd.rsp.arg[2] >> 8) & 0xff; + npar_info->max_vports = MSW(cmd.rsp.arg[2]); + npar_info->max_tx_ques = LSW(cmd.rsp.arg[3]); + npar_info->max_tx_mac_filters = MSW(cmd.rsp.arg[3]); + npar_info->max_rx_mcast_mac_filters = LSW(cmd.rsp.arg[4]); + npar_info->max_rx_ucast_mac_filters = MSW(cmd.rsp.arg[4]); + npar_info->max_rx_ip_addr = LSW(cmd.rsp.arg[5]); + npar_info->max_rx_lro_flow = MSW(cmd.rsp.arg[5]); + npar_info->max_rx_status_rings = LSW(cmd.rsp.arg[6]); + npar_info->max_rx_buf_rings = MSW(cmd.rsp.arg[6]); + npar_info->max_rx_ques = LSW(cmd.rsp.arg[7]); + npar_info->max_tx_vlan_keys = MSW(cmd.rsp.arg[7]); + npar_info->max_local_ipv6_addrs = LSW(cmd.rsp.arg[8]); + npar_info->max_remote_ipv6_addrs = MSW(cmd.rsp.arg[8]); + + qlcnic_sriov_pf_set_ff_max_res(adapter, npar_info); + dev_info(&adapter->pdev->dev, + "\n\ttotal_pf: %d,\n" + "\n\ttotal_rss_engines: %d max_vports: %d max_tx_ques %d,\n" + "\tmax_tx_mac_filters: %d max_rx_mcast_mac_filters: %d,\n" + "\tmax_rx_ucast_mac_filters: 0x%x, max_rx_ip_addr: %d,\n" + "\tmax_rx_lro_flow: %d max_rx_status_rings: %d,\n" + "\tmax_rx_buf_rings: %d, max_rx_ques: %d, max_tx_vlan_keys %d\n" + "\tmax_local_ipv6_addrs: %d, max_remote_ipv6_addrs: %d\n", + npar_info->total_pf, npar_info->total_rss_engines, + npar_info->max_vports, npar_info->max_tx_ques, + npar_info->max_tx_mac_filters, + npar_info->max_rx_mcast_mac_filters, + npar_info->max_rx_ucast_mac_filters, npar_info->max_rx_ip_addr, + npar_info->max_rx_lro_flow, npar_info->max_rx_status_rings, + npar_info->max_rx_buf_rings, npar_info->max_rx_ques, + npar_info->max_tx_vlan_keys, npar_info->max_local_ipv6_addrs, + npar_info->max_remote_ipv6_addrs); + +out: + qlcnic_free_mbx_args(&cmd); + return err; +} + +static void qlcnic_sriov_pf_reset_vport_handle(struct qlcnic_adapter *adapter, + u8 func) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_vport *vp; + int index; + + if (adapter->ahw->pci_func == func) { + sriov->vp_handle = 0; + } else { + index = qlcnic_sriov_func_to_index(adapter, func); + if (index < 0) + return; + vp = sriov->vf_info[index].vp; + vp->handle = 0; + } +} + +static void qlcnic_sriov_pf_set_vport_handle(struct qlcnic_adapter *adapter, + u16 vport_handle, u8 func) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_vport *vp; + int index; + + if (adapter->ahw->pci_func == func) { + sriov->vp_handle = vport_handle; + } else { + index = qlcnic_sriov_func_to_index(adapter, func); + if (index < 0) + return; + vp = sriov->vf_info[index].vp; + vp->handle = vport_handle; + } +} + +static int qlcnic_sriov_pf_get_vport_handle(struct qlcnic_adapter *adapter, + u8 func) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_vf_info *vf_info; + int index; + + if (adapter->ahw->pci_func == func) { + return sriov->vp_handle; + } else { + index = qlcnic_sriov_func_to_index(adapter, func); + if (index >= 0) { + vf_info = &sriov->vf_info[index]; + return vf_info->vp->handle; + } + } + + return -EINVAL; +} + +static int qlcnic_sriov_pf_config_vport(struct qlcnic_adapter *adapter, + u8 flag, u16 func) +{ + struct qlcnic_cmd_args cmd; + int ret; + int vpid; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIG_VPORT)) + return -ENOMEM; + + if (flag) { + cmd.req.arg[3] = func << 8; + } else { + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, func); + if (vpid < 0) { + ret = -EINVAL; + goto out; + } + cmd.req.arg[3] = ((vpid & 0xffff) << 8) | 1; + } + + ret = qlcnic_issue_cmd(adapter, &cmd); + if (ret) { + dev_err(&adapter->pdev->dev, + "Failed %s vport, err %d for func 0x%x\n", + (flag ? "enable" : "disable"), ret, func); + goto out; + } + + if (flag) { + vpid = cmd.rsp.arg[2] & 0xffff; + qlcnic_sriov_pf_set_vport_handle(adapter, vpid, func); + } else { + qlcnic_sriov_pf_reset_vport_handle(adapter, func); + } + +out: + qlcnic_free_mbx_args(&cmd); + return ret; +} + +static int qlcnic_sriov_pf_cfg_vlan_filtering(struct qlcnic_adapter *adapter, + u8 enable) +{ + struct qlcnic_cmd_args cmd; + int err; + + err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_SET_NIC_INFO); + if (err) + return err; + + cmd.req.arg[1] = 0x4; + if (enable) + cmd.req.arg[1] |= BIT_16; + + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) + dev_err(&adapter->pdev->dev, + "Failed to configure VLAN filtering, err=%d\n", err); + + qlcnic_free_mbx_args(&cmd); + return err; +} + +static int qlcnic_sriov_pf_cfg_eswitch(struct qlcnic_adapter *adapter, + u8 func, u8 enable) +{ + struct qlcnic_cmd_args cmd; + int err = -EIO; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_TOGGLE_ESWITCH)) + return -ENOMEM; + + cmd.req.arg[0] |= (3 << 29); + cmd.req.arg[1] = ((func & 0xf) << 2) | BIT_6 | BIT_1; + if (enable) + cmd.req.arg[1] |= BIT_0; + + err = qlcnic_issue_cmd(adapter, &cmd); + + if (err != QLCNIC_RCODE_SUCCESS) { + dev_err(&adapter->pdev->dev, + "Failed to enable sriov eswitch%d\n", err); + err = -EIO; + } + + qlcnic_free_mbx_args(&cmd); + return err; +} + +static void qlcnic_sriov_pf_del_flr_queue(struct qlcnic_adapter *adapter) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_back_channel *bc = &sriov->bc; + int i; + + for (i = 0; i < sriov->num_vfs; i++) + cancel_work_sync(&sriov->vf_info[i].flr_work); + + destroy_workqueue(bc->bc_flr_wq); +} + +static int qlcnic_sriov_pf_create_flr_queue(struct qlcnic_adapter *adapter) +{ + struct qlcnic_back_channel *bc = &adapter->ahw->sriov->bc; + struct workqueue_struct *wq; + + wq = create_singlethread_workqueue("qlcnic-flr"); + if (wq == NULL) { + dev_err(&adapter->pdev->dev, "Cannot create FLR workqueue\n"); + return -ENOMEM; + } + + bc->bc_flr_wq = wq; + return 0; +} + +void qlcnic_sriov_pf_cleanup(struct qlcnic_adapter *adapter) +{ + u8 func = adapter->ahw->pci_func; + + if (!qlcnic_sriov_enable_check(adapter)) + return; + + qlcnic_sriov_pf_del_flr_queue(adapter); + qlcnic_sriov_cfg_bc_intr(adapter, 0); + qlcnic_sriov_pf_config_vport(adapter, 0, func); + qlcnic_sriov_pf_cfg_eswitch(adapter, func, 0); + qlcnic_sriov_pf_cfg_vlan_filtering(adapter, 0); + __qlcnic_sriov_cleanup(adapter); + adapter->ahw->op_mode = QLCNIC_MGMT_FUNC; + clear_bit(__QLCNIC_SRIOV_ENABLE, &adapter->state); +} + +void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) +{ + if (!qlcnic_sriov_pf_check(adapter)) + return; + + if (!qlcnic_sriov_enable_check(adapter)) + return; + + pci_disable_sriov(adapter->pdev); + netdev_info(adapter->netdev, + "SR-IOV is disabled successfully on port %d\n", + adapter->portnum); +} + +static int qlcnic_pci_sriov_disable(struct qlcnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + if (netif_running(netdev)) + __qlcnic_down(adapter, netdev); + + qlcnic_sriov_pf_disable(adapter); + + qlcnic_sriov_pf_cleanup(adapter); + + /* After disabling SRIOV re-init the driver in default mode + configure opmode based on op_mode of function + */ + if (qlcnic_83xx_configure_opmode(adapter)) + return -EIO; + + if (netif_running(netdev)) + __qlcnic_up(adapter, netdev); + + return 0; +} + +static int qlcnic_sriov_pf_init(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlcnic_info nic_info, pf_info, vp_info; + int err; + u8 func = ahw->pci_func; + + if (!qlcnic_sriov_enable_check(adapter)) + return 0; + + err = qlcnic_sriov_pf_cfg_vlan_filtering(adapter, 1); + if (err) + return err; + + err = qlcnic_sriov_pf_cfg_eswitch(adapter, func, 1); + if (err) + goto disable_vlan_filtering; + + err = qlcnic_sriov_pf_config_vport(adapter, 1, func); + if (err) + goto disable_eswitch; + + err = qlcnic_sriov_get_pf_info(adapter, &pf_info); + if (err) + goto delete_vport; + + err = qlcnic_get_nic_info(adapter, &nic_info, func); + if (err) + goto delete_vport; + + err = qlcnic_sriov_pf_cal_res_limit(adapter, &vp_info, func); + if (err) + goto delete_vport; + + err = qlcnic_sriov_cfg_bc_intr(adapter, 1); + if (err) + goto delete_vport; + + ahw->physical_port = (u8) nic_info.phys_port; + ahw->switch_mode = nic_info.switch_mode; + ahw->max_mtu = nic_info.max_mtu; + ahw->capabilities = nic_info.capabilities; + ahw->nic_mode = QLC_83XX_SRIOV_MODE; + return err; + +delete_vport: + qlcnic_sriov_pf_config_vport(adapter, 0, func); + +disable_eswitch: + qlcnic_sriov_pf_cfg_eswitch(adapter, func, 0); + +disable_vlan_filtering: + qlcnic_sriov_pf_cfg_vlan_filtering(adapter, 0); + + return err; +} + +static int qlcnic_sriov_pf_enable(struct qlcnic_adapter *adapter, int num_vfs) +{ + int err; + + if (!qlcnic_sriov_enable_check(adapter)) + return 0; + + err = pci_enable_sriov(adapter->pdev, num_vfs); + if (err) + qlcnic_sriov_pf_cleanup(adapter); + + return err; +} + +static int __qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, + int num_vfs) +{ + int err = 0; + + set_bit(__QLCNIC_SRIOV_ENABLE, &adapter->state); + adapter->ahw->op_mode = QLCNIC_SRIOV_PF_FUNC; + + err = qlcnic_sriov_init(adapter, num_vfs); + if (err) + goto clear_op_mode; + + err = qlcnic_sriov_pf_create_flr_queue(adapter); + if (err) + goto sriov_cleanup; + + err = qlcnic_sriov_pf_init(adapter); + if (err) + goto del_flr_queue; + + err = qlcnic_sriov_pf_enable(adapter, num_vfs); + return err; + +del_flr_queue: + qlcnic_sriov_pf_del_flr_queue(adapter); + +sriov_cleanup: + __qlcnic_sriov_cleanup(adapter); + +clear_op_mode: + clear_bit(__QLCNIC_SRIOV_ENABLE, &adapter->state); + adapter->ahw->op_mode = QLCNIC_MGMT_FUNC; + return err; +} + +static int qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, int num_vfs) +{ + struct net_device *netdev = adapter->netdev; + int err; + + if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) { + netdev_err(netdev, + "SR-IOV cannot be enabled, when legacy interrupts are enabled\n"); + return -EIO; + } + + if (netif_running(netdev)) + __qlcnic_down(adapter, netdev); + + err = __qlcnic_pci_sriov_enable(adapter, num_vfs); + if (err) { + netdev_info(netdev, "Failed to enable SR-IOV on port %d\n", + adapter->portnum); + + err = -EIO; + if (qlcnic_83xx_configure_opmode(adapter)) + goto error; + } else { + netdev_info(netdev, + "SR-IOV is enabled successfully on port %d\n", + adapter->portnum); + /* Return number of vfs enabled */ + err = num_vfs; + } + if (netif_running(netdev)) + __qlcnic_up(adapter, netdev); + +error: + return err; +} + +int qlcnic_pci_sriov_configure(struct pci_dev *dev, int num_vfs) +{ + struct qlcnic_adapter *adapter = pci_get_drvdata(dev); + int err; + + if (test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) + return -EBUSY; + + if (num_vfs == 0) + err = qlcnic_pci_sriov_disable(adapter); + else + err = qlcnic_pci_sriov_enable(adapter, num_vfs); + + clear_bit(__QLCNIC_RESETTING, &adapter->state); + return err; +} + +static int qlcnic_sriov_set_vf_acl(struct qlcnic_adapter *adapter, u8 func) +{ + struct qlcnic_cmd_args cmd; + struct qlcnic_vport *vp; + int err, id; + + id = qlcnic_sriov_func_to_index(adapter, func); + if (id < 0) + return id; + + vp = adapter->ahw->sriov->vf_info[id].vp; + err = qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_SET_NIC_INFO); + if (err) + return err; + + cmd.req.arg[1] = 0x3 | func << 16; + if (vp->vlan_mode == QLC_PVID_MODE) { + cmd.req.arg[2] |= BIT_6; + cmd.req.arg[3] |= vp->vlan << 8; + } + + err = qlcnic_issue_cmd(adapter, &cmd); + if (err) + dev_err(&adapter->pdev->dev, "Failed to set ACL, err=%d\n", + err); + + qlcnic_free_mbx_args(&cmd); + return err; +} + +static int qlcnic_sriov_set_vf_vport_info(struct qlcnic_adapter *adapter, + u16 func) +{ + struct qlcnic_info defvp_info; + int err; + + err = qlcnic_sriov_pf_cal_res_limit(adapter, &defvp_info, func); + if (err) + return -EIO; + + err = qlcnic_sriov_set_vf_acl(adapter, func); + if (err) + return err; + + return 0; +} + +static int qlcnic_sriov_pf_channel_cfg_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + u16 func = vf->pci_func; + + cmd->rsp.arg[0] = trans->req_hdr->cmd_op; + cmd->rsp.arg[0] |= (1 << 16); + + if (trans->req_hdr->cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) { + err = qlcnic_sriov_pf_config_vport(adapter, 1, func); + if (!err) { + err = qlcnic_sriov_set_vf_vport_info(adapter, func); + if (err) + qlcnic_sriov_pf_config_vport(adapter, 0, func); + } + } else { + err = qlcnic_sriov_pf_config_vport(adapter, 0, func); + } + + if (err) + goto err_out; + + cmd->rsp.arg[0] |= (1 << 25); + + if (trans->req_hdr->cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) + set_bit(QLC_BC_VF_STATE, &vf->state); + else + clear_bit(QLC_BC_VF_STATE, &vf->state); + + return err; + +err_out: + cmd->rsp.arg[0] |= (2 << 25); + return err; +} + +static int qlcnic_sriov_cfg_vf_def_mac(struct qlcnic_adapter *adapter, + struct qlcnic_vport *vp, + u16 func, u16 vlan, u8 op) +{ + struct qlcnic_cmd_args cmd; + struct qlcnic_macvlan_mbx mv; + u8 *addr; + int err; + u32 *buf; + int vpid; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_CONFIG_MAC_VLAN)) + return -ENOMEM; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, func); + if (vpid < 0) { + err = -EINVAL; + goto out; + } + + if (vlan) + op = ((op == QLCNIC_MAC_ADD || op == QLCNIC_MAC_VLAN_ADD) ? + QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_VLAN_DEL); + + cmd.req.arg[1] = op | (1 << 8) | (3 << 6); + cmd.req.arg[1] |= ((vpid & 0xffff) << 16) | BIT_31; + + addr = vp->mac; + mv.vlan = vlan; + mv.mac_addr0 = addr[0]; + mv.mac_addr1 = addr[1]; + mv.mac_addr2 = addr[2]; + mv.mac_addr3 = addr[3]; + mv.mac_addr4 = addr[4]; + mv.mac_addr5 = addr[5]; + buf = &cmd.req.arg[2]; + memcpy(buf, &mv, sizeof(struct qlcnic_macvlan_mbx)); + + err = qlcnic_issue_cmd(adapter, &cmd); + + if (err) + dev_err(&adapter->pdev->dev, + "MAC-VLAN %s to CAM failed, err=%d.\n", + ((op == 1) ? "add " : "delete "), err); + +out: + qlcnic_free_mbx_args(&cmd); + return err; +} + +static int qlcnic_sriov_validate_create_rx_ctx(struct qlcnic_cmd_args *cmd) +{ + if ((cmd->req.arg[0] >> 29) != 0x3) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_create_rx_ctx_cmd(struct qlcnic_bc_trans *tran, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = tran->vf; + struct qlcnic_adapter *adapter = vf->adapter; + struct qlcnic_rcv_mbx_out *mbx_out; + int err; + u16 vlan; + + err = qlcnic_sriov_validate_create_rx_ctx(cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + cmd->req.arg[6] = vf->vp->handle; + err = qlcnic_issue_cmd(adapter, cmd); + + vlan = vf->vp->vlan; + if (!err) { + mbx_out = (struct qlcnic_rcv_mbx_out *)&cmd->rsp.arg[1]; + vf->rx_ctx_id = mbx_out->ctx_id; + qlcnic_sriov_cfg_vf_def_mac(adapter, vf->vp, vf->pci_func, + vlan, QLCNIC_MAC_ADD); + } else { + vf->rx_ctx_id = 0; + } + + return err; +} + +static int qlcnic_sriov_pf_mac_address_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + u8 type, *mac; + + type = cmd->req.arg[1]; + switch (type) { + case QLCNIC_SET_STATION_MAC: + case QLCNIC_SET_FAC_DEF_MAC: + cmd->rsp.arg[0] = (2 << 25); + break; + case QLCNIC_GET_CURRENT_MAC: + cmd->rsp.arg[0] = (1 << 25); + mac = vf->vp->mac; + cmd->rsp.arg[2] = mac[1] | ((mac[0] << 8) & 0xff00); + cmd->rsp.arg[1] = mac[5] | ((mac[4] << 8) & 0xff00) | + ((mac[3]) << 16 & 0xff0000) | + ((mac[2]) << 24 & 0xff000000); + } + + return 0; +} + +static int qlcnic_sriov_validate_create_tx_ctx(struct qlcnic_cmd_args *cmd) +{ + if ((cmd->req.arg[0] >> 29) != 0x3) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_create_tx_ctx_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + struct qlcnic_tx_mbx_out *mbx_out; + int err; + + err = qlcnic_sriov_validate_create_tx_ctx(cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + cmd->req.arg[5] |= vf->vp->handle << 16; + err = qlcnic_issue_cmd(adapter, cmd); + if (!err) { + mbx_out = (struct qlcnic_tx_mbx_out *)&cmd->rsp.arg[2]; + vf->tx_ctx_id = mbx_out->ctx_id; + } else { + vf->tx_ctx_id = 0; + } + + return err; +} + +static int qlcnic_sriov_validate_del_rx_ctx(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if ((cmd->req.arg[0] >> 29) != 0x3) + return -EINVAL; + + if ((cmd->req.arg[1] & 0xffff) != vf->rx_ctx_id) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_del_rx_ctx_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + u16 vlan; + + err = qlcnic_sriov_validate_del_rx_ctx(vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + vlan = vf->vp->vlan; + qlcnic_sriov_cfg_vf_def_mac(adapter, vf->vp, vf->pci_func, + vlan, QLCNIC_MAC_DEL); + cmd->req.arg[1] |= vf->vp->handle << 16; + err = qlcnic_issue_cmd(adapter, cmd); + + if (!err) + vf->rx_ctx_id = 0; + + return err; +} + +static int qlcnic_sriov_validate_del_tx_ctx(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if ((cmd->req.arg[0] >> 29) != 0x3) + return -EINVAL; + + if ((cmd->req.arg[1] & 0xffff) != vf->tx_ctx_id) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_del_tx_ctx_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_del_tx_ctx(vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + cmd->req.arg[1] |= vf->vp->handle << 16; + err = qlcnic_issue_cmd(adapter, cmd); + + if (!err) + vf->tx_ctx_id = 0; + + return err; +} + +static int qlcnic_sriov_validate_cfg_lro(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if ((cmd->req.arg[1] >> 16) != vf->rx_ctx_id) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_cfg_lro_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_cfg_lro(vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_pf_cfg_ip_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err = -EIO; + u8 op; + + op = cmd->req.arg[1] & 0xff; + + cmd->req.arg[1] |= vf->vp->handle << 16; + cmd->req.arg[1] |= BIT_31; + + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_validate_cfg_intrpt(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if (((cmd->req.arg[1] >> 8) & 0xff) != vf->pci_func) + return -EINVAL; + + if (!(cmd->req.arg[1] & BIT_16)) + return -EINVAL; + + if ((cmd->req.arg[1] & 0xff) != 0x1) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_cfg_intrpt_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_cfg_intrpt(vf, cmd); + if (err) + cmd->rsp.arg[0] |= (0x6 << 25); + else + err = qlcnic_issue_cmd(adapter, cmd); + + return err; +} + +static int qlcnic_sriov_validate_mtu(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if (cmd->req.arg[1] != vf->rx_ctx_id) + return -EINVAL; + + if (cmd->req.arg[2] > adapter->ahw->max_mtu) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_set_mtu_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_mtu(adapter, vf, cmd); + if (err) + cmd->rsp.arg[0] |= (0x6 << 25); + else + err = qlcnic_issue_cmd(adapter, cmd); + + return err; +} + +static int qlcnic_sriov_validate_get_nic_info(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if (cmd->req.arg[1] & BIT_31) { + if (((cmd->req.arg[1] >> 16) & 0x7fff) != vf->pci_func) + return -EINVAL; + } else { + cmd->req.arg[1] |= vf->vp->handle << 16; + } + + return 0; +} + +static int qlcnic_sriov_pf_get_nic_info_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_get_nic_info(vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_validate_cfg_rss(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if (cmd->req.arg[1] != vf->rx_ctx_id) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_cfg_rss_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_cfg_rss(vf, cmd); + if (err) + cmd->rsp.arg[0] |= (0x6 << 25); + else + err = qlcnic_issue_cmd(adapter, cmd); + + return err; +} + +static int qlcnic_sriov_validate_cfg_intrcoal(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_nic_intr_coalesce *coal = &adapter->ahw->coal; + u16 ctx_id, pkts, time; + + ctx_id = cmd->req.arg[1] >> 16; + pkts = cmd->req.arg[2] & 0xffff; + time = cmd->req.arg[2] >> 16; + + if (ctx_id != vf->rx_ctx_id) + return -EINVAL; + if (pkts > coal->rx_packets) + return -EINVAL; + if (time < coal->rx_time_us) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_cfg_intrcoal_cmd(struct qlcnic_bc_trans *tran, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = tran->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_cfg_intrcoal(adapter, vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_validate_cfg_macvlan(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_macvlan_mbx *macvlan; + struct qlcnic_vport *vp = vf->vp; + u8 op, new_op; + + if (!(cmd->req.arg[1] & BIT_8)) + return -EINVAL; + + cmd->req.arg[1] |= (vf->vp->handle << 16); + cmd->req.arg[1] |= BIT_31; + + macvlan = (struct qlcnic_macvlan_mbx *)&cmd->req.arg[2]; + if (!(macvlan->mac_addr0 & BIT_0)) { + dev_err(&adapter->pdev->dev, + "MAC address change is not allowed from VF %d", + vf->pci_func); + return -EINVAL; + } + + if (vp->vlan_mode == QLC_PVID_MODE) { + op = cmd->req.arg[1] & 0x7; + cmd->req.arg[1] &= ~0x7; + new_op = (op == QLCNIC_MAC_ADD || op == QLCNIC_MAC_VLAN_ADD) ? + QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_VLAN_DEL; + cmd->req.arg[3] |= vp->vlan << 16; + cmd->req.arg[1] |= new_op; + } + + return 0; +} + +static int qlcnic_sriov_pf_cfg_macvlan_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_cfg_macvlan(adapter, vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_validate_linkevent(struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + if ((cmd->req.arg[1] >> 16) != vf->rx_ctx_id) + return -EINVAL; + + if (!(cmd->req.arg[1] & BIT_8)) + return -EINVAL; + + return 0; +} + +static int qlcnic_sriov_pf_linkevent_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + err = qlcnic_sriov_validate_linkevent(vf, cmd); + if (err) { + cmd->rsp.arg[0] |= (0x6 << 25); + return err; + } + + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_pf_cfg_promisc_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_adapter *adapter = vf->adapter; + int err; + + cmd->req.arg[1] |= vf->vp->handle << 16; + cmd->req.arg[1] |= BIT_31; + err = qlcnic_issue_cmd(adapter, cmd); + return err; +} + +static int qlcnic_sriov_pf_get_acl_cmd(struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = trans->vf; + struct qlcnic_vport *vp = vf->vp; + u8 cmd_op, mode = vp->vlan_mode; + + cmd_op = trans->req_hdr->cmd_op; + cmd->rsp.arg[0] = (cmd_op & 0xffff) | 14 << 16 | 1 << 25; + + switch (mode) { + case QLC_GUEST_VLAN_MODE: + cmd->rsp.arg[1] = mode | 1 << 8; + cmd->rsp.arg[2] = 1 << 16; + break; + case QLC_PVID_MODE: + cmd->rsp.arg[1] = mode | 1 << 8 | vp->vlan << 16; + break; + } + + return 0; +} + +static int qlcnic_sriov_pf_del_guest_vlan(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf) + +{ + struct qlcnic_vport *vp = vf->vp; + + if (!vp->vlan) + return -EINVAL; + + if (!vf->rx_ctx_id) { + vp->vlan = 0; + return 0; + } + + qlcnic_sriov_cfg_vf_def_mac(adapter, vp, vf->pci_func, + vp->vlan, QLCNIC_MAC_DEL); + vp->vlan = 0; + qlcnic_sriov_cfg_vf_def_mac(adapter, vp, vf->pci_func, + 0, QLCNIC_MAC_ADD); + return 0; +} + +static int qlcnic_sriov_pf_add_guest_vlan(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vport *vp = vf->vp; + int err = -EIO; + + if (vp->vlan) + return err; + + if (!vf->rx_ctx_id) { + vp->vlan = cmd->req.arg[1] >> 16; + return 0; + } + + err = qlcnic_sriov_cfg_vf_def_mac(adapter, vp, vf->pci_func, + 0, QLCNIC_MAC_DEL); + if (err) + return err; + + vp->vlan = cmd->req.arg[1] >> 16; + err = qlcnic_sriov_cfg_vf_def_mac(adapter, vp, vf->pci_func, + vp->vlan, QLCNIC_MAC_ADD); + + if (err) { + qlcnic_sriov_cfg_vf_def_mac(adapter, vp, vf->pci_func, + 0, QLCNIC_MAC_ADD); + vp->vlan = 0; + } + + return err; +} + +static int qlcnic_sriov_pf_cfg_guest_vlan_cmd(struct qlcnic_bc_trans *tran, + struct qlcnic_cmd_args *cmd) +{ + struct qlcnic_vf_info *vf = tran->vf; + struct qlcnic_adapter *adapter = vf->adapter; + struct qlcnic_vport *vp = vf->vp; + int err = -EIO; + u8 op; + + if (vp->vlan_mode != QLC_GUEST_VLAN_MODE) { + cmd->rsp.arg[0] |= 2 << 25; + return err; + } + + op = cmd->req.arg[1] & 0xf; + + if (op) + err = qlcnic_sriov_pf_add_guest_vlan(adapter, vf, cmd); + else + err = qlcnic_sriov_pf_del_guest_vlan(adapter, vf); + + cmd->rsp.arg[0] |= err ? 2 << 25 : 1 << 25; + return err; +} + +static const int qlcnic_pf_passthru_supp_cmds[] = { + QLCNIC_CMD_GET_STATISTICS, + QLCNIC_CMD_GET_PORT_CONFIG, + QLCNIC_CMD_GET_LINK_STATUS, +}; + +static const struct qlcnic_sriov_cmd_handler qlcnic_pf_bc_cmd_hdlr[] = { + [QLCNIC_BC_CMD_CHANNEL_INIT] = {&qlcnic_sriov_pf_channel_cfg_cmd}, + [QLCNIC_BC_CMD_CHANNEL_TERM] = {&qlcnic_sriov_pf_channel_cfg_cmd}, + [QLCNIC_BC_CMD_GET_ACL] = {&qlcnic_sriov_pf_get_acl_cmd}, + [QLCNIC_BC_CMD_CFG_GUEST_VLAN] = {&qlcnic_sriov_pf_cfg_guest_vlan_cmd}, +}; + +static const struct qlcnic_sriov_fw_cmd_handler qlcnic_pf_fw_cmd_hdlr[] = { + {QLCNIC_CMD_CREATE_RX_CTX, qlcnic_sriov_pf_create_rx_ctx_cmd}, + {QLCNIC_CMD_CREATE_TX_CTX, qlcnic_sriov_pf_create_tx_ctx_cmd}, + {QLCNIC_CMD_MAC_ADDRESS, qlcnic_sriov_pf_mac_address_cmd}, + {QLCNIC_CMD_DESTROY_RX_CTX, qlcnic_sriov_pf_del_rx_ctx_cmd}, + {QLCNIC_CMD_DESTROY_TX_CTX, qlcnic_sriov_pf_del_tx_ctx_cmd}, + {QLCNIC_CMD_CONFIGURE_HW_LRO, qlcnic_sriov_pf_cfg_lro_cmd}, + {QLCNIC_CMD_CONFIGURE_IP_ADDR, qlcnic_sriov_pf_cfg_ip_cmd}, + {QLCNIC_CMD_CONFIG_INTRPT, qlcnic_sriov_pf_cfg_intrpt_cmd}, + {QLCNIC_CMD_SET_MTU, qlcnic_sriov_pf_set_mtu_cmd}, + {QLCNIC_CMD_GET_NIC_INFO, qlcnic_sriov_pf_get_nic_info_cmd}, + {QLCNIC_CMD_CONFIGURE_RSS, qlcnic_sriov_pf_cfg_rss_cmd}, + {QLCNIC_CMD_CONFIG_INTR_COAL, qlcnic_sriov_pf_cfg_intrcoal_cmd}, + {QLCNIC_CMD_CONFIG_MAC_VLAN, qlcnic_sriov_pf_cfg_macvlan_cmd}, + {QLCNIC_CMD_GET_LINK_EVENT, qlcnic_sriov_pf_linkevent_cmd}, + {QLCNIC_CMD_CONFIGURE_MAC_RX_MODE, qlcnic_sriov_pf_cfg_promisc_cmd}, +}; + +void qlcnic_sriov_pf_process_bc_cmd(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans, + struct qlcnic_cmd_args *cmd) +{ + u8 size, cmd_op; + + cmd_op = trans->req_hdr->cmd_op; + + if (trans->req_hdr->op_type == QLC_BC_CMD) { + size = ARRAY_SIZE(qlcnic_pf_bc_cmd_hdlr); + if (cmd_op < size) { + qlcnic_pf_bc_cmd_hdlr[cmd_op].fn(trans, cmd); + return; + } + } else { + int i; + size = ARRAY_SIZE(qlcnic_pf_fw_cmd_hdlr); + for (i = 0; i < size; i++) { + if (cmd_op == qlcnic_pf_fw_cmd_hdlr[i].cmd) { + qlcnic_pf_fw_cmd_hdlr[i].fn(trans, cmd); + return; + } + } + + size = ARRAY_SIZE(qlcnic_pf_passthru_supp_cmds); + for (i = 0; i < size; i++) { + if (cmd_op == qlcnic_pf_passthru_supp_cmds[i]) { + qlcnic_issue_cmd(adapter, cmd); + return; + } + } + } + + cmd->rsp.arg[0] |= (0x9 << 25); +} + +void qlcnic_pf_set_interface_id_create_rx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + u16 vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= vpid; +} + +void qlcnic_pf_set_interface_id_del_rx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + u16 vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= vpid << 16; +} + +void qlcnic_pf_set_interface_id_create_tx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + int vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= vpid << 16; +} + +void qlcnic_pf_set_interface_id_del_tx_ctx(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + u16 vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= vpid << 16; +} + +void qlcnic_pf_set_interface_id_promisc(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + u16 vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= (vpid << 16) | BIT_31; +} + +void qlcnic_pf_set_interface_id_ipaddr(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + u16 vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= (vpid << 16) | BIT_31; +} + +void qlcnic_pf_set_interface_id_macaddr(struct qlcnic_adapter *adapter, + u32 *int_id) +{ + u16 vpid; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, + adapter->ahw->pci_func); + *int_id |= (vpid << 16) | BIT_31; +} + +static void qlcnic_sriov_del_rx_ctx(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_cmd_args cmd; + int vpid; + + if (!vf->rx_ctx_id) + return; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DESTROY_RX_CTX)) + return; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, vf->pci_func); + if (vpid >= 0) { + cmd.req.arg[1] = vf->rx_ctx_id | (vpid & 0xffff) << 16; + if (qlcnic_issue_cmd(adapter, &cmd)) + dev_err(&adapter->pdev->dev, + "Failed to delete Tx ctx in firmware for func 0x%x\n", + vf->pci_func); + else + vf->rx_ctx_id = 0; + } + + qlcnic_free_mbx_args(&cmd); +} + +static void qlcnic_sriov_del_tx_ctx(struct qlcnic_adapter *adapter, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_cmd_args cmd; + int vpid; + + if (!vf->tx_ctx_id) + return; + + if (qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_DESTROY_TX_CTX)) + return; + + vpid = qlcnic_sriov_pf_get_vport_handle(adapter, vf->pci_func); + if (vpid >= 0) { + cmd.req.arg[1] |= vf->tx_ctx_id | (vpid & 0xffff) << 16; + if (qlcnic_issue_cmd(adapter, &cmd)) + dev_err(&adapter->pdev->dev, + "Failed to delete Tx ctx in firmware for func 0x%x\n", + vf->pci_func); + else + vf->tx_ctx_id = 0; + } + + qlcnic_free_mbx_args(&cmd); +} + +static int qlcnic_sriov_add_act_list_irqsave(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf, + struct qlcnic_bc_trans *trans) +{ + struct qlcnic_trans_list *t_list = &vf->rcv_act; + unsigned long flag; + + spin_lock_irqsave(&t_list->lock, flag); + + __qlcnic_sriov_add_act_list(sriov, vf, trans); + + spin_unlock_irqrestore(&t_list->lock, flag); + return 0; +} + +static void __qlcnic_sriov_process_flr(struct qlcnic_vf_info *vf) +{ + struct qlcnic_adapter *adapter = vf->adapter; + + qlcnic_sriov_cleanup_list(&vf->rcv_pend); + cancel_work_sync(&vf->trans_work); + qlcnic_sriov_cleanup_list(&vf->rcv_act); + + if (test_bit(QLC_BC_VF_SOFT_FLR, &vf->state)) { + qlcnic_sriov_del_tx_ctx(adapter, vf); + qlcnic_sriov_del_rx_ctx(adapter, vf); + } + + qlcnic_sriov_pf_config_vport(adapter, 0, vf->pci_func); + + clear_bit(QLC_BC_VF_FLR, &vf->state); + if (test_bit(QLC_BC_VF_SOFT_FLR, &vf->state)) { + qlcnic_sriov_add_act_list_irqsave(adapter->ahw->sriov, vf, + vf->flr_trans); + clear_bit(QLC_BC_VF_SOFT_FLR, &vf->state); + vf->flr_trans = NULL; + } +} + +static void qlcnic_sriov_pf_process_flr(struct work_struct *work) +{ + struct qlcnic_vf_info *vf; + + vf = container_of(work, struct qlcnic_vf_info, flr_work); + __qlcnic_sriov_process_flr(vf); + return; +} + +static void qlcnic_sriov_schedule_flr(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf, + work_func_t func) +{ + if (test_bit(__QLCNIC_RESETTING, &vf->adapter->state)) + return; + + INIT_WORK(&vf->flr_work, func); + queue_work(sriov->bc.bc_flr_wq, &vf->flr_work); +} + +static void qlcnic_sriov_handle_soft_flr(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + + set_bit(QLC_BC_VF_FLR, &vf->state); + clear_bit(QLC_BC_VF_STATE, &vf->state); + set_bit(QLC_BC_VF_SOFT_FLR, &vf->state); + vf->flr_trans = trans; + qlcnic_sriov_schedule_flr(sriov, vf, qlcnic_sriov_pf_process_flr); + netdev_info(adapter->netdev, "Software FLR for PCI func %d\n", + vf->pci_func); +} + +bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *adapter, + struct qlcnic_bc_trans *trans, + struct qlcnic_vf_info *vf) +{ + struct qlcnic_bc_hdr *hdr = trans->req_hdr; + + if ((hdr->cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) && + (hdr->op_type == QLC_BC_CMD) && + test_bit(QLC_BC_VF_STATE, &vf->state)) { + qlcnic_sriov_handle_soft_flr(adapter, trans, vf); + return true; + } + + return false; +} + +void qlcnic_sriov_pf_handle_flr(struct qlcnic_sriov *sriov, + struct qlcnic_vf_info *vf) +{ + struct net_device *dev = vf->adapter->netdev; + + if (!test_and_clear_bit(QLC_BC_VF_STATE, &vf->state)) { + clear_bit(QLC_BC_VF_FLR, &vf->state); + return; + } + + if (test_and_set_bit(QLC_BC_VF_FLR, &vf->state)) { + netdev_info(dev, "FLR for PCI func %d in progress\n", + vf->pci_func); + return; + } + + qlcnic_sriov_schedule_flr(sriov, vf, qlcnic_sriov_pf_process_flr); + netdev_info(dev, "FLR received for PCI func %d\n", vf->pci_func); +} + +void qlcnic_sriov_pf_reset(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + struct qlcnic_sriov *sriov = ahw->sriov; + struct qlcnic_vf_info *vf; + u16 num_vfs = sriov->num_vfs; + int i; + + for (i = 0; i < num_vfs; i++) { + vf = &sriov->vf_info[i]; + vf->rx_ctx_id = 0; + vf->tx_ctx_id = 0; + cancel_work_sync(&vf->flr_work); + __qlcnic_sriov_process_flr(vf); + clear_bit(QLC_BC_VF_STATE, &vf->state); + } + + qlcnic_sriov_pf_reset_vport_handle(adapter, ahw->pci_func); + QLCWRX(ahw, QLCNIC_MBX_INTR_ENBL, (ahw->num_msix - 1) << 8); +} + +int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *adapter) +{ + struct qlcnic_hardware_context *ahw = adapter->ahw; + int err; + + if (!qlcnic_sriov_enable_check(adapter)) + return 0; + + ahw->op_mode = QLCNIC_SRIOV_PF_FUNC; + + err = qlcnic_sriov_pf_init(adapter); + if (err) + return err; + + dev_info(&adapter->pdev->dev, "%s: op_mode %d\n", + __func__, ahw->op_mode); + return err; +} + +int qlcnic_sriov_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + int i, num_vfs = sriov->num_vfs; + struct qlcnic_vf_info *vf_info; + u8 *curr_mac; + + if (!qlcnic_sriov_pf_check(adapter)) + return -EOPNOTSUPP; + + if (!is_valid_ether_addr(mac) || vf >= num_vfs) + return -EINVAL; + + if (!compare_ether_addr(adapter->mac_addr, mac)) { + netdev_err(netdev, "MAC address is already in use by the PF\n"); + return -EINVAL; + } + + for (i = 0; i < num_vfs; i++) { + vf_info = &sriov->vf_info[i]; + if (!compare_ether_addr(vf_info->vp->mac, mac)) { + netdev_err(netdev, + "MAC address is already in use by VF %d\n", + i); + return -EINVAL; + } + } + + vf_info = &sriov->vf_info[vf]; + curr_mac = vf_info->vp->mac; + + if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) { + netdev_err(netdev, + "MAC address change failed for VF %d, as VF driver is loaded. Please unload VF driver and retry the operation\n", + vf); + return -EOPNOTSUPP; + } + + memcpy(curr_mac, mac, netdev->addr_len); + netdev_info(netdev, "MAC Address %pM is configured for VF %d\n", + mac, vf); + return 0; +} + +int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_vf_info *vf_info; + struct qlcnic_info nic_info; + struct qlcnic_vport *vp; + u16 vpid; + + if (!qlcnic_sriov_pf_check(adapter)) + return -EOPNOTSUPP; + + if (vf >= sriov->num_vfs) + return -EINVAL; + + if (tx_rate >= 10000 || tx_rate < 100) { + netdev_err(netdev, + "Invalid Tx rate, allowed range is [%d - %d]", + QLC_VF_MIN_TX_RATE, QLC_VF_MAX_TX_RATE); + return -EINVAL; + } + + if (tx_rate == 0) + tx_rate = 10000; + + vf_info = &sriov->vf_info[vf]; + vp = vf_info->vp; + vpid = vp->handle; + + if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) { + if (qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, vpid)) + return -EIO; + + nic_info.max_tx_bw = tx_rate / 100; + nic_info.bit_offsets = BIT_0; + + if (qlcnic_sriov_pf_set_vport_info(adapter, &nic_info, vpid)) + return -EIO; + } + + vp->max_tx_bw = tx_rate / 100; + netdev_info(netdev, + "Setting Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", + tx_rate, vp->max_tx_bw, vf); + return 0; +} + +int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, + u16 vlan, u8 qos) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_vf_info *vf_info; + struct qlcnic_vport *vp; + + if (!qlcnic_sriov_pf_check(adapter)) + return -EOPNOTSUPP; + + if (vf >= sriov->num_vfs || qos > 7) + return -EINVAL; + + if (vlan > MAX_VLAN_ID) { + netdev_err(netdev, + "Invalid VLAN ID, allowed range is [0 - %d]\n", + MAX_VLAN_ID); + return -EINVAL; + } + + vf_info = &sriov->vf_info[vf]; + vp = vf_info->vp; + if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) { + netdev_err(netdev, + "VLAN change failed for VF %d, as VF driver is loaded. Please unload VF driver and retry the operation\n", + vf); + return -EOPNOTSUPP; + } + + switch (vlan) { + case 4095: + vp->vlan_mode = QLC_GUEST_VLAN_MODE; + break; + case 0: + vp->vlan_mode = QLC_NO_VLAN_MODE; + vp->vlan = 0; + vp->qos = 0; + break; + default: + vp->vlan_mode = QLC_PVID_MODE; + vp->vlan = vlan; + vp->qos = qos; + } + + netdev_info(netdev, "Setting VLAN %d, QoS %d, for VF %d\n", + vlan, qos, vf); + return 0; +} + +int qlcnic_sriov_get_vf_config(struct net_device *netdev, + int vf, struct ifla_vf_info *ivi) +{ + struct qlcnic_adapter *adapter = netdev_priv(netdev); + struct qlcnic_sriov *sriov = adapter->ahw->sriov; + struct qlcnic_vport *vp; + + if (!qlcnic_sriov_pf_check(adapter)) + return -EOPNOTSUPP; + + if (vf >= sriov->num_vfs) + return -EINVAL; + + vp = sriov->vf_info[vf].vp; + memcpy(&ivi->mac, vp->mac, ETH_ALEN); + ivi->vlan = vp->vlan; + ivi->qos = vp->qos; + if (vp->max_tx_bw == MAX_BW) + ivi->tx_rate = 0; + else + ivi->tx_rate = vp->max_tx_bw * 100; + + ivi->vf = vf; + return 0; +} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 987fb6f8adc3..4e22e794a186 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -21,8 +21,6 @@ #include <linux/aer.h> #include <linux/log2.h> -#include <linux/sysfs.h> - #define QLC_STATUS_UNSUPPORTED_CMD -2 int qlcnicvf_config_bridged_mode(struct qlcnic_adapter *adapter, u32 enable) @@ -200,10 +198,10 @@ beacon_err: } err = qlcnic_config_led(adapter, b_state, b_rate); - if (!err) + if (!err) { err = len; - else ahw->beacon_state = b_state; + } if (test_and_clear_bit(__QLCNIC_DIAG_RES_ALLOC, &adapter->state)) qlcnic_diag_free_res(adapter->netdev, max_sds_rings); @@ -886,6 +884,244 @@ static ssize_t qlcnic_sysfs_read_pci_config(struct file *file, return size; } +static ssize_t qlcnic_83xx_sysfs_flash_read_handler(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t offset, + size_t size) +{ + unsigned char *p_read_buf; + int ret, count; + struct device *dev = container_of(kobj, struct device, kobj); + struct qlcnic_adapter *adapter = dev_get_drvdata(dev); + + if (!size) + return QL_STATUS_INVALID_PARAM; + if (!buf) + return QL_STATUS_INVALID_PARAM; + + count = size / sizeof(u32); + + if (size % sizeof(u32)) + count++; + + p_read_buf = kcalloc(size, sizeof(unsigned char), GFP_KERNEL); + if (!p_read_buf) + return -ENOMEM; + if (qlcnic_83xx_lock_flash(adapter) != 0) { + kfree(p_read_buf); + return -EIO; + } + + ret = qlcnic_83xx_lockless_flash_read32(adapter, offset, p_read_buf, + count); + + if (ret) { + qlcnic_83xx_unlock_flash(adapter); + kfree(p_read_buf); + return ret; + } + + qlcnic_83xx_unlock_flash(adapter); + memcpy(buf, p_read_buf, size); + kfree(p_read_buf); + + return size; +} + +static int qlcnic_83xx_sysfs_flash_bulk_write(struct qlcnic_adapter *adapter, + char *buf, loff_t offset, + size_t size) +{ + int i, ret, count; + unsigned char *p_cache, *p_src; + + p_cache = kcalloc(size, sizeof(unsigned char), GFP_KERNEL); + if (!p_cache) + return -ENOMEM; + + memcpy(p_cache, buf, size); + p_src = p_cache; + count = size / sizeof(u32); + + if (qlcnic_83xx_lock_flash(adapter) != 0) { + kfree(p_cache); + return -EIO; + } + + if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { + ret = qlcnic_83xx_enable_flash_write(adapter); + if (ret) { + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + } + + for (i = 0; i < count / QLC_83XX_FLASH_WRITE_MAX; i++) { + ret = qlcnic_83xx_flash_bulk_write(adapter, offset, + (u32 *)p_src, + QLC_83XX_FLASH_WRITE_MAX); + + if (ret) { + if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { + ret = qlcnic_83xx_disable_flash_write(adapter); + if (ret) { + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + } + + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + + p_src = p_src + sizeof(u32)*QLC_83XX_FLASH_WRITE_MAX; + offset = offset + sizeof(u32)*QLC_83XX_FLASH_WRITE_MAX; + } + + if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { + ret = qlcnic_83xx_disable_flash_write(adapter); + if (ret) { + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + } + + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + + return 0; +} + +static int qlcnic_83xx_sysfs_flash_write(struct qlcnic_adapter *adapter, + char *buf, loff_t offset, size_t size) +{ + int i, ret, count; + unsigned char *p_cache, *p_src; + + p_cache = kcalloc(size, sizeof(unsigned char), GFP_KERNEL); + if (!p_cache) + return -ENOMEM; + + memcpy(p_cache, buf, size); + p_src = p_cache; + count = size / sizeof(u32); + + if (qlcnic_83xx_lock_flash(adapter) != 0) { + kfree(p_cache); + return -EIO; + } + + if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { + ret = qlcnic_83xx_enable_flash_write(adapter); + if (ret) { + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + } + + for (i = 0; i < count; i++) { + ret = qlcnic_83xx_flash_write32(adapter, offset, (u32 *)p_src); + if (ret) { + if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { + ret = qlcnic_83xx_disable_flash_write(adapter); + if (ret) { + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + } + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + + p_src = p_src + sizeof(u32); + offset = offset + sizeof(u32); + } + + if (adapter->ahw->fdt.mfg_id == adapter->flash_mfg_id) { + ret = qlcnic_83xx_disable_flash_write(adapter); + if (ret) { + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + return -EIO; + } + } + + kfree(p_cache); + qlcnic_83xx_unlock_flash(adapter); + + return 0; +} + +static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t offset, + size_t size) +{ + int ret; + static int flash_mode; + unsigned long data; + struct device *dev = container_of(kobj, struct device, kobj); + struct qlcnic_adapter *adapter = dev_get_drvdata(dev); + + if (!buf) + return QL_STATUS_INVALID_PARAM; + + ret = kstrtoul(buf, 16, &data); + + switch (data) { + case QLC_83XX_FLASH_SECTOR_ERASE_CMD: + flash_mode = QLC_83XX_ERASE_MODE; + ret = qlcnic_83xx_erase_flash_sector(adapter, offset); + if (ret) { + dev_err(&adapter->pdev->dev, + "%s failed at %d\n", __func__, __LINE__); + return -EIO; + } + break; + + case QLC_83XX_FLASH_BULK_WRITE_CMD: + flash_mode = QLC_83XX_BULK_WRITE_MODE; + break; + + case QLC_83XX_FLASH_WRITE_CMD: + flash_mode = QLC_83XX_WRITE_MODE; + break; + default: + if (flash_mode == QLC_83XX_BULK_WRITE_MODE) { + ret = qlcnic_83xx_sysfs_flash_bulk_write(adapter, buf, + offset, size); + if (ret) { + dev_err(&adapter->pdev->dev, + "%s failed at %d\n", + __func__, __LINE__); + return -EIO; + } + } + + if (flash_mode == QLC_83XX_WRITE_MODE) { + ret = qlcnic_83xx_sysfs_flash_write(adapter, buf, + offset, size); + if (ret) { + dev_err(&adapter->pdev->dev, + "%s failed at %d\n", __func__, + __LINE__); + return -EIO; + } + } + } + + return size; +} + static struct device_attribute dev_attr_bridged_mode = { .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)}, .show = qlcnic_show_bridged_mode, @@ -960,6 +1196,13 @@ static struct bin_attribute bin_attr_pm_config = { .write = qlcnic_sysfs_write_pm_config, }; +static struct bin_attribute bin_attr_flash = { + .attr = {.name = "flash", .mode = (S_IRUGO | S_IWUSR)}, + .size = 0, + .read = qlcnic_83xx_sysfs_flash_read_handler, + .write = qlcnic_83xx_sysfs_flash_write_handler, +}; + void qlcnic_create_sysfs_entries(struct qlcnic_adapter *adapter) { struct device *dev = &adapter->pdev->dev; @@ -1048,10 +1291,18 @@ void qlcnic_82xx_remove_sysfs(struct qlcnic_adapter *adapter) void qlcnic_83xx_add_sysfs(struct qlcnic_adapter *adapter) { + struct device *dev = &adapter->pdev->dev; + qlcnic_create_diag_entries(adapter); + + if (sysfs_create_bin_file(&dev->kobj, &bin_attr_flash)) + dev_info(dev, "failed to create flash sysfs entry\n"); } void qlcnic_83xx_remove_sysfs(struct qlcnic_adapter *adapter) { + struct device *dev = &adapter->pdev->dev; + qlcnic_remove_diag_entries(adapter); + sysfs_remove_bin_file(&dev->kobj, &bin_attr_flash); } diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index a131d7b5d2fe..7e8d68263963 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -18,7 +18,7 @@ */ #define DRV_NAME "qlge" #define DRV_STRING "QLogic 10 Gigabit PCI-E Ethernet Driver " -#define DRV_VERSION "v1.00.00.31" +#define DRV_VERSION "v1.00.00.32" #define WQ_ADDR_ALIGN 0x3 /* 4 byte alignment */ diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c index 6f316ab23257..0780e039b271 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c @@ -379,13 +379,13 @@ static int ql_get_settings(struct net_device *ndev, ecmd->supported = SUPPORTED_10000baseT_Full; ecmd->advertising = ADVERTISED_10000baseT_Full; - ecmd->autoneg = AUTONEG_ENABLE; ecmd->transceiver = XCVR_EXTERNAL; if ((qdev->link_status & STS_LINK_TYPE_MASK) == STS_LINK_TYPE_10GBASET) { ecmd->supported |= (SUPPORTED_TP | SUPPORTED_Autoneg); ecmd->advertising |= (ADVERTISED_TP | ADVERTISED_Autoneg); ecmd->port = PORT_TP; + ecmd->autoneg = AUTONEG_ENABLE; } else { ecmd->supported |= SUPPORTED_FIBRE; ecmd->advertising |= ADVERTISED_FIBRE; diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index b13ab544a7eb..87463bc701a6 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -409,7 +409,7 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type, (qdev-> func << CAM_OUT_FUNC_SHIFT) | (0 << CAM_OUT_CQ_ID_SHIFT)); - if (qdev->ndev->features & NETIF_F_HW_VLAN_RX) + if (qdev->ndev->features & NETIF_F_HW_VLAN_CTAG_RX) cam_output |= CAM_OUT_RV; /* route to NIC core */ ql_write32(qdev, MAC_ADDR_DATA, cam_output); @@ -1211,8 +1211,6 @@ static void ql_update_sbq(struct ql_adapter *qdev, struct rx_ring *rx_ring) netdev_alloc_skb(qdev->ndev, SMALL_BUFFER_SIZE); if (sbq_desc->p.skb == NULL) { - netif_err(qdev, probe, qdev->ndev, - "Couldn't get an skb.\n"); rx_ring->sbq_clean_idx = clean_idx; return; } @@ -1434,11 +1432,13 @@ map_error: } /* Categorizing receive firmware frame errors */ -static void ql_categorize_rx_err(struct ql_adapter *qdev, u8 rx_err) +static void ql_categorize_rx_err(struct ql_adapter *qdev, u8 rx_err, + struct rx_ring *rx_ring) { struct nic_stats *stats = &qdev->nic_stats; stats->rx_err_count++; + rx_ring->rx_errors++; switch (rx_err & IB_MAC_IOCB_RSP_ERR_MASK) { case IB_MAC_IOCB_RSP_ERR_CODE_ERR: @@ -1474,6 +1474,12 @@ static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev, struct bq_desc *lbq_desc = ql_get_curr_lchunk(qdev, rx_ring); struct napi_struct *napi = &rx_ring->napi; + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + put_page(lbq_desc->p.pg_chunk.page); + return; + } napi->dev = qdev->ndev; skb = napi_get_frags(napi); @@ -1500,7 +1506,7 @@ static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev, skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rx_ring->cq_id); if (vlan_id != 0xffff) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); napi_gro_frags(napi); } @@ -1519,8 +1525,6 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, skb = netdev_alloc_skb(ndev, length); if (!skb) { - netif_err(qdev, drv, qdev->ndev, - "Couldn't get an skb, need to unwind!.\n"); rx_ring->rx_dropped++; put_page(lbq_desc->p.pg_chunk.page); return; @@ -1529,6 +1533,12 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, addr = lbq_desc->p.pg_chunk.va; prefetch(addr); + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + goto err_out; + } + /* The max framesize filter on this chip is set higher than * MTU since FCoE uses 2k frames. */ @@ -1578,7 +1588,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, skb_record_rx_queue(skb, rx_ring->cq_id); if (vlan_id != 0xffff) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (skb->ip_summed == CHECKSUM_UNNECESSARY) napi_gro_receive(napi, skb); else @@ -1605,8 +1615,6 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, /* Allocate new_skb and copy */ new_skb = netdev_alloc_skb(qdev->ndev, length + NET_IP_ALIGN); if (new_skb == NULL) { - netif_err(qdev, probe, qdev->ndev, - "No skb available, drop the packet.\n"); rx_ring->rx_dropped++; return; } @@ -1614,6 +1622,13 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, memcpy(skb_put(new_skb, length), skb->data, length); skb = new_skb; + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + dev_kfree_skb_any(skb); + return; + } + /* loopback self test for ethtool */ if (test_bit(QL_SELFTEST, &qdev->flags)) { ql_check_lb_frame(qdev, skb); @@ -1676,7 +1691,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, skb_record_rx_queue(skb, rx_ring->cq_id); if (vlan_id != 0xffff) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (skb->ip_summed == CHECKSUM_UNNECESSARY) napi_gro_receive(&rx_ring->napi, skb); else @@ -1919,6 +1934,13 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev, return; } + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + dev_kfree_skb_any(skb); + return; + } + /* The max framesize filter on this chip is set higher than * MTU since FCoE uses 2k frames. */ @@ -1981,7 +2003,7 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev, rx_ring->rx_bytes += skb->len; skb_record_rx_queue(skb, rx_ring->cq_id); if ((ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_V) && (vlan_id != 0)) - __vlan_hwaccel_put_tag(skb, vlan_id); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_id); if (skb->ip_summed == CHECKSUM_UNNECESSARY) napi_gro_receive(&rx_ring->napi, skb); else @@ -2000,12 +2022,6 @@ static unsigned long ql_process_mac_rx_intr(struct ql_adapter *qdev, QL_DUMP_IB_MAC_RSP(ib_mac_rsp); - /* Frame error, so drop the packet. */ - if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { - ql_categorize_rx_err(qdev, ib_mac_rsp->flags2); - return (unsigned long)length; - } - if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV) { /* The data and headers are split into * separate buffers. @@ -2285,7 +2301,7 @@ static void qlge_vlan_mode(struct net_device *ndev, netdev_features_t features) { struct ql_adapter *qdev = netdev_priv(ndev); - if (features & NETIF_F_HW_VLAN_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) { ql_write32(qdev, NIC_RCV_CFG, NIC_RCV_CFG_VLAN_MASK | NIC_RCV_CFG_VLAN_MATCH_AND_NON); } else { @@ -2300,10 +2316,10 @@ static netdev_features_t qlge_fix_features(struct net_device *ndev, * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -2313,7 +2329,7 @@ static int qlge_set_features(struct net_device *ndev, { netdev_features_t changed = ndev->features ^ features; - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) qlge_vlan_mode(ndev, features); return 0; @@ -2332,7 +2348,7 @@ static int __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid) return err; } -static int qlge_vlan_rx_add_vid(struct net_device *ndev, u16 vid) +static int qlge_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct ql_adapter *qdev = netdev_priv(ndev); int status; @@ -2363,7 +2379,7 @@ static int __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid) return err; } -static int qlge_vlan_rx_kill_vid(struct net_device *ndev, u16 vid) +static int qlge_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct ql_adapter *qdev = netdev_priv(ndev); int status; @@ -4671,9 +4687,9 @@ static int qlge_probe(struct pci_dev *pdev, SET_NETDEV_DEV(ndev, &pdev->dev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | - NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXCSUM; ndev->features = ndev->hw_features | - NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->vlan_features = ndev->hw_features; if (test_bit(QL_DMA64, &qdev->flags)) diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 5b4103db70f5..e9dc84943cfc 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -224,11 +224,14 @@ static int r6040_phy_read(void __iomem *ioaddr, int phy_addr, int reg) break; } + if (limit < 0) + return -ETIMEDOUT; + return ioread16(ioaddr + MMRD); } /* Write a word data from PHY Chip */ -static void r6040_phy_write(void __iomem *ioaddr, +static int r6040_phy_write(void __iomem *ioaddr, int phy_addr, int reg, u16 val) { int limit = MAC_DEF_TIMEOUT; @@ -243,6 +246,8 @@ static void r6040_phy_write(void __iomem *ioaddr, if (!(cmd & MDIO_WRITE)) break; } + + return (limit < 0) ? -ETIMEDOUT : 0; } static int r6040_mdiobus_read(struct mii_bus *bus, int phy_addr, int reg) @@ -261,9 +266,7 @@ static int r6040_mdiobus_write(struct mii_bus *bus, int phy_addr, struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - r6040_phy_write(ioaddr, phy_addr, reg, value); - - return 0; + return r6040_phy_write(ioaddr, phy_addr, reg, value); } static int r6040_mdiobus_reset(struct mii_bus *bus) @@ -347,7 +350,6 @@ static int r6040_alloc_rxbufs(struct net_device *dev) do { skb = netdev_alloc_skb(dev, MAX_BUF_SIZE); if (!skb) { - netdev_err(dev, "failed to alloc skb for rx\n"); rc = -ENOMEM; goto err_exit; } diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index b62a32484f6a..7d1fb9ad1296 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -431,7 +431,7 @@ static inline void cp_rx_skb (struct cp_private *cp, struct sk_buff *skb, cp->dev->stats.rx_bytes += skb->len; if (opts2 & RxVlanTagged) - __vlan_hwaccel_put_tag(skb, swab16(opts2 & 0xffff)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff)); napi_gro_receive(&cp->napi, skb); } @@ -1438,7 +1438,7 @@ static int cp_set_features(struct net_device *dev, netdev_features_t features) else cp->cpcmd &= ~RxChkSum; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) cp->cpcmd |= RxVlanOn; else cp->cpcmd &= ~RxVlanOn; @@ -1955,14 +1955,14 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &cp_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; /* disabled by default until verified */ dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 1276ac71353a..3ccedeb8aba0 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2041,8 +2041,6 @@ keep_pkt: netif_receive_skb (skb); } else { - if (net_ratelimit()) - netdev_warn(dev, "Memory squeeze, dropping packet\n"); dev->stats.rx_dropped++; } received++; diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 9f2d416de750..d77d60ea8202 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -782,8 +782,6 @@ static void net_rx(struct net_device *dev) skb = netdev_alloc_skb(dev, pkt_len + 2); if (skb == NULL) { - printk(KERN_ERR "%s: Memory squeeze, dropping packet.\n", - dev->name); dev->stats.rx_dropped++; goto done; } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 28fb50a1e9c3..c6dac38fd9cc 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -47,7 +47,9 @@ #define FIRMWARE_8402_1 "rtl_nic/rtl8402-1.fw" #define FIRMWARE_8411_1 "rtl_nic/rtl8411-1.fw" #define FIRMWARE_8106E_1 "rtl_nic/rtl8106e-1.fw" -#define FIRMWARE_8168G_1 "rtl_nic/rtl8168g-1.fw" +#define FIRMWARE_8106E_2 "rtl_nic/rtl8106e-2.fw" +#define FIRMWARE_8168G_2 "rtl_nic/rtl8168g-2.fw" +#define FIRMWARE_8168G_3 "rtl_nic/rtl8168g-3.fw" #ifdef RTL8169_DEBUG #define assert(expr) \ @@ -140,6 +142,8 @@ enum mac_version { RTL_GIGA_MAC_VER_39, RTL_GIGA_MAC_VER_40, RTL_GIGA_MAC_VER_41, + RTL_GIGA_MAC_VER_42, + RTL_GIGA_MAC_VER_43, RTL_GIGA_MAC_NONE = 0xff, }; @@ -262,10 +266,16 @@ static const struct { _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_1, JUMBO_1K, true), [RTL_GIGA_MAC_VER_40] = - _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_1, + _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_2, JUMBO_9K, false), [RTL_GIGA_MAC_VER_41] = _R("RTL8168g/8111g", RTL_TD_1, NULL, JUMBO_9K, false), + [RTL_GIGA_MAC_VER_42] = + _R("RTL8168g/8111g", RTL_TD_1, FIRMWARE_8168G_3, + JUMBO_9K, false), + [RTL_GIGA_MAC_VER_43] = + _R("RTL8106e", RTL_TD_1, FIRMWARE_8106E_2, + JUMBO_1K, true), }; #undef _R @@ -329,6 +339,7 @@ enum rtl_registers { #define RXCFG_FIFO_SHIFT 13 /* No threshold before first PCI xfer */ #define RX_FIFO_THRESH (7 << RXCFG_FIFO_SHIFT) +#define RX_EARLY_OFF (1 << 11) #define RXCFG_DMA_SHIFT 8 /* Unlimited maximum PCI burst. */ #define RX_DMA_BURST (7 << RXCFG_DMA_SHIFT) @@ -513,6 +524,7 @@ enum rtl_register_content { PMEnable = (1 << 0), /* Power Management Enable */ /* Config2 register p. 25 */ + ClkReqEn = (1 << 7), /* Clock Request Enable */ MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */ PCI_Clock_66MHz = 0x01, PCI_Clock_33MHz = 0x00, @@ -533,6 +545,7 @@ enum rtl_register_content { Spi_en = (1 << 3), LanWake = (1 << 1), /* LanWake enable/disable */ PMEStatus = (1 << 0), /* PME status can be reset by PCI RST# */ + ASPM_en = (1 << 0), /* ASPM enable */ /* TBICSR p.28 */ TBIReset = 0x80000000, @@ -814,7 +827,9 @@ MODULE_FIRMWARE(FIRMWARE_8168F_2); MODULE_FIRMWARE(FIRMWARE_8402_1); MODULE_FIRMWARE(FIRMWARE_8411_1); MODULE_FIRMWARE(FIRMWARE_8106E_1); -MODULE_FIRMWARE(FIRMWARE_8168G_1); +MODULE_FIRMWARE(FIRMWARE_8106E_2); +MODULE_FIRMWARE(FIRMWARE_8168G_2); +MODULE_FIRMWARE(FIRMWARE_8168G_3); static void rtl_lock_work(struct rtl8169_private *tp) { @@ -1024,14 +1039,6 @@ static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) (RTL_R32(GPHY_OCP) & 0xffff) : ~0; } -static void rtl_w1w0_phy_ocp(struct rtl8169_private *tp, int reg, int p, int m) -{ - int val; - - val = r8168_phy_ocp_read(tp, reg); - r8168_phy_ocp_write(tp, reg, (val | p) & ~m); -} - static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) { void __iomem *ioaddr = tp->mmio_addr; @@ -1077,6 +1084,21 @@ static int r8168g_mdio_read(struct rtl8169_private *tp, int reg) return r8168_phy_ocp_read(tp, tp->ocp_base + reg * 2); } +static void mac_mcu_write(struct rtl8169_private *tp, int reg, int value) +{ + if (reg == 0x1f) { + tp->ocp_base = value << 4; + return; + } + + r8168_mac_ocp_write(tp, tp->ocp_base + reg, value); +} + +static int mac_mcu_read(struct rtl8169_private *tp, int reg) +{ + return r8168_mac_ocp_read(tp, tp->ocp_base + reg); +} + DECLARE_RTL_COND(rtl_phyar_cond) { void __iomem *ioaddr = tp->mmio_addr; @@ -1771,16 +1793,17 @@ static void __rtl8169_set_features(struct net_device *dev, netdev_features_t changed = features ^ dev->features; void __iomem *ioaddr = tp->mmio_addr; - if (!(changed & (NETIF_F_RXALL | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX))) + if (!(changed & (NETIF_F_RXALL | NETIF_F_RXCSUM | + NETIF_F_HW_VLAN_CTAG_RX))) return; - if (changed & (NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX)) { + if (changed & (NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX)) { if (features & NETIF_F_RXCSUM) tp->cp_cmd |= RxChkSum; else tp->cp_cmd &= ~RxChkSum; - if (dev->features & NETIF_F_HW_VLAN_RX) + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) tp->cp_cmd |= RxVlan; else tp->cp_cmd &= ~RxVlan; @@ -1820,7 +1843,7 @@ static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb) u32 opts2 = le32_to_cpu(desc->opts2); if (opts2 & RxVlanTag) - __vlan_hwaccel_put_tag(skb, swab16(opts2 & 0xffff)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff)); } static int rtl8169_gset_tbi(struct net_device *dev, struct ethtool_cmd *cmd) @@ -2028,6 +2051,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, int mac_version; } mac_info[] = { /* 8168G family. */ + { 0x7cf00000, 0x50900000, RTL_GIGA_MAC_VER_42 }, { 0x7cf00000, 0x4c100000, RTL_GIGA_MAC_VER_41 }, { 0x7cf00000, 0x4c000000, RTL_GIGA_MAC_VER_40 }, @@ -2116,6 +2140,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, netif_notice(tp, probe, dev, "unknown MAC, using family default\n"); tp->mac_version = default_version; + } else if (tp->mac_version == RTL_GIGA_MAC_VER_42) { + tp->mac_version = tp->mii.supports_gmii ? + RTL_GIGA_MAC_VER_42 : + RTL_GIGA_MAC_VER_43; } } @@ -2142,9 +2170,7 @@ static void rtl_writephy_batch(struct rtl8169_private *tp, #define PHY_DATA_OR 0x10000000 #define PHY_DATA_AND 0x20000000 #define PHY_BJMPN 0x30000000 -#define PHY_READ_EFUSE 0x40000000 -#define PHY_READ_MAC_BYTE 0x50000000 -#define PHY_WRITE_MAC_BYTE 0x60000000 +#define PHY_MDIO_CHG 0x40000000 #define PHY_CLEAR_READCOUNT 0x70000000 #define PHY_WRITE 0x80000000 #define PHY_READCOUNT_EQ_SKIP 0x90000000 @@ -2153,7 +2179,6 @@ static void rtl_writephy_batch(struct rtl8169_private *tp, #define PHY_WRITE_PREVIOUS 0xc0000000 #define PHY_SKIPN 0xd0000000 #define PHY_DELAY_MS 0xe0000000 -#define PHY_WRITE_ERI_WORD 0xf0000000 struct fw_info { u32 magic; @@ -2230,7 +2255,7 @@ static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev, case PHY_READ: case PHY_DATA_OR: case PHY_DATA_AND: - case PHY_READ_EFUSE: + case PHY_MDIO_CHG: case PHY_CLEAR_READCOUNT: case PHY_WRITE: case PHY_WRITE_PREVIOUS: @@ -2261,9 +2286,6 @@ static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev, } break; - case PHY_READ_MAC_BYTE: - case PHY_WRITE_MAC_BYTE: - case PHY_WRITE_ERI_WORD: default: netif_err(tp, ifup, tp->dev, "Invalid action 0x%08x\n", action); @@ -2294,10 +2316,13 @@ out: static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) { struct rtl_fw_phy_action *pa = &rtl_fw->phy_action; + struct mdio_ops org, *ops = &tp->mdio_ops; u32 predata, count; size_t index; predata = count = 0; + org.write = ops->write; + org.read = ops->read; for (index = 0; index < pa->size; ) { u32 action = le32_to_cpu(pa->code[index]); @@ -2324,8 +2349,15 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) case PHY_BJMPN: index -= regno; break; - case PHY_READ_EFUSE: - predata = rtl8168d_efuse_read(tp, regno); + case PHY_MDIO_CHG: + if (data == 0) { + ops->write = org.write; + ops->read = org.read; + } else if (data == 1) { + ops->write = mac_mcu_write; + ops->read = mac_mcu_read; + } + index++; break; case PHY_CLEAR_READCOUNT: @@ -2361,13 +2393,13 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) index++; break; - case PHY_READ_MAC_BYTE: - case PHY_WRITE_MAC_BYTE: - case PHY_WRITE_ERI_WORD: default: BUG(); } } + + ops->write = org.write; + ops->read = org.read; } static void rtl_release_firmware(struct rtl8169_private *tp) @@ -3368,51 +3400,68 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp) static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) { - static const u16 mac_ocp_patch[] = { - 0xe008, 0xe01b, 0xe01d, 0xe01f, - 0xe021, 0xe023, 0xe025, 0xe027, - 0x49d2, 0xf10d, 0x766c, 0x49e2, - 0xf00a, 0x1ec0, 0x8ee1, 0xc60a, - - 0x77c0, 0x4870, 0x9fc0, 0x1ea0, - 0xc707, 0x8ee1, 0x9d6c, 0xc603, - 0xbe00, 0xb416, 0x0076, 0xe86c, - 0xc602, 0xbe00, 0x0000, 0xc602, - - 0xbe00, 0x0000, 0xc602, 0xbe00, - 0x0000, 0xc602, 0xbe00, 0x0000, - 0xc602, 0xbe00, 0x0000, 0xc602, - 0xbe00, 0x0000, 0xc602, 0xbe00, - - 0x0000, 0x0000, 0x0000, 0x0000 - }; - u32 i; + rtl_apply_firmware(tp); - /* Patch code for GPHY reset */ - for (i = 0; i < ARRAY_SIZE(mac_ocp_patch); i++) - r8168_mac_ocp_write(tp, 0xf800 + 2*i, mac_ocp_patch[i]); - r8168_mac_ocp_write(tp, 0xfc26, 0x8000); - r8168_mac_ocp_write(tp, 0xfc28, 0x0075); + rtl_writephy(tp, 0x1f, 0x0a46); + if (rtl_readphy(tp, 0x10) & 0x0100) { + rtl_writephy(tp, 0x1f, 0x0bcc); + rtl_w1w0_phy(tp, 0x12, 0x0000, 0x8000); + } else { + rtl_writephy(tp, 0x1f, 0x0bcc); + rtl_w1w0_phy(tp, 0x12, 0x8000, 0x0000); + } - rtl_apply_firmware(tp); + rtl_writephy(tp, 0x1f, 0x0a46); + if (rtl_readphy(tp, 0x13) & 0x0100) { + rtl_writephy(tp, 0x1f, 0x0c41); + rtl_w1w0_phy(tp, 0x15, 0x0002, 0x0000); + } else { + rtl_writephy(tp, 0x1f, 0x0c41); + rtl_w1w0_phy(tp, 0x15, 0x0000, 0x0002); + } - if (r8168_phy_ocp_read(tp, 0xa460) & 0x0100) - rtl_w1w0_phy_ocp(tp, 0xbcc4, 0x0000, 0x8000); - else - rtl_w1w0_phy_ocp(tp, 0xbcc4, 0x8000, 0x0000); + /* Enable PHY auto speed down */ + rtl_writephy(tp, 0x1f, 0x0a44); + rtl_w1w0_phy(tp, 0x11, 0x000c, 0x0000); + + rtl_writephy(tp, 0x1f, 0x0bcc); + rtl_w1w0_phy(tp, 0x14, 0x0100, 0x0000); + rtl_writephy(tp, 0x1f, 0x0a44); + rtl_w1w0_phy(tp, 0x11, 0x00c0, 0x0000); + rtl_writephy(tp, 0x1f, 0x0a43); + rtl_writephy(tp, 0x13, 0x8084); + rtl_w1w0_phy(tp, 0x14, 0x0000, 0x6000); + rtl_w1w0_phy(tp, 0x10, 0x1003, 0x0000); + + /* EEE auto-fallback function */ + rtl_writephy(tp, 0x1f, 0x0a4b); + rtl_w1w0_phy(tp, 0x11, 0x0004, 0x0000); + + /* Enable UC LPF tune function */ + rtl_writephy(tp, 0x1f, 0x0a43); + rtl_writephy(tp, 0x13, 0x8012); + rtl_w1w0_phy(tp, 0x14, 0x8000, 0x0000); - if (r8168_phy_ocp_read(tp, 0xa466) & 0x0100) - rtl_w1w0_phy_ocp(tp, 0xc41a, 0x0002, 0x0000); - else - rtl_w1w0_phy_ocp(tp, 0xbcc4, 0x0000, 0x0002); + rtl_writephy(tp, 0x1f, 0x0c42); + rtl_w1w0_phy(tp, 0x11, 0x4000, 0x2000); - rtl_w1w0_phy_ocp(tp, 0xa442, 0x000c, 0x0000); - rtl_w1w0_phy_ocp(tp, 0xa4b2, 0x0004, 0x0000); + /* Improve SWR Efficiency */ + rtl_writephy(tp, 0x1f, 0x0bcd); + rtl_writephy(tp, 0x14, 0x5065); + rtl_writephy(tp, 0x14, 0xd065); + rtl_writephy(tp, 0x1f, 0x0bc8); + rtl_writephy(tp, 0x11, 0x5655); + rtl_writephy(tp, 0x1f, 0x0bcd); + rtl_writephy(tp, 0x14, 0x1065); + rtl_writephy(tp, 0x14, 0x9065); + rtl_writephy(tp, 0x14, 0x1065); - r8168_phy_ocp_write(tp, 0xa436, 0x8012); - rtl_w1w0_phy_ocp(tp, 0xa438, 0x8000, 0x0000); + rtl_writephy(tp, 0x1f, 0x0000); +} - rtl_w1w0_phy_ocp(tp, 0xc422, 0x4000, 0x2000); +static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp) +{ + rtl_apply_firmware(tp); } static void rtl8102e_hw_phy_config(struct rtl8169_private *tp) @@ -3600,6 +3649,10 @@ static void rtl_hw_phy_config(struct net_device *dev) case RTL_GIGA_MAC_VER_40: rtl8168g_1_hw_phy_config(tp); break; + case RTL_GIGA_MAC_VER_42: + case RTL_GIGA_MAC_VER_43: + rtl8168g_2_hw_phy_config(tp); + break; case RTL_GIGA_MAC_VER_41: default: @@ -3808,6 +3861,8 @@ static void rtl_init_mdio_ops(struct rtl8169_private *tp) break; case RTL_GIGA_MAC_VER_40: case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_42: + case RTL_GIGA_MAC_VER_43: ops->write = r8168g_mdio_write; ops->read = r8168g_mdio_read; break; @@ -3818,6 +3873,30 @@ static void rtl_init_mdio_ops(struct rtl8169_private *tp) } } +static void rtl_speed_down(struct rtl8169_private *tp) +{ + u32 adv; + int lpa; + + rtl_writephy(tp, 0x1f, 0x0000); + lpa = rtl_readphy(tp, MII_LPA); + + if (lpa & (LPA_10HALF | LPA_10FULL)) + adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full; + else if (lpa & (LPA_100HALF | LPA_100FULL)) + adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; + else + adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | + (tp->mii.supports_gmii ? + ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full : 0); + + rtl8169_set_speed(tp->dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL, + adv); +} + static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) { void __iomem *ioaddr = tp->mmio_addr; @@ -3835,6 +3914,8 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_40: case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_42: + case RTL_GIGA_MAC_VER_43: RTL_W32(RxConfig, RTL_R32(RxConfig) | AcceptBroadcast | AcceptMulticast | AcceptMyPhys); break; @@ -3848,9 +3929,7 @@ static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) if (!(__rtl8169_get_wol(tp) & WAKE_ANY)) return false; - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, MII_BMCR, 0x0000); - + rtl_speed_down(tp); rtl_wol_suspend_quirk(tp); return true; @@ -3944,6 +4023,8 @@ static void r8168_phy_power_down(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_32: case RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_40: + case RTL_GIGA_MAC_VER_41: rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE | BMCR_PDOWN); break; @@ -4005,6 +4086,11 @@ static void r8168_pll_power_down(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_33: RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80); break; + case RTL_GIGA_MAC_VER_40: + case RTL_GIGA_MAC_VER_41: + rtl_w1w0_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000, + 0xfc000000, ERIAR_EXGMAC); + break; } } @@ -4022,6 +4108,11 @@ static void r8168_pll_power_up(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_33: RTL_W8(PMCH, RTL_R8(PMCH) | 0x80); break; + case RTL_GIGA_MAC_VER_40: + case RTL_GIGA_MAC_VER_41: + rtl_w1w0_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000, + 0x00000000, ERIAR_EXGMAC); + break; } r8168_phy_power_up(tp); @@ -4058,6 +4149,7 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_30: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: + case RTL_GIGA_MAC_VER_43: ops->down = r810x_pll_power_down; ops->up = r810x_pll_power_up; break; @@ -4085,6 +4177,7 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_38: case RTL_GIGA_MAC_VER_40: case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_42: ops->down = r8168_pll_power_down; ops->up = r8168_pll_power_up; break; @@ -4127,6 +4220,12 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_34: RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; + case RTL_GIGA_MAC_VER_40: + case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_42: + case RTL_GIGA_MAC_VER_43: + RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST | RX_EARLY_OFF); + break; default: RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST); break; @@ -4283,6 +4382,8 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp) */ case RTL_GIGA_MAC_VER_40: case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_42: + case RTL_GIGA_MAC_VER_43: default: ops->disable = NULL; ops->enable = NULL; @@ -4390,6 +4491,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp) tp->mac_version == RTL_GIGA_MAC_VER_37 || tp->mac_version == RTL_GIGA_MAC_VER_40 || tp->mac_version == RTL_GIGA_MAC_VER_41 || + tp->mac_version == RTL_GIGA_MAC_VER_42 || + tp->mac_version == RTL_GIGA_MAC_VER_43 || tp->mac_version == RTL_GIGA_MAC_VER_38) { RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq); rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666); @@ -5105,6 +5208,8 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; + RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); + rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); @@ -5116,6 +5221,7 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC); rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); + rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC); RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); @@ -5127,7 +5233,26 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) /* Adjust EEE LED frequency */ RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07); - rtl_w1w0_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x02, ERIAR_EXGMAC); + rtl_w1w0_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC); + rtl_w1w0_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); +} + +static void rtl_hw_start_8168g_2(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + static const struct ephy_info e_info_8168g_2[] = { + { 0x00, 0x0000, 0x0008 }, + { 0x0c, 0x3df0, 0x0200 }, + { 0x19, 0xffff, 0xfc00 }, + { 0x1e, 0xffff, 0x20eb } + }; + + rtl_hw_start_8168g_1(tp); + + /* disable aspm and clock request before access ephy */ + RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn); + RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en); + rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2)); } static void rtl_hw_start_8168(struct net_device *dev) @@ -5155,10 +5280,7 @@ static void rtl_hw_start_8168(struct net_device *dev) rtl_set_rx_tx_desc_registers(tp, ioaddr); - rtl_set_rx_mode(dev); - - RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) | - (InterFrameGap << TxInterFrameGapShift)); + rtl_set_rx_tx_config_registers(tp); RTL_R8(IntrMask); @@ -5235,6 +5357,9 @@ static void rtl_hw_start_8168(struct net_device *dev) case RTL_GIGA_MAC_VER_41: rtl_hw_start_8168g_1(tp); break; + case RTL_GIGA_MAC_VER_42: + rtl_hw_start_8168g_2(tp); + break; default: printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n", @@ -5242,9 +5367,11 @@ static void rtl_hw_start_8168(struct net_device *dev) break; } + RTL_W8(Cfg9346, Cfg9346_Lock); + RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); - RTL_W8(Cfg9346, Cfg9346_Lock); + rtl_set_rx_mode(dev); RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xF000); } @@ -5402,6 +5529,17 @@ static void rtl_hw_start_8101(struct net_device *dev) RTL_W8(Cfg9346, Cfg9346_Unlock); + RTL_W8(MaxTxPacketSize, TxPacketMax); + + rtl_set_rx_max_size(ioaddr, rx_buf_sz); + + tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK; + RTL_W16(CPlusCmd, tp->cp_cmd); + + rtl_set_rx_tx_desc_registers(tp, ioaddr); + + rtl_set_rx_tx_config_registers(tp); + switch (tp->mac_version) { case RTL_GIGA_MAC_VER_07: rtl_hw_start_8102e_1(tp); @@ -5429,28 +5567,21 @@ static void rtl_hw_start_8101(struct net_device *dev) case RTL_GIGA_MAC_VER_39: rtl_hw_start_8106(tp); break; + case RTL_GIGA_MAC_VER_43: + rtl_hw_start_8168g_2(tp); + break; } RTL_W8(Cfg9346, Cfg9346_Lock); - RTL_W8(MaxTxPacketSize, TxPacketMax); - - rtl_set_rx_max_size(ioaddr, rx_buf_sz); - - tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK; - RTL_W16(CPlusCmd, tp->cp_cmd); - RTL_W16(IntrMitigate, 0x0000); - rtl_set_rx_tx_desc_registers(tp, ioaddr); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); - rtl_set_rx_tx_config_registers(tp); - - RTL_R8(IntrMask); rtl_set_rx_mode(dev); + RTL_R8(IntrMask); + RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000); } @@ -6722,6 +6853,8 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_40: case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_42: + case RTL_GIGA_MAC_VER_43: rtl_hw_init_8168g(tp); break; @@ -6904,16 +7037,17 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* don't enable SG, IP_CSUM and TSO by default - it might not work * properly for all devices */ dev->features |= NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; if (tp->mac_version == RTL_GIGA_MAC_VER_05) /* 8110SCd requires hardware Rx VLAN - disallow toggling */ - dev->hw_features &= ~NETIF_F_HW_VLAN_RX; + dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 33e96176e4d8..b8e52cd1a698 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2,7 +2,8 @@ * SuperH Ethernet device driver * * Copyright (C) 2006-2012 Nobuhiro Iwamatsu - * Copyright (C) 2008-2012 Renesas Solutions Corp. + * Copyright (C) 2008-2013 Renesas Solutions Corp. + * Copyright (C) 2013 Cogent Embedded, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -49,6 +50,269 @@ NETIF_MSG_RX_ERR| \ NETIF_MSG_TX_ERR) +static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { + [EDSR] = 0x0000, + [EDMR] = 0x0400, + [EDTRR] = 0x0408, + [EDRRR] = 0x0410, + [EESR] = 0x0428, + [EESIPR] = 0x0430, + [TDLAR] = 0x0010, + [TDFAR] = 0x0014, + [TDFXR] = 0x0018, + [TDFFR] = 0x001c, + [RDLAR] = 0x0030, + [RDFAR] = 0x0034, + [RDFXR] = 0x0038, + [RDFFR] = 0x003c, + [TRSCER] = 0x0438, + [RMFCR] = 0x0440, + [TFTR] = 0x0448, + [FDR] = 0x0450, + [RMCR] = 0x0458, + [RPADIR] = 0x0460, + [FCFTR] = 0x0468, + [CSMR] = 0x04E4, + + [ECMR] = 0x0500, + [ECSR] = 0x0510, + [ECSIPR] = 0x0518, + [PIR] = 0x0520, + [PSR] = 0x0528, + [PIPR] = 0x052c, + [RFLR] = 0x0508, + [APR] = 0x0554, + [MPR] = 0x0558, + [PFTCR] = 0x055c, + [PFRCR] = 0x0560, + [TPAUSER] = 0x0564, + [GECMR] = 0x05b0, + [BCULR] = 0x05b4, + [MAHR] = 0x05c0, + [MALR] = 0x05c8, + [TROCR] = 0x0700, + [CDCR] = 0x0708, + [LCCR] = 0x0710, + [CEFCR] = 0x0740, + [FRECR] = 0x0748, + [TSFRCR] = 0x0750, + [TLFRCR] = 0x0758, + [RFCR] = 0x0760, + [CERCR] = 0x0768, + [CEECR] = 0x0770, + [MAFCR] = 0x0778, + [RMII_MII] = 0x0790, + + [ARSTR] = 0x0000, + [TSU_CTRST] = 0x0004, + [TSU_FWEN0] = 0x0010, + [TSU_FWEN1] = 0x0014, + [TSU_FCM] = 0x0018, + [TSU_BSYSL0] = 0x0020, + [TSU_BSYSL1] = 0x0024, + [TSU_PRISL0] = 0x0028, + [TSU_PRISL1] = 0x002c, + [TSU_FWSL0] = 0x0030, + [TSU_FWSL1] = 0x0034, + [TSU_FWSLC] = 0x0038, + [TSU_QTAG0] = 0x0040, + [TSU_QTAG1] = 0x0044, + [TSU_FWSR] = 0x0050, + [TSU_FWINMK] = 0x0054, + [TSU_ADQT0] = 0x0048, + [TSU_ADQT1] = 0x004c, + [TSU_VTAG0] = 0x0058, + [TSU_VTAG1] = 0x005c, + [TSU_ADSBSY] = 0x0060, + [TSU_TEN] = 0x0064, + [TSU_POST1] = 0x0070, + [TSU_POST2] = 0x0074, + [TSU_POST3] = 0x0078, + [TSU_POST4] = 0x007c, + [TSU_ADRH0] = 0x0100, + [TSU_ADRL0] = 0x0104, + [TSU_ADRH31] = 0x01f8, + [TSU_ADRL31] = 0x01fc, + + [TXNLCR0] = 0x0080, + [TXALCR0] = 0x0084, + [RXNLCR0] = 0x0088, + [RXALCR0] = 0x008c, + [FWNLCR0] = 0x0090, + [FWALCR0] = 0x0094, + [TXNLCR1] = 0x00a0, + [TXALCR1] = 0x00a0, + [RXNLCR1] = 0x00a8, + [RXALCR1] = 0x00ac, + [FWNLCR1] = 0x00b0, + [FWALCR1] = 0x00b4, +}; + +static const u16 sh_eth_offset_fast_rcar[SH_ETH_MAX_REGISTER_OFFSET] = { + [ECMR] = 0x0300, + [RFLR] = 0x0308, + [ECSR] = 0x0310, + [ECSIPR] = 0x0318, + [PIR] = 0x0320, + [PSR] = 0x0328, + [RDMLR] = 0x0340, + [IPGR] = 0x0350, + [APR] = 0x0354, + [MPR] = 0x0358, + [RFCF] = 0x0360, + [TPAUSER] = 0x0364, + [TPAUSECR] = 0x0368, + [MAHR] = 0x03c0, + [MALR] = 0x03c8, + [TROCR] = 0x03d0, + [CDCR] = 0x03d4, + [LCCR] = 0x03d8, + [CNDCR] = 0x03dc, + [CEFCR] = 0x03e4, + [FRECR] = 0x03e8, + [TSFRCR] = 0x03ec, + [TLFRCR] = 0x03f0, + [RFCR] = 0x03f4, + [MAFCR] = 0x03f8, + + [EDMR] = 0x0200, + [EDTRR] = 0x0208, + [EDRRR] = 0x0210, + [TDLAR] = 0x0218, + [RDLAR] = 0x0220, + [EESR] = 0x0228, + [EESIPR] = 0x0230, + [TRSCER] = 0x0238, + [RMFCR] = 0x0240, + [TFTR] = 0x0248, + [FDR] = 0x0250, + [RMCR] = 0x0258, + [TFUCR] = 0x0264, + [RFOCR] = 0x0268, + [FCFTR] = 0x0270, + [TRIMD] = 0x027c, +}; + +static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = { + [ECMR] = 0x0100, + [RFLR] = 0x0108, + [ECSR] = 0x0110, + [ECSIPR] = 0x0118, + [PIR] = 0x0120, + [PSR] = 0x0128, + [RDMLR] = 0x0140, + [IPGR] = 0x0150, + [APR] = 0x0154, + [MPR] = 0x0158, + [TPAUSER] = 0x0164, + [RFCF] = 0x0160, + [TPAUSECR] = 0x0168, + [BCFRR] = 0x016c, + [MAHR] = 0x01c0, + [MALR] = 0x01c8, + [TROCR] = 0x01d0, + [CDCR] = 0x01d4, + [LCCR] = 0x01d8, + [CNDCR] = 0x01dc, + [CEFCR] = 0x01e4, + [FRECR] = 0x01e8, + [TSFRCR] = 0x01ec, + [TLFRCR] = 0x01f0, + [RFCR] = 0x01f4, + [MAFCR] = 0x01f8, + [RTRATE] = 0x01fc, + + [EDMR] = 0x0000, + [EDTRR] = 0x0008, + [EDRRR] = 0x0010, + [TDLAR] = 0x0018, + [RDLAR] = 0x0020, + [EESR] = 0x0028, + [EESIPR] = 0x0030, + [TRSCER] = 0x0038, + [RMFCR] = 0x0040, + [TFTR] = 0x0048, + [FDR] = 0x0050, + [RMCR] = 0x0058, + [TFUCR] = 0x0064, + [RFOCR] = 0x0068, + [FCFTR] = 0x0070, + [RPADIR] = 0x0078, + [TRIMD] = 0x007c, + [RBWAR] = 0x00c8, + [RDFAR] = 0x00cc, + [TBRAR] = 0x00d4, + [TDFAR] = 0x00d8, +}; + +static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { + [ECMR] = 0x0160, + [ECSR] = 0x0164, + [ECSIPR] = 0x0168, + [PIR] = 0x016c, + [MAHR] = 0x0170, + [MALR] = 0x0174, + [RFLR] = 0x0178, + [PSR] = 0x017c, + [TROCR] = 0x0180, + [CDCR] = 0x0184, + [LCCR] = 0x0188, + [CNDCR] = 0x018c, + [CEFCR] = 0x0194, + [FRECR] = 0x0198, + [TSFRCR] = 0x019c, + [TLFRCR] = 0x01a0, + [RFCR] = 0x01a4, + [MAFCR] = 0x01a8, + [IPGR] = 0x01b4, + [APR] = 0x01b8, + [MPR] = 0x01bc, + [TPAUSER] = 0x01c4, + [BCFR] = 0x01cc, + + [ARSTR] = 0x0000, + [TSU_CTRST] = 0x0004, + [TSU_FWEN0] = 0x0010, + [TSU_FWEN1] = 0x0014, + [TSU_FCM] = 0x0018, + [TSU_BSYSL0] = 0x0020, + [TSU_BSYSL1] = 0x0024, + [TSU_PRISL0] = 0x0028, + [TSU_PRISL1] = 0x002c, + [TSU_FWSL0] = 0x0030, + [TSU_FWSL1] = 0x0034, + [TSU_FWSLC] = 0x0038, + [TSU_QTAGM0] = 0x0040, + [TSU_QTAGM1] = 0x0044, + [TSU_ADQT0] = 0x0048, + [TSU_ADQT1] = 0x004c, + [TSU_FWSR] = 0x0050, + [TSU_FWINMK] = 0x0054, + [TSU_ADSBSY] = 0x0060, + [TSU_TEN] = 0x0064, + [TSU_POST1] = 0x0070, + [TSU_POST2] = 0x0074, + [TSU_POST3] = 0x0078, + [TSU_POST4] = 0x007c, + + [TXNLCR0] = 0x0080, + [TXALCR0] = 0x0084, + [RXNLCR0] = 0x0088, + [RXALCR0] = 0x008c, + [FWNLCR0] = 0x0090, + [FWALCR0] = 0x0094, + [TXNLCR1] = 0x00a0, + [TXALCR1] = 0x00a0, + [RXNLCR1] = 0x00a8, + [RXALCR1] = 0x00ac, + [FWNLCR1] = 0x00b0, + [FWALCR1] = 0x00b4, + + [TSU_ADRH0] = 0x0100, + [TSU_ADRL0] = 0x0104, + [TSU_ADRL31] = 0x01fc, +}; + #if defined(CONFIG_CPU_SUBTYPE_SH7734) || \ defined(CONFIG_CPU_SUBTYPE_SH7763) || \ defined(CONFIG_ARCH_R8A7740) @@ -78,7 +342,7 @@ static void sh_eth_select_mii(struct net_device *ndev) #endif /* There is CPU dependent code */ -#if defined(CONFIG_CPU_SUBTYPE_SH7724) || defined(CONFIG_ARCH_R8A7779) +#if defined(CONFIG_ARCH_R8A7779) #define SH_ETH_RESET_DEFAULT 1 static void sh_eth_set_duplex(struct net_device *ndev) { @@ -93,18 +357,60 @@ static void sh_eth_set_duplex(struct net_device *ndev) static void sh_eth_set_rate(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - unsigned int bits = ECMR_RTM; -#if defined(CONFIG_ARCH_R8A7779) - bits |= ECMR_ELB; -#endif + switch (mdp->speed) { + case 10: /* 10BASE */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_ELB, ECMR); + break; + case 100:/* 100BASE */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_ELB, ECMR); + break; + default: + break; + } +} + +/* R8A7779 */ +static struct sh_eth_cpu_data sh_eth_my_cpu_data = { + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate, + + .ecsr_value = ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD, + .ecsipr_value = ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP, + .eesipr_value = 0x01ff009f, + + .tx_check = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO, + .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RDE | + EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, + .tx_error_check = EESR_TWB | EESR_TABT | EESR_TDE | EESR_TFE, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .hw_swap = 1, +}; +#elif defined(CONFIG_CPU_SUBTYPE_SH7724) +#define SH_ETH_RESET_DEFAULT 1 +static void sh_eth_set_duplex(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + if (mdp->duplex) /* Full */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_DM, ECMR); + else /* Half */ + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR); +} + +static void sh_eth_set_rate(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); switch (mdp->speed) { case 10: /* 10BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~bits, ECMR); + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_RTM, ECMR); break; case 100:/* 100BASE */ - sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | bits, ECMR); + sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_RTM, ECMR); break; default: break; @@ -592,7 +898,7 @@ static int sh_eth_check_reset(struct net_device *ndev) cnt--; } if (cnt < 0) { - printk(KERN_ERR "Device reset fail\n"); + pr_err("Device reset fail\n"); ret = -ETIMEDOUT; } return ret; @@ -908,11 +1214,8 @@ static int sh_eth_ring_init(struct net_device *ndev) /* Allocate all Rx descriptors. */ rx_ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; mdp->rx_ring = dma_alloc_coherent(NULL, rx_ringsize, &mdp->rx_desc_dma, - GFP_KERNEL); - + GFP_KERNEL); if (!mdp->rx_ring) { - dev_err(&ndev->dev, "Cannot allocate Rx Ring (size %d bytes)\n", - rx_ringsize); ret = -ENOMEM; goto desc_ring_free; } @@ -922,10 +1225,8 @@ static int sh_eth_ring_init(struct net_device *ndev) /* Allocate all Tx descriptors. */ tx_ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring; mdp->tx_ring = dma_alloc_coherent(NULL, tx_ringsize, &mdp->tx_desc_dma, - GFP_KERNEL); + GFP_KERNEL); if (!mdp->tx_ring) { - dev_err(&ndev->dev, "Cannot allocate Tx Ring (size %d bytes)\n", - tx_ringsize); ret = -ENOMEM; goto desc_ring_free; } @@ -1216,10 +1517,7 @@ static void sh_eth_error(struct net_device *ndev, int intr_status) if (felic_stat & ECSR_LCHNG) { /* Link Changed */ if (mdp->cd->no_psr || mdp->no_ether_link) { - if (mdp->link == PHY_DOWN) - link_stat = 0; - else - link_stat = PHY_ST_LINK; + goto ignore_link; } else { link_stat = (sh_eth_read(ndev, PSR)); if (mdp->ether_link_active_low) @@ -1242,6 +1540,7 @@ static void sh_eth_error(struct net_device *ndev, int intr_status) } } +ignore_link: if (intr_status & EESR_TWB) { /* Write buck end. unused write back interrupt */ if (intr_status & EESR_TABT) /* Transmit Abort int */ @@ -1326,12 +1625,18 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev) struct sh_eth_private *mdp = netdev_priv(ndev); struct sh_eth_cpu_data *cd = mdp->cd; irqreturn_t ret = IRQ_NONE; - u32 intr_status = 0; + unsigned long intr_status; spin_lock(&mdp->lock); - /* Get interrpt stat */ + /* Get interrupt status */ intr_status = sh_eth_read(ndev, EESR); + /* Mask it with the interrupt mask, forcing ECI interrupt to be always + * enabled since it's the one that comes thru regardless of the mask, + * and we need to fully handle it in sh_eth_error() in order to quench + * it as it doesn't get cleared by just writing 1 to the ECI bit... + */ + intr_status &= sh_eth_read(ndev, EESIPR) | DMAC_M_ECI; /* Clear interrupt */ if (intr_status & (EESR_FRC | EESR_RMAF | EESR_RRF | EESR_RTLF | EESR_RTSF | EESR_PRE | EESR_CERF | @@ -1373,7 +1678,7 @@ static void sh_eth_adjust_link(struct net_device *ndev) struct phy_device *phydev = mdp->phydev; int new_state = 0; - if (phydev->link != PHY_DOWN) { + if (phydev->link) { if (phydev->duplex != mdp->duplex) { new_state = 1; mdp->duplex = phydev->duplex; @@ -1387,17 +1692,21 @@ static void sh_eth_adjust_link(struct net_device *ndev) if (mdp->cd->set_rate) mdp->cd->set_rate(ndev); } - if (mdp->link == PHY_DOWN) { + if (!mdp->link) { sh_eth_write(ndev, (sh_eth_read(ndev, ECMR) & ~ECMR_TXF), ECMR); new_state = 1; mdp->link = phydev->link; + if (mdp->cd->no_psr || mdp->no_ether_link) + sh_eth_rcv_snd_enable(ndev); } } else if (mdp->link) { new_state = 1; - mdp->link = PHY_DOWN; + mdp->link = 0; mdp->speed = 0; mdp->duplex = -1; + if (mdp->cd->no_psr || mdp->no_ether_link) + sh_eth_rcv_snd_disable(ndev); } if (new_state && netif_msg_link(mdp)) @@ -1414,7 +1723,7 @@ static int sh_eth_phy_init(struct net_device *ndev) snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, mdp->mii_bus->id , mdp->phy_id); - mdp->link = PHY_DOWN; + mdp->link = 0; mdp->speed = 0; mdp->duplex = -1; @@ -2139,7 +2448,8 @@ static int sh_eth_get_vtag_index(struct sh_eth_private *mdp) return TSU_VTAG1; } -static int sh_eth_vlan_rx_add_vid(struct net_device *ndev, u16 vid) +static int sh_eth_vlan_rx_add_vid(struct net_device *ndev, + __be16 proto, u16 vid) { struct sh_eth_private *mdp = netdev_priv(ndev); int vtag_reg_index = sh_eth_get_vtag_index(mdp); @@ -2169,7 +2479,8 @@ static int sh_eth_vlan_rx_add_vid(struct net_device *ndev, u16 vid) return 0; } -static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev, u16 vid) +static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev, + __be16 proto, u16 vid) { struct sh_eth_private *mdp = netdev_priv(ndev); int vtag_reg_index = sh_eth_get_vtag_index(mdp); @@ -2228,9 +2539,6 @@ static int sh_mdio_release(struct net_device *ndev) /* remove mdio bus info from net_device */ dev_set_drvdata(&ndev->dev, NULL); - /* free interrupts memory */ - kfree(bus->irq); - /* free bitbang info */ free_mdio_bitbang(bus); @@ -2246,7 +2554,8 @@ static int sh_mdio_init(struct net_device *ndev, int id, struct sh_eth_private *mdp = netdev_priv(ndev); /* create bit control struct for PHY */ - bitbang = kzalloc(sizeof(struct bb_info), GFP_KERNEL); + bitbang = devm_kzalloc(&ndev->dev, sizeof(struct bb_info), + GFP_KERNEL); if (!bitbang) { ret = -ENOMEM; goto out; @@ -2255,17 +2564,17 @@ static int sh_mdio_init(struct net_device *ndev, int id, /* bitbang init */ bitbang->addr = mdp->addr + mdp->reg_offset[PIR]; bitbang->set_gate = pd->set_mdio_gate; - bitbang->mdi_msk = 0x08; - bitbang->mdo_msk = 0x04; - bitbang->mmd_msk = 0x02;/* MMD */ - bitbang->mdc_msk = 0x01; + bitbang->mdi_msk = PIR_MDI; + bitbang->mdo_msk = PIR_MDO; + bitbang->mmd_msk = PIR_MMD; + bitbang->mdc_msk = PIR_MDC; bitbang->ctrl.ops = &bb_ops; /* MII controller setting */ mdp->mii_bus = alloc_mdio_bitbang(&bitbang->ctrl); if (!mdp->mii_bus) { ret = -ENOMEM; - goto out_free_bitbang; + goto out; } /* Hook up MII support for ethtool */ @@ -2275,7 +2584,9 @@ static int sh_mdio_init(struct net_device *ndev, int id, mdp->pdev->name, id); /* PHY IRQ */ - mdp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); + mdp->mii_bus->irq = devm_kzalloc(&ndev->dev, + sizeof(int) * PHY_MAX_ADDR, + GFP_KERNEL); if (!mdp->mii_bus->irq) { ret = -ENOMEM; goto out_free_bus; @@ -2287,21 +2598,15 @@ static int sh_mdio_init(struct net_device *ndev, int id, /* register mdio bus */ ret = mdiobus_register(mdp->mii_bus); if (ret) - goto out_free_irq; + goto out_free_bus; dev_set_drvdata(&ndev->dev, mdp->mii_bus); return 0; -out_free_irq: - kfree(mdp->mii_bus->irq); - out_free_bus: free_mdio_bitbang(mdp->mii_bus); -out_free_bitbang: - kfree(bitbang); - out: return ret; } @@ -2314,6 +2619,9 @@ static const u16 *sh_eth_get_register_offset(int register_type) case SH_ETH_REG_GIGABIT: reg_offset = sh_eth_offset_gigabit; break; + case SH_ETH_REG_FAST_RCAR: + reg_offset = sh_eth_offset_fast_rcar; + break; case SH_ETH_REG_FAST_SH4: reg_offset = sh_eth_offset_fast_sh4; break; @@ -2321,7 +2629,7 @@ static const u16 *sh_eth_get_register_offset(int register_type) reg_offset = sh_eth_offset_fast_sh3_sh2; break; default: - printk(KERN_ERR "Unknown register type (%d)\n", register_type); + pr_err("Unknown register type (%d)\n", register_type); break; } @@ -2351,7 +2659,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) struct resource *res; struct net_device *ndev = NULL; struct sh_eth_private *mdp = NULL; - struct sh_eth_plat_data *pd; + struct sh_eth_plat_data *pd = pdev->dev.platform_data; /* get base addr */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -2389,10 +2697,9 @@ static int sh_eth_drv_probe(struct platform_device *pdev) mdp = netdev_priv(ndev); mdp->num_tx_ring = TX_RING_SIZE; mdp->num_rx_ring = RX_RING_SIZE; - mdp->addr = ioremap(res->start, resource_size(res)); - if (mdp->addr == NULL) { - ret = -ENOMEM; - dev_err(&pdev->dev, "ioremap failed.\n"); + mdp->addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(mdp->addr)) { + ret = PTR_ERR(mdp->addr); goto out_release; } @@ -2401,7 +2708,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); pm_runtime_resume(&pdev->dev); - pd = (struct sh_eth_plat_data *)(pdev->dev.platform_data); /* get PHY ID */ mdp->phy_id = pd->phy; mdp->phy_interface = pd->phy_interface; @@ -2439,10 +2745,13 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ret = -ENODEV; goto out_release; } - mdp->tsu_addr = ioremap(rtsu->start, - resource_size(rtsu)); + mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); + if (IS_ERR(mdp->tsu_addr)) { + ret = PTR_ERR(mdp->tsu_addr); + goto out_release; + } mdp->port = devno % 2; - ndev->features = NETIF_F_HW_VLAN_FILTER; + ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } /* initialize first or needed device */ @@ -2479,10 +2788,6 @@ out_unregister: out_release: /* net_dev free */ - if (mdp && mdp->addr) - iounmap(mdp->addr); - if (mdp && mdp->tsu_addr) - iounmap(mdp->tsu_addr); if (ndev) free_netdev(ndev); @@ -2493,14 +2798,10 @@ out: static int sh_eth_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - struct sh_eth_private *mdp = netdev_priv(ndev); - if (mdp->cd->tsu) - iounmap(mdp->tsu_addr); sh_mdio_release(ndev); unregister_netdev(ndev); pm_runtime_disable(&pdev->dev); - iounmap(mdp->addr); free_netdev(ndev); platform_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index bae84fd2e73a..1ddc9f235bcb 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -156,225 +156,6 @@ enum { SH_ETH_MAX_REGISTER_OFFSET, }; -static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { - [EDSR] = 0x0000, - [EDMR] = 0x0400, - [EDTRR] = 0x0408, - [EDRRR] = 0x0410, - [EESR] = 0x0428, - [EESIPR] = 0x0430, - [TDLAR] = 0x0010, - [TDFAR] = 0x0014, - [TDFXR] = 0x0018, - [TDFFR] = 0x001c, - [RDLAR] = 0x0030, - [RDFAR] = 0x0034, - [RDFXR] = 0x0038, - [RDFFR] = 0x003c, - [TRSCER] = 0x0438, - [RMFCR] = 0x0440, - [TFTR] = 0x0448, - [FDR] = 0x0450, - [RMCR] = 0x0458, - [RPADIR] = 0x0460, - [FCFTR] = 0x0468, - [CSMR] = 0x04E4, - - [ECMR] = 0x0500, - [ECSR] = 0x0510, - [ECSIPR] = 0x0518, - [PIR] = 0x0520, - [PSR] = 0x0528, - [PIPR] = 0x052c, - [RFLR] = 0x0508, - [APR] = 0x0554, - [MPR] = 0x0558, - [PFTCR] = 0x055c, - [PFRCR] = 0x0560, - [TPAUSER] = 0x0564, - [GECMR] = 0x05b0, - [BCULR] = 0x05b4, - [MAHR] = 0x05c0, - [MALR] = 0x05c8, - [TROCR] = 0x0700, - [CDCR] = 0x0708, - [LCCR] = 0x0710, - [CEFCR] = 0x0740, - [FRECR] = 0x0748, - [TSFRCR] = 0x0750, - [TLFRCR] = 0x0758, - [RFCR] = 0x0760, - [CERCR] = 0x0768, - [CEECR] = 0x0770, - [MAFCR] = 0x0778, - [RMII_MII] = 0x0790, - - [ARSTR] = 0x0000, - [TSU_CTRST] = 0x0004, - [TSU_FWEN0] = 0x0010, - [TSU_FWEN1] = 0x0014, - [TSU_FCM] = 0x0018, - [TSU_BSYSL0] = 0x0020, - [TSU_BSYSL1] = 0x0024, - [TSU_PRISL0] = 0x0028, - [TSU_PRISL1] = 0x002c, - [TSU_FWSL0] = 0x0030, - [TSU_FWSL1] = 0x0034, - [TSU_FWSLC] = 0x0038, - [TSU_QTAG0] = 0x0040, - [TSU_QTAG1] = 0x0044, - [TSU_FWSR] = 0x0050, - [TSU_FWINMK] = 0x0054, - [TSU_ADQT0] = 0x0048, - [TSU_ADQT1] = 0x004c, - [TSU_VTAG0] = 0x0058, - [TSU_VTAG1] = 0x005c, - [TSU_ADSBSY] = 0x0060, - [TSU_TEN] = 0x0064, - [TSU_POST1] = 0x0070, - [TSU_POST2] = 0x0074, - [TSU_POST3] = 0x0078, - [TSU_POST4] = 0x007c, - [TSU_ADRH0] = 0x0100, - [TSU_ADRL0] = 0x0104, - [TSU_ADRH31] = 0x01f8, - [TSU_ADRL31] = 0x01fc, - - [TXNLCR0] = 0x0080, - [TXALCR0] = 0x0084, - [RXNLCR0] = 0x0088, - [RXALCR0] = 0x008c, - [FWNLCR0] = 0x0090, - [FWALCR0] = 0x0094, - [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, - [RXNLCR1] = 0x00a8, - [RXALCR1] = 0x00ac, - [FWNLCR1] = 0x00b0, - [FWALCR1] = 0x00b4, -}; - -static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = { - [ECMR] = 0x0100, - [RFLR] = 0x0108, - [ECSR] = 0x0110, - [ECSIPR] = 0x0118, - [PIR] = 0x0120, - [PSR] = 0x0128, - [RDMLR] = 0x0140, - [IPGR] = 0x0150, - [APR] = 0x0154, - [MPR] = 0x0158, - [TPAUSER] = 0x0164, - [RFCF] = 0x0160, - [TPAUSECR] = 0x0168, - [BCFRR] = 0x016c, - [MAHR] = 0x01c0, - [MALR] = 0x01c8, - [TROCR] = 0x01d0, - [CDCR] = 0x01d4, - [LCCR] = 0x01d8, - [CNDCR] = 0x01dc, - [CEFCR] = 0x01e4, - [FRECR] = 0x01e8, - [TSFRCR] = 0x01ec, - [TLFRCR] = 0x01f0, - [RFCR] = 0x01f4, - [MAFCR] = 0x01f8, - [RTRATE] = 0x01fc, - - [EDMR] = 0x0000, - [EDTRR] = 0x0008, - [EDRRR] = 0x0010, - [TDLAR] = 0x0018, - [RDLAR] = 0x0020, - [EESR] = 0x0028, - [EESIPR] = 0x0030, - [TRSCER] = 0x0038, - [RMFCR] = 0x0040, - [TFTR] = 0x0048, - [FDR] = 0x0050, - [RMCR] = 0x0058, - [TFUCR] = 0x0064, - [RFOCR] = 0x0068, - [FCFTR] = 0x0070, - [RPADIR] = 0x0078, - [TRIMD] = 0x007c, - [RBWAR] = 0x00c8, - [RDFAR] = 0x00cc, - [TBRAR] = 0x00d4, - [TDFAR] = 0x00d8, -}; - -static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { - [ECMR] = 0x0160, - [ECSR] = 0x0164, - [ECSIPR] = 0x0168, - [PIR] = 0x016c, - [MAHR] = 0x0170, - [MALR] = 0x0174, - [RFLR] = 0x0178, - [PSR] = 0x017c, - [TROCR] = 0x0180, - [CDCR] = 0x0184, - [LCCR] = 0x0188, - [CNDCR] = 0x018c, - [CEFCR] = 0x0194, - [FRECR] = 0x0198, - [TSFRCR] = 0x019c, - [TLFRCR] = 0x01a0, - [RFCR] = 0x01a4, - [MAFCR] = 0x01a8, - [IPGR] = 0x01b4, - [APR] = 0x01b8, - [MPR] = 0x01bc, - [TPAUSER] = 0x01c4, - [BCFR] = 0x01cc, - - [ARSTR] = 0x0000, - [TSU_CTRST] = 0x0004, - [TSU_FWEN0] = 0x0010, - [TSU_FWEN1] = 0x0014, - [TSU_FCM] = 0x0018, - [TSU_BSYSL0] = 0x0020, - [TSU_BSYSL1] = 0x0024, - [TSU_PRISL0] = 0x0028, - [TSU_PRISL1] = 0x002c, - [TSU_FWSL0] = 0x0030, - [TSU_FWSL1] = 0x0034, - [TSU_FWSLC] = 0x0038, - [TSU_QTAGM0] = 0x0040, - [TSU_QTAGM1] = 0x0044, - [TSU_ADQT0] = 0x0048, - [TSU_ADQT1] = 0x004c, - [TSU_FWSR] = 0x0050, - [TSU_FWINMK] = 0x0054, - [TSU_ADSBSY] = 0x0060, - [TSU_TEN] = 0x0064, - [TSU_POST1] = 0x0070, - [TSU_POST2] = 0x0074, - [TSU_POST3] = 0x0078, - [TSU_POST4] = 0x007c, - - [TXNLCR0] = 0x0080, - [TXALCR0] = 0x0084, - [RXNLCR0] = 0x0088, - [RXALCR0] = 0x008c, - [FWNLCR0] = 0x0090, - [FWALCR0] = 0x0094, - [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, - [RXNLCR1] = 0x00a8, - [RXALCR1] = 0x00ac, - [FWNLCR1] = 0x00b0, - [FWALCR1] = 0x00b4, - - [TSU_ADRH0] = 0x0100, - [TSU_ADRL0] = 0x0104, - [TSU_ADRL31] = 0x01fc, - -}; - /* Driver's parameters */ #if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE) #define SH4_SKB_RX_ALIGN 32 @@ -722,7 +503,7 @@ struct sh_eth_private { u32 phy_id; /* PHY ID */ struct mii_bus *mii_bus; /* MDIO bus control */ struct phy_device *phydev; /* PHY device control */ - enum phy_state link; + int link; phy_interface_t phy_interface; int msg_enable; int speed; diff --git a/drivers/net/ethernet/s6gmac.c b/drivers/net/ethernet/s6gmac.c index 21683e2b1ff4..b6739afeaca1 100644 --- a/drivers/net/ethernet/s6gmac.c +++ b/drivers/net/ethernet/s6gmac.c @@ -998,6 +998,7 @@ static int s6gmac_probe(struct platform_device *pdev) mb = mdiobus_alloc(); if (!mb) { printk(KERN_ERR DRV_PRMT "error allocating mii bus\n"); + res = -ENOMEM; goto errmii; } mb->name = "s6gmac_mii"; @@ -1053,20 +1054,7 @@ static struct platform_driver s6gmac_driver = { }, }; -static int __init s6gmac_init(void) -{ - printk(KERN_INFO DRV_PRMT "S6 GMAC ethernet driver\n"); - return platform_driver_register(&s6gmac_driver); -} - - -static void __exit s6gmac_exit(void) -{ - platform_driver_unregister(&s6gmac_driver); -} - -module_init(s6gmac_init); -module_exit(s6gmac_exit); +module_platform_driver(s6gmac_driver); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("S6105 on chip Ethernet driver"); diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 3aca57853ed4..bdac936a68bc 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -651,8 +651,11 @@ if (next_ptr < RX_START || next_ptr >= RX_END) { skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); received ++; - } else - goto dropping; + } else { + ether3_outw(next_ptr >> 8, REG_RECVEND); + dev->stats.rx_dropped++; + goto done; + } } else { struct net_device_stats *stats = &dev->stats; ether3_outw(next_ptr >> 8, REG_RECVEND); @@ -679,21 +682,6 @@ done: } return maxcnt; - -dropping:{ - static unsigned long last_warned; - - ether3_outw(next_ptr >> 8, REG_RECVEND); - /* - * Don't print this message too many times... - */ - if (time_after(jiffies, last_warned + 10 * HZ)) { - last_warned = jiffies; - printk("%s: memory squeeze, dropping packet.\n", dev->name); - } - dev->stats.rx_dropped++; - goto done; - } } /* diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 0fde9ca28269..0ad5694b41f8 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -381,8 +381,6 @@ memory_squeeze: dev->stats.rx_packets++; dev->stats.rx_bytes += len; } else { - printk(KERN_NOTICE "%s: Memory squeeze, deferring packet.\n", - dev->name); dev->stats.rx_dropped++; } } else { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0bc00991d310..01b99206139a 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -22,6 +22,7 @@ #include <linux/topology.h> #include <linux/gfp.h> #include <linux/cpu_rmap.h> +#include <linux/aer.h> #include "net_driver.h" #include "efx.h" #include "nic.h" @@ -71,21 +72,21 @@ const char *const efx_loopback_mode_names[] = { const unsigned int efx_reset_type_max = RESET_TYPE_MAX; const char *const efx_reset_type_names[] = { - [RESET_TYPE_INVISIBLE] = "INVISIBLE", - [RESET_TYPE_ALL] = "ALL", - [RESET_TYPE_WORLD] = "WORLD", - [RESET_TYPE_DISABLE] = "DISABLE", - [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", - [RESET_TYPE_INT_ERROR] = "INT_ERROR", - [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY", - [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH", - [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH", - [RESET_TYPE_TX_SKIP] = "TX_SKIP", - [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", + [RESET_TYPE_INVISIBLE] = "INVISIBLE", + [RESET_TYPE_ALL] = "ALL", + [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", + [RESET_TYPE_WORLD] = "WORLD", + [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", + [RESET_TYPE_DISABLE] = "DISABLE", + [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", + [RESET_TYPE_INT_ERROR] = "INT_ERROR", + [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY", + [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH", + [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH", + [RESET_TYPE_TX_SKIP] = "TX_SKIP", + [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", }; -#define EFX_MAX_MTU (9 * 1024) - /* Reset workqueue. If any NIC has a hardware failure then a reset will be * queued onto this work queue. This is not a per-nic work queue, because * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. @@ -117,9 +118,12 @@ MODULE_PARM_DESC(separate_tx_channels, static int napi_weight = 64; /* This is the time (in jiffies) between invocations of the hardware - * monitor. On Falcon-based NICs, this will: + * monitor. + * On Falcon-based NICs, this will: * - Check the on-board hardware monitor; * - Poll the link state and reconfigure the hardware as necessary. + * On Siena-based NICs for power systems with EEH support, this will give EEH a + * chance to start. */ static unsigned int efx_monitor_interval = 1 * HZ; @@ -203,13 +207,14 @@ static void efx_stop_all(struct efx_nic *efx); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ if ((efx->state == STATE_READY) || \ + (efx->state == STATE_RECOVERY) || \ (efx->state == STATE_DISABLED)) \ ASSERT_RTNL(); \ } while (0) static int efx_check_disabled(struct efx_nic *efx) { - if (efx->state == STATE_DISABLED) { + if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { netif_err(efx, drv, efx->net_dev, "device is disabled due to earlier errors\n"); return -EIO; @@ -242,15 +247,9 @@ static int efx_process_channel(struct efx_channel *channel, int budget) struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - /* Deliver last RX packet. */ - if (channel->rx_pkt) { - __efx_rx_packet(channel, channel->rx_pkt); - channel->rx_pkt = NULL; - } - if (rx_queue->enabled) { - efx_rx_strategy(channel); + efx_rx_flush_packet(channel); + if (rx_queue->enabled) efx_fast_push_rx_descriptors(rx_queue); - } } return spent; @@ -625,20 +624,51 @@ fail: */ static void efx_start_datapath(struct efx_nic *efx) { + bool old_rx_scatter = efx->rx_scatter; struct efx_tx_queue *tx_queue; struct efx_rx_queue *rx_queue; struct efx_channel *channel; + size_t rx_buf_len; /* Calculate the rx buffer allocation parameters required to * support the current MTU, including padding for header * alignment and overruns. */ - efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + - EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + - efx->type->rx_buffer_hash_size + - efx->type->rx_buffer_padding); - efx->rx_buffer_order = get_order(efx->rx_buffer_len + - sizeof(struct efx_rx_page_state)); + efx->rx_dma_len = (efx->type->rx_buffer_hash_size + + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + + efx->type->rx_buffer_padding); + rx_buf_len = (sizeof(struct efx_rx_page_state) + + EFX_PAGE_IP_ALIGN + efx->rx_dma_len); + if (rx_buf_len <= PAGE_SIZE) { + efx->rx_scatter = false; + efx->rx_buffer_order = 0; + } else if (efx->type->can_rx_scatter) { + BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + + EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE > + PAGE_SIZE / 2); + efx->rx_scatter = true; + efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; + efx->rx_buffer_order = 0; + } else { + efx->rx_scatter = false; + efx->rx_buffer_order = get_order(rx_buf_len); + } + + efx_rx_config_page_split(efx); + if (efx->rx_buffer_order) + netif_dbg(efx, drv, efx->net_dev, + "RX buf len=%u; page order=%u batch=%u\n", + efx->rx_dma_len, efx->rx_buffer_order, + efx->rx_pages_per_batch); + else + netif_dbg(efx, drv, efx->net_dev, + "RX buf len=%u step=%u bpp=%u; page batch=%u\n", + efx->rx_dma_len, efx->rx_page_buf_step, + efx->rx_bufs_per_page, efx->rx_pages_per_batch); + + /* RX filters also have scatter-enabled flags */ + if (efx->rx_scatter != old_rx_scatter) + efx_filter_update_rx_scatter(efx); /* We must keep at least one descriptor in a TX ring empty. * We could avoid this when the queue size does not exactly @@ -655,16 +685,12 @@ static void efx_start_datapath(struct efx_nic *efx) efx_for_each_channel_tx_queue(tx_queue, channel) efx_init_tx_queue(tx_queue); - /* The rx buffer allocation strategy is MTU dependent */ - efx_rx_strategy(channel); - efx_for_each_channel_rx_queue(rx_queue, channel) { efx_init_rx_queue(rx_queue); efx_nic_generate_fill_event(rx_queue); } - WARN_ON(channel->rx_pkt != NULL); - efx_rx_strategy(channel); + WARN_ON(channel->rx_pkt_n_frags); } if (netif_device_present(efx->net_dev)) @@ -683,7 +709,7 @@ static void efx_stop_datapath(struct efx_nic *efx) BUG_ON(efx->port_enabled); /* Only perform flush if dma is enabled */ - if (dev->is_busmaster) { + if (dev->is_busmaster && efx->state != STATE_RECOVERY) { rc = efx_nic_flush_queues(efx); if (rc && EFX_WORKAROUND_7803(efx)) { @@ -1596,13 +1622,15 @@ static void efx_start_all(struct efx_nic *efx) efx_start_port(efx); efx_start_datapath(efx); - /* Start the hardware monitor if there is one. Otherwise (we're link - * event driven), we have to poll the PHY because after an event queue - * flush, we could have a missed a link state change */ - if (efx->type->monitor != NULL) { + /* Start the hardware monitor if there is one */ + if (efx->type->monitor != NULL) queue_delayed_work(efx->workqueue, &efx->monitor_work, efx_monitor_interval); - } else { + + /* If link state detection is normally event-driven, we have + * to poll now because we could have missed a change + */ + if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) { mutex_lock(&efx->mac_lock); if (efx->phy_op->poll(efx)) efx_link_status_changed(efx); @@ -2309,7 +2337,9 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) out: /* Leave device stopped if necessary */ - disabled = rc || method == RESET_TYPE_DISABLE; + disabled = rc || + method == RESET_TYPE_DISABLE || + method == RESET_TYPE_RECOVER_OR_DISABLE; rc2 = efx_reset_up(efx, method, !disabled); if (rc2) { disabled = true; @@ -2328,13 +2358,48 @@ out: return rc; } +/* Try recovery mechanisms. + * For now only EEH is supported. + * Returns 0 if the recovery mechanisms are unsuccessful. + * Returns a non-zero value otherwise. + */ +static int efx_try_recovery(struct efx_nic *efx) +{ +#ifdef CONFIG_EEH + /* A PCI error can occur and not be seen by EEH because nothing + * happens on the PCI bus. In this case the driver may fail and + * schedule a 'recover or reset', leading to this recovery handler. + * Manually call the eeh failure check function. + */ + struct eeh_dev *eehdev = + of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev)); + + if (eeh_dev_check_failure(eehdev)) { + /* The EEH mechanisms will handle the error and reset the + * device if necessary. + */ + return 1; + } +#endif + return 0; +} + /* The worker thread exists so that code that cannot sleep can * schedule a reset for later. */ static void efx_reset_work(struct work_struct *data) { struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); - unsigned long pending = ACCESS_ONCE(efx->reset_pending); + unsigned long pending; + enum reset_type method; + + pending = ACCESS_ONCE(efx->reset_pending); + method = fls(pending) - 1; + + if ((method == RESET_TYPE_RECOVER_OR_DISABLE || + method == RESET_TYPE_RECOVER_OR_ALL) && + efx_try_recovery(efx)) + return; if (!pending) return; @@ -2346,7 +2411,7 @@ static void efx_reset_work(struct work_struct *data) * it cannot change again. */ if (efx->state == STATE_READY) - (void)efx_reset(efx, fls(pending) - 1); + (void)efx_reset(efx, method); rtnl_unlock(); } @@ -2355,11 +2420,20 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) { enum reset_type method; + if (efx->state == STATE_RECOVERY) { + netif_dbg(efx, drv, efx->net_dev, + "recovering: skip scheduling %s reset\n", + RESET_TYPE(type)); + return; + } + switch (type) { case RESET_TYPE_INVISIBLE: case RESET_TYPE_ALL: + case RESET_TYPE_RECOVER_OR_ALL: case RESET_TYPE_WORLD: case RESET_TYPE_DISABLE: + case RESET_TYPE_RECOVER_OR_DISABLE: method = type; netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", RESET_TYPE(method)); @@ -2569,6 +2643,8 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_fini_struct(efx); pci_set_drvdata(pci_dev, NULL); free_netdev(efx->net_dev); + + pci_disable_pcie_error_reporting(pci_dev); }; /* NIC VPD information @@ -2741,6 +2817,11 @@ static int efx_pci_probe(struct pci_dev *pci_dev, netif_warn(efx, probe, efx->net_dev, "failed to create MTDs (%d)\n", rc); + rc = pci_enable_pcie_error_reporting(pci_dev); + if (rc && rc != -EINVAL) + netif_warn(efx, probe, efx->net_dev, + "pci_enable_pcie_error_reporting failed (%d)\n", rc); + return 0; fail4: @@ -2865,12 +2946,112 @@ static const struct dev_pm_ops efx_pm_ops = { .restore = efx_pm_resume, }; +/* A PCI error affecting this device was detected. + * At this point MMIO and DMA may be disabled. + * Stop the software path and request a slot reset. + */ +static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, + enum pci_channel_state state) +{ + pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; + struct efx_nic *efx = pci_get_drvdata(pdev); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + rtnl_lock(); + + if (efx->state != STATE_DISABLED) { + efx->state = STATE_RECOVERY; + efx->reset_pending = 0; + + efx_device_detach_sync(efx); + + efx_stop_all(efx); + efx_stop_interrupts(efx, false); + + status = PCI_ERS_RESULT_NEED_RESET; + } else { + /* If the interface is disabled we don't want to do anything + * with it. + */ + status = PCI_ERS_RESULT_RECOVERED; + } + + rtnl_unlock(); + + pci_disable_device(pdev); + + return status; +} + +/* Fake a successfull reset, which will be performed later in efx_io_resume. */ +static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) +{ + struct efx_nic *efx = pci_get_drvdata(pdev); + pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; + int rc; + + if (pci_enable_device(pdev)) { + netif_err(efx, hw, efx->net_dev, + "Cannot re-enable PCI device after reset.\n"); + status = PCI_ERS_RESULT_DISCONNECT; + } + + rc = pci_cleanup_aer_uncorrect_error_status(pdev); + if (rc) { + netif_err(efx, hw, efx->net_dev, + "pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc); + /* Non-fatal error. Continue. */ + } + + return status; +} + +/* Perform the actual reset and resume I/O operations. */ +static void efx_io_resume(struct pci_dev *pdev) +{ + struct efx_nic *efx = pci_get_drvdata(pdev); + int rc; + + rtnl_lock(); + + if (efx->state == STATE_DISABLED) + goto out; + + rc = efx_reset(efx, RESET_TYPE_ALL); + if (rc) { + netif_err(efx, hw, efx->net_dev, + "efx_reset failed after PCI error (%d)\n", rc); + } else { + efx->state = STATE_READY; + netif_dbg(efx, hw, efx->net_dev, + "Done resetting and resuming IO after PCI error.\n"); + } + +out: + rtnl_unlock(); +} + +/* For simplicity and reliability, we always require a slot reset and try to + * reset the hardware when a pci error affecting the device is detected. + * We leave both the link_reset and mmio_enabled callback unimplemented: + * with our request for slot reset the mmio_enabled callback will never be + * called, and the link_reset callback is not used by AER or EEH mechanisms. + */ +static struct pci_error_handlers efx_err_handlers = { + .error_detected = efx_io_error_detected, + .slot_reset = efx_io_slot_reset, + .resume = efx_io_resume, +}; + static struct pci_driver efx_pci_driver = { .name = KBUILD_MODNAME, .id_table = efx_pci_table, .probe = efx_pci_probe, .remove = efx_pci_remove, .driver.pm = &efx_pm_ops, + .err_handler = &efx_err_handlers, }; /************************************************************************** diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index d2f790df6dcb..8372da239b43 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -33,17 +33,22 @@ extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc); extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); /* RX */ +extern void efx_rx_config_page_split(struct efx_nic *efx); extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); extern void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue); extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); -extern void efx_rx_strategy(struct efx_channel *channel); extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue); extern void efx_rx_slow_fill(unsigned long context); -extern void __efx_rx_packet(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf); -extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, +extern void __efx_rx_packet(struct efx_channel *channel); +extern void efx_rx_packet(struct efx_rx_queue *rx_queue, + unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); +static inline void efx_rx_flush_packet(struct efx_channel *channel) +{ + if (channel->rx_pkt_n_frags) + __efx_rx_packet(channel); +} extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); #define EFX_MAX_DMAQ_SIZE 4096UL @@ -67,6 +72,7 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); extern int efx_probe_filters(struct efx_nic *efx); extern void efx_restore_filters(struct efx_nic *efx); extern void efx_remove_filters(struct efx_nic *efx); +extern void efx_filter_update_rx_scatter(struct efx_nic *efx); extern s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, bool replace); diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index 182dbe2cc6e4..ab8fb5889e55 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h @@ -137,8 +137,12 @@ enum efx_loopback_mode { * Reset methods are numbered in order of increasing scope. * * @RESET_TYPE_INVISIBLE: Reset datapath and MAC (Falcon only) + * @RESET_TYPE_RECOVER_OR_ALL: Try to recover. Apply RESET_TYPE_ALL + * if unsuccessful. * @RESET_TYPE_ALL: Reset datapath, MAC and PHY * @RESET_TYPE_WORLD: Reset as much as possible + * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if + * unsuccessful. * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog * @RESET_TYPE_INT_ERROR: reset due to internal error @@ -150,9 +154,11 @@ enum efx_loopback_mode { */ enum reset_type { RESET_TYPE_INVISIBLE = 0, - RESET_TYPE_ALL = 1, - RESET_TYPE_WORLD = 2, - RESET_TYPE_DISABLE = 3, + RESET_TYPE_RECOVER_OR_ALL = 1, + RESET_TYPE_ALL = 2, + RESET_TYPE_WORLD = 3, + RESET_TYPE_RECOVER_OR_DISABLE = 4, + RESET_TYPE_DISABLE = 5, RESET_TYPE_MAX_METHOD, RESET_TYPE_TX_WATCHDOG, RESET_TYPE_INT_ERROR, diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 8e61cd06f66a..6e768175e7e0 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -154,6 +154,7 @@ static const struct efx_ethtool_stat efx_ethtool_stats[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_nodesc_trunc), }; /* Number of ethtool statistics */ @@ -978,7 +979,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, rule->m_ext.data[1])) return -EINVAL; - efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0, + efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, + efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, (rule->ring_cookie == RX_CLS_FLOW_DISC) ? 0xfff : rule->ring_cookie); diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index 49bcd196e10d..4486102fa9b3 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -1546,10 +1546,6 @@ static int falcon_probe_nic(struct efx_nic *efx) static void falcon_init_rx_cfg(struct efx_nic *efx) { - /* Prior to Siena the RX DMA engine will split each frame at - * intervals of RX_USR_BUF_SIZE (32-byte units). We set it to - * be so large that that never happens. */ - const unsigned huge_buf_size = (3 * 4096) >> 5; /* RX control FIFO thresholds (32 entries) */ const unsigned ctrl_xon_thr = 20; const unsigned ctrl_xoff_thr = 25; @@ -1557,10 +1553,15 @@ static void falcon_init_rx_cfg(struct efx_nic *efx) efx_reado(efx, ®, FR_AZ_RX_CFG); if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) { - /* Data FIFO size is 5.5K */ + /* Data FIFO size is 5.5K. The RX DMA engine only + * supports scattering for user-mode queues, but will + * split DMA writes at intervals of RX_USR_BUF_SIZE + * (32-byte units) even for kernel-mode queues. We + * set it to be so large that that never happens. + */ EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0); EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE, - huge_buf_size); + (3 * 4096) >> 5); EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8); EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8); EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr); @@ -1569,7 +1570,7 @@ static void falcon_init_rx_cfg(struct efx_nic *efx) /* Data FIFO size is 80K; register fields moved */ EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0); EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE, - huge_buf_size); + EFX_RX_USR_BUF_SIZE >> 5); /* Send XON and XOFF at ~3 * max MTU away from empty/full */ EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8); EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8); @@ -1815,6 +1816,7 @@ const struct efx_nic_type falcon_a1_nic_type = { .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER, .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), .rx_buffer_padding = 0x24, + .can_rx_scatter = false, .max_interrupt_mode = EFX_INT_MODE_MSI, .phys_addr_channels = 4, .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, @@ -1865,6 +1867,7 @@ const struct efx_nic_type falcon_b0_nic_type = { .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), .rx_buffer_hash_size = 0x10, .rx_buffer_padding = 0, + .can_rx_scatter = true, .max_interrupt_mode = EFX_INT_MODE_MSIX, .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy * interrupt handler only supports 32 diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c index 8af42cd1feda..2397f0e8d3eb 100644 --- a/drivers/net/ethernet/sfc/filter.c +++ b/drivers/net/ethernet/sfc/filter.c @@ -66,6 +66,10 @@ struct efx_filter_state { #endif }; +static void efx_filter_table_clear_entry(struct efx_nic *efx, + struct efx_filter_table *table, + unsigned int filter_idx); + /* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit * key derived from the n-tuple. The initial LFSR state is 0xffff. */ static u16 efx_filter_hash(u32 key) @@ -168,6 +172,25 @@ static void efx_filter_push_rx_config(struct efx_nic *efx) filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & EFX_FILTER_FLAG_RX_RSS)); + + /* There is a single bit to enable RX scatter for all + * unmatched packets. Only set it if scatter is + * enabled in both filter specs. + */ + EFX_SET_OWORD_FIELD( + filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q, + !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & + table->spec[EFX_FILTER_INDEX_MC_DEF].flags & + EFX_FILTER_FLAG_RX_SCATTER)); + } else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { + /* We don't expose 'default' filters because unmatched + * packets always go to the queue number found in the + * RSS table. But we still need to set the RX scatter + * bit here. + */ + EFX_SET_OWORD_FIELD( + filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q, + efx->rx_scatter); } efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); @@ -409,9 +432,18 @@ static void efx_filter_reset_rx_def(struct efx_nic *efx, unsigned filter_idx) struct efx_filter_state *state = efx->filter_state; struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF]; struct efx_filter_spec *spec = &table->spec[filter_idx]; + enum efx_filter_flags flags = 0; + + /* If there's only one channel then disable RSS for non VF + * traffic, thereby allowing VFs to use RSS when the PF can't. + */ + if (efx->n_rx_channels > 1) + flags |= EFX_FILTER_FLAG_RX_RSS; - efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, - EFX_FILTER_FLAG_RX_RSS, 0); + if (efx->rx_scatter) + flags |= EFX_FILTER_FLAG_RX_SCATTER; + + efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, flags, 0); spec->type = EFX_FILTER_UC_DEF + filter_idx; table->used_bitmap[0] |= 1 << filter_idx; } @@ -463,13 +495,6 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec) break; } - case EFX_FILTER_TABLE_RX_DEF: - /* One filter spec per type */ - BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0); - BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF != - EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF); - return spec->type - EFX_FILTER_UC_DEF; - case EFX_FILTER_TABLE_RX_MAC: { bool is_wild = spec->type == EFX_FILTER_MAC_WILD; EFX_POPULATE_OWORD_7( @@ -520,42 +545,6 @@ static bool efx_filter_equal(const struct efx_filter_spec *left, return true; } -static int efx_filter_search(struct efx_filter_table *table, - struct efx_filter_spec *spec, u32 key, - bool for_insert, unsigned int *depth_required) -{ - unsigned hash, incr, filter_idx, depth, depth_max; - - hash = efx_filter_hash(key); - incr = efx_filter_increment(key); - - filter_idx = hash & (table->size - 1); - depth = 1; - depth_max = (for_insert ? - (spec->priority <= EFX_FILTER_PRI_HINT ? - FILTER_CTL_SRCH_HINT_MAX : FILTER_CTL_SRCH_MAX) : - table->search_depth[spec->type]); - - for (;;) { - /* Return success if entry is used and matches this spec - * or entry is unused and we are trying to insert. - */ - if (test_bit(filter_idx, table->used_bitmap) ? - efx_filter_equal(spec, &table->spec[filter_idx]) : - for_insert) { - *depth_required = depth; - return filter_idx; - } - - /* Return failure if we reached the maximum search depth */ - if (depth == depth_max) - return for_insert ? -EBUSY : -ENOENT; - - filter_idx = (filter_idx + incr) & (table->size - 1); - ++depth; - } -} - /* * Construct/deconstruct external filter IDs. At least the RX filter * IDs must be ordered by matching priority, for RX NFC semantics. @@ -650,44 +639,111 @@ u32 efx_filter_get_rx_id_limit(struct efx_nic *efx) * efx_filter_insert_filter - add or replace a filter * @efx: NIC in which to insert the filter * @spec: Specification for the filter - * @replace: Flag for whether the specified filter may replace a filter - * with an identical match expression and equal or lower priority + * @replace_equal: Flag for whether the specified filter may replace an + * existing filter with equal priority * * On success, return the filter ID. * On failure, return a negative error code. + * + * If an existing filter has equal match values to the new filter + * spec, then the new filter might replace it, depending on the + * relative priorities. If the existing filter has lower priority, or + * if @replace_equal is set and it has equal priority, then it is + * replaced. Otherwise the function fails, returning -%EPERM if + * the existing filter has higher priority or -%EEXIST if it has + * equal priority. */ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, - bool replace) + bool replace_equal) { struct efx_filter_state *state = efx->filter_state; struct efx_filter_table *table = efx_filter_spec_table(state, spec); - struct efx_filter_spec *saved_spec; efx_oword_t filter; - unsigned int filter_idx, depth = 0; - u32 key; + int rep_index, ins_index; + unsigned int depth = 0; int rc; if (!table || table->size == 0) return -EINVAL; - key = efx_filter_build(&filter, spec); - netif_vdbg(efx, hw, efx->net_dev, "%s: type %d search_depth=%d", __func__, spec->type, table->search_depth[spec->type]); - spin_lock_bh(&state->lock); + if (table->id == EFX_FILTER_TABLE_RX_DEF) { + /* One filter spec per type */ + BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0); + BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF != + EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF); + rep_index = spec->type - EFX_FILTER_INDEX_UC_DEF; + ins_index = rep_index; - rc = efx_filter_search(table, spec, key, true, &depth); - if (rc < 0) - goto out; - filter_idx = rc; - BUG_ON(filter_idx >= table->size); - saved_spec = &table->spec[filter_idx]; - - if (test_bit(filter_idx, table->used_bitmap)) { - /* Should we replace the existing filter? */ - if (!replace) { + spin_lock_bh(&state->lock); + } else { + /* Search concurrently for + * (1) a filter to be replaced (rep_index): any filter + * with the same match values, up to the current + * search depth for this type, and + * (2) the insertion point (ins_index): (1) or any + * free slot before it or up to the maximum search + * depth for this priority + * We fail if we cannot find (2). + * + * We can stop once either + * (a) we find (1), in which case we have definitely + * found (2) as well; or + * (b) we have searched exhaustively for (1), and have + * either found (2) or searched exhaustively for it + */ + u32 key = efx_filter_build(&filter, spec); + unsigned int hash = efx_filter_hash(key); + unsigned int incr = efx_filter_increment(key); + unsigned int max_rep_depth = table->search_depth[spec->type]; + unsigned int max_ins_depth = + spec->priority <= EFX_FILTER_PRI_HINT ? + FILTER_CTL_SRCH_HINT_MAX : FILTER_CTL_SRCH_MAX; + unsigned int i = hash & (table->size - 1); + + ins_index = -1; + depth = 1; + + spin_lock_bh(&state->lock); + + for (;;) { + if (!test_bit(i, table->used_bitmap)) { + if (ins_index < 0) + ins_index = i; + } else if (efx_filter_equal(spec, &table->spec[i])) { + /* Case (a) */ + if (ins_index < 0) + ins_index = i; + rep_index = i; + break; + } + + if (depth >= max_rep_depth && + (ins_index >= 0 || depth >= max_ins_depth)) { + /* Case (b) */ + if (ins_index < 0) { + rc = -EBUSY; + goto out; + } + rep_index = -1; + break; + } + + i = (i + incr) & (table->size - 1); + ++depth; + } + } + + /* If we found a filter to be replaced, check whether we + * should do so + */ + if (rep_index >= 0) { + struct efx_filter_spec *saved_spec = &table->spec[rep_index]; + + if (spec->priority == saved_spec->priority && !replace_equal) { rc = -EEXIST; goto out; } @@ -695,11 +751,14 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, rc = -EPERM; goto out; } - } else { - __set_bit(filter_idx, table->used_bitmap); + } + + /* Insert the filter */ + if (ins_index != rep_index) { + __set_bit(ins_index, table->used_bitmap); ++table->used; } - *saved_spec = *spec; + table->spec[ins_index] = *spec; if (table->id == EFX_FILTER_TABLE_RX_DEF) { efx_filter_push_rx_config(efx); @@ -713,13 +772,19 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, } efx_writeo(efx, &filter, - table->offset + table->step * filter_idx); + table->offset + table->step * ins_index); + + /* If we were able to replace a filter by inserting + * at a lower depth, clear the replaced filter + */ + if (ins_index != rep_index && rep_index >= 0) + efx_filter_table_clear_entry(efx, table, rep_index); } netif_vdbg(efx, hw, efx->net_dev, "%s: filter type %d index %d rxq %u set", - __func__, spec->type, filter_idx, spec->dmaq_id); - rc = efx_filter_make_id(spec, filter_idx); + __func__, spec->type, ins_index, spec->dmaq_id); + rc = efx_filter_make_id(spec, ins_index); out: spin_unlock_bh(&state->lock); @@ -1060,6 +1125,50 @@ void efx_remove_filters(struct efx_nic *efx) kfree(state); } +/* Update scatter enable flags for filters pointing to our own RX queues */ +void efx_filter_update_rx_scatter(struct efx_nic *efx) +{ + struct efx_filter_state *state = efx->filter_state; + enum efx_filter_table_id table_id; + struct efx_filter_table *table; + efx_oword_t filter; + unsigned int filter_idx; + + spin_lock_bh(&state->lock); + + for (table_id = EFX_FILTER_TABLE_RX_IP; + table_id <= EFX_FILTER_TABLE_RX_DEF; + table_id++) { + table = &state->table[table_id]; + + for (filter_idx = 0; filter_idx < table->size; filter_idx++) { + if (!test_bit(filter_idx, table->used_bitmap) || + table->spec[filter_idx].dmaq_id >= + efx->n_rx_channels) + continue; + + if (efx->rx_scatter) + table->spec[filter_idx].flags |= + EFX_FILTER_FLAG_RX_SCATTER; + else + table->spec[filter_idx].flags &= + ~EFX_FILTER_FLAG_RX_SCATTER; + + if (table_id == EFX_FILTER_TABLE_RX_DEF) + /* Pushed by efx_filter_push_rx_config() */ + continue; + + efx_filter_build(&filter, &table->spec[filter_idx]); + efx_writeo(efx, &filter, + table->offset + table->step * filter_idx); + } + } + + efx_filter_push_rx_config(efx); + + spin_unlock_bh(&state->lock); +} + #ifdef CONFIG_RFS_ACCEL int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index 9d426d0457bd..c5c9747861ba 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -553,6 +553,7 @@ #define MC_CMD_PTP_MODE_V1_VLAN 0x1 /* enum */ #define MC_CMD_PTP_MODE_V2 0x2 /* enum */ #define MC_CMD_PTP_MODE_V2_VLAN 0x3 /* enum */ +#define MC_CMD_PTP_MODE_V2_ENHANCED 0x4 /* enum */ /* MC_CMD_PTP_IN_DISABLE msgrequest */ #define MC_CMD_PTP_IN_DISABLE_LEN 8 diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 0a90abd2421b..9bd433a095c5 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -69,6 +69,12 @@ #define EFX_TXQ_TYPES 4 #define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) +/* Maximum possible MTU the driver supports */ +#define EFX_MAX_MTU (9 * 1024) + +/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page. */ +#define EFX_RX_USR_BUF_SIZE 1824 + /* Forward declare Precision Time Protocol (PTP) support structure. */ struct efx_ptp_data; @@ -206,25 +212,23 @@ struct efx_tx_queue { /** * struct efx_rx_buffer - An Efx RX data buffer * @dma_addr: DMA base address of the buffer - * @skb: The associated socket buffer. Valid iff !(@flags & %EFX_RX_BUF_PAGE). - * Will be %NULL if the buffer slot is currently free. - * @page: The associated page buffer. Valif iff @flags & %EFX_RX_BUF_PAGE. + * @page: The associated page buffer. * Will be %NULL if the buffer slot is currently free. - * @page_offset: Offset within page. Valid iff @flags & %EFX_RX_BUF_PAGE. - * @len: Buffer length, in bytes. - * @flags: Flags for buffer and packet state. + * @page_offset: If pending: offset in @page of DMA base address. + * If completed: offset in @page of Ethernet header. + * @len: If pending: length for DMA descriptor. + * If completed: received length, excluding hash prefix. + * @flags: Flags for buffer and packet state. These are only set on the + * first buffer of a scattered packet. */ struct efx_rx_buffer { dma_addr_t dma_addr; - union { - struct sk_buff *skb; - struct page *page; - } u; + struct page *page; u16 page_offset; u16 len; u16 flags; }; -#define EFX_RX_BUF_PAGE 0x0001 +#define EFX_RX_BUF_LAST_IN_PAGE 0x0001 #define EFX_RX_PKT_CSUMMED 0x0002 #define EFX_RX_PKT_DISCARD 0x0004 @@ -260,14 +264,23 @@ struct efx_rx_page_state { * @added_count: Number of buffers added to the receive queue. * @notified_count: Number of buffers given to NIC (<= @added_count). * @removed_count: Number of buffers removed from the receive queue. + * @scatter_n: Number of buffers used by current packet + * @page_ring: The ring to store DMA mapped pages for reuse. + * @page_add: Counter to calculate the write pointer for the recycle ring. + * @page_remove: Counter to calculate the read pointer for the recycle ring. + * @page_recycle_count: The number of pages that have been recycled. + * @page_recycle_failed: The number of pages that couldn't be recycled because + * the kernel still held a reference to them. + * @page_recycle_full: The number of pages that were released because the + * recycle ring was full. + * @page_ptr_mask: The number of pages in the RX recycle ring minus 1. * @max_fill: RX descriptor maximum fill level (<= ring size) * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill * (<= @max_fill) * @min_fill: RX descriptor minimum non-zero fill level. * This records the minimum fill level observed when a ring * refill was triggered. - * @alloc_page_count: RX allocation strategy counter. - * @alloc_skb_count: RX allocation strategy counter. + * @recycle_count: RX buffer recycle counter. * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). */ struct efx_rx_queue { @@ -279,15 +292,22 @@ struct efx_rx_queue { bool enabled; bool flush_pending; - int added_count; - int notified_count; - int removed_count; + unsigned int added_count; + unsigned int notified_count; + unsigned int removed_count; + unsigned int scatter_n; + struct page **page_ring; + unsigned int page_add; + unsigned int page_remove; + unsigned int page_recycle_count; + unsigned int page_recycle_failed; + unsigned int page_recycle_full; + unsigned int page_ptr_mask; unsigned int max_fill; unsigned int fast_fill_trigger; unsigned int min_fill; unsigned int min_overfill; - unsigned int alloc_page_count; - unsigned int alloc_skb_count; + unsigned int recycle_count; struct timer_list slow_fill; unsigned int slow_fill_count; }; @@ -336,10 +356,6 @@ enum efx_rx_alloc_method { * @event_test_cpu: Last CPU to handle interrupt or test event for this channel * @irq_count: Number of IRQs since last adaptive moderation decision * @irq_mod_score: IRQ moderation score - * @rx_alloc_level: Watermark based heuristic counter for pushing descriptors - * and diagnostic counters - * @rx_alloc_push_pages: RX allocation method currently in use for pushing - * descriptors * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors @@ -347,6 +363,12 @@ enum efx_rx_alloc_method { * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors * @n_rx_overlength: Count of RX_OVERLENGTH errors * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun + * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to + * lack of descriptors + * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by + * __efx_rx_packet(), or zero if there is none + * @rx_pkt_index: Ring index of first buffer for next packet to be delivered + * by __efx_rx_packet(), if @rx_pkt_n_frags != 0 * @rx_queue: RX queue for this channel * @tx_queue: TX queues for this channel */ @@ -371,9 +393,6 @@ struct efx_channel { unsigned int rfs_filters_added; #endif - int rx_alloc_level; - int rx_alloc_push_pages; - unsigned n_rx_tobe_disc; unsigned n_rx_ip_hdr_chksum_err; unsigned n_rx_tcp_udp_chksum_err; @@ -381,11 +400,10 @@ struct efx_channel { unsigned n_rx_frm_trunc; unsigned n_rx_overlength; unsigned n_skbuff_leaks; + unsigned int n_rx_nodesc_trunc; - /* Used to pipeline received packets in order to optimise memory - * access with prefetches. - */ - struct efx_rx_buffer *rx_pkt; + unsigned int rx_pkt_n_frags; + unsigned int rx_pkt_index; struct efx_rx_queue rx_queue; struct efx_tx_queue tx_queue[EFX_TXQ_TYPES]; @@ -410,7 +428,7 @@ struct efx_channel_type { void (*post_remove)(struct efx_channel *); void (*get_name)(struct efx_channel *, char *buf, size_t len); struct efx_channel *(*copy)(const struct efx_channel *); - void (*receive_skb)(struct efx_channel *, struct sk_buff *); + bool (*receive_skb)(struct efx_channel *, struct sk_buff *); bool keep_eventq; }; @@ -446,6 +464,7 @@ enum nic_state { STATE_UNINIT = 0, /* device being probed/removed or is frozen */ STATE_READY = 1, /* hardware ready and netdev registered */ STATE_DISABLED = 2, /* device disabled due to hardware errors */ + STATE_RECOVERY = 3, /* device recovering from PCI error */ }; /* @@ -684,10 +703,13 @@ struct vfdi_status; * @n_channels: Number of channels in use * @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_tx_channels: Number of channels used for TX - * @rx_buffer_len: RX buffer length + * @rx_dma_len: Current maximum RX DMA length * @rx_buffer_order: Order (log2) of number of pages for each RX buffer + * @rx_buffer_truesize: Amortised allocation size of an RX buffer, + * for use in sk_buff::truesize * @rx_hash_key: Toeplitz hash key for RSS * @rx_indir_table: Indirection table for RSS + * @rx_scatter: Scatter mode enabled for receives * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired * @irq_status: Interrupt status buffer @@ -800,10 +822,15 @@ struct efx_nic { unsigned rss_spread; unsigned tx_channel_offset; unsigned n_tx_channels; - unsigned int rx_buffer_len; + unsigned int rx_dma_len; unsigned int rx_buffer_order; + unsigned int rx_buffer_truesize; + unsigned int rx_page_buf_step; + unsigned int rx_bufs_per_page; + unsigned int rx_pages_per_batch; u8 rx_hash_key[40]; u32 rx_indir_table[128]; + bool rx_scatter; unsigned int_error_count; unsigned long int_error_expire; @@ -934,8 +961,9 @@ static inline unsigned int efx_port_num(struct efx_nic *efx) * @evq_ptr_tbl_base: Event queue pointer table base address * @evq_rptr_tbl_base: Event queue read-pointer table base address * @max_dma_mask: Maximum possible DMA mask - * @rx_buffer_hash_size: Size of hash at start of RX buffer - * @rx_buffer_padding: Size of padding at end of RX buffer + * @rx_buffer_hash_size: Size of hash at start of RX packet + * @rx_buffer_padding: Size of padding at end of RX packet + * @can_rx_scatter: NIC is able to scatter packet to multiple buffers * @max_interrupt_mode: Highest capability interrupt mode supported * from &enum efx_init_mode. * @phys_addr_channels: Number of channels with physically addressed @@ -983,6 +1011,7 @@ struct efx_nic_type { u64 max_dma_mask; unsigned int rx_buffer_hash_size; unsigned int rx_buffer_padding; + bool can_rx_scatter; unsigned int max_interrupt_mode; unsigned int phys_addr_channels; unsigned int timer_period_max; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 0ad790cc473c..b0503cd8c2a0 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -305,11 +305,11 @@ int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer, unsigned int len) { buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len, - &buffer->dma_addr, GFP_ATOMIC); + &buffer->dma_addr, + GFP_ATOMIC | __GFP_ZERO); if (!buffer->addr) return -ENOMEM; buffer->len = len; - memset(buffer->addr, 0, len); return 0; } @@ -376,7 +376,8 @@ efx_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count) return false; tx_queue->empty_read_count = 0; - return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0; + return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0 + && tx_queue->write_count - write_count == 1; } /* For each entry inserted into the software descriptor ring, create a @@ -591,12 +592,22 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue) struct efx_nic *efx = rx_queue->efx; bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0; bool iscsi_digest_en = is_b0; + bool jumbo_en; + + /* For kernel-mode queues in Falcon A1, the JUMBO flag enables + * DMA to continue after a PCIe page boundary (and scattering + * is not possible). In Falcon B0 and Siena, it enables + * scatter. + */ + jumbo_en = !is_b0 || efx->rx_scatter; netif_dbg(efx, hw, efx->net_dev, "RX queue %d ring in special buffers %d-%d\n", efx_rx_queue_index(rx_queue), rx_queue->rxd.index, rx_queue->rxd.index + rx_queue->rxd.entries - 1); + rx_queue->scatter_n = 0; + /* Pin RX descriptor ring */ efx_init_special_buffer(efx, &rx_queue->rxd); @@ -613,8 +624,7 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue) FRF_AZ_RX_DESCQ_SIZE, __ffs(rx_queue->rxd.entries), FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ , - /* For >=B0 this is scatter so disable */ - FRF_AZ_RX_DESCQ_JUMBO, !is_b0, + FRF_AZ_RX_DESCQ_JUMBO, jumbo_en, FRF_AZ_RX_DESCQ_EN, 1); efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, efx_rx_queue_index(rx_queue)); @@ -968,13 +978,24 @@ static u16 efx_handle_rx_not_ok(struct efx_rx_queue *rx_queue, EFX_RX_PKT_DISCARD : 0; } -/* Handle receive events that are not in-order. */ -static void +/* Handle receive events that are not in-order. Return true if this + * can be handled as a partial packet discard, false if it's more + * serious. + */ +static bool efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) { + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); struct efx_nic *efx = rx_queue->efx; unsigned expected, dropped; + if (rx_queue->scatter_n && + index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) & + rx_queue->ptr_mask)) { + ++channel->n_rx_nodesc_trunc; + return true; + } + expected = rx_queue->removed_count & rx_queue->ptr_mask; dropped = (index - expected) & rx_queue->ptr_mask; netif_info(efx, rx_err, efx->net_dev, @@ -983,6 +1004,7 @@ efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); + return false; } /* Handle a packet received event @@ -998,7 +1020,7 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt; unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt; unsigned expected_ptr; - bool rx_ev_pkt_ok; + bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont; u16 flags; struct efx_rx_queue *rx_queue; struct efx_nic *efx = channel->efx; @@ -1006,21 +1028,56 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) if (unlikely(ACCESS_ONCE(efx->reset_pending))) return; - /* Basic packet information */ - rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); - rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK); - rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE); - WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT)); - WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP) != 1); + rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT); + rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP); WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) != channel->channel); rx_queue = efx_channel_get_rx_queue(channel); rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR); - expected_ptr = rx_queue->removed_count & rx_queue->ptr_mask; - if (unlikely(rx_ev_desc_ptr != expected_ptr)) - efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr); + expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) & + rx_queue->ptr_mask); + + /* Check for partial drops and other errors */ + if (unlikely(rx_ev_desc_ptr != expected_ptr) || + unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) { + if (rx_ev_desc_ptr != expected_ptr && + !efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr)) + return; + + /* Discard all pending fragments */ + if (rx_queue->scatter_n) { + efx_rx_packet( + rx_queue, + rx_queue->removed_count & rx_queue->ptr_mask, + rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD); + rx_queue->removed_count += rx_queue->scatter_n; + rx_queue->scatter_n = 0; + } + + /* Return if there is no new fragment */ + if (rx_ev_desc_ptr != expected_ptr) + return; + + /* Discard new fragment if not SOP */ + if (!rx_ev_sop) { + efx_rx_packet( + rx_queue, + rx_queue->removed_count & rx_queue->ptr_mask, + 1, 0, EFX_RX_PKT_DISCARD); + ++rx_queue->removed_count; + return; + } + } + + ++rx_queue->scatter_n; + if (rx_ev_cont) + return; + + rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); + rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK); + rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE); if (likely(rx_ev_pkt_ok)) { /* If packet is marked as OK and packet type is TCP/IP or @@ -1048,7 +1105,11 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) channel->irq_mod_score += 2; /* Handle received packet */ - efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, flags); + efx_rx_packet(rx_queue, + rx_queue->removed_count & rx_queue->ptr_mask, + rx_queue->scatter_n, rx_ev_byte_cnt, flags); + rx_queue->removed_count += rx_queue->scatter_n; + rx_queue->scatter_n = 0; } /* If this flush done event corresponds to a &struct efx_tx_queue, then diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 3f93624fc273..07f6baa15c0c 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -99,6 +99,9 @@ #define PTP_V2_VERSION_LENGTH 1 #define PTP_V2_VERSION_OFFSET 29 +#define PTP_V2_UUID_LENGTH 8 +#define PTP_V2_UUID_OFFSET 48 + /* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2), * the MC only captures the last six bytes of the clock identity. These values * reflect those, not the ones used in the standard. The standard permits @@ -429,13 +432,10 @@ static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, unsigned number_readings = (response_length / MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN); unsigned i; - unsigned min; - unsigned min_set = 0; unsigned total; unsigned ngood = 0; unsigned last_good = 0; struct efx_ptp_data *ptp = efx->ptp_data; - bool min_valid = false; u32 last_sec; u32 start_sec; struct timespec delta; @@ -443,35 +443,17 @@ static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, if (number_readings == 0) return -EAGAIN; - /* Find minimum value in this set of results, discarding clearly - * erroneous results. + /* Read the set of results and increment stats for any results that + * appera to be erroneous. */ for (i = 0; i < number_readings; i++) { efx_ptp_read_timeset(synch_buf, &ptp->timeset[i]); synch_buf += MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN; - if (ptp->timeset[i].window > SYNCHRONISATION_GRANULARITY_NS) { - if (min_valid) { - if (ptp->timeset[i].window < min_set) - min_set = ptp->timeset[i].window; - } else { - min_valid = true; - min_set = ptp->timeset[i].window; - } - } - } - - if (min_valid) { - if (ptp->base_sync_valid && (min_set > ptp->base_sync_ns)) - min = ptp->base_sync_ns; - else - min = min_set; - } else { - min = SYNCHRONISATION_GRANULARITY_NS; } - /* Discard excessively long synchronise durations. The MC times - * when it finishes reading the host time so the corrected window - * time should be fairly constant for a given platform. + /* Find the last good host-MC synchronization result. The MC times + * when it finishes reading the host time so the corrected window time + * should be fairly constant for a given platform. */ total = 0; for (i = 0; i < number_readings; i++) @@ -489,8 +471,8 @@ static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, if (ngood == 0) { netif_warn(efx, drv, efx->net_dev, - "PTP no suitable synchronisations %dns %dns\n", - ptp->base_sync_ns, min_set); + "PTP no suitable synchronisations %dns\n", + ptp->base_sync_ns); return -EAGAIN; } @@ -1006,43 +988,53 @@ bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) * the receive timestamp from the MC - this will probably occur after the * packet arrival because of the processing in the MC. */ -static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) +static bool efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) { struct efx_nic *efx = channel->efx; struct efx_ptp_data *ptp = efx->ptp_data; struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb; - u8 *data; + u8 *match_data_012, *match_data_345; unsigned int version; match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); /* Correct version? */ if (ptp->mode == MC_CMD_PTP_MODE_V1) { - if (skb->len < PTP_V1_MIN_LENGTH) { - netif_receive_skb(skb); - return; + if (!pskb_may_pull(skb, PTP_V1_MIN_LENGTH)) { + return false; } version = ntohs(*(__be16 *)&skb->data[PTP_V1_VERSION_OFFSET]); if (version != PTP_VERSION_V1) { - netif_receive_skb(skb); - return; + return false; } + + /* PTP V1 uses all six bytes of the UUID to match the packet + * to the timestamp + */ + match_data_012 = skb->data + PTP_V1_UUID_OFFSET; + match_data_345 = skb->data + PTP_V1_UUID_OFFSET + 3; } else { - if (skb->len < PTP_V2_MIN_LENGTH) { - netif_receive_skb(skb); - return; + if (!pskb_may_pull(skb, PTP_V2_MIN_LENGTH)) { + return false; } version = skb->data[PTP_V2_VERSION_OFFSET]; - - BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2); - BUILD_BUG_ON(PTP_V1_UUID_OFFSET != PTP_V2_MC_UUID_OFFSET); - BUILD_BUG_ON(PTP_V1_UUID_LENGTH != PTP_V2_MC_UUID_LENGTH); - BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); - BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); - if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) { - netif_receive_skb(skb); - return; + return false; + } + + /* The original V2 implementation uses bytes 2-7 of + * the UUID to match the packet to the timestamp. This + * discards two of the bytes of the MAC address used + * to create the UUID (SF bug 33070). The PTP V2 + * enhanced mode fixes this issue and uses bytes 0-2 + * and byte 5-7 of the UUID. + */ + match_data_345 = skb->data + PTP_V2_UUID_OFFSET + 5; + if (ptp->mode == MC_CMD_PTP_MODE_V2) { + match_data_012 = skb->data + PTP_V2_UUID_OFFSET + 2; + } else { + match_data_012 = skb->data + PTP_V2_UUID_OFFSET + 0; + BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2_ENHANCED); } } @@ -1056,14 +1048,19 @@ static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) timestamps = skb_hwtstamps(skb); memset(timestamps, 0, sizeof(*timestamps)); + /* We expect the sequence number to be in the same position in + * the packet for PTP V1 and V2 + */ + BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); + BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); + /* Extract UUID/Sequence information */ - data = skb->data + PTP_V1_UUID_OFFSET; - match->words[0] = (data[0] | - (data[1] << 8) | - (data[2] << 16) | - (data[3] << 24)); - match->words[1] = (data[4] | - (data[5] << 8) | + match->words[0] = (match_data_012[0] | + (match_data_012[1] << 8) | + (match_data_012[2] << 16) | + (match_data_345[0] << 24)); + match->words[1] = (match_data_345[1] | + (match_data_345[2] << 8) | (skb->data[PTP_V1_SEQUENCE_OFFSET + PTP_V1_SEQUENCE_LENGTH - 1] << 16)); @@ -1073,6 +1070,8 @@ static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) skb_queue_tail(&ptp->rxq, skb); queue_work(ptp->workwq, &ptp->work); + + return true; } /* Transmit a PTP packet. This has to be transmitted by the MC @@ -1167,7 +1166,7 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) * timestamped */ init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; - new_mode = MC_CMD_PTP_MODE_V2; + new_mode = MC_CMD_PTP_MODE_V2_ENHANCED; enable_wanted = true; break; case HWTSTAMP_FILTER_PTP_V2_EVENT: @@ -1186,7 +1185,14 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) if (init->tx_type != HWTSTAMP_TX_OFF) enable_wanted = true; + /* Old versions of the firmware do not support the improved + * UUID filtering option (SF bug 33070). If the firmware does + * not accept the enhanced mode, fall back to the standard PTP + * v2 UUID filtering. + */ rc = efx_ptp_change_mode(efx, enable_wanted, new_mode); + if ((rc != 0) && (new_mode == MC_CMD_PTP_MODE_V2_ENHANCED)) + rc = efx_ptp_change_mode(efx, enable_wanted, MC_CMD_PTP_MODE_V2); if (rc != 0) return rc; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index bb579a6128c8..e73e30bac10e 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -16,6 +16,7 @@ #include <linux/udp.h> #include <linux/prefetch.h> #include <linux/moduleparam.h> +#include <linux/iommu.h> #include <net/ip.h> #include <net/checksum.h> #include "net_driver.h" @@ -24,85 +25,39 @@ #include "selftest.h" #include "workarounds.h" -/* Number of RX descriptors pushed at once. */ -#define EFX_RX_BATCH 8 +/* Preferred number of descriptors to fill at once */ +#define EFX_RX_PREFERRED_BATCH 8U -/* Maximum size of a buffer sharing a page */ -#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state)) +/* Number of RX buffers to recycle pages for. When creating the RX page recycle + * ring, this number is divided by the number of buffers per page to calculate + * the number of pages to store in the RX page recycle ring. + */ +#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 +#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) /* Size of buffer allocated for skb header area. */ #define EFX_SKB_HEADERS 64u -/* - * rx_alloc_method - RX buffer allocation method - * - * This driver supports two methods for allocating and using RX buffers: - * each RX buffer may be backed by an skb or by an order-n page. - * - * When GRO is in use then the second method has a lower overhead, - * since we don't have to allocate then free skbs on reassembled frames. - * - * Values: - * - RX_ALLOC_METHOD_AUTO = 0 - * - RX_ALLOC_METHOD_SKB = 1 - * - RX_ALLOC_METHOD_PAGE = 2 - * - * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count - * controlled by the parameters below. - * - * - Since pushing and popping descriptors are separated by the rx_queue - * size, so the watermarks should be ~rxd_size. - * - The performance win by using page-based allocation for GRO is less - * than the performance hit of using page-based allocation of non-GRO, - * so the watermarks should reflect this. - * - * Per channel we maintain a single variable, updated by each channel: - * - * rx_alloc_level += (gro_performed ? RX_ALLOC_FACTOR_GRO : - * RX_ALLOC_FACTOR_SKB) - * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which - * limits the hysteresis), and update the allocation strategy: - * - * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_GRO ? - * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB) - */ -static int rx_alloc_method = RX_ALLOC_METHOD_AUTO; - -#define RX_ALLOC_LEVEL_GRO 0x2000 -#define RX_ALLOC_LEVEL_MAX 0x3000 -#define RX_ALLOC_FACTOR_GRO 1 -#define RX_ALLOC_FACTOR_SKB (-2) - /* This is the percentage fill level below which new RX descriptors * will be added to the RX descriptor ring. */ static unsigned int rx_refill_threshold; +/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ +#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ + EFX_RX_USR_BUF_SIZE) + /* * RX maximum head room required. * - * This must be at least 1 to prevent overflow and at least 2 to allow - * pipelined receives. + * This must be at least 1 to prevent overflow, plus one packet-worth + * to allow pipelined receives. */ -#define EFX_RXD_HEAD_ROOM 2 +#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) -/* Offset of ethernet header within page */ -static inline unsigned int efx_rx_buf_offset(struct efx_nic *efx, - struct efx_rx_buffer *buf) +static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) { - return buf->page_offset + efx->type->rx_buffer_hash_size; -} -static inline unsigned int efx_rx_buf_size(struct efx_nic *efx) -{ - return PAGE_SIZE << efx->rx_buffer_order; -} - -static u8 *efx_rx_buf_eh(struct efx_nic *efx, struct efx_rx_buffer *buf) -{ - if (buf->flags & EFX_RX_BUF_PAGE) - return page_address(buf->u.page) + efx_rx_buf_offset(efx, buf); - else - return (u8 *)buf->u.skb->data + efx->type->rx_buffer_hash_size; + return page_address(buf->page) + buf->page_offset; } static inline u32 efx_rx_buf_hash(const u8 *eh) @@ -119,66 +74,81 @@ static inline u32 efx_rx_buf_hash(const u8 *eh) #endif } -/** - * efx_init_rx_buffers_skb - create EFX_RX_BATCH skb-based RX buffers - * - * @rx_queue: Efx RX queue - * - * This allocates EFX_RX_BATCH skbs, maps them for DMA, and populates a - * struct efx_rx_buffer for each one. Return a negative error code or 0 - * on success. May fail having only inserted fewer than EFX_RX_BATCH - * buffers. - */ -static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue) +static inline struct efx_rx_buffer * +efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) +{ + if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) + return efx_rx_buffer(rx_queue, 0); + else + return rx_buf + 1; +} + +static inline void efx_sync_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf, + unsigned int len) +{ + dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len, + DMA_FROM_DEVICE); +} + +void efx_rx_config_page_split(struct efx_nic *efx) +{ + efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + EFX_PAGE_IP_ALIGN, + L1_CACHE_BYTES); + efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : + ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / + efx->rx_page_buf_step); + efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / + efx->rx_bufs_per_page; + efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, + efx->rx_bufs_per_page); +} + +/* Check the RX page recycle ring for a page that can be reused. */ +static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; - struct net_device *net_dev = efx->net_dev; - struct efx_rx_buffer *rx_buf; - struct sk_buff *skb; - int skb_len = efx->rx_buffer_len; - unsigned index, count; + struct page *page; + struct efx_rx_page_state *state; + unsigned index; - for (count = 0; count < EFX_RX_BATCH; ++count) { - index = rx_queue->added_count & rx_queue->ptr_mask; - rx_buf = efx_rx_buffer(rx_queue, index); - - rx_buf->u.skb = skb = netdev_alloc_skb(net_dev, skb_len); - if (unlikely(!skb)) - return -ENOMEM; - - /* Adjust the SKB for padding */ - skb_reserve(skb, NET_IP_ALIGN); - rx_buf->len = skb_len - NET_IP_ALIGN; - rx_buf->flags = 0; - - rx_buf->dma_addr = dma_map_single(&efx->pci_dev->dev, - skb->data, rx_buf->len, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&efx->pci_dev->dev, - rx_buf->dma_addr))) { - dev_kfree_skb_any(skb); - rx_buf->u.skb = NULL; - return -EIO; - } + index = rx_queue->page_remove & rx_queue->page_ptr_mask; + page = rx_queue->page_ring[index]; + if (page == NULL) + return NULL; + + rx_queue->page_ring[index] = NULL; + /* page_remove cannot exceed page_add. */ + if (rx_queue->page_remove != rx_queue->page_add) + ++rx_queue->page_remove; - ++rx_queue->added_count; - ++rx_queue->alloc_skb_count; + /* If page_count is 1 then we hold the only reference to this page. */ + if (page_count(page) == 1) { + ++rx_queue->page_recycle_count; + return page; + } else { + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + ++rx_queue->page_recycle_failed; } - return 0; + return NULL; } /** - * efx_init_rx_buffers_page - create EFX_RX_BATCH page-based RX buffers + * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers * * @rx_queue: Efx RX queue * - * This allocates memory for EFX_RX_BATCH receive buffers, maps them for DMA, - * and populates struct efx_rx_buffers for each one. Return a negative error - * code or 0 on success. If a single page can be split between two buffers, - * then the page will either be inserted fully, or not at at all. + * This allocates a batch of pages, maps them for DMA, and populates + * struct efx_rx_buffers for each one. Return a negative error code or + * 0 on success. If a single page can be used for multiple buffers, + * then the page will either be inserted fully, or not at all. */ -static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) +static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; @@ -188,150 +158,140 @@ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) dma_addr_t dma_addr; unsigned index, count; - /* We can split a page between two buffers */ - BUILD_BUG_ON(EFX_RX_BATCH & 1); - - for (count = 0; count < EFX_RX_BATCH; ++count) { - page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, - efx->rx_buffer_order); - if (unlikely(page == NULL)) - return -ENOMEM; - dma_addr = dma_map_page(&efx->pci_dev->dev, page, 0, - efx_rx_buf_size(efx), - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&efx->pci_dev->dev, dma_addr))) { - __free_pages(page, efx->rx_buffer_order); - return -EIO; + count = 0; + do { + page = efx_reuse_page(rx_queue); + if (page == NULL) { + page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, + efx->rx_buffer_order); + if (unlikely(page == NULL)) + return -ENOMEM; + dma_addr = + dma_map_page(&efx->pci_dev->dev, page, 0, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&efx->pci_dev->dev, + dma_addr))) { + __free_pages(page, efx->rx_buffer_order); + return -EIO; + } + state = page_address(page); + state->dma_addr = dma_addr; + } else { + state = page_address(page); + dma_addr = state->dma_addr; } - state = page_address(page); - state->refcnt = 0; - state->dma_addr = dma_addr; dma_addr += sizeof(struct efx_rx_page_state); page_offset = sizeof(struct efx_rx_page_state); - split: - index = rx_queue->added_count & rx_queue->ptr_mask; - rx_buf = efx_rx_buffer(rx_queue, index); - rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; - rx_buf->u.page = page; - rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN; - rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; - rx_buf->flags = EFX_RX_BUF_PAGE; - ++rx_queue->added_count; - ++rx_queue->alloc_page_count; - ++state->refcnt; - - if ((~count & 1) && (efx->rx_buffer_len <= EFX_RX_HALF_PAGE)) { - /* Use the second half of the page */ + do { + index = rx_queue->added_count & rx_queue->ptr_mask; + rx_buf = efx_rx_buffer(rx_queue, index); + rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; + rx_buf->page = page; + rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN; + rx_buf->len = efx->rx_dma_len; + rx_buf->flags = 0; + ++rx_queue->added_count; get_page(page); - dma_addr += (PAGE_SIZE >> 1); - page_offset += (PAGE_SIZE >> 1); - ++count; - goto split; - } - } + dma_addr += efx->rx_page_buf_step; + page_offset += efx->rx_page_buf_step; + } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); + + rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; + } while (++count < efx->rx_pages_per_batch); return 0; } +/* Unmap a DMA-mapped page. This function is only called for the final RX + * buffer in a page. + */ static void efx_unmap_rx_buffer(struct efx_nic *efx, - struct efx_rx_buffer *rx_buf, - unsigned int used_len) + struct efx_rx_buffer *rx_buf) { - if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) { - struct efx_rx_page_state *state; - - state = page_address(rx_buf->u.page); - if (--state->refcnt == 0) { - dma_unmap_page(&efx->pci_dev->dev, - state->dma_addr, - efx_rx_buf_size(efx), - DMA_FROM_DEVICE); - } else if (used_len) { - dma_sync_single_for_cpu(&efx->pci_dev->dev, - rx_buf->dma_addr, used_len, - DMA_FROM_DEVICE); - } - } else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) { - dma_unmap_single(&efx->pci_dev->dev, rx_buf->dma_addr, - rx_buf->len, DMA_FROM_DEVICE); + struct page *page = rx_buf->page; + + if (page) { + struct efx_rx_page_state *state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, + state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); } } -static void efx_free_rx_buffer(struct efx_nic *efx, - struct efx_rx_buffer *rx_buf) +static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf) { - if ((rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.page) { - __free_pages(rx_buf->u.page, efx->rx_buffer_order); - rx_buf->u.page = NULL; - } else if (!(rx_buf->flags & EFX_RX_BUF_PAGE) && rx_buf->u.skb) { - dev_kfree_skb_any(rx_buf->u.skb); - rx_buf->u.skb = NULL; + if (rx_buf->page) { + put_page(rx_buf->page); + rx_buf->page = NULL; } } -static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, - struct efx_rx_buffer *rx_buf) +/* Attempt to recycle the page if there is an RX recycle ring; the page can + * only be added if this is the final RX buffer, to prevent pages being used in + * the descriptor ring and appearing in the recycle ring simultaneously. + */ +static void efx_recycle_rx_page(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf) { - efx_unmap_rx_buffer(rx_queue->efx, rx_buf, 0); - efx_free_rx_buffer(rx_queue->efx, rx_buf); -} + struct page *page = rx_buf->page; + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + struct efx_nic *efx = rx_queue->efx; + unsigned index; -/* Attempt to resurrect the other receive buffer that used to share this page, - * which had previously been passed up to the kernel and freed. */ -static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue, - struct efx_rx_buffer *rx_buf) -{ - struct efx_rx_page_state *state = page_address(rx_buf->u.page); - struct efx_rx_buffer *new_buf; - unsigned fill_level, index; - - /* +1 because efx_rx_packet() incremented removed_count. +1 because - * we'd like to insert an additional descriptor whilst leaving - * EFX_RXD_HEAD_ROOM for the non-recycle path */ - fill_level = (rx_queue->added_count - rx_queue->removed_count + 2); - if (unlikely(fill_level > rx_queue->max_fill)) { - /* We could place "state" on a list, and drain the list in - * efx_fast_push_rx_descriptors(). For now, this will do. */ + /* Only recycle the page after processing the final buffer. */ + if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) return; - } - ++state->refcnt; - get_page(rx_buf->u.page); + index = rx_queue->page_add & rx_queue->page_ptr_mask; + if (rx_queue->page_ring[index] == NULL) { + unsigned read_index = rx_queue->page_remove & + rx_queue->page_ptr_mask; - index = rx_queue->added_count & rx_queue->ptr_mask; - new_buf = efx_rx_buffer(rx_queue, index); - new_buf->dma_addr = rx_buf->dma_addr ^ (PAGE_SIZE >> 1); - new_buf->u.page = rx_buf->u.page; - new_buf->len = rx_buf->len; - new_buf->flags = EFX_RX_BUF_PAGE; - ++rx_queue->added_count; + /* The next slot in the recycle ring is available, but + * increment page_remove if the read pointer currently + * points here. + */ + if (read_index == index) + ++rx_queue->page_remove; + rx_queue->page_ring[index] = page; + ++rx_queue->page_add; + return; + } + ++rx_queue->page_recycle_full; + efx_unmap_rx_buffer(efx, rx_buf); + put_page(rx_buf->page); } -/* Recycle the given rx buffer directly back into the rx_queue. There is - * always room to add this buffer, because we've just popped a buffer. */ -static void efx_recycle_rx_buffer(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf) +static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) { - struct efx_nic *efx = channel->efx; - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - struct efx_rx_buffer *new_buf; - unsigned index; - - rx_buf->flags &= EFX_RX_BUF_PAGE; - - if ((rx_buf->flags & EFX_RX_BUF_PAGE) && - efx->rx_buffer_len <= EFX_RX_HALF_PAGE && - page_count(rx_buf->u.page) == 1) - efx_resurrect_rx_buffer(rx_queue, rx_buf); + /* Release the page reference we hold for the buffer. */ + if (rx_buf->page) + put_page(rx_buf->page); + + /* If this is the last buffer in a page, unmap and free it. */ + if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { + efx_unmap_rx_buffer(rx_queue->efx, rx_buf); + efx_free_rx_buffer(rx_buf); + } + rx_buf->page = NULL; +} - index = rx_queue->added_count & rx_queue->ptr_mask; - new_buf = efx_rx_buffer(rx_queue, index); +/* Recycle the pages that are used by buffers that have just been received. */ +static void efx_recycle_rx_buffers(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - memcpy(new_buf, rx_buf, sizeof(*new_buf)); - rx_buf->u.page = NULL; - ++rx_queue->added_count; + do { + efx_recycle_rx_page(channel, rx_buf); + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--n_frags); } /** @@ -348,8 +308,8 @@ static void efx_recycle_rx_buffer(struct efx_channel *channel, */ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) { - struct efx_channel *channel = efx_rx_queue_channel(rx_queue); - unsigned fill_level; + struct efx_nic *efx = rx_queue->efx; + unsigned int fill_level, batch_size; int space, rc = 0; /* Calculate current fill level, and exit if we don't need to fill */ @@ -364,28 +324,26 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) rx_queue->min_fill = fill_level; } + batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; space = rx_queue->max_fill - fill_level; - EFX_BUG_ON_PARANOID(space < EFX_RX_BATCH); + EFX_BUG_ON_PARANOID(space < batch_size); netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, "RX queue %d fast-filling descriptor ring from" - " level %d to level %d using %s allocation\n", + " level %d to level %d\n", efx_rx_queue_index(rx_queue), fill_level, - rx_queue->max_fill, - channel->rx_alloc_push_pages ? "page" : "skb"); + rx_queue->max_fill); + do { - if (channel->rx_alloc_push_pages) - rc = efx_init_rx_buffers_page(rx_queue); - else - rc = efx_init_rx_buffers_skb(rx_queue); + rc = efx_init_rx_buffers(rx_queue); if (unlikely(rc)) { /* Ensure that we don't leave the rx queue empty */ if (rx_queue->added_count == rx_queue->removed_count) efx_schedule_slow_fill(rx_queue); goto out; } - } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); + } while ((space -= batch_size) >= batch_size); netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, "RX queue %d fast-filled descriptor ring " @@ -408,7 +366,7 @@ void efx_rx_slow_fill(unsigned long context) static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf, - int len, bool *leak_packet) + int len) { struct efx_nic *efx = rx_queue->efx; unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; @@ -428,11 +386,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, "RX event (0x%x > 0x%x+0x%x). Leaking\n", efx_rx_queue_index(rx_queue), len, max_len, efx->type->rx_buffer_padding); - /* If this buffer was skb-allocated, then the meta - * data at the end of the skb will be trashed. So - * we have no choice but to leak the fragment. - */ - *leak_packet = !(rx_buf->flags & EFX_RX_BUF_PAGE); efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); } else { if (net_ratelimit()) @@ -448,212 +401,238 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, /* Pass a received packet up through GRO. GRO can handle pages * regardless of checksum state and skbs with a good checksum. */ -static void efx_rx_packet_gro(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - const u8 *eh) +static void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh) { struct napi_struct *napi = &channel->napi_str; gro_result_t gro_result; + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; - if (rx_buf->flags & EFX_RX_BUF_PAGE) { - struct efx_nic *efx = channel->efx; - struct page *page = rx_buf->u.page; - struct sk_buff *skb; + skb = napi_get_frags(napi); + if (unlikely(!skb)) { + while (n_frags--) { + put_page(rx_buf->page); + rx_buf->page = NULL; + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } + return; + } - rx_buf->u.page = NULL; + if (efx->net_dev->features & NETIF_F_RXHASH) + skb->rxhash = efx_rx_buf_hash(eh); + skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE); + + for (;;) { + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len); + rx_buf->page = NULL; + skb->len += rx_buf->len; + if (skb_shinfo(skb)->nr_frags == n_frags) + break; + + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } - skb = napi_get_frags(napi); - if (!skb) { - put_page(page); - return; - } + skb->data_len = skb->len; + skb->truesize += n_frags * efx->rx_buffer_truesize; + + skb_record_rx_queue(skb, channel->rx_queue.core_index); + + gro_result = napi_gro_frags(napi); + if (gro_result != GRO_DROP) + channel->irq_mod_score += 2; +} - if (efx->net_dev->features & NETIF_F_RXHASH) - skb->rxhash = efx_rx_buf_hash(eh); +/* Allocate and construct an SKB around page fragments */ +static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags, + u8 *eh, int hdr_len) +{ + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; - skb_fill_page_desc(skb, 0, page, - efx_rx_buf_offset(efx, rx_buf), rx_buf->len); + /* Allocate an SKB to store the headers */ + skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN); + if (unlikely(skb == NULL)) + return NULL; - skb->len = rx_buf->len; - skb->data_len = rx_buf->len; - skb->truesize += rx_buf->len; - skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? - CHECKSUM_UNNECESSARY : CHECKSUM_NONE); + EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); - skb_record_rx_queue(skb, channel->rx_queue.core_index); + skb_reserve(skb, EFX_PAGE_SKB_ALIGN); + memcpy(__skb_put(skb, hdr_len), eh, hdr_len); - gro_result = napi_gro_frags(napi); - } else { - struct sk_buff *skb = rx_buf->u.skb; + /* Append the remaining page(s) onto the frag list */ + if (rx_buf->len > hdr_len) { + rx_buf->page_offset += hdr_len; + rx_buf->len -= hdr_len; - EFX_BUG_ON_PARANOID(!(rx_buf->flags & EFX_RX_PKT_CSUMMED)); - rx_buf->u.skb = NULL; - skb->ip_summed = CHECKSUM_UNNECESSARY; + for (;;) { + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len); + rx_buf->page = NULL; + skb->len += rx_buf->len; + skb->data_len += rx_buf->len; + if (skb_shinfo(skb)->nr_frags == n_frags) + break; - gro_result = napi_gro_receive(napi, skb); + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } + } else { + __free_pages(rx_buf->page, efx->rx_buffer_order); + rx_buf->page = NULL; + n_frags = 0; } - if (gro_result == GRO_NORMAL) { - channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; - } else if (gro_result != GRO_DROP) { - channel->rx_alloc_level += RX_ALLOC_FACTOR_GRO; - channel->irq_mod_score += 2; - } + skb->truesize += n_frags * efx->rx_buffer_truesize; + + /* Move past the ethernet header */ + skb->protocol = eth_type_trans(skb, efx->net_dev); + + return skb; } void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, - unsigned int len, u16 flags) + unsigned int n_frags, unsigned int len, u16 flags) { struct efx_nic *efx = rx_queue->efx; struct efx_channel *channel = efx_rx_queue_channel(rx_queue); struct efx_rx_buffer *rx_buf; - bool leak_packet = false; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->flags |= flags; - /* This allows the refill path to post another buffer. - * EFX_RXD_HEAD_ROOM ensures that the slot we are using - * isn't overwritten yet. - */ - rx_queue->removed_count++; - - /* Validate the length encoded in the event vs the descriptor pushed */ - efx_rx_packet__check_len(rx_queue, rx_buf, len, &leak_packet); + /* Validate the number of fragments and completed length */ + if (n_frags == 1) { + efx_rx_packet__check_len(rx_queue, rx_buf, len); + } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || + unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) || + unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) || + unlikely(!efx->rx_scatter)) { + /* If this isn't an explicit discard request, either + * the hardware or the driver is broken. + */ + WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); + rx_buf->flags |= EFX_RX_PKT_DISCARD; + } netif_vdbg(efx, rx_status, efx->net_dev, - "RX queue %d received id %x at %llx+%x %s%s\n", + "RX queue %d received ids %x-%x len %d %s%s\n", efx_rx_queue_index(rx_queue), index, - (unsigned long long)rx_buf->dma_addr, len, + (index + n_frags - 1) & rx_queue->ptr_mask, len, (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); - /* Discard packet, if instructed to do so */ + /* Discard packet, if instructed to do so. Process the + * previous receive first. + */ if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { - if (unlikely(leak_packet)) - channel->n_skbuff_leaks++; - else - efx_recycle_rx_buffer(channel, rx_buf); - - /* Don't hold off the previous receive */ - rx_buf = NULL; - goto out; + efx_rx_flush_packet(channel); + put_page(rx_buf->page); + efx_recycle_rx_buffers(channel, rx_buf, n_frags); + return; } - /* Release and/or sync DMA mapping - assumes all RX buffers - * consumed in-order per RX queue + if (n_frags == 1) + rx_buf->len = len; + + /* Release and/or sync the DMA mapping - assumes all RX buffers + * consumed in-order per RX queue. */ - efx_unmap_rx_buffer(efx, rx_buf, len); + efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); /* Prefetch nice and early so data will (hopefully) be in cache by * the time we look at it. */ - prefetch(efx_rx_buf_eh(efx, rx_buf)); + prefetch(efx_rx_buf_va(rx_buf)); + + rx_buf->page_offset += efx->type->rx_buffer_hash_size; + rx_buf->len -= efx->type->rx_buffer_hash_size; + + if (n_frags > 1) { + /* Release/sync DMA mapping for additional fragments. + * Fix length for last fragment. + */ + unsigned int tail_frags = n_frags - 1; + + for (;;) { + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + if (--tail_frags == 0) + break; + efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE); + } + rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE; + efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); + } + + /* All fragments have been DMA-synced, so recycle buffers and pages. */ + rx_buf = efx_rx_buffer(rx_queue, index); + efx_recycle_rx_buffers(channel, rx_buf, n_frags); /* Pipeline receives so that we give time for packet headers to be * prefetched into cache. */ - rx_buf->len = len - efx->type->rx_buffer_hash_size; -out: - if (channel->rx_pkt) - __efx_rx_packet(channel, channel->rx_pkt); - channel->rx_pkt = rx_buf; + efx_rx_flush_packet(channel); + channel->rx_pkt_n_frags = n_frags; + channel->rx_pkt_index = index; } -static void efx_rx_deliver(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf) +static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) { struct sk_buff *skb; + u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); - /* We now own the SKB */ - skb = rx_buf->u.skb; - rx_buf->u.skb = NULL; + skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); + if (unlikely(skb == NULL)) { + efx_free_rx_buffer(rx_buf); + return; + } + skb_record_rx_queue(skb, channel->rx_queue.core_index); /* Set the SKB flags */ skb_checksum_none_assert(skb); - /* Record the rx_queue */ - skb_record_rx_queue(skb, channel->rx_queue.core_index); - - /* Pass the packet up */ if (channel->type->receive_skb) - channel->type->receive_skb(channel, skb); - else - netif_receive_skb(skb); + if (channel->type->receive_skb(channel, skb)) + return; - /* Update allocation strategy method */ - channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; + /* Pass the packet up */ + netif_receive_skb(skb); } /* Handle a received packet. Second half: Touches packet payload. */ -void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf) +void __efx_rx_packet(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; - u8 *eh = efx_rx_buf_eh(efx, rx_buf); + struct efx_rx_buffer *rx_buf = + efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); + u8 *eh = efx_rx_buf_va(rx_buf); /* If we're in loopback test, then pass the packet directly to the * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { efx_loopback_rx_packet(efx, eh, rx_buf->len); - efx_free_rx_buffer(efx, rx_buf); - return; - } - - if (!(rx_buf->flags & EFX_RX_BUF_PAGE)) { - struct sk_buff *skb = rx_buf->u.skb; - - prefetch(skb_shinfo(skb)); - - skb_reserve(skb, efx->type->rx_buffer_hash_size); - skb_put(skb, rx_buf->len); - - if (efx->net_dev->features & NETIF_F_RXHASH) - skb->rxhash = efx_rx_buf_hash(eh); - - /* Move past the ethernet header. rx_buf->data still points - * at the ethernet header */ - skb->protocol = eth_type_trans(skb, efx->net_dev); - - skb_record_rx_queue(skb, channel->rx_queue.core_index); + efx_free_rx_buffer(rx_buf); + goto out; } if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; - if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)) && - !channel->type->receive_skb) - efx_rx_packet_gro(channel, rx_buf, eh); + if (!channel->type->receive_skb) + efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); else - efx_rx_deliver(channel, rx_buf); -} - -void efx_rx_strategy(struct efx_channel *channel) -{ - enum efx_rx_alloc_method method = rx_alloc_method; - - if (channel->type->receive_skb) { - channel->rx_alloc_push_pages = false; - return; - } - - /* Only makes sense to use page based allocation if GRO is enabled */ - if (!(channel->efx->net_dev->features & NETIF_F_GRO)) { - method = RX_ALLOC_METHOD_SKB; - } else if (method == RX_ALLOC_METHOD_AUTO) { - /* Constrain the rx_alloc_level */ - if (channel->rx_alloc_level < 0) - channel->rx_alloc_level = 0; - else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX) - channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX; - - /* Decide on the allocation method */ - method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_GRO) ? - RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB); - } - - /* Push the option */ - channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE); + efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); +out: + channel->rx_pkt_n_frags = 0; } int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) @@ -683,9 +662,32 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) kfree(rx_queue->buffer); rx_queue->buffer = NULL; } + return rc; } +static void efx_init_rx_recycle_ring(struct efx_nic *efx, + struct efx_rx_queue *rx_queue) +{ + unsigned int bufs_in_recycle_ring, page_ring_size; + + /* Set the RX recycle ring size */ +#ifdef CONFIG_PPC64 + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; +#else + if (efx->pci_dev->dev.iommu_group) + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; + else + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; +#endif /* CONFIG_PPC64 */ + + page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / + efx->rx_bufs_per_page); + rx_queue->page_ring = kcalloc(page_ring_size, + sizeof(*rx_queue->page_ring), GFP_KERNEL); + rx_queue->page_ptr_mask = page_ring_size - 1; +} + void efx_init_rx_queue(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; @@ -699,10 +701,18 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) rx_queue->notified_count = 0; rx_queue->removed_count = 0; rx_queue->min_fill = -1U; + efx_init_rx_recycle_ring(efx, rx_queue); + + rx_queue->page_remove = 0; + rx_queue->page_add = rx_queue->page_ptr_mask + 1; + rx_queue->page_recycle_count = 0; + rx_queue->page_recycle_failed = 0; + rx_queue->page_recycle_full = 0; /* Initialise limit fields */ max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; - max_trigger = max_fill - EFX_RX_BATCH; + max_trigger = + max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; if (rx_refill_threshold != 0) { trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; if (trigger > max_trigger) @@ -722,6 +732,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) { int i; + struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, @@ -733,13 +744,32 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) del_timer_sync(&rx_queue->slow_fill); efx_nic_fini_rx(rx_queue); - /* Release RX buffers NB start at index 0 not current HW ptr */ + /* Release RX buffers from the current read ptr to the write ptr */ if (rx_queue->buffer) { - for (i = 0; i <= rx_queue->ptr_mask; i++) { - rx_buf = efx_rx_buffer(rx_queue, i); + for (i = rx_queue->removed_count; i < rx_queue->added_count; + i++) { + unsigned index = i & rx_queue->ptr_mask; + rx_buf = efx_rx_buffer(rx_queue, index); efx_fini_rx_buffer(rx_queue, rx_buf); } } + + /* Unmap and release the pages in the recycle ring. Remove the ring. */ + for (i = 0; i <= rx_queue->page_ptr_mask; i++) { + struct page *page = rx_queue->page_ring[i]; + struct efx_rx_page_state *state; + + if (page == NULL) + continue; + + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + } + kfree(rx_queue->page_ring); + rx_queue->page_ring = NULL; } void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) @@ -754,9 +784,6 @@ void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) } -module_param(rx_alloc_method, int, 0644); -MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers"); - module_param(rx_refill_threshold, uint, 0444); MODULE_PARM_DESC(rx_refill_threshold, "RX descriptor ring refill threshold (%)"); diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index ba40f67e4f05..51669244d154 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -202,7 +202,7 @@ out: static enum reset_type siena_map_reset_reason(enum reset_type reason) { - return RESET_TYPE_ALL; + return RESET_TYPE_RECOVER_OR_ALL; } static int siena_map_reset_flags(u32 *flags) @@ -245,6 +245,22 @@ static int siena_reset_hw(struct efx_nic *efx, enum reset_type method) return efx_mcdi_reset_port(efx); } +#ifdef CONFIG_EEH +/* When a PCI device is isolated from the bus, a subsequent MMIO read is + * required for the kernel EEH mechanisms to notice. As the Solarflare driver + * was written to minimise MMIO read (for latency) then a periodic call to check + * the EEH status of the device is required so that device recovery can happen + * in a timely fashion. + */ +static void siena_monitor(struct efx_nic *efx) +{ + struct eeh_dev *eehdev = + of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev)); + + eeh_dev_check_failure(eehdev); +} +#endif + static int siena_probe_nvconfig(struct efx_nic *efx) { u32 caps = 0; @@ -398,6 +414,8 @@ static int siena_init_nic(struct efx_nic *efx) EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1); EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1); EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1); + EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE, + EFX_RX_USR_BUF_SIZE >> 5); efx_writeo(efx, &temp, FR_AZ_RX_CFG); /* Set hash key for IPv4 */ @@ -665,7 +683,11 @@ const struct efx_nic_type siena_a0_nic_type = { .init = siena_init_nic, .dimension_resources = siena_dimension_resources, .fini = efx_port_dummy_op_void, +#ifdef CONFIG_EEH + .monitor = siena_monitor, +#else .monitor = NULL, +#endif .map_reset_reason = siena_map_reset_reason, .map_reset_flags = siena_map_reset_flags, .reset = siena_reset_hw, @@ -698,6 +720,7 @@ const struct efx_nic_type siena_a0_nic_type = { .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH), .rx_buffer_hash_size = 0x10, .rx_buffer_padding = 0, + .can_rx_scatter = true, .max_interrupt_mode = EFX_INT_MODE_MSIX, .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy * interrupt handler only supports 32 diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 79ad9c94a21b..4bdbaad9932d 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -213,10 +213,11 @@ static int meth_init_tx_ring(struct meth_private *priv) { /* Init TX ring */ priv->tx_ring = dma_alloc_coherent(NULL, TX_RING_BUFFER_SIZE, - &priv->tx_ring_dma, GFP_ATOMIC); + &priv->tx_ring_dma, + GFP_ATOMIC | __GFP_ZERO); if (!priv->tx_ring) return -ENOMEM; - memset(priv->tx_ring, 0, TX_RING_BUFFER_SIZE); + priv->tx_count = priv->tx_read = priv->tx_write = 0; mace->eth.tx_ring_base = priv->tx_ring_dma; /* Now init skb save area */ diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index efca14eaefa9..eb4aea3fe793 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1187,8 +1187,14 @@ sis900_init_rx_ring(struct net_device *net_dev) } sis_priv->rx_skbuff[i] = skb; sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE; - sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev, - skb->data, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); + sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev, + skb->data, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(sis_priv->pci_dev, + sis_priv->rx_ring[i].bufptr))) { + dev_kfree_skb(skb); + sis_priv->rx_skbuff[i] = NULL; + break; + } } sis_priv->dirty_rx = (unsigned int) (i - NUM_RX_DESC); @@ -1621,6 +1627,14 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev) /* set the transmit buffer descriptor and enable Transmit State Machine */ sis_priv->tx_ring[entry].bufptr = pci_map_single(sis_priv->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE); + if (unlikely(pci_dma_mapping_error(sis_priv->pci_dev, + sis_priv->tx_ring[entry].bufptr))) { + dev_kfree_skb(skb); + sis_priv->tx_skbuff[entry] = NULL; + net_dev->stats.tx_dropped++; + spin_unlock_irqrestore(&sis_priv->lock, flags); + return NETDEV_TX_OK; + } sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len); sw32(cr, TxENA | sr32(cr)); @@ -1824,9 +1838,15 @@ static int sis900_rx(struct net_device *net_dev) refill_rx_ring: sis_priv->rx_skbuff[entry] = skb; sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE; - sis_priv->rx_ring[entry].bufptr = + sis_priv->rx_ring[entry].bufptr = pci_map_single(sis_priv->pci_dev, skb->data, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(sis_priv->pci_dev, + sis_priv->rx_ring[entry].bufptr))) { + dev_kfree_skb_irq(skb); + sis_priv->rx_skbuff[entry] = NULL; + break; + } } sis_priv->cur_rx++; entry = sis_priv->cur_rx % NUM_RX_DESC; @@ -1841,23 +1861,26 @@ refill_rx_ring: entry = sis_priv->dirty_rx % NUM_RX_DESC; if (sis_priv->rx_skbuff[entry] == NULL) { - if ((skb = netdev_alloc_skb(net_dev, RX_BUF_SIZE)) == NULL) { + skb = netdev_alloc_skb(net_dev, RX_BUF_SIZE); + if (skb == NULL) { /* not enough memory for skbuff, this makes a * "hole" on the buffer ring, it is not clear * how the hardware will react to this kind * of degenerated buffer */ - if (netif_msg_rx_err(sis_priv)) - printk(KERN_INFO "%s: Memory squeeze, " - "deferring packet.\n", - net_dev->name); net_dev->stats.rx_dropped++; break; } sis_priv->rx_skbuff[entry] = skb; sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE; - sis_priv->rx_ring[entry].bufptr = + sis_priv->rx_ring[entry].bufptr = pci_map_single(sis_priv->pci_dev, skb->data, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(sis_priv->pci_dev, + sis_priv->rx_ring[entry].bufptr))) { + dev_kfree_skb_irq(skb); + sis_priv->rx_skbuff[entry] = NULL; + break; + } } } /* re-enable the potentially idle receive state matchine */ diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index 50823da9dc1e..e85c2e7e8246 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -1223,9 +1223,7 @@ static void smc_rcv(struct net_device *dev) dev->stats.multicast++; skb = netdev_alloc_skb(dev, packet_length + 5); - if ( skb == NULL ) { - printk(KERN_NOTICE CARDNAME ": Low memory, packet dropped.\n"); dev->stats.rx_dropped++; goto done; } diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 591650a8de38..dfbf978315df 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -465,8 +465,6 @@ static inline void smc_rcv(struct net_device *dev) */ skb = netdev_alloc_skb(dev, packet_len); if (unlikely(skb == NULL)) { - printk(KERN_NOTICE "%s: Low memory, packet dropped.\n", - dev->name); SMC_WAIT_MMU_BUSY(lp); SMC_SET_MMU_CMD(lp, MC_RELEASE); dev->stats.rx_dropped++; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index da5cc9a3b34c..48e2b99bec51 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2115,7 +2115,7 @@ static int smsc911x_init(struct net_device *dev) spin_lock_init(&pdata->dev_lock); spin_lock_init(&pdata->mac_lock); - if (pdata->ioaddr == 0) { + if (pdata->ioaddr == NULL) { SMSC_WARN(pdata, probe, "pdata->ioaddr: 0x00000000"); return -ENODEV; } diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index d457fa2d7509..ffa5c4ad1210 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -848,10 +848,8 @@ static int smsc9420_alloc_rx_buffer(struct smsc9420_pdata *pd, int index) BUG_ON(pd->rx_buffers[index].skb); BUG_ON(pd->rx_buffers[index].mapping); - if (unlikely(!skb)) { - smsc_warn(RX_ERR, "Failed to allocate new skb!"); + if (unlikely(!skb)) return -ENOMEM; - } mapping = pci_map_single(pd->pdev, skb_tail_pointer(skb), PKT_BUF_SZ, PCI_DMA_FROMDEVICE); diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index c0ea838c78d1..f695a50bac47 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -5,6 +5,7 @@ config STMMAC_ETH select MII select PHYLIB select CRC32 + select PTP_1588_CLOCK ---help--- This is the driver for the Ethernet IPs are built around a Synopsys IP Core and only tested on the STMicroelectronics @@ -54,22 +55,4 @@ config STMMAC_DA By default, the DMA arbitration scheme is based on Round-robin (rx:tx priority is 1:1). -choice - prompt "Select the DMA TX/RX descriptor operating modes" - depends on STMMAC_ETH - ---help--- - This driver supports DMA descriptor to operate both in dual buffer - (RING) and linked-list(CHAINED) mode. In RING mode each descriptor - points to two data buffer pointers whereas in CHAINED mode they - points to only one data buffer pointer. - -config STMMAC_RING - bool "Enable Descriptor Ring Mode" - -config STMMAC_CHAINED - bool "Enable Descriptor Chained Mode" - -endchoice - - endif diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index c8e8ea60ac19..356a9dd32be7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -1,9 +1,7 @@ obj-$(CONFIG_STMMAC_ETH) += stmmac.o -stmmac-$(CONFIG_STMMAC_RING) += ring_mode.o -stmmac-$(CONFIG_STMMAC_CHAINED) += chain_mode.o stmmac-$(CONFIG_STMMAC_PLATFORM) += stmmac_platform.o stmmac-$(CONFIG_STMMAC_PCI) += stmmac_pci.o -stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o \ - dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ +stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ + chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ - mmc_core.o $(stmmac-y) + mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o $(stmmac-y) diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 0668659803ed..d234ab540b29 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -28,9 +28,9 @@ #include "stmmac.h" -unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) +static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) { - struct stmmac_priv *priv = (struct stmmac_priv *) p; + struct stmmac_priv *priv = (struct stmmac_priv *)p; unsigned int txsize = priv->dma_tx_size; unsigned int entry = priv->cur_tx % txsize; struct dma_desc *desc = priv->dma_tx + entry; @@ -47,7 +47,8 @@ unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des2 = dma_map_single(priv->device, skb->data, bmax, DMA_TO_DEVICE); - priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum); + priv->tx_skbuff_dma[entry] = desc->des2; + priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE); while (len != 0) { entry = (++priv->cur_tx) % txsize; @@ -57,8 +58,9 @@ unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des2 = dma_map_single(priv->device, (skb->data + bmax * i), bmax, DMA_TO_DEVICE); - priv->hw->desc->prepare_tx_desc(desc, 0, bmax, - csum); + priv->tx_skbuff_dma[entry] = desc->des2; + priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum, + STMMAC_CHAIN_MODE); priv->hw->desc->set_tx_owner(desc); priv->tx_skbuff[entry] = NULL; len -= bmax; @@ -67,8 +69,9 @@ unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des2 = dma_map_single(priv->device, (skb->data + bmax * i), len, DMA_TO_DEVICE); - priv->hw->desc->prepare_tx_desc(desc, 0, len, - csum); + priv->tx_skbuff_dma[entry] = desc->des2; + priv->hw->desc->prepare_tx_desc(desc, 0, len, csum, + STMMAC_CHAIN_MODE); priv->hw->desc->set_tx_owner(desc); priv->tx_skbuff[entry] = NULL; len = 0; @@ -89,49 +92,70 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) return ret; } -static void stmmac_refill_desc3(int bfsize, struct dma_desc *p) -{ -} - -static void stmmac_init_desc3(int des3_as_data_buf, struct dma_desc *p) -{ -} - -static void stmmac_clean_desc3(struct dma_desc *p) -{ -} - -static void stmmac_init_dma_chain(struct dma_desc *des, dma_addr_t phy_addr, - unsigned int size) +static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr, + unsigned int size, unsigned int extend_desc) { /* * In chained mode the des3 points to the next element in the ring. * The latest element has to point to the head. */ int i; - struct dma_desc *p = des; dma_addr_t dma_phy = phy_addr; - for (i = 0; i < (size - 1); i++) { - dma_phy += sizeof(struct dma_desc); - p->des3 = (unsigned int)dma_phy; - p++; + if (extend_desc) { + struct dma_extended_desc *p = (struct dma_extended_desc *)des; + for (i = 0; i < (size - 1); i++) { + dma_phy += sizeof(struct dma_extended_desc); + p->basic.des3 = (unsigned int)dma_phy; + p++; + } + p->basic.des3 = (unsigned int)phy_addr; + + } else { + struct dma_desc *p = (struct dma_desc *)des; + for (i = 0; i < (size - 1); i++) { + dma_phy += sizeof(struct dma_desc); + p->des3 = (unsigned int)dma_phy; + p++; + } + p->des3 = (unsigned int)phy_addr; } - p->des3 = (unsigned int)phy_addr; } -static int stmmac_set_16kib_bfsize(int mtu) +static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) +{ + struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; + + if (priv->hwts_rx_en && !priv->extend_desc) + /* NOTE: Device will overwrite des3 with timestamp value if + * 1588-2002 time stamping is enabled, hence reinitialize it + * to keep explicit chaining in the descriptor. + */ + p->des3 = (unsigned int)(priv->dma_rx_phy + + (((priv->dirty_rx) + 1) % + priv->dma_rx_size) * + sizeof(struct dma_desc)); +} + +static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) { - /* Not supported */ - return 0; + struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; + + if (priv->hw->desc->get_tx_ls(p) && !priv->extend_desc) + /* NOTE: Device will overwrite des3 with timestamp value if + * 1588-2002 time stamping is enabled, hence reinitialize it + * to keep explicit chaining in the descriptor. + */ + p->des3 = (unsigned int)(priv->dma_tx_phy + + (((priv->dirty_tx + 1) % + priv->dma_tx_size) * + sizeof(struct dma_desc))); } -const struct stmmac_ring_mode_ops ring_mode_ops = { +const struct stmmac_chain_mode_ops chain_mode_ops = { + .init = stmmac_init_dma_chain, .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, .refill_desc3 = stmmac_refill_desc3, - .init_desc3 = stmmac_init_desc3, - .init_dma_chain = stmmac_init_dma_chain, .clean_desc3 = stmmac_clean_desc3, - .set_16kib_bfsize = stmmac_set_16kib_bfsize, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 186d14806122..7788fbe44f0a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -117,6 +117,36 @@ struct stmmac_extra_stats { unsigned long irq_rx_path_in_lpi_mode_n; unsigned long irq_rx_path_exit_lpi_mode_n; unsigned long phy_eee_wakeup_error_n; + /* Extended RDES status */ + unsigned long ip_hdr_err; + unsigned long ip_payload_err; + unsigned long ip_csum_bypassed; + unsigned long ipv4_pkt_rcvd; + unsigned long ipv6_pkt_rcvd; + unsigned long rx_msg_type_ext_no_ptp; + unsigned long rx_msg_type_sync; + unsigned long rx_msg_type_follow_up; + unsigned long rx_msg_type_delay_req; + unsigned long rx_msg_type_delay_resp; + unsigned long rx_msg_type_pdelay_req; + unsigned long rx_msg_type_pdelay_resp; + unsigned long rx_msg_type_pdelay_follow_up; + unsigned long ptp_frame_type; + unsigned long ptp_ver; + unsigned long timestamp_dropped; + unsigned long av_pkt_rcvd; + unsigned long av_tagged_pkt_rcvd; + unsigned long vlan_tag_priority_val; + unsigned long l3_filter_match; + unsigned long l4_filter_match; + unsigned long l3_l4_filter_no_match; + /* PCS */ + unsigned long irq_pcs_ane_n; + unsigned long irq_pcs_link_n; + unsigned long irq_rgmii_n; + unsigned long pcs_link; + unsigned long pcs_duplex; + unsigned long pcs_speed; }; /* CSR Frequency Access Defines*/ @@ -138,37 +168,43 @@ struct stmmac_extra_stats { #define FLOW_TX 2 #define FLOW_AUTO (FLOW_TX | FLOW_RX) -#define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */ +/* PCS defines */ +#define STMMAC_PCS_RGMII (1 << 0) +#define STMMAC_PCS_SGMII (1 << 1) +#define STMMAC_PCS_TBI (1 << 2) +#define STMMAC_PCS_RTBI (1 << 3) + +#define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */ /* DAM HW feature register fields */ -#define DMA_HW_FEAT_MIISEL 0x00000001 /* 10/100 Mbps Support */ -#define DMA_HW_FEAT_GMIISEL 0x00000002 /* 1000 Mbps Support */ -#define DMA_HW_FEAT_HDSEL 0x00000004 /* Half-Duplex Support */ -#define DMA_HW_FEAT_EXTHASHEN 0x00000008 /* Expanded DA Hash Filter */ -#define DMA_HW_FEAT_HASHSEL 0x00000010 /* HASH Filter */ -#define DMA_HW_FEAT_ADDMACADRSEL 0x00000020 /* Multiple MAC Addr Reg */ -#define DMA_HW_FEAT_PCSSEL 0x00000040 /* PCS registers */ -#define DMA_HW_FEAT_L3L4FLTREN 0x00000080 /* Layer 3 & Layer 4 Feature */ -#define DMA_HW_FEAT_SMASEL 0x00000100 /* SMA(MDIO) Interface */ -#define DMA_HW_FEAT_RWKSEL 0x00000200 /* PMT Remote Wakeup */ -#define DMA_HW_FEAT_MGKSEL 0x00000400 /* PMT Magic Packet */ -#define DMA_HW_FEAT_MMCSEL 0x00000800 /* RMON Module */ -#define DMA_HW_FEAT_TSVER1SEL 0x00001000 /* Only IEEE 1588-2002 Timestamp */ -#define DMA_HW_FEAT_TSVER2SEL 0x00002000 /* IEEE 1588-2008 Adv Timestamp */ -#define DMA_HW_FEAT_EEESEL 0x00004000 /* Energy Efficient Ethernet */ -#define DMA_HW_FEAT_AVSEL 0x00008000 /* AV Feature */ -#define DMA_HW_FEAT_TXCOESEL 0x00010000 /* Checksum Offload in Tx */ -#define DMA_HW_FEAT_RXTYP1COE 0x00020000 /* IP csum Offload(Type 1) in Rx */ -#define DMA_HW_FEAT_RXTYP2COE 0x00040000 /* IP csum Offload(Type 2) in Rx */ -#define DMA_HW_FEAT_RXFIFOSIZE 0x00080000 /* Rx FIFO > 2048 Bytes */ -#define DMA_HW_FEAT_RXCHCNT 0x00300000 /* No. of additional Rx Channels */ -#define DMA_HW_FEAT_TXCHCNT 0x00c00000 /* No. of additional Tx Channels */ -#define DMA_HW_FEAT_ENHDESSEL 0x01000000 /* Alternate (Enhanced Descriptor) */ -#define DMA_HW_FEAT_INTTSEN 0x02000000 /* Timestamping with Internal - System Time */ -#define DMA_HW_FEAT_FLEXIPPSEN 0x04000000 /* Flexible PPS Output */ -#define DMA_HW_FEAT_SAVLANINS 0x08000000 /* Source Addr or VLAN Insertion */ -#define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY interface */ +#define DMA_HW_FEAT_MIISEL 0x00000001 /* 10/100 Mbps Support */ +#define DMA_HW_FEAT_GMIISEL 0x00000002 /* 1000 Mbps Support */ +#define DMA_HW_FEAT_HDSEL 0x00000004 /* Half-Duplex Support */ +#define DMA_HW_FEAT_EXTHASHEN 0x00000008 /* Expanded DA Hash Filter */ +#define DMA_HW_FEAT_HASHSEL 0x00000010 /* HASH Filter */ +#define DMA_HW_FEAT_ADDMAC 0x00000020 /* Multiple MAC Addr Reg */ +#define DMA_HW_FEAT_PCSSEL 0x00000040 /* PCS registers */ +#define DMA_HW_FEAT_L3L4FLTREN 0x00000080 /* Layer 3 & Layer 4 Feature */ +#define DMA_HW_FEAT_SMASEL 0x00000100 /* SMA(MDIO) Interface */ +#define DMA_HW_FEAT_RWKSEL 0x00000200 /* PMT Remote Wakeup */ +#define DMA_HW_FEAT_MGKSEL 0x00000400 /* PMT Magic Packet */ +#define DMA_HW_FEAT_MMCSEL 0x00000800 /* RMON Module */ +#define DMA_HW_FEAT_TSVER1SEL 0x00001000 /* Only IEEE 1588-2002 */ +#define DMA_HW_FEAT_TSVER2SEL 0x00002000 /* IEEE 1588-2008 PTPv2 */ +#define DMA_HW_FEAT_EEESEL 0x00004000 /* Energy Efficient Ethernet */ +#define DMA_HW_FEAT_AVSEL 0x00008000 /* AV Feature */ +#define DMA_HW_FEAT_TXCOESEL 0x00010000 /* Checksum Offload in Tx */ +#define DMA_HW_FEAT_RXTYP1COE 0x00020000 /* IP COE (Type 1) in Rx */ +#define DMA_HW_FEAT_RXTYP2COE 0x00040000 /* IP COE (Type 2) in Rx */ +#define DMA_HW_FEAT_RXFIFOSIZE 0x00080000 /* Rx FIFO > 2048 Bytes */ +#define DMA_HW_FEAT_RXCHCNT 0x00300000 /* No. additional Rx Channels */ +#define DMA_HW_FEAT_TXCHCNT 0x00c00000 /* No. additional Tx Channels */ +#define DMA_HW_FEAT_ENHDESSEL 0x01000000 /* Alternate Descriptor */ +/* Timestamping with Internal System Time */ +#define DMA_HW_FEAT_INTTSEN 0x02000000 +#define DMA_HW_FEAT_FLEXIPPSEN 0x04000000 /* Flexible PPS Output */ +#define DMA_HW_FEAT_SAVLANINS 0x08000000 /* Source Addr or VLAN */ +#define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY iface */ #define DEFAULT_DMA_PBL 8 /* Max/Min RI Watchdog Timer count value */ @@ -180,7 +216,8 @@ struct stmmac_extra_stats { #define STMMAC_TX_MAX_FRAMES 256 #define STMMAC_TX_FRAMES 64 -enum rx_frame_status { /* IPC status */ +/* Rx IPC status */ +enum rx_frame_status { good_frame = 0, discard_frame = 1, csum_none = 2, @@ -194,17 +231,25 @@ enum dma_irq_status { handle_tx = 0x8, }; -enum core_specific_irq_mask { - core_mmc_tx_irq = 1, - core_mmc_rx_irq = 2, - core_mmc_rx_csum_offload_irq = 4, - core_irq_receive_pmt_irq = 8, - core_irq_tx_path_in_lpi_mode = 16, - core_irq_tx_path_exit_lpi_mode = 32, - core_irq_rx_path_in_lpi_mode = 64, - core_irq_rx_path_exit_lpi_mode = 128, +#define CORE_IRQ_TX_PATH_IN_LPI_MODE (1 << 1) +#define CORE_IRQ_TX_PATH_EXIT_LPI_MODE (1 << 2) +#define CORE_IRQ_RX_PATH_IN_LPI_MODE (1 << 3) +#define CORE_IRQ_RX_PATH_EXIT_LPI_MODE (1 << 4) + +#define CORE_PCS_ANE_COMPLETE (1 << 5) +#define CORE_PCS_LINK_STATUS (1 << 6) +#define CORE_RGMII_IRQ (1 << 7) + +struct rgmii_adv { + unsigned int pause; + unsigned int duplex; + unsigned int lp_pause; + unsigned int lp_duplex; }; +#define STMMAC_PCS_PAUSE 1 +#define STMMAC_PCS_ASYM_PAUSE 2 + /* DMA HW capabilities */ struct dma_features { unsigned int mbps_10_100; @@ -217,9 +262,9 @@ struct dma_features { unsigned int pmt_remote_wake_up; unsigned int pmt_magic_frame; unsigned int rmon; - /* IEEE 1588-2002*/ + /* IEEE 1588-2002 */ unsigned int time_stamp; - /* IEEE 1588-2008*/ + /* IEEE 1588-2008 */ unsigned int atime_stamp; /* 802.3az - Energy-Efficient Ethernet (EEE) */ unsigned int eee; @@ -232,7 +277,7 @@ struct dma_features { /* TX and RX number of channels */ unsigned int number_rx_channel; unsigned int number_tx_channel; - /* Alternate (enhanced) DESC mode*/ + /* Alternate (enhanced) DESC mode */ unsigned int enh_desc; }; @@ -255,23 +300,26 @@ struct dma_features { #define STMMAC_DEFAULT_LIT_LS_TIMER 0x3E8 #define STMMAC_DEFAULT_TWT_LS_TIMER 0x0 +#define STMMAC_CHAIN_MODE 0x1 +#define STMMAC_RING_MODE 0x2 + struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ - void (*init_rx_desc) (struct dma_desc *p, unsigned int ring_size, - int disable_rx_ic); + void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode, + int end); /* DMA TX descriptor ring initialization */ - void (*init_tx_desc) (struct dma_desc *p, unsigned int ring_size); + void (*init_tx_desc) (struct dma_desc *p, int mode, int end); /* Invoked by the xmit function to prepare the tx descriptor */ void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len, - int csum_flag); + int csum_flag, int mode); /* Set/get the owner of the descriptor */ void (*set_tx_owner) (struct dma_desc *p); int (*get_tx_owner) (struct dma_desc *p); /* Invoked by the xmit function to close the tx descriptor */ void (*close_tx_desc) (struct dma_desc *p); /* Clean the tx descriptor as soon as the tx irq is received */ - void (*release_tx_desc) (struct dma_desc *p); + void (*release_tx_desc) (struct dma_desc *p, int mode); /* Clear interrupt on tx frame completion. When this bit is * set an interrupt happens as soon as the frame is transmitted */ void (*clear_tx_ic) (struct dma_desc *p); @@ -290,12 +338,22 @@ struct stmmac_desc_ops { /* Return the reception status looking at the RDES1 */ int (*rx_status) (void *data, struct stmmac_extra_stats *x, struct dma_desc *p); + void (*rx_extended_status) (void *data, struct stmmac_extra_stats *x, + struct dma_extended_desc *p); + /* Set tx timestamp enable bit */ + void (*enable_tx_timestamp) (struct dma_desc *p); + /* get tx timestamp status */ + int (*get_tx_timestamp_status) (struct dma_desc *p); + /* get timestamp value */ + u64(*get_timestamp) (void *desc, u32 ats); + /* get rx timestamp status */ + int (*get_rx_timestamp_status) (void *desc, u32 ats); }; struct stmmac_dma_ops { /* DMA core initialization */ int (*init) (void __iomem *ioaddr, int pbl, int fb, int mb, - int burst_len, u32 dma_tx, u32 dma_rx); + int burst_len, u32 dma_tx, u32 dma_rx, int atds); /* Dump DMA registers */ void (*dump_regs) (void __iomem *ioaddr); /* Set tx/rx threshold in the csr6 register @@ -321,13 +379,14 @@ struct stmmac_dma_ops { struct stmmac_ops { /* MAC core initialization */ - void (*core_init) (void __iomem *ioaddr) ____cacheline_aligned; + void (*core_init) (void __iomem *ioaddr); /* Enable and verify that the IPC module is supported */ int (*rx_ipc) (void __iomem *ioaddr); /* Dump MAC registers */ void (*dump_regs) (void __iomem *ioaddr); /* Handle extra events on specific interrupts hw dependent */ - int (*host_irq_status) (void __iomem *ioaddr); + int (*host_irq_status) (void __iomem *ioaddr, + struct stmmac_extra_stats *x); /* Multicast filter setting */ void (*set_filter) (struct net_device *dev, int id); /* Flow control setting */ @@ -344,6 +403,18 @@ struct stmmac_ops { void (*reset_eee_mode) (void __iomem *ioaddr); void (*set_eee_timer) (void __iomem *ioaddr, int ls, int tw); void (*set_eee_pls) (void __iomem *ioaddr, int link); + void (*ctrl_ane) (void __iomem *ioaddr, bool restart); + void (*get_adv) (void __iomem *ioaddr, struct rgmii_adv *adv); +}; + +struct stmmac_hwtimestamp { + void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); + void (*config_sub_second_increment) (void __iomem *ioaddr); + int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec); + int (*config_addend) (void __iomem *ioaddr, u32 addend); + int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, + int add_sub); + u64(*get_systime) (void __iomem *ioaddr); }; struct mac_link { @@ -360,19 +431,28 @@ struct mii_regs { struct stmmac_ring_mode_ops { unsigned int (*is_jumbo_frm) (int len, int ehn_desc); unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); - void (*refill_desc3) (int bfsize, struct dma_desc *p); - void (*init_desc3) (int des3_as_data_buf, struct dma_desc *p); - void (*init_dma_chain) (struct dma_desc *des, dma_addr_t phy_addr, - unsigned int size); - void (*clean_desc3) (struct dma_desc *p); + void (*refill_desc3) (void *priv, struct dma_desc *p); + void (*init_desc3) (struct dma_desc *p); + void (*clean_desc3) (void *priv, struct dma_desc *p); int (*set_16kib_bfsize) (int mtu); }; +struct stmmac_chain_mode_ops { + void (*init) (void *des, dma_addr_t phy_addr, unsigned int size, + unsigned int extend_desc); + unsigned int (*is_jumbo_frm) (int len, int ehn_desc); + unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); + void (*refill_desc3) (void *priv, struct dma_desc *p); + void (*clean_desc3) (void *priv, struct dma_desc *p); +}; + struct mac_device_info { - const struct stmmac_ops *mac; - const struct stmmac_desc_ops *desc; - const struct stmmac_dma_ops *dma; - const struct stmmac_ring_mode_ops *ring; + const struct stmmac_ops *mac; + const struct stmmac_desc_ops *desc; + const struct stmmac_dma_ops *dma; + const struct stmmac_ring_mode_ops *ring; + const struct stmmac_chain_mode_ops *chain; + const struct stmmac_hwtimestamp *ptp; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; unsigned int synopsys_uid; @@ -390,5 +470,6 @@ extern void stmmac_set_mac(void __iomem *ioaddr, bool enable); extern void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); extern const struct stmmac_ring_mode_ops ring_mode_ops; +extern const struct stmmac_chain_mode_ops chain_mode_ops; #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index 223adf95fd03..ad3996038018 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -24,6 +24,7 @@ #ifndef __DESCS_H__ #define __DESCS_H__ +/* Basic descriptor structure for normal and alternate descriptors */ struct dma_desc { /* Receive descriptor */ union { @@ -60,7 +61,7 @@ struct dma_desc { } rx; struct { /* RDES0 */ - u32 payload_csum_error:1; + u32 rx_mac_addr:1; u32 crc_error:1; u32 dribbling:1; u32 error_gmii:1; @@ -162,13 +163,57 @@ struct dma_desc { unsigned int des3; }; +/* Extended descriptor structure (supported by new SYNP GMAC generations) */ +struct dma_extended_desc { + struct dma_desc basic; + union { + struct { + u32 ip_payload_type:3; + u32 ip_hdr_err:1; + u32 ip_payload_err:1; + u32 ip_csum_bypassed:1; + u32 ipv4_pkt_rcvd:1; + u32 ipv6_pkt_rcvd:1; + u32 msg_type:4; + u32 ptp_frame_type:1; + u32 ptp_ver:1; + u32 timestamp_dropped:1; + u32 reserved:1; + u32 av_pkt_rcvd:1; + u32 av_tagged_pkt_rcvd:1; + u32 vlan_tag_priority_val:3; + u32 reserved3:3; + u32 l3_filter_match:1; + u32 l4_filter_match:1; + u32 l3_l4_filter_no_match:2; + u32 reserved4:4; + } erx; + struct { + u32 reserved; + } etx; + } des4; + unsigned int des5; /* Reserved */ + unsigned int des6; /* Tx/Rx Timestamp Low */ + unsigned int des7; /* Tx/Rx Timestamp High */ +}; + /* Transmit checksum insertion control */ enum tdes_csum_insertion { cic_disabled = 0, /* Checksum Insertion Control */ cic_only_ip = 1, /* Only IP header */ - cic_no_pseudoheader = 2, /* IP header but pseudoheader - * is not calculated */ + /* IP header but pseudoheader is not calculated */ + cic_no_pseudoheader = 2, cic_full = 3, /* IP header and pseudoheader */ }; +/* Extended RDES4 definitions */ +#define RDES_EXT_NO_PTP 0 +#define RDES_EXT_SYNC 0x1 +#define RDES_EXT_FOLLOW_UP 0x2 +#define RDES_EXT_DELAY_REQ 0x3 +#define RDES_EXT_DELAY_RESP 0x4 +#define RDES_EXT_PDELAY_REQ 0x5 +#define RDES_EXT_PDELAY_RESP 0x6 +#define RDES_EXT_PDELAY_FOLLOW_UP 0x7 + #endif /* __DESCS_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index 7ee9499a6e38..6f2cc78c5cf5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -30,26 +30,28 @@ #ifndef __DESC_COM_H__ #define __DESC_COM_H__ -#if defined(CONFIG_STMMAC_RING) -static inline void ehn_desc_rx_set_on_ring_chain(struct dma_desc *p, int end) +/* Specific functions used for Ring mode */ + +/* Enhanced descriptors */ +static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) { p->des01.erx.buffer2_size = BUF_SIZE_8KiB - 1; if (end) p->des01.erx.end_ring = 1; } -static inline void ehn_desc_tx_set_on_ring_chain(struct dma_desc *p, int end) +static inline void ehn_desc_tx_set_on_ring(struct dma_desc *p, int end) { if (end) p->des01.etx.end_ring = 1; } -static inline void enh_desc_end_tx_desc(struct dma_desc *p, int ter) +static inline void enh_desc_end_tx_desc_on_ring(struct dma_desc *p, int ter) { p->des01.etx.end_ring = ter; } -static inline void enh_set_tx_desc_len(struct dma_desc *p, int len) +static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len) { if (unlikely(len > BUF_SIZE_4KiB)) { p->des01.etx.buffer1_size = BUF_SIZE_4KiB; @@ -58,25 +60,26 @@ static inline void enh_set_tx_desc_len(struct dma_desc *p, int len) p->des01.etx.buffer1_size = len; } -static inline void ndesc_rx_set_on_ring_chain(struct dma_desc *p, int end) +/* Normal descriptors */ +static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end) { p->des01.rx.buffer2_size = BUF_SIZE_2KiB - 1; if (end) p->des01.rx.end_ring = 1; } -static inline void ndesc_tx_set_on_ring_chain(struct dma_desc *p, int end) +static inline void ndesc_tx_set_on_ring(struct dma_desc *p, int end) { if (end) p->des01.tx.end_ring = 1; } -static inline void ndesc_end_tx_desc(struct dma_desc *p, int ter) +static inline void ndesc_end_tx_desc_on_ring(struct dma_desc *p, int ter) { p->des01.tx.end_ring = ter; } -static inline void norm_set_tx_desc_len(struct dma_desc *p, int len) +static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len) { if (unlikely(len > BUF_SIZE_2KiB)) { p->des01.etx.buffer1_size = BUF_SIZE_2KiB - 1; @@ -85,47 +88,47 @@ static inline void norm_set_tx_desc_len(struct dma_desc *p, int len) p->des01.tx.buffer1_size = len; } -#else +/* Specific functions used for Chain mode */ -static inline void ehn_desc_rx_set_on_ring_chain(struct dma_desc *p, int end) +/* Enhanced descriptors */ +static inline void ehn_desc_rx_set_on_chain(struct dma_desc *p, int end) { p->des01.erx.second_address_chained = 1; } -static inline void ehn_desc_tx_set_on_ring_chain(struct dma_desc *p, int end) +static inline void ehn_desc_tx_set_on_chain(struct dma_desc *p, int end) { p->des01.etx.second_address_chained = 1; } -static inline void enh_desc_end_tx_desc(struct dma_desc *p, int ter) +static inline void enh_desc_end_tx_desc_on_chain(struct dma_desc *p, int ter) { p->des01.etx.second_address_chained = 1; } -static inline void enh_set_tx_desc_len(struct dma_desc *p, int len) +static inline void enh_set_tx_desc_len_on_chain(struct dma_desc *p, int len) { p->des01.etx.buffer1_size = len; } -static inline void ndesc_rx_set_on_ring_chain(struct dma_desc *p, int end) +/* Normal descriptors */ +static inline void ndesc_rx_set_on_chain(struct dma_desc *p, int end) { p->des01.rx.second_address_chained = 1; } -static inline void ndesc_tx_set_on_ring_chain(struct dma_desc *p, int ring_size) +static inline void ndesc_tx_set_on_chain(struct dma_desc *p, int ring_size) { p->des01.tx.second_address_chained = 1; } -static inline void ndesc_end_tx_desc(struct dma_desc *p, int ter) +static inline void ndesc_end_tx_desc_on_chain(struct dma_desc *p, int ter) { p->des01.tx.second_address_chained = 1; } -static inline void norm_set_tx_desc_len(struct dma_desc *p, int len) +static inline void norm_set_tx_desc_len_on_chain(struct dma_desc *p, int len) { p->des01.tx.buffer1_size = len; } -#endif - #endif /* __DESC_COM_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 7ad56afd6324..c12aabb8cf93 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -89,13 +89,46 @@ enum power_event { (reg * 8)) #define GMAC_MAX_PERFECT_ADDRESSES 32 +/* PCS registers (AN/TBI/SGMII/RGMII) offset */ #define GMAC_AN_CTRL 0x000000c0 /* AN control */ #define GMAC_AN_STATUS 0x000000c4 /* AN status */ #define GMAC_ANE_ADV 0x000000c8 /* Auto-Neg. Advertisement */ -#define GMAC_ANE_LINK 0x000000cc /* Auto-Neg. link partener ability */ +#define GMAC_ANE_LPA 0x000000cc /* Auto-Neg. link partener ability */ #define GMAC_ANE_EXP 0x000000d0 /* ANE expansion */ #define GMAC_TBI 0x000000d4 /* TBI extend status */ -#define GMAC_GMII_STATUS 0x000000d8 /* S/R-GMII status */ +#define GMAC_S_R_GMII 0x000000d8 /* SGMII RGMII status */ + +/* AN Configuration defines */ +#define GMAC_AN_CTRL_RAN 0x00000200 /* Restart Auto-Negotiation */ +#define GMAC_AN_CTRL_ANE 0x00001000 /* Auto-Negotiation Enable */ +#define GMAC_AN_CTRL_ELE 0x00004000 /* External Loopback Enable */ +#define GMAC_AN_CTRL_ECD 0x00010000 /* Enable Comma Detect */ +#define GMAC_AN_CTRL_LR 0x00020000 /* Lock to Reference */ +#define GMAC_AN_CTRL_SGMRAL 0x00040000 /* SGMII RAL Control */ + +/* AN Status defines */ +#define GMAC_AN_STATUS_LS 0x00000004 /* Link Status 0:down 1:up */ +#define GMAC_AN_STATUS_ANA 0x00000008 /* Auto-Negotiation Ability */ +#define GMAC_AN_STATUS_ANC 0x00000020 /* Auto-Negotiation Complete */ +#define GMAC_AN_STATUS_ES 0x00000100 /* Extended Status */ + +/* Register 54 (SGMII/RGMII status register) */ +#define GMAC_S_R_GMII_LINK 0x8 +#define GMAC_S_R_GMII_SPEED 0x5 +#define GMAC_S_R_GMII_SPEED_SHIFT 0x1 +#define GMAC_S_R_GMII_MODE 0x1 +#define GMAC_S_R_GMII_SPEED_125 2 +#define GMAC_S_R_GMII_SPEED_25 1 + +/* Common ADV and LPA defines */ +#define GMAC_ANE_FD (1 << 5) +#define GMAC_ANE_HD (1 << 6) +#define GMAC_ANE_PSE (3 << 7) +#define GMAC_ANE_PSE_SHIFT 7 + + /* GMAC Configuration defines */ +#define GMAC_CONTROL_TC 0x01000000 /* Transmit Conf. in RGMII/SGMII */ +#define GMAC_CONTROL_WD 0x00800000 /* Disable Watchdog on receive */ /* GMAC Configuration defines */ #define GMAC_CONTROL_TC 0x01000000 /* Transmit Conf. in RGMII/SGMII */ @@ -108,19 +141,19 @@ enum inter_frame_gap { GMAC_CONTROL_IFG_80 = 0x00020000, GMAC_CONTROL_IFG_40 = 0x000e0000, }; -#define GMAC_CONTROL_DCRS 0x00010000 /* Disable carrier sense during tx */ -#define GMAC_CONTROL_PS 0x00008000 /* Port Select 0:GMI 1:MII */ -#define GMAC_CONTROL_FES 0x00004000 /* Speed 0:10 1:100 */ -#define GMAC_CONTROL_DO 0x00002000 /* Disable Rx Own */ -#define GMAC_CONTROL_LM 0x00001000 /* Loop-back mode */ -#define GMAC_CONTROL_DM 0x00000800 /* Duplex Mode */ -#define GMAC_CONTROL_IPC 0x00000400 /* Checksum Offload */ -#define GMAC_CONTROL_DR 0x00000200 /* Disable Retry */ -#define GMAC_CONTROL_LUD 0x00000100 /* Link up/down */ -#define GMAC_CONTROL_ACS 0x00000080 /* Automatic Pad/FCS Stripping */ -#define GMAC_CONTROL_DC 0x00000010 /* Deferral Check */ -#define GMAC_CONTROL_TE 0x00000008 /* Transmitter Enable */ -#define GMAC_CONTROL_RE 0x00000004 /* Receiver Enable */ +#define GMAC_CONTROL_DCRS 0x00010000 /* Disable carrier sense */ +#define GMAC_CONTROL_PS 0x00008000 /* Port Select 0:GMI 1:MII */ +#define GMAC_CONTROL_FES 0x00004000 /* Speed 0:10 1:100 */ +#define GMAC_CONTROL_DO 0x00002000 /* Disable Rx Own */ +#define GMAC_CONTROL_LM 0x00001000 /* Loop-back mode */ +#define GMAC_CONTROL_DM 0x00000800 /* Duplex Mode */ +#define GMAC_CONTROL_IPC 0x00000400 /* Checksum Offload */ +#define GMAC_CONTROL_DR 0x00000200 /* Disable Retry */ +#define GMAC_CONTROL_LUD 0x00000100 /* Link up/down */ +#define GMAC_CONTROL_ACS 0x00000080 /* Auto Pad/FCS Stripping */ +#define GMAC_CONTROL_DC 0x00000010 /* Deferral Check */ +#define GMAC_CONTROL_TE 0x00000008 /* Transmitter Enable */ +#define GMAC_CONTROL_RE 0x00000004 /* Receiver Enable */ #define GMAC_CORE_INIT (GMAC_CONTROL_JD | GMAC_CONTROL_PS | GMAC_CONTROL_ACS | \ GMAC_CONTROL_JE | GMAC_CONTROL_BE) @@ -151,15 +184,16 @@ enum inter_frame_gap { #define DMA_BUS_MODE_SFT_RESET 0x00000001 /* Software Reset */ #define DMA_BUS_MODE_DA 0x00000002 /* Arbitration scheme */ #define DMA_BUS_MODE_DSL_MASK 0x0000007c /* Descriptor Skip Length */ -#define DMA_BUS_MODE_DSL_SHIFT 2 /* (in DWORDS) */ +#define DMA_BUS_MODE_DSL_SHIFT 2 /* (in DWORDS) */ /* Programmable burst length (passed thorugh platform)*/ #define DMA_BUS_MODE_PBL_MASK 0x00003f00 /* Programmable Burst Len */ #define DMA_BUS_MODE_PBL_SHIFT 8 +#define DMA_BUS_MODE_ATDS 0x00000080 /* Alternate Descriptor Size */ enum rx_tx_priority_ratio { - double_ratio = 0x00004000, /*2:1 */ - triple_ratio = 0x00008000, /*3:1 */ - quadruple_ratio = 0x0000c000, /*4:1 */ + double_ratio = 0x00004000, /* 2:1 */ + triple_ratio = 0x00008000, /* 3:1 */ + quadruple_ratio = 0x0000c000, /* 4:1 */ }; #define DMA_BUS_MODE_FB 0x00010000 /* Fixed burst */ @@ -179,9 +213,10 @@ enum rx_tx_priority_ratio { #define DMA_BUS_FB 0x00010000 /* Fixed Burst */ /* DMA operation mode defines (start/stop tx/rx are placed in common header)*/ -#define DMA_CONTROL_DT 0x04000000 /* Disable Drop TCP/IP csum error */ -#define DMA_CONTROL_RSF 0x02000000 /* Receive Store and Forward */ -#define DMA_CONTROL_DFF 0x01000000 /* Disaable flushing */ +/* Disable Drop TCP/IP csum error */ +#define DMA_CONTROL_DT 0x04000000 +#define DMA_CONTROL_RSF 0x02000000 /* Receive Store and Forward */ +#define DMA_CONTROL_DFF 0x01000000 /* Disaable flushing */ /* Threshold for Activating the FC */ enum rfa { act_full_minus_1 = 0x00800000, @@ -196,7 +231,7 @@ enum rfd { deac_full_minus_3 = 0x00401000, deac_full_minus_4 = 0x00401800, }; -#define DMA_CONTROL_TSF 0x00200000 /* Transmit Store and Forward */ +#define DMA_CONTROL_TSF 0x00200000 /* Transmit Store and Forward */ enum ttc_control { DMA_CONTROL_TTC_64 = 0x00000000, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index bfe022605498..7e05e8d0f1c2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -28,6 +28,7 @@ #include <linux/crc32.h> #include <linux/slab.h> +#include <linux/ethtool.h> #include <asm/io.h> #include "dwmac1000.h" @@ -71,22 +72,22 @@ static void dwmac1000_dump_regs(void __iomem *ioaddr) } static void dwmac1000_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, - unsigned int reg_n) + unsigned int reg_n) { stmmac_set_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), - GMAC_ADDR_LOW(reg_n)); + GMAC_ADDR_LOW(reg_n)); } static void dwmac1000_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, - unsigned int reg_n) + unsigned int reg_n) { stmmac_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), - GMAC_ADDR_LOW(reg_n)); + GMAC_ADDR_LOW(reg_n)); } static void dwmac1000_set_filter(struct net_device *dev, int id) { - void __iomem *ioaddr = (void __iomem *) dev->base_addr; + void __iomem *ioaddr = (void __iomem *)dev->base_addr; unsigned int value = 0; unsigned int perfect_addr_number; @@ -96,7 +97,7 @@ static void dwmac1000_set_filter(struct net_device *dev, int id) if (dev->flags & IFF_PROMISC) value = GMAC_FRAME_FILTER_PR; else if ((netdev_mc_count(dev) > HASH_TABLE_SIZE) - || (dev->flags & IFF_ALLMULTI)) { + || (dev->flags & IFF_ALLMULTI)) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ writel(0xffffffff, ioaddr + GMAC_HASH_HIGH); writel(0xffffffff, ioaddr + GMAC_HASH_LOW); @@ -110,12 +111,13 @@ static void dwmac1000_set_filter(struct net_device *dev, int id) memset(mc_filter, 0, sizeof(mc_filter)); netdev_for_each_mc_addr(ha, dev) { /* The upper 6 bits of the calculated CRC are used to - index the contens of the hash table */ - int bit_nr = - bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26; + * index the contens of the hash table + */ + int bit_nr = bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26; /* The most significant bit determines the register to * use (H/L) while the other 5 bits determine the bit - * within the register. */ + * within the register. + */ mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31); } writel(mc_filter[0], ioaddr + GMAC_HASH_LOW); @@ -128,10 +130,11 @@ static void dwmac1000_set_filter(struct net_device *dev, int id) else perfect_addr_number = GMAC_MAX_PERFECT_ADDRESSES / 2; - /* Handle multiple unicast addresses (perfect filtering)*/ + /* Handle multiple unicast addresses (perfect filtering) */ if (netdev_uc_count(dev) > perfect_addr_number) - /* Switch to promiscuous mode is more than 16 addrs - are required */ + /* Switch to promiscuous mode if more than 16 addrs + * are required + */ value |= GMAC_FRAME_FILTER_PR; else { int reg = 1; @@ -149,13 +152,13 @@ static void dwmac1000_set_filter(struct net_device *dev, int id) #endif writel(value, ioaddr + GMAC_FRAME_FILTER); - CHIP_DBG(KERN_INFO "\tFrame Filter reg: 0x%08x\n\tHash regs: " - "HI 0x%08x, LO 0x%08x\n", readl(ioaddr + GMAC_FRAME_FILTER), - readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW)); + CHIP_DBG(KERN_INFO "\tFilter: 0x%08x\n\tHash: HI 0x%08x, LO 0x%08x\n", + readl(ioaddr + GMAC_FRAME_FILTER), + readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW)); } static void dwmac1000_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, - unsigned int fc, unsigned int pause_time) + unsigned int fc, unsigned int pause_time) { unsigned int flow = 0; @@ -193,74 +196,106 @@ static void dwmac1000_pmt(void __iomem *ioaddr, unsigned long mode) writel(pmt, ioaddr + GMAC_PMT); } - -static int dwmac1000_irq_status(void __iomem *ioaddr) +static int dwmac1000_irq_status(void __iomem *ioaddr, + struct stmmac_extra_stats *x) { u32 intr_status = readl(ioaddr + GMAC_INT_STATUS); - int status = 0; + int ret = 0; /* Not used events (e.g. MMC interrupts) are not handled. */ if ((intr_status & mmc_tx_irq)) { CHIP_DBG(KERN_INFO "GMAC: MMC tx interrupt: 0x%08x\n", - readl(ioaddr + GMAC_MMC_TX_INTR)); - status |= core_mmc_tx_irq; + readl(ioaddr + GMAC_MMC_TX_INTR)); + x->mmc_tx_irq_n++; } if (unlikely(intr_status & mmc_rx_irq)) { CHIP_DBG(KERN_INFO "GMAC: MMC rx interrupt: 0x%08x\n", - readl(ioaddr + GMAC_MMC_RX_INTR)); - status |= core_mmc_rx_irq; + readl(ioaddr + GMAC_MMC_RX_INTR)); + x->mmc_rx_irq_n++; } if (unlikely(intr_status & mmc_rx_csum_offload_irq)) { CHIP_DBG(KERN_INFO "GMAC: MMC rx csum offload: 0x%08x\n", - readl(ioaddr + GMAC_MMC_RX_CSUM_OFFLOAD)); - status |= core_mmc_rx_csum_offload_irq; + readl(ioaddr + GMAC_MMC_RX_CSUM_OFFLOAD)); + x->mmc_rx_csum_offload_irq_n++; } if (unlikely(intr_status & pmt_irq)) { CHIP_DBG(KERN_INFO "GMAC: received Magic frame\n"); - /* clear the PMT bits 5 and 6 by reading the PMT - * status register. */ + /* clear the PMT bits 5 and 6 by reading the PMT status reg */ readl(ioaddr + GMAC_PMT); - status |= core_irq_receive_pmt_irq; + x->irq_receive_pmt_irq_n++; } /* MAC trx/rx EEE LPI entry/exit interrupts */ if (intr_status & lpiis_irq) { /* Clean LPI interrupt by reading the Reg 12 */ - u32 lpi_status = readl(ioaddr + LPI_CTRL_STATUS); + ret = readl(ioaddr + LPI_CTRL_STATUS); - if (lpi_status & LPI_CTRL_STATUS_TLPIEN) { + if (ret & LPI_CTRL_STATUS_TLPIEN) { CHIP_DBG(KERN_INFO "GMAC TX entered in LPI\n"); - status |= core_irq_tx_path_in_lpi_mode; + x->irq_tx_path_in_lpi_mode_n++; } - if (lpi_status & LPI_CTRL_STATUS_TLPIEX) { + if (ret & LPI_CTRL_STATUS_TLPIEX) { CHIP_DBG(KERN_INFO "GMAC TX exit from LPI\n"); - status |= core_irq_tx_path_exit_lpi_mode; + x->irq_tx_path_exit_lpi_mode_n++; } - if (lpi_status & LPI_CTRL_STATUS_RLPIEN) { + if (ret & LPI_CTRL_STATUS_RLPIEN) { CHIP_DBG(KERN_INFO "GMAC RX entered in LPI\n"); - status |= core_irq_rx_path_in_lpi_mode; + x->irq_rx_path_in_lpi_mode_n++; } - if (lpi_status & LPI_CTRL_STATUS_RLPIEX) { + if (ret & LPI_CTRL_STATUS_RLPIEX) { CHIP_DBG(KERN_INFO "GMAC RX exit from LPI\n"); - status |= core_irq_rx_path_exit_lpi_mode; + x->irq_rx_path_exit_lpi_mode_n++; + } + } + + if ((intr_status & pcs_ane_irq) || (intr_status & pcs_link_irq)) { + CHIP_DBG(KERN_INFO "GMAC PCS ANE IRQ\n"); + readl(ioaddr + GMAC_AN_STATUS); + x->irq_pcs_ane_n++; + } + if (intr_status & rgmii_irq) { + u32 status = readl(ioaddr + GMAC_S_R_GMII); + CHIP_DBG(KERN_INFO "GMAC RGMII/SGMII interrupt\n"); + x->irq_rgmii_n++; + + /* Save and dump the link status. */ + if (status & GMAC_S_R_GMII_LINK) { + int speed_value = (status & GMAC_S_R_GMII_SPEED) >> + GMAC_S_R_GMII_SPEED_SHIFT; + x->pcs_duplex = (status & GMAC_S_R_GMII_MODE); + + if (speed_value == GMAC_S_R_GMII_SPEED_125) + x->pcs_speed = SPEED_1000; + else if (speed_value == GMAC_S_R_GMII_SPEED_25) + x->pcs_speed = SPEED_100; + else + x->pcs_speed = SPEED_10; + + x->pcs_link = 1; + pr_debug("Link is Up - %d/%s\n", (int)x->pcs_speed, + x->pcs_duplex ? "Full" : "Half"); + } else { + x->pcs_link = 0; + pr_debug("Link is Down\n"); } } - return status; + return ret; } -static void dwmac1000_set_eee_mode(void __iomem *ioaddr) +static void dwmac1000_set_eee_mode(void __iomem *ioaddr) { u32 value; /* Enable the link status receive on RGMII, SGMII ore SMII * receive path and instruct the transmit to enter in LPI - * state. */ + * state. + */ value = readl(ioaddr + LPI_CTRL_STATUS); value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; writel(value, ioaddr + LPI_CTRL_STATUS); } -static void dwmac1000_reset_eee_mode(void __iomem *ioaddr) +static void dwmac1000_reset_eee_mode(void __iomem *ioaddr) { u32 value; @@ -269,7 +304,7 @@ static void dwmac1000_reset_eee_mode(void __iomem *ioaddr) writel(value, ioaddr + LPI_CTRL_STATUS); } -static void dwmac1000_set_eee_pls(void __iomem *ioaddr, int link) +static void dwmac1000_set_eee_pls(void __iomem *ioaddr, int link) { u32 value; @@ -283,7 +318,7 @@ static void dwmac1000_set_eee_pls(void __iomem *ioaddr, int link) writel(value, ioaddr + LPI_CTRL_STATUS); } -static void dwmac1000_set_eee_timer(void __iomem *ioaddr, int ls, int tw) +static void dwmac1000_set_eee_timer(void __iomem *ioaddr, int ls, int tw) { int value = ((tw & 0xffff)) | ((ls & 0x7ff) << 16); @@ -297,6 +332,41 @@ static void dwmac1000_set_eee_timer(void __iomem *ioaddr, int ls, int tw) writel(value, ioaddr + LPI_TIMER_CTRL); } +static void dwmac1000_ctrl_ane(void __iomem *ioaddr, bool restart) +{ + u32 value; + + value = readl(ioaddr + GMAC_AN_CTRL); + /* auto negotiation enable and External Loopback enable */ + value = GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_ELE; + + if (restart) + value |= GMAC_AN_CTRL_RAN; + + writel(value, ioaddr + GMAC_AN_CTRL); +} + +static void dwmac1000_get_adv(void __iomem *ioaddr, struct rgmii_adv *adv) +{ + u32 value = readl(ioaddr + GMAC_ANE_ADV); + + if (value & GMAC_ANE_FD) + adv->duplex = DUPLEX_FULL; + if (value & GMAC_ANE_HD) + adv->duplex |= DUPLEX_HALF; + + adv->pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT; + + value = readl(ioaddr + GMAC_ANE_LPA); + + if (value & GMAC_ANE_FD) + adv->lp_duplex = DUPLEX_FULL; + if (value & GMAC_ANE_HD) + adv->lp_duplex = DUPLEX_HALF; + + adv->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT; +} + static const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .rx_ipc = dwmac1000_rx_ipc_enable, @@ -307,10 +377,12 @@ static const struct stmmac_ops dwmac1000_ops = { .pmt = dwmac1000_pmt, .set_umac_addr = dwmac1000_set_umac_addr, .get_umac_addr = dwmac1000_get_umac_addr, - .set_eee_mode = dwmac1000_set_eee_mode, - .reset_eee_mode = dwmac1000_reset_eee_mode, - .set_eee_timer = dwmac1000_set_eee_timer, - .set_eee_pls = dwmac1000_set_eee_pls, + .set_eee_mode = dwmac1000_set_eee_mode, + .reset_eee_mode = dwmac1000_reset_eee_mode, + .set_eee_timer = dwmac1000_set_eee_timer, + .set_eee_pls = dwmac1000_set_eee_pls, + .ctrl_ane = dwmac1000_ctrl_ane, + .get_adv = dwmac1000_get_adv, }; struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index bf83c03bfd06..2c431b616058 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -30,8 +30,8 @@ #include "dwmac1000.h" #include "dwmac_dma.h" -static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, - int mb, int burst_len, u32 dma_tx, u32 dma_rx) +static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb, + int burst_len, u32 dma_tx, u32 dma_rx, int atds) { u32 value = readl(ioaddr + DMA_BUS_MODE); int limit; @@ -60,7 +60,7 @@ static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, * depending on pbl value. */ value = DMA_BUS_MODE_PBL | ((pbl << DMA_BUS_MODE_PBL_SHIFT) | - (pbl << DMA_BUS_MODE_RPBL_SHIFT)); + (pbl << DMA_BUS_MODE_RPBL_SHIFT)); /* Set the Fixed burst mode */ if (fb) @@ -73,6 +73,10 @@ static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, #ifdef CONFIG_STMMAC_DA value |= DMA_BUS_MODE_DA; /* Rx has priority over tx */ #endif + + if (atds) + value |= DMA_BUS_MODE_ATDS; + writel(value, ioaddr + DMA_BUS_MODE); /* In case of GMAC AXI configuration, program the DMA_AXI_BUS_MODE @@ -90,14 +94,16 @@ static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, * * For Non Fixed Burst Mode: provide the maximum value of the * burst length. Any burst equal or below the provided burst - * length would be allowed to perform. */ + * length would be allowed to perform. + */ writel(burst_len, ioaddr + DMA_AXI_BUS_MODE); /* Mask interrupts by writing to CSR7 */ writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); - /* The base address of the RX/TX descriptor lists must be written into - * DMA CSR3 and CSR4, respectively. */ + /* RX/TX descriptor base address lists must be written into + * DMA CSR3 and CSR4, respectively + */ writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR); writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR); @@ -105,7 +111,7 @@ static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, } static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, - int rxmode) + int rxmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); @@ -114,11 +120,12 @@ static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, /* Transmit COE type 2 cannot be done in cut-through mode. */ csr6 |= DMA_CONTROL_TSF; /* Operating on second frame increase the performance - * especially when transmit store-and-forward is used.*/ + * especially when transmit store-and-forward is used. + */ csr6 |= DMA_CONTROL_OSF; } else { - CHIP_DBG(KERN_DEBUG "GMAC: disabling TX store and forward mode" - " (threshold = %d)\n", txmode); + CHIP_DBG(KERN_DEBUG "GMAC: disabling TX SF (threshold %d)\n", + txmode); csr6 &= ~DMA_CONTROL_TSF; csr6 &= DMA_CONTROL_TC_TX_MASK; /* Set the transmit threshold */ @@ -138,8 +145,8 @@ static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, CHIP_DBG(KERN_DEBUG "GMAC: enable RX store and forward mode\n"); csr6 |= DMA_CONTROL_RSF; } else { - CHIP_DBG(KERN_DEBUG "GMAC: disabling RX store and forward mode" - " (threshold = %d)\n", rxmode); + CHIP_DBG(KERN_DEBUG "GMAC: disable RX SF mode (threshold %d)\n", + rxmode); csr6 &= ~DMA_CONTROL_RSF; csr6 &= DMA_CONTROL_TC_RX_MASK; if (rxmode <= 32) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index f83210e7c221..007bb2be3f10 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -47,8 +47,7 @@ static void dwmac100_dump_mac_regs(void __iomem *ioaddr) { pr_info("\t----------------------------------------------\n" "\t DWMAC 100 CSR (base addr = 0x%p)\n" - "\t----------------------------------------------\n", - ioaddr); + "\t----------------------------------------------\n", ioaddr); pr_info("\tcontrol reg (offset 0x%x): 0x%08x\n", MAC_CONTROL, readl(ioaddr + MAC_CONTROL)); pr_info("\taddr HI (offset 0x%x): 0x%08x\n ", MAC_ADDR_HIGH, @@ -72,7 +71,8 @@ static int dwmac100_rx_ipc_enable(void __iomem *ioaddr) return 0; } -static int dwmac100_irq_status(void __iomem *ioaddr) +static int dwmac100_irq_status(void __iomem *ioaddr, + struct stmmac_extra_stats *x) { return 0; } @@ -91,7 +91,7 @@ static void dwmac100_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, static void dwmac100_set_filter(struct net_device *dev, int id) { - void __iomem *ioaddr = (void __iomem *) dev->base_addr; + void __iomem *ioaddr = (void __iomem *)dev->base_addr; u32 value = readl(ioaddr + MAC_CONTROL); if (dev->flags & IFF_PROMISC) { @@ -112,7 +112,8 @@ static void dwmac100_set_filter(struct net_device *dev, int id) struct netdev_hw_addr *ha; /* Perfect filter mode for physical address and Hash - filter for multicast */ + * filter for multicast + */ value |= MAC_CONTROL_HP; value &= ~(MAC_CONTROL_PM | MAC_CONTROL_PR | MAC_CONTROL_IF | MAC_CONTROL_HO); @@ -120,12 +121,13 @@ static void dwmac100_set_filter(struct net_device *dev, int id) memset(mc_filter, 0, sizeof(mc_filter)); netdev_for_each_mc_addr(ha, dev) { /* The upper 6 bits of the calculated CRC are used to - * index the contens of the hash table */ - int bit_nr = - ether_crc(ETH_ALEN, ha->addr) >> 26; + * index the contens of the hash table + */ + int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26; /* The most significant bit determines the register to * use (H/L) while the other 5 bits determine the bit - * within the register. */ + * within the register. + */ mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31); } writel(mc_filter[0], ioaddr + MAC_HASH_LOW); @@ -134,10 +136,9 @@ static void dwmac100_set_filter(struct net_device *dev, int id) writel(value, ioaddr + MAC_CONTROL); - CHIP_DBG(KERN_INFO "%s: CTRL reg: 0x%08x Hash regs: " - "HI 0x%08x, LO 0x%08x\n", - __func__, readl(ioaddr + MAC_CONTROL), - readl(ioaddr + MAC_HASH_HIGH), readl(ioaddr + MAC_HASH_LOW)); + CHIP_DBG(KERN_INFO "%s: Filter: 0x%08x Hash: HI 0x%08x, LO 0x%08x\n", + __func__, readl(ioaddr + MAC_CONTROL), + readl(ioaddr + MAC_HASH_HIGH), readl(ioaddr + MAC_HASH_LOW)); } static void dwmac100_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, @@ -150,9 +151,7 @@ static void dwmac100_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, writel(flow, ioaddr + MAC_FLOW_CTRL); } -/* No PMT module supported for this Ethernet Controller. - * Tested on ST platforms only. - */ +/* No PMT module supported on ST boards with this Eth chip. */ static void dwmac100_pmt(void __iomem *ioaddr, unsigned long mode) { return; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c index c2b4d55a79b6..67551c154138 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c @@ -32,8 +32,8 @@ #include "dwmac100.h" #include "dwmac_dma.h" -static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, - int mb, int burst_len, u32 dma_tx, u32 dma_rx) +static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb, + int burst_len, u32 dma_tx, u32 dma_rx, int atds) { u32 value = readl(ioaddr + DMA_BUS_MODE); int limit; @@ -52,22 +52,25 @@ static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, /* Enable Application Access by writing to DMA CSR0 */ writel(DMA_BUS_MODE_DEFAULT | (pbl << DMA_BUS_MODE_PBL_SHIFT), - ioaddr + DMA_BUS_MODE); + ioaddr + DMA_BUS_MODE); /* Mask interrupts by writing to CSR7 */ writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); - /* The base address of the RX/TX descriptor lists must be written into - * DMA CSR3 and CSR4, respectively. */ + /* RX/TX descriptor base addr lists must be written into + * DMA CSR3 and CSR4, respectively + */ writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR); writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR); return 0; } -/* Store and Forward capability is not used at all.. - * The transmit threshold can be programmed by - * setting the TTC bits in the DMA control register.*/ +/* Store and Forward capability is not used at all. + * + * The transmit threshold can be programmed by setting the TTC bits in the DMA + * control register. + */ static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode, int rxmode) { @@ -90,16 +93,15 @@ static void dwmac100_dump_dma_regs(void __iomem *ioaddr) CHIP_DBG(KERN_DEBUG "DWMAC 100 DMA CSR\n"); for (i = 0; i < 9; i++) pr_debug("\t CSR%d (offset 0x%x): 0x%08x\n", i, - (DMA_BUS_MODE + i * 4), - readl(ioaddr + DMA_BUS_MODE + i * 4)); + (DMA_BUS_MODE + i * 4), + readl(ioaddr + DMA_BUS_MODE + i * 4)); CHIP_DBG(KERN_DEBUG "\t CSR20 (offset 0x%x): 0x%08x\n", - DMA_CUR_TX_BUF_ADDR, readl(ioaddr + DMA_CUR_TX_BUF_ADDR)); + DMA_CUR_TX_BUF_ADDR, readl(ioaddr + DMA_CUR_TX_BUF_ADDR)); CHIP_DBG(KERN_DEBUG "\t CSR21 (offset 0x%x): 0x%08x\n", - DMA_CUR_RX_BUF_ADDR, readl(ioaddr + DMA_CUR_RX_BUF_ADDR)); + DMA_CUR_RX_BUF_ADDR, readl(ioaddr + DMA_CUR_RX_BUF_ADDR)); } -/* DMA controller has two counters to track the number of - * the receive missed frames. */ +/* DMA controller has two counters to track the number of the missed frames. */ static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index ab4896ecac1c..8e5662ce488b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -102,7 +102,7 @@ #define DMA_STATUS_TU 0x00000004 /* Transmit Buffer Unavailable */ #define DMA_STATUS_TPS 0x00000002 /* Transmit Process Stopped */ #define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ -#define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ +#define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ extern void dwmac_enable_dma_transmission(void __iomem *ioaddr); extern void dwmac_enable_dma_irq(void __iomem *ioaddr); @@ -112,6 +112,6 @@ extern void dwmac_dma_stop_tx(void __iomem *ioaddr); extern void dwmac_dma_start_rx(void __iomem *ioaddr); extern void dwmac_dma_stop_rx(void __iomem *ioaddr); extern int dwmac_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x); + struct stmmac_extra_stats *x); #endif /* __DWMAC_DMA_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 2fc8ef95f97a..0fbc8fafa706 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -150,6 +150,57 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err) return ret; } +static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x, + struct dma_extended_desc *p) +{ + if (unlikely(p->basic.des01.erx.rx_mac_addr)) { + if (p->des4.erx.ip_hdr_err) + x->ip_hdr_err++; + if (p->des4.erx.ip_payload_err) + x->ip_payload_err++; + if (p->des4.erx.ip_csum_bypassed) + x->ip_csum_bypassed++; + if (p->des4.erx.ipv4_pkt_rcvd) + x->ipv4_pkt_rcvd++; + if (p->des4.erx.ipv6_pkt_rcvd) + x->ipv6_pkt_rcvd++; + if (p->des4.erx.msg_type == RDES_EXT_SYNC) + x->rx_msg_type_sync++; + else if (p->des4.erx.msg_type == RDES_EXT_FOLLOW_UP) + x->rx_msg_type_follow_up++; + else if (p->des4.erx.msg_type == RDES_EXT_DELAY_REQ) + x->rx_msg_type_delay_req++; + else if (p->des4.erx.msg_type == RDES_EXT_DELAY_RESP) + x->rx_msg_type_delay_resp++; + else if (p->des4.erx.msg_type == RDES_EXT_DELAY_REQ) + x->rx_msg_type_pdelay_req++; + else if (p->des4.erx.msg_type == RDES_EXT_PDELAY_RESP) + x->rx_msg_type_pdelay_resp++; + else if (p->des4.erx.msg_type == RDES_EXT_PDELAY_FOLLOW_UP) + x->rx_msg_type_pdelay_follow_up++; + else + x->rx_msg_type_ext_no_ptp++; + if (p->des4.erx.ptp_frame_type) + x->ptp_frame_type++; + if (p->des4.erx.ptp_ver) + x->ptp_ver++; + if (p->des4.erx.timestamp_dropped) + x->timestamp_dropped++; + if (p->des4.erx.av_pkt_rcvd) + x->av_pkt_rcvd++; + if (p->des4.erx.av_tagged_pkt_rcvd) + x->av_tagged_pkt_rcvd++; + if (p->des4.erx.vlan_tag_priority_val) + x->vlan_tag_priority_val++; + if (p->des4.erx.l3_filter_match) + x->l3_filter_match++; + if (p->des4.erx.l4_filter_match) + x->l4_filter_match++; + if (p->des4.erx.l3_l4_filter_no_match) + x->l3_l4_filter_no_match++; + } +} + static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, struct dma_desc *p) { @@ -198,7 +249,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, * At any rate, we need to understand if the CSUM hw computation is ok * and report this info to the upper layers. */ ret = enh_desc_coe_rdes0(p->des01.erx.ipc_csum_error, - p->des01.erx.frame_type, p->des01.erx.payload_csum_error); + p->des01.erx.frame_type, p->des01.erx.rx_mac_addr); if (unlikely(p->des01.erx.dribbling)) { CHIP_DBG(KERN_ERR "GMAC RX: dribbling error\n"); @@ -225,34 +276,32 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, x->rx_vlan++; } #endif + return ret; } -static void enh_desc_init_rx_desc(struct dma_desc *p, unsigned int ring_size, - int disable_rx_ic) +static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, + int mode, int end) { - int i; - for (i = 0; i < ring_size; i++) { - p->des01.erx.own = 1; - p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; + p->des01.erx.own = 1; + p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; - ehn_desc_rx_set_on_ring_chain(p, (i == ring_size - 1)); + if (mode == STMMAC_CHAIN_MODE) + ehn_desc_rx_set_on_chain(p, end); + else + ehn_desc_rx_set_on_ring(p, end); - if (disable_rx_ic) - p->des01.erx.disable_ic = 1; - p++; - } + if (disable_rx_ic) + p->des01.erx.disable_ic = 1; } -static void enh_desc_init_tx_desc(struct dma_desc *p, unsigned int ring_size) +static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) { - int i; - - for (i = 0; i < ring_size; i++) { - p->des01.etx.own = 0; - ehn_desc_tx_set_on_ring_chain(p, (i == ring_size - 1)); - p++; - } + p->des01.etx.own = 0; + if (mode == STMMAC_CHAIN_MODE) + ehn_desc_tx_set_on_chain(p, end); + else + ehn_desc_tx_set_on_ring(p, end); } static int enh_desc_get_tx_owner(struct dma_desc *p) @@ -280,20 +329,26 @@ static int enh_desc_get_tx_ls(struct dma_desc *p) return p->des01.etx.last_segment; } -static void enh_desc_release_tx_desc(struct dma_desc *p) +static void enh_desc_release_tx_desc(struct dma_desc *p, int mode) { int ter = p->des01.etx.end_ring; memset(p, 0, offsetof(struct dma_desc, des2)); - enh_desc_end_tx_desc(p, ter); + if (mode == STMMAC_CHAIN_MODE) + enh_desc_end_tx_desc_on_chain(p, ter); + else + enh_desc_end_tx_desc_on_ring(p, ter); } static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, - int csum_flag) + int csum_flag, int mode) { p->des01.etx.first_segment = is_fs; - enh_set_tx_desc_len(p, len); + if (mode == STMMAC_CHAIN_MODE) + enh_set_tx_desc_len_on_chain(p, len); + else + enh_set_tx_desc_len_on_ring(p, len); if (likely(csum_flag)) p->des01.etx.checksum_insertion = cic_full; @@ -323,6 +378,49 @@ static int enh_desc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type) return p->des01.erx.frame_length; } +static void enh_desc_enable_tx_timestamp(struct dma_desc *p) +{ + p->des01.etx.time_stamp_enable = 1; +} + +static int enh_desc_get_tx_timestamp_status(struct dma_desc *p) +{ + return p->des01.etx.time_stamp_status; +} + +static u64 enh_desc_get_timestamp(void *desc, u32 ats) +{ + u64 ns; + + if (ats) { + struct dma_extended_desc *p = (struct dma_extended_desc *)desc; + ns = p->des6; + /* convert high/sec time stamp value to nanosecond */ + ns += p->des7 * 1000000000ULL; + } else { + struct dma_desc *p = (struct dma_desc *)desc; + ns = p->des2; + ns += p->des3 * 1000000000ULL; + } + + return ns; +} + +static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) +{ + if (ats) { + struct dma_extended_desc *p = (struct dma_extended_desc *)desc; + return p->basic.des01.erx.ipc_csum_error; + } else { + struct dma_desc *p = (struct dma_desc *)desc; + if ((p->des2 == 0xffffffff) && (p->des3 == 0xffffffff)) + /* timestamp is corrupted, hence don't store it */ + return 0; + else + return 1; + } +} + const struct stmmac_desc_ops enh_desc_ops = { .tx_status = enh_desc_get_tx_status, .rx_status = enh_desc_get_rx_status, @@ -339,4 +437,9 @@ const struct stmmac_desc_ops enh_desc_ops = { .set_tx_owner = enh_desc_set_tx_owner, .set_rx_owner = enh_desc_set_rx_owner, .get_rx_frame_len = enh_desc_get_rx_frame_len, + .rx_extended_status = enh_desc_get_ext_status, + .enable_tx_timestamp = enh_desc_enable_tx_timestamp, + .get_tx_timestamp_status = enh_desc_get_tx_timestamp_status, + .get_timestamp = enh_desc_get_timestamp, + .get_rx_timestamp_status = enh_desc_get_rx_timestamp_status, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h index 67995ef25251..48ec001566b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc.h +++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h @@ -28,8 +28,7 @@ /* MMC control register */ /* When set, all counter are reset */ #define MMC_CNTRL_COUNTER_RESET 0x1 -/* When set, do not roll over zero - * after reaching the max value*/ +/* When set, do not roll over zero after reaching the max value*/ #define MMC_CNTRL_COUNTER_STOP_ROLLOVER 0x2 #define MMC_CNTRL_RESET_ON_READ 0x4 /* Reset after reading */ #define MMC_CNTRL_COUNTER_FREEZER 0x8 /* Freeze counter values to the diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 0c74a702d461..50617c5a0bdb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -149,6 +149,7 @@ void dwmac_mmc_intr_all_mask(void __iomem *ioaddr) { writel(MMC_DEFAULT_MASK, ioaddr + MMC_RX_INTR_MASK); writel(MMC_DEFAULT_MASK, ioaddr + MMC_TX_INTR_MASK); + writel(MMC_DEFAULT_MASK, ioaddr + MMC_RX_IPC_INTR_MASK); } /* This reads the MAC core counters (if actaully supported). diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 68962c549a2d..11775b99afc5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -79,8 +79,8 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, struct net_device_stats *stats = (struct net_device_stats *)data; if (unlikely(p->des01.rx.last_descriptor == 0)) { - pr_warning("ndesc Error: Oversized Ethernet " - "frame spanned multiple buffers\n"); + pr_warn("%s: Oversized frame spanned multiple buffers\n", + __func__); stats->rx_length_errors++; return discard_frame; } @@ -122,30 +122,28 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, return ret; } -static void ndesc_init_rx_desc(struct dma_desc *p, unsigned int ring_size, - int disable_rx_ic) +static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, + int end) { - int i; - for (i = 0; i < ring_size; i++) { - p->des01.rx.own = 1; - p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; + p->des01.rx.own = 1; + p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; - ndesc_rx_set_on_ring_chain(p, (i == ring_size - 1)); + if (mode == STMMAC_CHAIN_MODE) + ndesc_rx_set_on_chain(p, end); + else + ndesc_rx_set_on_ring(p, end); - if (disable_rx_ic) - p->des01.rx.disable_ic = 1; - p++; - } + if (disable_rx_ic) + p->des01.rx.disable_ic = 1; } -static void ndesc_init_tx_desc(struct dma_desc *p, unsigned int ring_size) +static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) { - int i; - for (i = 0; i < ring_size; i++) { - p->des01.tx.own = 0; - ndesc_tx_set_on_ring_chain(p, (i == (ring_size - 1))); - p++; - } + p->des01.tx.own = 0; + if (mode == STMMAC_CHAIN_MODE) + ndesc_tx_set_on_chain(p, end); + else + ndesc_tx_set_on_ring(p, end); } static int ndesc_get_tx_owner(struct dma_desc *p) @@ -173,19 +171,25 @@ static int ndesc_get_tx_ls(struct dma_desc *p) return p->des01.tx.last_segment; } -static void ndesc_release_tx_desc(struct dma_desc *p) +static void ndesc_release_tx_desc(struct dma_desc *p, int mode) { int ter = p->des01.tx.end_ring; memset(p, 0, offsetof(struct dma_desc, des2)); - ndesc_end_tx_desc(p, ter); + if (mode == STMMAC_CHAIN_MODE) + ndesc_end_tx_desc_on_chain(p, ter); + else + ndesc_end_tx_desc_on_ring(p, ter); } static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, - int csum_flag) + int csum_flag, int mode) { p->des01.tx.first_segment = is_fs; - norm_set_tx_desc_len(p, len); + if (mode == STMMAC_CHAIN_MODE) + norm_set_tx_desc_len_on_chain(p, len); + else + norm_set_tx_desc_len_on_ring(p, len); if (likely(csum_flag)) p->des01.tx.checksum_insertion = cic_full; @@ -215,6 +219,39 @@ static int ndesc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type) return p->des01.rx.frame_length; } +static void ndesc_enable_tx_timestamp(struct dma_desc *p) +{ + p->des01.tx.time_stamp_enable = 1; +} + +static int ndesc_get_tx_timestamp_status(struct dma_desc *p) +{ + return p->des01.tx.time_stamp_status; +} + +static u64 ndesc_get_timestamp(void *desc, u32 ats) +{ + struct dma_desc *p = (struct dma_desc *)desc; + u64 ns; + + ns = p->des2; + /* convert high/sec time stamp value to nanosecond */ + ns += p->des3 * 1000000000ULL; + + return ns; +} + +static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) +{ + struct dma_desc *p = (struct dma_desc *)desc; + + if ((p->des2 == 0xffffffff) && (p->des3 == 0xffffffff)) + /* timestamp is corrupted, hence don't store it */ + return 0; + else + return 1; +} + const struct stmmac_desc_ops ndesc_ops = { .tx_status = ndesc_get_tx_status, .rx_status = ndesc_get_rx_status, @@ -231,4 +268,8 @@ const struct stmmac_desc_ops ndesc_ops = { .set_tx_owner = ndesc_set_tx_owner, .set_rx_owner = ndesc_set_rx_owner, .get_rx_frame_len = ndesc_get_rx_frame_len, + .enable_tx_timestamp = ndesc_enable_tx_timestamp, + .get_tx_timestamp_status = ndesc_get_tx_timestamp_status, + .get_timestamp = ndesc_get_timestamp, + .get_rx_timestamp_status = ndesc_get_rx_timestamp_status, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index 4b785e10f2ed..c9d942a5c335 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -30,7 +30,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) { - struct stmmac_priv *priv = (struct stmmac_priv *) p; + struct stmmac_priv *priv = (struct stmmac_priv *)p; unsigned int txsize = priv->dma_tx_size; unsigned int entry = priv->cur_tx % txsize; struct dma_desc *desc = priv->dma_tx + entry; @@ -48,25 +48,30 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des2 = dma_map_single(priv->device, skb->data, bmax, DMA_TO_DEVICE); + priv->tx_skbuff_dma[entry] = desc->des2; desc->des3 = desc->des2 + BUF_SIZE_4KiB; - priv->hw->desc->prepare_tx_desc(desc, 1, bmax, - csum); + priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, + STMMAC_RING_MODE); wmb(); entry = (++priv->cur_tx) % txsize; desc = priv->dma_tx + entry; desc->des2 = dma_map_single(priv->device, skb->data + bmax, len, DMA_TO_DEVICE); + priv->tx_skbuff_dma[entry] = desc->des2; desc->des3 = desc->des2 + BUF_SIZE_4KiB; - priv->hw->desc->prepare_tx_desc(desc, 0, len, csum); + priv->hw->desc->prepare_tx_desc(desc, 0, len, csum, + STMMAC_RING_MODE); wmb(); priv->hw->desc->set_tx_owner(desc); priv->tx_skbuff[entry] = NULL; } else { desc->des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); + priv->tx_skbuff_dma[entry] = desc->des2; desc->des3 = desc->des2 + BUF_SIZE_4KiB; - priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum); + priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum, + STMMAC_RING_MODE); } return entry; @@ -82,27 +87,23 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc) return ret; } -static void stmmac_refill_desc3(int bfsize, struct dma_desc *p) +static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) { - /* Fill DES3 in case of RING mode */ - if (bfsize >= BUF_SIZE_8KiB) - p->des3 = p->des2 + BUF_SIZE_8KiB; -} + struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; -/* In ring mode we need to fill the desc3 because it is used - * as buffer */ -static void stmmac_init_desc3(int des3_as_data_buf, struct dma_desc *p) -{ - if (unlikely(des3_as_data_buf)) - p->des3 = p->des2 + BUF_SIZE_8KiB; + if (unlikely(priv->plat->has_gmac)) + /* Fill DES3 in case of RING mode */ + if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + p->des3 = p->des2 + BUF_SIZE_8KiB; } -static void stmmac_init_dma_chain(struct dma_desc *des, dma_addr_t phy_addr, - unsigned int size) +/* In ring mode we need to fill the desc3 because it is used as buffer */ +static void stmmac_init_desc3(struct dma_desc *p) { + p->des3 = p->des2 + BUF_SIZE_8KiB; } -static void stmmac_clean_desc3(struct dma_desc *p) +static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) { if (unlikely(p->des3)) p->des3 = 0; @@ -121,7 +122,6 @@ const struct stmmac_ring_mode_ops ring_mode_ops = { .jumbo_frm = stmmac_jumbo_frm, .refill_desc3 = stmmac_refill_desc3, .init_desc3 = stmmac_init_desc3, - .init_dma_chain = stmmac_init_dma_chain, .clean_desc3 = stmmac_clean_desc3, .set_16kib_bfsize = stmmac_set_16kib_bfsize, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index b05df8983be5..c922fde929a1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -24,43 +24,56 @@ #define __STMMAC_H__ #define STMMAC_RESOURCE_NAME "stmmaceth" -#define DRV_MODULE_VERSION "Nov_2012" +#define DRV_MODULE_VERSION "March_2013" #include <linux/clk.h> #include <linux/stmmac.h> #include <linux/phy.h> #include <linux/pci.h> #include "common.h" +#include <linux/ptp_clock_kernel.h> struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ - struct dma_desc *dma_tx ____cacheline_aligned; - dma_addr_t dma_tx_phy; + struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp; + struct dma_desc *dma_tx; struct sk_buff **tx_skbuff; unsigned int cur_tx; unsigned int dirty_tx; unsigned int dma_tx_size; + u32 tx_count_frames; + u32 tx_coal_frames; + u32 tx_coal_timer; + dma_addr_t *tx_skbuff_dma; + dma_addr_t dma_tx_phy; int tx_coalesce; + int hwts_tx_en; + spinlock_t tx_lock; + bool tx_path_in_lpi_mode; + struct timer_list txtimer; - struct dma_desc *dma_rx ; + struct dma_desc *dma_rx ____cacheline_aligned_in_smp; + struct dma_extended_desc *dma_erx; + struct sk_buff **rx_skbuff; unsigned int cur_rx; unsigned int dirty_rx; - struct sk_buff **rx_skbuff; + unsigned int dma_rx_size; + unsigned int dma_buf_sz; + u32 rx_riwt; + int hwts_rx_en; dma_addr_t *rx_skbuff_dma; + dma_addr_t dma_rx_phy; + struct napi_struct napi ____cacheline_aligned_in_smp; + + void __iomem *ioaddr; struct net_device *dev; - dma_addr_t dma_rx_phy; - unsigned int dma_rx_size; - unsigned int dma_buf_sz; struct device *device; struct mac_device_info *hw; - void __iomem *ioaddr; - - struct stmmac_extra_stats xstats; - struct napi_struct napi; int no_csum_insertion; + spinlock_t lock; - struct phy_device *phydev; + struct phy_device *phydev ____cacheline_aligned_in_smp; int oldlink; int speed; int oldduplex; @@ -69,30 +82,31 @@ struct stmmac_priv { struct mii_bus *mii; int mii_irq[PHY_MAX_ADDR]; - u32 msg_enable; - spinlock_t lock; - spinlock_t tx_lock; - int wolopts; - int wol_irq; + struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp; struct plat_stmmacenet_data *plat; - struct stmmac_counters mmc; struct dma_features dma_cap; + struct stmmac_counters mmc; int hw_cap_support; + int synopsys_id; + u32 msg_enable; + int wolopts; + int wol_irq; struct clk *stmmac_clk; int clk_csr; - int synopsys_id; struct timer_list eee_ctrl_timer; - bool tx_path_in_lpi_mode; int lpi_irq; int eee_enabled; int eee_active; int tx_lpi_timer; - struct timer_list txtimer; - u32 tx_count_frames; - u32 tx_coal_frames; - u32 tx_coal_timer; + int pcs; + unsigned int mode; + int extend_desc; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_ops; + unsigned int default_addend; + u32 adv_ts; int use_riwt; - u32 rx_riwt; + spinlock_t ptp_lock; }; extern int phyaddr; @@ -102,6 +116,9 @@ extern int stmmac_mdio_register(struct net_device *ndev); extern void stmmac_set_ethtool_ops(struct net_device *netdev); extern const struct stmmac_desc_ops enh_desc_ops; extern const struct stmmac_desc_ops ndesc_ops; +extern const struct stmmac_hwtimestamp stmmac_ptp; +extern int stmmac_ptp_register(struct stmmac_priv *priv); +extern void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_freeze(struct net_device *ndev); int stmmac_restore(struct net_device *ndev); int stmmac_resume(struct net_device *ndev); @@ -125,6 +142,7 @@ static inline int stmmac_register_platform(void) return err; } + static inline void stmmac_unregister_platform(void) { platform_driver_unregister(&stmmac_pltfr_driver); @@ -136,6 +154,7 @@ static inline int stmmac_register_platform(void) return 0; } + static inline void stmmac_unregister_platform(void) { } @@ -153,6 +172,7 @@ static inline int stmmac_register_pci(void) return err; } + static inline void stmmac_unregister_pci(void) { pci_unregister_driver(&stmmac_pci_driver); @@ -164,6 +184,7 @@ static inline int stmmac_register_pci(void) return 0; } + static inline void stmmac_unregister_pci(void) { } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d1ac39c1b05d..c5f9cb85c8ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -27,6 +27,7 @@ #include <linux/interrupt.h> #include <linux/mii.h> #include <linux/phy.h> +#include <linux/net_tstamp.h> #include <asm/io.h> #include "stmmac.h" @@ -108,6 +109,33 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { STMMAC_STAT(irq_rx_path_in_lpi_mode_n), STMMAC_STAT(irq_rx_path_exit_lpi_mode_n), STMMAC_STAT(phy_eee_wakeup_error_n), + /* Extended RDES status */ + STMMAC_STAT(ip_hdr_err), + STMMAC_STAT(ip_payload_err), + STMMAC_STAT(ip_csum_bypassed), + STMMAC_STAT(ipv4_pkt_rcvd), + STMMAC_STAT(ipv6_pkt_rcvd), + STMMAC_STAT(rx_msg_type_ext_no_ptp), + STMMAC_STAT(rx_msg_type_sync), + STMMAC_STAT(rx_msg_type_follow_up), + STMMAC_STAT(rx_msg_type_delay_req), + STMMAC_STAT(rx_msg_type_delay_resp), + STMMAC_STAT(rx_msg_type_pdelay_req), + STMMAC_STAT(rx_msg_type_pdelay_resp), + STMMAC_STAT(rx_msg_type_pdelay_follow_up), + STMMAC_STAT(ptp_frame_type), + STMMAC_STAT(ptp_ver), + STMMAC_STAT(timestamp_dropped), + STMMAC_STAT(av_pkt_rcvd), + STMMAC_STAT(av_tagged_pkt_rcvd), + STMMAC_STAT(vlan_tag_priority_val), + STMMAC_STAT(l3_filter_match), + STMMAC_STAT(l4_filter_match), + STMMAC_STAT(l3_l4_filter_no_match), + /* PCS */ + STMMAC_STAT(irq_pcs_ane_n), + STMMAC_STAT(irq_pcs_link_n), + STMMAC_STAT(irq_rgmii_n), }; #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats) @@ -219,6 +247,70 @@ static int stmmac_ethtool_getsettings(struct net_device *dev, struct stmmac_priv *priv = netdev_priv(dev); struct phy_device *phy = priv->phydev; int rc; + + if ((priv->pcs & STMMAC_PCS_RGMII) || (priv->pcs & STMMAC_PCS_SGMII)) { + struct rgmii_adv adv; + + if (!priv->xstats.pcs_link) { + ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); + cmd->duplex = DUPLEX_UNKNOWN; + return 0; + } + cmd->duplex = priv->xstats.pcs_duplex; + + ethtool_cmd_speed_set(cmd, priv->xstats.pcs_speed); + + /* Get and convert ADV/LP_ADV from the HW AN registers */ + if (priv->hw->mac->get_adv) + priv->hw->mac->get_adv(priv->ioaddr, &adv); + else + return -EOPNOTSUPP; /* should never happen indeed */ + + /* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */ + + if (adv.pause & STMMAC_PCS_PAUSE) + cmd->advertising |= ADVERTISED_Pause; + if (adv.pause & STMMAC_PCS_ASYM_PAUSE) + cmd->advertising |= ADVERTISED_Asym_Pause; + if (adv.lp_pause & STMMAC_PCS_PAUSE) + cmd->lp_advertising |= ADVERTISED_Pause; + if (adv.lp_pause & STMMAC_PCS_ASYM_PAUSE) + cmd->lp_advertising |= ADVERTISED_Asym_Pause; + + /* Reg49[3] always set because ANE is always supported */ + cmd->autoneg = ADVERTISED_Autoneg; + cmd->supported |= SUPPORTED_Autoneg; + cmd->advertising |= ADVERTISED_Autoneg; + cmd->lp_advertising |= ADVERTISED_Autoneg; + + if (adv.duplex) { + cmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Full); + cmd->advertising |= (ADVERTISED_1000baseT_Full | + ADVERTISED_100baseT_Full | + ADVERTISED_10baseT_Full); + } else { + cmd->supported |= (SUPPORTED_1000baseT_Half | + SUPPORTED_100baseT_Half | + SUPPORTED_10baseT_Half); + cmd->advertising |= (ADVERTISED_1000baseT_Half | + ADVERTISED_100baseT_Half | + ADVERTISED_10baseT_Half); + } + if (adv.lp_duplex) + cmd->lp_advertising |= (ADVERTISED_1000baseT_Full | + ADVERTISED_100baseT_Full | + ADVERTISED_10baseT_Full); + else + cmd->lp_advertising |= (ADVERTISED_1000baseT_Half | + ADVERTISED_100baseT_Half | + ADVERTISED_10baseT_Half); + cmd->port = PORT_OTHER; + + return 0; + } + if (phy == NULL) { pr_err("%s: %s: PHY is not registered\n", __func__, dev->name); @@ -243,6 +335,30 @@ static int stmmac_ethtool_setsettings(struct net_device *dev, struct phy_device *phy = priv->phydev; int rc; + if ((priv->pcs & STMMAC_PCS_RGMII) || (priv->pcs & STMMAC_PCS_SGMII)) { + u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause; + + /* Only support ANE */ + if (cmd->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_ENABLE) { + mask &= (ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full); + + spin_lock(&priv->lock); + if (priv->hw->mac->ctrl_ane) + priv->hw->mac->ctrl_ane(priv->ioaddr, 1); + spin_unlock(&priv->lock); + } + + return 0; + } + spin_lock(&priv->lock); rc = phy_ethtool_sset(phy, cmd); spin_unlock(&priv->lock); @@ -312,6 +428,9 @@ stmmac_get_pauseparam(struct net_device *netdev, { struct stmmac_priv *priv = netdev_priv(netdev); + if (priv->pcs) /* FIXME */ + return; + spin_lock(&priv->lock); pause->rx_pause = 0; @@ -335,6 +454,9 @@ stmmac_set_pauseparam(struct net_device *netdev, int new_pause = FLOW_OFF; int ret = 0; + if (priv->pcs) /* FIXME */ + return -EOPNOTSUPP; + spin_lock(&priv->lock); if (pause->rx_pause) @@ -604,6 +726,38 @@ static int stmmac_set_coalesce(struct net_device *dev, return 0; } +static int stmmac_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + if ((priv->hwts_tx_en) && (priv->hwts_rx_en)) { + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (priv->ptp_clock) + info->phc_index = ptp_clock_index(priv->ptp_clock); + + info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); + + info->rx_filters = ((1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_ALL)); + return 0; + } else + return ethtool_op_get_ts_info(dev, info); +} + static const struct ethtool_ops stmmac_ethtool_ops = { .begin = stmmac_check_if_running, .get_drvinfo = stmmac_ethtool_getdrvinfo, @@ -623,7 +777,7 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .get_eee = stmmac_ethtool_op_get_eee, .set_eee = stmmac_ethtool_op_set_eee, .get_sset_count = stmmac_get_sset_count, - .get_ts_info = ethtool_op_get_ts_info, + .get_ts_info = stmmac_get_ts_info, .get_coalesce = stmmac_get_coalesce, .set_coalesce = stmmac_set_coalesce, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c new file mode 100644 index 000000000000..def7e75e1d57 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -0,0 +1,148 @@ +/******************************************************************************* + Copyright (C) 2013 Vayavya Labs Pvt Ltd + + This implements all the API for managing HW timestamp & PTP. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Rayagond Kokatanur <rayagond@vayavyalabs.com> + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include <linux/io.h> +#include <linux/delay.h> +#include "common.h" +#include "stmmac_ptp.h" + +static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data) +{ + writel(data, ioaddr + PTP_TCR); +} + +static void stmmac_config_sub_second_increment(void __iomem *ioaddr) +{ + u32 value = readl(ioaddr + PTP_TCR); + unsigned long data; + + /* Convert the ptp_clock to nano second + * formula = (1/ptp_clock) * 1000000000 + * where, ptp_clock = 50MHz. + */ + data = (1000000000ULL / 50000000); + + /* 0.465ns accuracy */ + if (value & PTP_TCR_TSCTRLSSR) + data = (data * 100) / 465; + + writel(data, ioaddr + PTP_SSIR); +} + +static int stmmac_init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) +{ + int limit; + u32 value; + + writel(sec, ioaddr + PTP_STSUR); + writel(nsec, ioaddr + PTP_STNSUR); + /* issue command to initialize the system time value */ + value = readl(ioaddr + PTP_TCR); + value |= PTP_TCR_TSINIT; + writel(value, ioaddr + PTP_TCR); + + /* wait for present system time initialize to complete */ + limit = 10; + while (limit--) { + if (!(readl(ioaddr + PTP_TCR) & PTP_TCR_TSINIT)) + break; + mdelay(10); + } + if (limit < 0) + return -EBUSY; + + return 0; +} + +static int stmmac_config_addend(void __iomem *ioaddr, u32 addend) +{ + u32 value; + int limit; + + writel(addend, ioaddr + PTP_TAR); + /* issue command to update the addend value */ + value = readl(ioaddr + PTP_TCR); + value |= PTP_TCR_TSADDREG; + writel(value, ioaddr + PTP_TCR); + + /* wait for present addend update to complete */ + limit = 10; + while (limit--) { + if (!(readl(ioaddr + PTP_TCR) & PTP_TCR_TSADDREG)) + break; + mdelay(10); + } + if (limit < 0) + return -EBUSY; + + return 0; +} + +static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, + int add_sub) +{ + u32 value; + int limit; + + writel(sec, ioaddr + PTP_STSUR); + writel(((add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec), + ioaddr + PTP_STNSUR); + /* issue command to initialize the system time value */ + value = readl(ioaddr + PTP_TCR); + value |= PTP_TCR_TSUPDT; + writel(value, ioaddr + PTP_TCR); + + /* wait for present system time adjust/update to complete */ + limit = 10; + while (limit--) { + if (!(readl(ioaddr + PTP_TCR) & PTP_TCR_TSUPDT)) + break; + mdelay(10); + } + if (limit < 0) + return -EBUSY; + + return 0; +} + +static u64 stmmac_get_systime(void __iomem *ioaddr) +{ + u64 ns; + + ns = readl(ioaddr + PTP_STNSR); + /* convert sec time value to nanosecond */ + ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; + + return ns; +} + +const struct stmmac_hwtimestamp stmmac_ptp = { + .config_hw_tstamping = stmmac_config_hw_tstamping, + .init_systime = stmmac_init_systime, + .config_sub_second_increment = stmmac_config_sub_second_increment, + .config_addend = stmmac_config_addend, + .adjust_systime = stmmac_adjust_systime, + .get_systime = stmmac_get_systime, +}; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 39c6c5524633..618446ae1ec1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -46,7 +46,9 @@ #ifdef CONFIG_STMMAC_DEBUG_FS #include <linux/debugfs.h> #include <linux/seq_file.h> -#endif +#endif /* CONFIG_STMMAC_DEBUG_FS */ +#include <linux/net_tstamp.h> +#include "stmmac_ptp.h" #include "stmmac.h" #undef STMMAC_DEBUG @@ -79,14 +81,14 @@ #define JUMBO_LEN 9000 /* Module parameters */ -#define TX_TIMEO 5000 /* default 5 seconds */ +#define TX_TIMEO 5000 static int watchdog = TX_TIMEO; module_param(watchdog, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(watchdog, "Transmit timeout in milliseconds"); +MODULE_PARM_DESC(watchdog, "Transmit timeout in milliseconds (default 5s)"); -static int debug = -1; /* -1: default, 0: no output, 16: all */ +static int debug = -1; module_param(debug, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(debug, "Message Level (0: no output, 16: all)"); +MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)"); int phyaddr = -1; module_param(phyaddr, int, S_IRUGO); @@ -130,6 +132,13 @@ module_param(eee_timer, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec"); #define STMMAC_LPI_TIMER(x) (jiffies + msecs_to_jiffies(x)) +/* By default the driver will use the ring mode to manage tx and rx descriptors + * but passing this value so user can force to use the chain instead of the ring + */ +static unsigned int chain_mode; +module_param(chain_mode, int, S_IRUGO); +MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode"); + static irqreturn_t stmmac_interrupt(int irq, void *dev_id); #ifdef CONFIG_STMMAC_DEBUG_FS @@ -164,6 +173,18 @@ static void stmmac_verify_args(void) eee_timer = STMMAC_DEFAULT_LPI_TIMER; } +/** + * stmmac_clk_csr_set - dynamically set the MDC clock + * @priv: driver private structure + * Description: this is to dynamically set the MDC clock according to the csr + * clock input. + * Note: + * If a specific clk_csr value is passed from the platform + * this means that the CSR Clock Range selection cannot be + * changed at run-time and it is fixed (as reported in the driver + * documentation). Viceversa the driver will try to set the MDC + * clock dynamically according to the actual clock input. + */ static void stmmac_clk_csr_set(struct stmmac_priv *priv) { u32 clk_rate; @@ -171,7 +192,12 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) clk_rate = clk_get_rate(priv->stmmac_clk); /* Platform provided default clk_csr would be assumed valid - * for all other cases except for the below mentioned ones. */ + * for all other cases except for the below mentioned ones. + * For values higher than the IEEE 802.3 specified frequency + * we can not estimate the proper divider as it is not known + * the frequency of clk_csr_i. So we do not change the default + * divider. + */ if (!(priv->clk_csr & MAC_CSR_H_FRQ_MASK)) { if (clk_rate < CSR_F_35M) priv->clk_csr = STMMAC_CSR_20_35M; @@ -185,10 +211,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_150_250M; else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; - } /* For values higher than the IEEE 802.3 specified frequency - * we can not estimate the proper divider as it is not known - * the frequency of clk_csr_i. So we do not change the default - * divider. */ + } } #if defined(STMMAC_XMIT_DEBUG) || defined(STMMAC_RX_DEBUG) @@ -213,18 +236,25 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv) return priv->dirty_tx + priv->dma_tx_size - priv->cur_tx - 1; } -/* On some ST platforms, some HW system configuraton registers have to be - * set according to the link speed negotiated. +/** + * stmmac_hw_fix_mac_speed: callback for speed selection + * @priv: driver private structure + * Description: on some platforms (e.g. ST), some HW system configuraton + * registers have to be set according to the link speed negotiated. */ static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv) { struct phy_device *phydev = priv->phydev; if (likely(priv->plat->fix_mac_speed)) - priv->plat->fix_mac_speed(priv->plat->bsp_priv, - phydev->speed); + priv->plat->fix_mac_speed(priv->plat->bsp_priv, phydev->speed); } +/** + * stmmac_enable_eee_mode: Check and enter in LPI mode + * @priv: driver private structure + * Description: this function is to verify and enter in LPI mode for EEE. + */ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) { /* Check and enter in LPI mode */ @@ -233,19 +263,24 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) priv->hw->mac->set_eee_mode(priv->ioaddr); } +/** + * stmmac_disable_eee_mode: disable/exit from EEE + * @priv: driver private structure + * Description: this function is to exit and disable EEE in case of + * LPI state is true. This is called by the xmit. + */ void stmmac_disable_eee_mode(struct stmmac_priv *priv) { - /* Exit and disable EEE in case of we are are in LPI state. */ priv->hw->mac->reset_eee_mode(priv->ioaddr); del_timer_sync(&priv->eee_ctrl_timer); priv->tx_path_in_lpi_mode = false; } /** - * stmmac_eee_ctrl_timer + * stmmac_eee_ctrl_timer: EEE TX SW timer. * @arg : data hook * Description: - * If there is no data transfer and if we are not in LPI state, + * if there is no data transfer and if we are not in LPI state, * then MAC Transmitter can be moved to LPI state. */ static void stmmac_eee_ctrl_timer(unsigned long arg) @@ -257,8 +292,8 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) } /** - * stmmac_eee_init - * @priv: private device pointer + * stmmac_eee_init: init EEE + * @priv: driver private structure * Description: * If the EEE support has been enabled while configuring the driver, * if the GMAC actually supports the EEE (from the HW cap reg) and the @@ -294,16 +329,359 @@ out: return ret; } +/** + * stmmac_eee_adjust: adjust HW EEE according to the speed + * @priv: driver private structure + * Description: + * When the EEE has been already initialised we have to + * modify the PLS bit in the LPI ctrl & status reg according + * to the PHY link status. For this reason. + */ static void stmmac_eee_adjust(struct stmmac_priv *priv) { - /* When the EEE has been already initialised we have to - * modify the PLS bit in the LPI ctrl & status reg according - * to the PHY link status. For this reason. - */ if (priv->eee_enabled) priv->hw->mac->set_eee_pls(priv->ioaddr, priv->phydev->link); } +/* stmmac_get_tx_hwtstamp: get HW TX timestamps + * @priv: driver private structure + * @entry : descriptor index to be used. + * @skb : the socket buffer + * Description : + * This function will read timestamp from the descriptor & pass it to stack. + * and also perform some sanity checks. + */ +static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, + unsigned int entry, struct sk_buff *skb) +{ + struct skb_shared_hwtstamps shhwtstamp; + u64 ns; + void *desc = NULL; + + if (!priv->hwts_tx_en) + return; + + /* exit if skb doesn't support hw tstamp */ + if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))) + return; + + if (priv->adv_ts) + desc = (priv->dma_etx + entry); + else + desc = (priv->dma_tx + entry); + + /* check tx tstamp status */ + if (!priv->hw->desc->get_tx_timestamp_status((struct dma_desc *)desc)) + return; + + /* get the valid tstamp */ + ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); + + memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamp.hwtstamp = ns_to_ktime(ns); + /* pass tstamp to stack */ + skb_tstamp_tx(skb, &shhwtstamp); + + return; +} + +/* stmmac_get_rx_hwtstamp: get HW RX timestamps + * @priv: driver private structure + * @entry : descriptor index to be used. + * @skb : the socket buffer + * Description : + * This function will read received packet's timestamp from the descriptor + * and pass it to stack. It also perform some sanity checks. + */ +static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, + unsigned int entry, struct sk_buff *skb) +{ + struct skb_shared_hwtstamps *shhwtstamp = NULL; + u64 ns; + void *desc = NULL; + + if (!priv->hwts_rx_en) + return; + + if (priv->adv_ts) + desc = (priv->dma_erx + entry); + else + desc = (priv->dma_rx + entry); + + /* exit if rx tstamp is not valid */ + if (!priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) + return; + + /* get valid tstamp */ + ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); + shhwtstamp = skb_hwtstamps(skb); + memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamp->hwtstamp = ns_to_ktime(ns); +} + +/** + * stmmac_hwtstamp_ioctl - control hardware timestamping. + * @dev: device pointer. + * @ifr: An IOCTL specefic structure, that can contain a pointer to + * a proprietary structure used to pass information to the driver. + * Description: + * This function configures the MAC to enable/disable both outgoing(TX) + * and incoming(RX) packets time stamping based on user input. + * Return Value: + * 0 on success and an appropriate -ve integer on failure. + */ +static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct hwtstamp_config config; + struct timespec now; + u64 temp = 0; + u32 ptp_v2 = 0; + u32 tstamp_all = 0; + u32 ptp_over_ipv4_udp = 0; + u32 ptp_over_ipv6_udp = 0; + u32 ptp_over_ethernet = 0; + u32 snap_type_sel = 0; + u32 ts_master_en = 0; + u32 ts_event_en = 0; + u32 value = 0; + + if (!(priv->dma_cap.time_stamp || priv->adv_ts)) { + netdev_alert(priv->dev, "No support for HW time stamping\n"); + priv->hwts_tx_en = 0; + priv->hwts_rx_en = 0; + + return -EOPNOTSUPP; + } + + if (copy_from_user(&config, ifr->ifr_data, + sizeof(struct hwtstamp_config))) + return -EFAULT; + + pr_debug("%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n", + __func__, config.flags, config.tx_type, config.rx_filter); + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + priv->hwts_tx_en = 0; + break; + case HWTSTAMP_TX_ON: + priv->hwts_tx_en = 1; + break; + default: + return -ERANGE; + } + + if (priv->adv_ts) { + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + /* time stamp no incoming packet at all */ + config.rx_filter = HWTSTAMP_FILTER_NONE; + break; + + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + /* PTP v1, UDP, any kind of event packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + /* take time stamp for all event messages */ + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + break; + + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + /* PTP v1, UDP, Sync packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_SYNC; + /* take time stamp for SYNC messages only */ + ts_event_en = PTP_TCR_TSEVNTENA; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + break; + + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + /* PTP v1, UDP, Delay_req packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ; + /* take time stamp for Delay_Req messages only */ + ts_master_en = PTP_TCR_TSMSTRENA; + ts_event_en = PTP_TCR_TSEVNTENA; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + break; + + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + /* PTP v2, UDP, any kind of event packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + ptp_v2 = PTP_TCR_TSVER2ENA; + /* take time stamp for all event messages */ + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + break; + + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + /* PTP v2, UDP, Sync packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_SYNC; + ptp_v2 = PTP_TCR_TSVER2ENA; + /* take time stamp for SYNC messages only */ + ts_event_en = PTP_TCR_TSEVNTENA; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + break; + + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + /* PTP v2, UDP, Delay_req packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ; + ptp_v2 = PTP_TCR_TSVER2ENA; + /* take time stamp for Delay_Req messages only */ + ts_master_en = PTP_TCR_TSMSTRENA; + ts_event_en = PTP_TCR_TSEVNTENA; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + break; + + case HWTSTAMP_FILTER_PTP_V2_EVENT: + /* PTP v2/802.AS1 any layer, any kind of event packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + ptp_v2 = PTP_TCR_TSVER2ENA; + /* take time stamp for all event messages */ + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + ptp_over_ethernet = PTP_TCR_TSIPENA; + break; + + case HWTSTAMP_FILTER_PTP_V2_SYNC: + /* PTP v2/802.AS1, any layer, Sync packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_SYNC; + ptp_v2 = PTP_TCR_TSVER2ENA; + /* take time stamp for SYNC messages only */ + ts_event_en = PTP_TCR_TSEVNTENA; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + ptp_over_ethernet = PTP_TCR_TSIPENA; + break; + + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + /* PTP v2/802.AS1, any layer, Delay_req packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_DELAY_REQ; + ptp_v2 = PTP_TCR_TSVER2ENA; + /* take time stamp for Delay_Req messages only */ + ts_master_en = PTP_TCR_TSMSTRENA; + ts_event_en = PTP_TCR_TSEVNTENA; + + ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; + ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; + ptp_over_ethernet = PTP_TCR_TSIPENA; + break; + + case HWTSTAMP_FILTER_ALL: + /* time stamp any incoming packet */ + config.rx_filter = HWTSTAMP_FILTER_ALL; + tstamp_all = PTP_TCR_TSENALL; + break; + + default: + return -ERANGE; + } + } else { + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + config.rx_filter = HWTSTAMP_FILTER_NONE; + break; + default: + /* PTP v1, UDP, any kind of event packet */ + config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + break; + } + } + priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1); + + if (!priv->hwts_tx_en && !priv->hwts_rx_en) + priv->hw->ptp->config_hw_tstamping(priv->ioaddr, 0); + else { + value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR | + tstamp_all | ptp_v2 | ptp_over_ethernet | + ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | + ts_master_en | snap_type_sel); + + priv->hw->ptp->config_hw_tstamping(priv->ioaddr, value); + + /* program Sub Second Increment reg */ + priv->hw->ptp->config_sub_second_increment(priv->ioaddr); + + /* calculate default added value: + * formula is : + * addend = (2^32)/freq_div_ratio; + * where, freq_div_ratio = STMMAC_SYSCLOCK/50MHz + * hence, addend = ((2^32) * 50MHz)/STMMAC_SYSCLOCK; + * NOTE: STMMAC_SYSCLOCK should be >= 50MHz to + * achive 20ns accuracy. + * + * 2^x * y == (y << x), hence + * 2^32 * 50000000 ==> (50000000 << 32) + */ + temp = (u64) (50000000ULL << 32); + priv->default_addend = div_u64(temp, STMMAC_SYSCLOCK); + priv->hw->ptp->config_addend(priv->ioaddr, + priv->default_addend); + + /* initialize system time */ + getnstimeofday(&now); + priv->hw->ptp->init_systime(priv->ioaddr, now.tv_sec, + now.tv_nsec); + } + + return copy_to_user(ifr->ifr_data, &config, + sizeof(struct hwtstamp_config)) ? -EFAULT : 0; +} + +/** + * stmmac_init_ptp: init PTP + * @priv: driver private structure + * Description: this is to verify if the HW supports the PTPv1 or v2. + * This is done by looking at the HW cap. register. + * Also it registers the ptp driver. + */ +static int stmmac_init_ptp(struct stmmac_priv *priv) +{ + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) + return -EOPNOTSUPP; + + if (netif_msg_hw(priv)) { + if (priv->dma_cap.time_stamp) { + pr_debug("IEEE 1588-2002 Time Stamp supported\n"); + priv->adv_ts = 0; + } + if (priv->dma_cap.atime_stamp && priv->extend_desc) { + pr_debug + ("IEEE 1588-2008 Advanced Time Stamp supported\n"); + priv->adv_ts = 1; + } + } + + priv->hw->ptp = &stmmac_ptp; + priv->hwts_tx_en = 0; + priv->hwts_rx_en = 0; + + return stmmac_ptp_register(priv); +} + +static void stmmac_release_ptp(struct stmmac_priv *priv) +{ + stmmac_ptp_unregister(priv); +} + /** * stmmac_adjust_link * @dev: net device structure @@ -349,7 +727,7 @@ static void stmmac_adjust_link(struct net_device *dev) case 1000: if (likely(priv->plat->has_gmac)) ctrl &= ~priv->hw->link.port; - stmmac_hw_fix_mac_speed(priv); + stmmac_hw_fix_mac_speed(priv); break; case 100: case 10: @@ -367,8 +745,8 @@ static void stmmac_adjust_link(struct net_device *dev) break; default: if (netif_msg_link(priv)) - pr_warning("%s: Speed (%d) is not 10" - " or 100!\n", dev->name, phydev->speed); + pr_warn("%s: Speed (%d) not 10/100\n", + dev->name, phydev->speed); break; } @@ -399,6 +777,31 @@ static void stmmac_adjust_link(struct net_device *dev) } /** + * stmmac_check_pcs_mode: verify if RGMII/SGMII is supported + * @priv: driver private structure + * Description: this is to verify if the HW supports the PCS. + * Physical Coding Sublayer (PCS) interface that can be used when the MAC is + * configured for the TBI, RTBI, or SGMII PHY interface. + */ +static void stmmac_check_pcs_mode(struct stmmac_priv *priv) +{ + int interface = priv->plat->interface; + + if (priv->dma_cap.pcs) { + if ((interface & PHY_INTERFACE_MODE_RGMII) || + (interface & PHY_INTERFACE_MODE_RGMII_ID) || + (interface & PHY_INTERFACE_MODE_RGMII_RXID) || + (interface & PHY_INTERFACE_MODE_RGMII_TXID)) { + pr_debug("STMMAC: PCS RGMII support enable\n"); + priv->pcs = STMMAC_PCS_RGMII; + } else if (interface & PHY_INTERFACE_MODE_SGMII) { + pr_debug("STMMAC: PCS SGMII support enable\n"); + priv->pcs = STMMAC_PCS_SGMII; + } + } +} + +/** * stmmac_init_phy - PHY initialization * @dev: net device structure * Description: it initializes the driver's PHY state, and attaches the PHY @@ -419,10 +822,10 @@ static int stmmac_init_phy(struct net_device *dev) if (priv->plat->phy_bus_name) snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", - priv->plat->phy_bus_name, priv->plat->bus_id); + priv->plat->phy_bus_name, priv->plat->bus_id); else snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", - priv->plat->bus_id); + priv->plat->bus_id); snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, priv->plat->phy_addr); @@ -461,29 +864,57 @@ static int stmmac_init_phy(struct net_device *dev) } /** - * display_ring - * @p: pointer to the ring. + * stmmac_display_ring: display ring + * @head: pointer to the head of the ring passed. * @size: size of the ring. - * Description: display all the descriptors within the ring. + * @extend_desc: to verify if extended descriptors are used. + * Description: display the control/status and buffer descriptors. */ -static void display_ring(struct dma_desc *p, int size) +static void stmmac_display_ring(void *head, int size, int extend_desc) { - struct tmp_s { - u64 a; - unsigned int b; - unsigned int c; - }; int i; + struct dma_extended_desc *ep = (struct dma_extended_desc *)head; + struct dma_desc *p = (struct dma_desc *)head; + for (i = 0; i < size; i++) { - struct tmp_s *x = (struct tmp_s *)(p + i); - pr_info("\t%d [0x%x]: DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x", - i, (unsigned int)virt_to_phys(&p[i]), - (unsigned int)(x->a), (unsigned int)((x->a) >> 32), - x->b, x->c); + u64 x; + if (extend_desc) { + x = *(u64 *) ep; + pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + i, (unsigned int)virt_to_phys(ep), + (unsigned int)x, (unsigned int)(x >> 32), + ep->basic.des2, ep->basic.des3); + ep++; + } else { + x = *(u64 *) p; + pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x", + i, (unsigned int)virt_to_phys(p), + (unsigned int)x, (unsigned int)(x >> 32), + p->des2, p->des3); + p++; + } pr_info("\n"); } } +static void stmmac_display_rings(struct stmmac_priv *priv) +{ + unsigned int txsize = priv->dma_tx_size; + unsigned int rxsize = priv->dma_rx_size; + + if (priv->extend_desc) { + pr_info("Extended RX descriptor ring:\n"); + stmmac_display_ring((void *)priv->dma_erx, rxsize, 1); + pr_info("Extended TX descriptor ring:\n"); + stmmac_display_ring((void *)priv->dma_etx, txsize, 1); + } else { + pr_info("RX descriptor ring:\n"); + stmmac_display_ring((void *)priv->dma_rx, rxsize, 0); + pr_info("TX descriptor ring:\n"); + stmmac_display_ring((void *)priv->dma_tx, txsize, 0); + } +} + static int stmmac_set_bfsize(int mtu, int bufsize) { int ret = bufsize; @@ -501,6 +932,65 @@ static int stmmac_set_bfsize(int mtu, int bufsize) } /** + * stmmac_clear_descriptors: clear descriptors + * @priv: driver private structure + * Description: this function is called to clear the tx and rx descriptors + * in case of both basic and extended descriptors are used. + */ +static void stmmac_clear_descriptors(struct stmmac_priv *priv) +{ + int i; + unsigned int txsize = priv->dma_tx_size; + unsigned int rxsize = priv->dma_rx_size; + + /* Clear the Rx/Tx descriptors */ + for (i = 0; i < rxsize; i++) + if (priv->extend_desc) + priv->hw->desc->init_rx_desc(&priv->dma_erx[i].basic, + priv->use_riwt, priv->mode, + (i == rxsize - 1)); + else + priv->hw->desc->init_rx_desc(&priv->dma_rx[i], + priv->use_riwt, priv->mode, + (i == rxsize - 1)); + for (i = 0; i < txsize; i++) + if (priv->extend_desc) + priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic, + priv->mode, + (i == txsize - 1)); + else + priv->hw->desc->init_tx_desc(&priv->dma_tx[i], + priv->mode, + (i == txsize - 1)); +} + +static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, + int i) +{ + struct sk_buff *skb; + + skb = __netdev_alloc_skb(priv->dev, priv->dma_buf_sz + NET_IP_ALIGN, + GFP_KERNEL); + if (unlikely(skb == NULL)) { + pr_err("%s: Rx init fails; skb is NULL\n", __func__); + return 1; + } + skb_reserve(skb, NET_IP_ALIGN); + priv->rx_skbuff[i] = skb; + priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data, + priv->dma_buf_sz, + DMA_FROM_DEVICE); + + p->des2 = priv->rx_skbuff_dma[i]; + + if ((priv->mode == STMMAC_RING_MODE) && + (priv->dma_buf_sz == BUF_SIZE_16KiB)) + priv->hw->ring->init_desc3(p); + + return 0; +} + +/** * init_dma_desc_rings - init the RX/TX descriptor rings * @dev: net device structure * Description: this function initializes the DMA RX/TX descriptors @@ -511,110 +1001,114 @@ static void init_dma_desc_rings(struct net_device *dev) { int i; struct stmmac_priv *priv = netdev_priv(dev); - struct sk_buff *skb; unsigned int txsize = priv->dma_tx_size; unsigned int rxsize = priv->dma_rx_size; - unsigned int bfsize; - int dis_ic = 0; - int des3_as_data_buf = 0; + unsigned int bfsize = 0; /* Set the max buffer size according to the DESC mode - * and the MTU. Note that RING mode allows 16KiB bsize. */ - bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu); + * and the MTU. Note that RING mode allows 16KiB bsize. + */ + if (priv->mode == STMMAC_RING_MODE) + bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu); - if (bfsize == BUF_SIZE_16KiB) - des3_as_data_buf = 1; - else + if (bfsize < BUF_SIZE_16KiB) bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz); DBG(probe, INFO, "stmmac: txsize %d, rxsize %d, bfsize %d\n", txsize, rxsize, bfsize); - priv->rx_skbuff_dma = kmalloc_array(rxsize, sizeof(dma_addr_t), - GFP_KERNEL); - priv->rx_skbuff = kmalloc_array(rxsize, sizeof(struct sk_buff *), - GFP_KERNEL); - priv->dma_rx = - (struct dma_desc *)dma_alloc_coherent(priv->device, - rxsize * + if (priv->extend_desc) { + priv->dma_erx = dma_alloc_coherent(priv->device, rxsize * + sizeof(struct + dma_extended_desc), + &priv->dma_rx_phy, + GFP_KERNEL); + priv->dma_etx = dma_alloc_coherent(priv->device, txsize * + sizeof(struct + dma_extended_desc), + &priv->dma_tx_phy, + GFP_KERNEL); + if ((!priv->dma_erx) || (!priv->dma_etx)) + return; + } else { + priv->dma_rx = dma_alloc_coherent(priv->device, rxsize * sizeof(struct dma_desc), &priv->dma_rx_phy, GFP_KERNEL); - priv->tx_skbuff = kmalloc_array(txsize, sizeof(struct sk_buff *), - GFP_KERNEL); - priv->dma_tx = - (struct dma_desc *)dma_alloc_coherent(priv->device, - txsize * + priv->dma_tx = dma_alloc_coherent(priv->device, txsize * sizeof(struct dma_desc), &priv->dma_tx_phy, GFP_KERNEL); - - if ((priv->dma_rx == NULL) || (priv->dma_tx == NULL)) { - pr_err("%s:ERROR allocating the DMA Tx/Rx desc\n", __func__); - return; + if ((!priv->dma_rx) || (!priv->dma_tx)) + return; } - DBG(probe, INFO, "stmmac (%s) DMA desc: virt addr (Rx %p, " - "Tx %p)\n\tDMA phy addr (Rx 0x%08x, Tx 0x%08x)\n", - dev->name, priv->dma_rx, priv->dma_tx, - (unsigned int)priv->dma_rx_phy, (unsigned int)priv->dma_tx_phy); + priv->rx_skbuff_dma = kmalloc_array(rxsize, sizeof(dma_addr_t), + GFP_KERNEL); + priv->rx_skbuff = kmalloc_array(rxsize, sizeof(struct sk_buff *), + GFP_KERNEL); + priv->tx_skbuff_dma = kmalloc_array(txsize, sizeof(dma_addr_t), + GFP_KERNEL); + priv->tx_skbuff = kmalloc_array(txsize, sizeof(struct sk_buff *), + GFP_KERNEL); + if (netif_msg_drv(priv)) + pr_debug("(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n", __func__, + (u32) priv->dma_rx_phy, (u32) priv->dma_tx_phy); /* RX INITIALIZATION */ - DBG(probe, INFO, "stmmac: SKB addresses:\n" - "skb\t\tskb data\tdma data\n"); - + DBG(probe, INFO, "stmmac: SKB addresses:\nskb\t\tskb data\tdma data\n"); for (i = 0; i < rxsize; i++) { - struct dma_desc *p = priv->dma_rx + i; + struct dma_desc *p; + if (priv->extend_desc) + p = &((priv->dma_erx + i)->basic); + else + p = priv->dma_rx + i; - skb = __netdev_alloc_skb(dev, bfsize + NET_IP_ALIGN, - GFP_KERNEL); - if (unlikely(skb == NULL)) { - pr_err("%s: Rx init fails; skb is NULL\n", __func__); + if (stmmac_init_rx_buffers(priv, p, i)) break; - } - skb_reserve(skb, NET_IP_ALIGN); - priv->rx_skbuff[i] = skb; - priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data, - bfsize, DMA_FROM_DEVICE); - - p->des2 = priv->rx_skbuff_dma[i]; - - priv->hw->ring->init_desc3(des3_as_data_buf, p); DBG(probe, INFO, "[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i], - priv->rx_skbuff[i]->data, priv->rx_skbuff_dma[i]); + priv->rx_skbuff[i]->data, priv->rx_skbuff_dma[i]); } priv->cur_rx = 0; priv->dirty_rx = (unsigned int)(i - rxsize); priv->dma_buf_sz = bfsize; buf_sz = bfsize; + /* Setup the chained descriptor addresses */ + if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->extend_desc) { + priv->hw->chain->init(priv->dma_erx, priv->dma_rx_phy, + rxsize, 1); + priv->hw->chain->init(priv->dma_etx, priv->dma_tx_phy, + txsize, 1); + } else { + priv->hw->chain->init(priv->dma_rx, priv->dma_rx_phy, + rxsize, 0); + priv->hw->chain->init(priv->dma_tx, priv->dma_tx_phy, + txsize, 0); + } + } + /* TX INITIALIZATION */ for (i = 0; i < txsize; i++) { + struct dma_desc *p; + if (priv->extend_desc) + p = &((priv->dma_etx + i)->basic); + else + p = priv->dma_tx + i; + p->des2 = 0; + priv->tx_skbuff_dma[i] = 0; priv->tx_skbuff[i] = NULL; - priv->dma_tx[i].des2 = 0; } - /* In case of Chained mode this sets the des3 to the next - * element in the chain */ - priv->hw->ring->init_dma_chain(priv->dma_rx, priv->dma_rx_phy, rxsize); - priv->hw->ring->init_dma_chain(priv->dma_tx, priv->dma_tx_phy, txsize); - priv->dirty_tx = 0; priv->cur_tx = 0; - if (priv->use_riwt) - dis_ic = 1; - /* Clear the Rx/Tx descriptors */ - priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic); - priv->hw->desc->init_tx_desc(priv->dma_tx, txsize); + stmmac_clear_descriptors(priv); - if (netif_msg_hw(priv)) { - pr_info("RX descriptor ring:\n"); - display_ring(priv->dma_rx, rxsize); - pr_info("TX descriptor ring:\n"); - display_ring(priv->dma_tx, txsize); - } + if (netif_msg_hw(priv)) + stmmac_display_rings(priv); } static void dma_free_rx_skbufs(struct stmmac_priv *priv) @@ -637,13 +1131,20 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv) for (i = 0; i < priv->dma_tx_size; i++) { if (priv->tx_skbuff[i] != NULL) { - struct dma_desc *p = priv->dma_tx + i; - if (p->des2) - dma_unmap_single(priv->device, p->des2, + struct dma_desc *p; + if (priv->extend_desc) + p = &((priv->dma_etx + i)->basic); + else + p = priv->dma_tx + i; + + if (priv->tx_skbuff_dma[i]) + dma_unmap_single(priv->device, + priv->tx_skbuff_dma[i], priv->hw->desc->get_tx_len(p), DMA_TO_DEVICE); dev_kfree_skb_any(priv->tx_skbuff[i]); priv->tx_skbuff[i] = NULL; + priv->tx_skbuff_dma[i] = 0; } } } @@ -654,29 +1155,38 @@ static void free_dma_desc_resources(struct stmmac_priv *priv) dma_free_rx_skbufs(priv); dma_free_tx_skbufs(priv); - /* Free the region of consistent memory previously allocated for - * the DMA */ - dma_free_coherent(priv->device, - priv->dma_tx_size * sizeof(struct dma_desc), - priv->dma_tx, priv->dma_tx_phy); - dma_free_coherent(priv->device, - priv->dma_rx_size * sizeof(struct dma_desc), - priv->dma_rx, priv->dma_rx_phy); + /* Free DMA regions of consistent memory previously allocated */ + if (!priv->extend_desc) { + dma_free_coherent(priv->device, + priv->dma_tx_size * sizeof(struct dma_desc), + priv->dma_tx, priv->dma_tx_phy); + dma_free_coherent(priv->device, + priv->dma_rx_size * sizeof(struct dma_desc), + priv->dma_rx, priv->dma_rx_phy); + } else { + dma_free_coherent(priv->device, priv->dma_tx_size * + sizeof(struct dma_extended_desc), + priv->dma_etx, priv->dma_tx_phy); + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_extended_desc), + priv->dma_erx, priv->dma_rx_phy); + } kfree(priv->rx_skbuff_dma); kfree(priv->rx_skbuff); + kfree(priv->tx_skbuff_dma); kfree(priv->tx_skbuff); } /** * stmmac_dma_operation_mode - HW DMA operation mode - * @priv : pointer to the private device structure. + * @priv: driver private structure * Description: it sets the DMA operation mode: tx/rx DMA thresholds * or Store-And-Forward capability. */ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) { if (likely(priv->plat->force_sf_dma_mode || - ((priv->plat->tx_coe) && (!priv->no_csum_insertion)))) { + ((priv->plat->tx_coe) && (!priv->no_csum_insertion)))) { /* * In case of GMAC, SF mode can be enabled * to perform the TX COE in HW. This depends on: @@ -684,8 +1194,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) * 2) There is no bugged Jumbo frame support * that needs to not insert csum in the TDES. */ - priv->hw->dma->dma_mode(priv->ioaddr, - SF_DMA_MODE, SF_DMA_MODE); + priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE); tc = SF_DMA_MODE; } else priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE); @@ -693,7 +1202,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) /** * stmmac_tx_clean: - * @priv: private data pointer + * @priv: driver private structure * Description: it reclaims resources after transmission completes. */ static void stmmac_tx_clean(struct stmmac_priv *priv) @@ -708,40 +1217,50 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) int last; unsigned int entry = priv->dirty_tx % txsize; struct sk_buff *skb = priv->tx_skbuff[entry]; - struct dma_desc *p = priv->dma_tx + entry; + struct dma_desc *p; + + if (priv->extend_desc) + p = (struct dma_desc *)(priv->dma_etx + entry); + else + p = priv->dma_tx + entry; /* Check if the descriptor is owned by the DMA. */ if (priv->hw->desc->get_tx_owner(p)) break; - /* Verify tx error by looking at the last segment */ + /* Verify tx error by looking at the last segment. */ last = priv->hw->desc->get_tx_ls(p); if (likely(last)) { int tx_error = - priv->hw->desc->tx_status(&priv->dev->stats, - &priv->xstats, p, - priv->ioaddr); + priv->hw->desc->tx_status(&priv->dev->stats, + &priv->xstats, p, + priv->ioaddr); if (likely(tx_error == 0)) { priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; } else priv->dev->stats.tx_errors++; + + stmmac_get_tx_hwtstamp(priv, entry, skb); } TX_DBG("%s: curr %d, dirty %d\n", __func__, - priv->cur_tx, priv->dirty_tx); + priv->cur_tx, priv->dirty_tx); - if (likely(p->des2)) - dma_unmap_single(priv->device, p->des2, + if (likely(priv->tx_skbuff_dma[entry])) { + dma_unmap_single(priv->device, + priv->tx_skbuff_dma[entry], priv->hw->desc->get_tx_len(p), DMA_TO_DEVICE); - priv->hw->ring->clean_desc3(p); + priv->tx_skbuff_dma[entry] = 0; + } + priv->hw->ring->clean_desc3(priv, p); if (likely(skb != NULL)) { dev_kfree_skb(skb); priv->tx_skbuff[entry] = NULL; } - priv->hw->desc->release_tx_desc(p); + priv->hw->desc->release_tx_desc(p, priv->mode); priv->dirty_tx++; } @@ -749,7 +1268,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) { netif_tx_lock(priv->dev); if (netif_queue_stopped(priv->dev) && - stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv)) { + stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv)) { TX_DBG("%s: restart transmit\n", __func__); netif_wake_queue(priv->dev); } @@ -773,20 +1292,29 @@ static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv) priv->hw->dma->disable_dma_irq(priv->ioaddr); } - /** - * stmmac_tx_err: - * @priv: pointer to the private device structure + * stmmac_tx_err: irq tx error mng function + * @priv: driver private structure * Description: it cleans the descriptors and restarts the transmission * in case of errors. */ static void stmmac_tx_err(struct stmmac_priv *priv) { + int i; + int txsize = priv->dma_tx_size; netif_stop_queue(priv->dev); priv->hw->dma->stop_tx(priv->ioaddr); dma_free_tx_skbufs(priv); - priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size); + for (i = 0; i < txsize; i++) + if (priv->extend_desc) + priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic, + priv->mode, + (i == txsize - 1)); + else + priv->hw->desc->init_tx_desc(&priv->dma_tx[i], + priv->mode, + (i == txsize - 1)); priv->dirty_tx = 0; priv->cur_tx = 0; priv->hw->dma->start_tx(priv->ioaddr); @@ -795,6 +1323,14 @@ static void stmmac_tx_err(struct stmmac_priv *priv) netif_wake_queue(priv->dev); } +/** + * stmmac_dma_interrupt: DMA ISR + * @priv: driver private structure + * Description: this is the DMA ISR. It is called by the main ISR. + * It calls the dwmac dma routine to understand which type of interrupt + * happened. In case of there is a Normal interrupt and either TX or RX + * interrupt happened so the NAPI is scheduled. + */ static void stmmac_dma_interrupt(struct stmmac_priv *priv) { int status; @@ -817,13 +1353,16 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) stmmac_tx_err(priv); } +/** + * stmmac_mmc_setup: setup the Mac Management Counters (MMC) + * @priv: driver private structure + * Description: this masks the MMC irq, in fact, the counters are managed in SW. + */ static void stmmac_mmc_setup(struct stmmac_priv *priv) { unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET | - MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET; + MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET; - /* Mask MMC irq, counters are managed in SW and registers - * are cleared on each READ eventually. */ dwmac_mmc_intr_all_mask(priv->ioaddr); if (priv->dma_cap.rmon) { @@ -837,8 +1376,7 @@ static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv) { u32 hwid = priv->hw->synopsys_uid; - /* Only check valid Synopsys Id because old MAC chips - * have no HW registers where get the ID */ + /* Check Synopsys Id (not available on old chips) */ if (likely(hwid)) { u32 uid = ((hwid & 0x0000ff00) >> 8); u32 synid = (hwid & 0x000000ff); @@ -852,14 +1390,24 @@ static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv) } /** - * stmmac_selec_desc_mode - * @priv : private structure - * Description: select the Enhanced/Alternate or Normal descriptors + * stmmac_selec_desc_mode: to select among: normal/alternate/extend descriptors + * @priv: driver private structure + * Description: select the Enhanced/Alternate or Normal descriptors. + * In case of Enhanced/Alternate, it looks at the extended descriptors are + * supported by the HW cap. register. */ static void stmmac_selec_desc_mode(struct stmmac_priv *priv) { if (priv->plat->enh_desc) { pr_info(" Enhanced/Alternate descriptors\n"); + + /* GMAC older than 3.50 has no extended descriptors */ + if (priv->synopsys_id >= DWMAC_CORE_3_50) { + pr_info("\tEnabled extended descriptors\n"); + priv->extend_desc = 1; + } else + pr_warn("Extended descriptors not supported\n"); + priv->hw->desc = &enh_desc_ops; } else { pr_info(" Normal descriptors\n"); @@ -868,8 +1416,8 @@ static void stmmac_selec_desc_mode(struct stmmac_priv *priv) } /** - * stmmac_get_hw_features - * @priv : private device pointer + * stmmac_get_hw_features: get MAC capabilities from the HW cap. register. + * @priv: driver private structure * Description: * new GMAC chip generations have a new register to indicate the * presence of the optional feature/functions. @@ -887,69 +1435,78 @@ static int stmmac_get_hw_features(struct stmmac_priv *priv) priv->dma_cap.mbps_1000 = (hw_cap & DMA_HW_FEAT_GMIISEL) >> 1; priv->dma_cap.half_duplex = (hw_cap & DMA_HW_FEAT_HDSEL) >> 2; priv->dma_cap.hash_filter = (hw_cap & DMA_HW_FEAT_HASHSEL) >> 4; - priv->dma_cap.multi_addr = - (hw_cap & DMA_HW_FEAT_ADDMACADRSEL) >> 5; + priv->dma_cap.multi_addr = (hw_cap & DMA_HW_FEAT_ADDMAC) >> 5; priv->dma_cap.pcs = (hw_cap & DMA_HW_FEAT_PCSSEL) >> 6; priv->dma_cap.sma_mdio = (hw_cap & DMA_HW_FEAT_SMASEL) >> 8; priv->dma_cap.pmt_remote_wake_up = - (hw_cap & DMA_HW_FEAT_RWKSEL) >> 9; + (hw_cap & DMA_HW_FEAT_RWKSEL) >> 9; priv->dma_cap.pmt_magic_frame = - (hw_cap & DMA_HW_FEAT_MGKSEL) >> 10; + (hw_cap & DMA_HW_FEAT_MGKSEL) >> 10; /* MMC */ priv->dma_cap.rmon = (hw_cap & DMA_HW_FEAT_MMCSEL) >> 11; - /* IEEE 1588-2002*/ + /* IEEE 1588-2002 */ priv->dma_cap.time_stamp = - (hw_cap & DMA_HW_FEAT_TSVER1SEL) >> 12; - /* IEEE 1588-2008*/ + (hw_cap & DMA_HW_FEAT_TSVER1SEL) >> 12; + /* IEEE 1588-2008 */ priv->dma_cap.atime_stamp = - (hw_cap & DMA_HW_FEAT_TSVER2SEL) >> 13; + (hw_cap & DMA_HW_FEAT_TSVER2SEL) >> 13; /* 802.3az - Energy-Efficient Ethernet (EEE) */ priv->dma_cap.eee = (hw_cap & DMA_HW_FEAT_EEESEL) >> 14; priv->dma_cap.av = (hw_cap & DMA_HW_FEAT_AVSEL) >> 15; /* TX and RX csum */ priv->dma_cap.tx_coe = (hw_cap & DMA_HW_FEAT_TXCOESEL) >> 16; priv->dma_cap.rx_coe_type1 = - (hw_cap & DMA_HW_FEAT_RXTYP1COE) >> 17; + (hw_cap & DMA_HW_FEAT_RXTYP1COE) >> 17; priv->dma_cap.rx_coe_type2 = - (hw_cap & DMA_HW_FEAT_RXTYP2COE) >> 18; + (hw_cap & DMA_HW_FEAT_RXTYP2COE) >> 18; priv->dma_cap.rxfifo_over_2048 = - (hw_cap & DMA_HW_FEAT_RXFIFOSIZE) >> 19; + (hw_cap & DMA_HW_FEAT_RXFIFOSIZE) >> 19; /* TX and RX number of channels */ priv->dma_cap.number_rx_channel = - (hw_cap & DMA_HW_FEAT_RXCHCNT) >> 20; + (hw_cap & DMA_HW_FEAT_RXCHCNT) >> 20; priv->dma_cap.number_tx_channel = - (hw_cap & DMA_HW_FEAT_TXCHCNT) >> 22; - /* Alternate (enhanced) DESC mode*/ - priv->dma_cap.enh_desc = - (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24; + (hw_cap & DMA_HW_FEAT_TXCHCNT) >> 22; + /* Alternate (enhanced) DESC mode */ + priv->dma_cap.enh_desc = (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24; } return hw_cap; } +/** + * stmmac_check_ether_addr: check if the MAC addr is valid + * @priv: driver private structure + * Description: + * it is to verify if the MAC address is valid, in case of failures it + * generates a random MAC address + */ static void stmmac_check_ether_addr(struct stmmac_priv *priv) { - /* verify if the MAC address is valid, in case of failures it - * generates a random MAC address */ if (!is_valid_ether_addr(priv->dev->dev_addr)) { priv->hw->mac->get_umac_addr((void __iomem *) priv->dev->base_addr, priv->dev->dev_addr, 0); - if (!is_valid_ether_addr(priv->dev->dev_addr)) + if (!is_valid_ether_addr(priv->dev->dev_addr)) eth_hw_addr_random(priv->dev); } - pr_warning("%s: device MAC address %pM\n", priv->dev->name, - priv->dev->dev_addr); + pr_warn("%s: device MAC address %pM\n", priv->dev->name, + priv->dev->dev_addr); } +/** + * stmmac_init_dma_engine: DMA init. + * @priv: driver private structure + * Description: + * It inits the DMA invoking the specific MAC/GMAC callback. + * Some DMA parameters can be passed from the platform; + * in case of these are not passed a default is kept for the MAC or GMAC. + */ static int stmmac_init_dma_engine(struct stmmac_priv *priv) { int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, burst_len = 0; int mixed_burst = 0; + int atds = 0; - /* Some DMA parameters can be passed from the platform; - * in case of these are not passed we keep a default - * (good for all the chips) and init the DMA! */ if (priv->plat->dma_cfg) { pbl = priv->plat->dma_cfg->pbl; fixed_burst = priv->plat->dma_cfg->fixed_burst; @@ -957,13 +1514,16 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) burst_len = priv->plat->dma_cfg->burst_len; } + if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE)) + atds = 1; + return priv->hw->dma->init(priv->ioaddr, pbl, fixed_burst, mixed_burst, burst_len, priv->dma_tx_phy, - priv->dma_rx_phy); + priv->dma_rx_phy, atds); } /** - * stmmac_tx_timer: + * stmmac_tx_timer: mitigation sw timer for tx. * @data: data pointer * Description: * This is the timer handler to directly invoke the stmmac_tx_clean. @@ -976,8 +1536,8 @@ static void stmmac_tx_timer(unsigned long data) } /** - * stmmac_tx_timer: - * @priv: private data structure + * stmmac_init_tx_coalesce: init tx mitigation options. + * @priv: driver private structure * Description: * This inits the transmit coalesce parameters: i.e. timer rate, * timer handler and default threshold used for enabling the @@ -1012,10 +1572,14 @@ static int stmmac_open(struct net_device *dev) stmmac_check_ether_addr(priv); - ret = stmmac_init_phy(dev); - if (unlikely(ret)) { - pr_err("%s: Cannot attach to PHY (error: %d)\n", __func__, ret); - goto open_error; + if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI && + priv->pcs != STMMAC_PCS_RTBI) { + ret = stmmac_init_phy(dev); + if (ret) { + pr_err("%s: Cannot attach to PHY (error: %d)\n", + __func__, ret); + goto open_error; + } } /* Create and initialize the TX/RX descriptors chains. */ @@ -1043,7 +1607,7 @@ static int stmmac_open(struct net_device *dev) /* Request the IRQ lines */ ret = request_irq(dev->irq, stmmac_interrupt, - IRQF_SHARED, dev->name, dev); + IRQF_SHARED, dev->name, dev); if (unlikely(ret < 0)) { pr_err("%s: ERROR: allocating the IRQ %d (error: %d)\n", __func__, dev->irq, ret); @@ -1055,8 +1619,8 @@ static int stmmac_open(struct net_device *dev) ret = request_irq(priv->wol_irq, stmmac_interrupt, IRQF_SHARED, dev->name, dev); if (unlikely(ret < 0)) { - pr_err("%s: ERROR: allocating the ext WoL IRQ %d " - "(error: %d)\n", __func__, priv->wol_irq, ret); + pr_err("%s: ERROR: allocating the WoL IRQ %d (%d)\n", + __func__, priv->wol_irq, ret); goto open_error_wolirq; } } @@ -1084,10 +1648,14 @@ static int stmmac_open(struct net_device *dev) stmmac_mmc_setup(priv); + ret = stmmac_init_ptp(priv); + if (ret) + pr_warn("%s: failed PTP initialisation\n", __func__); + #ifdef CONFIG_STMMAC_DEBUG_FS ret = stmmac_init_fs(dev); if (ret < 0) - pr_warning("%s: failed debugFS registration\n", __func__); + pr_warn("%s: failed debugFS registration\n", __func__); #endif /* Start the ball rolling... */ DBG(probe, DEBUG, "%s: DMA RX/TX processes started...\n", dev->name); @@ -1104,7 +1672,13 @@ static int stmmac_open(struct net_device *dev) phy_start(priv->phydev); priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS_TIMER; - priv->eee_enabled = stmmac_eee_init(priv); + + /* Using PCS we cannot dial with the phy registers at this stage + * so we do not support extra feature like EEE. + */ + if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI && + priv->pcs != STMMAC_PCS_RTBI) + priv->eee_enabled = stmmac_eee_init(priv); stmmac_init_tx_coalesce(priv); @@ -1113,6 +1687,9 @@ static int stmmac_open(struct net_device *dev) priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT); } + if (priv->pcs && priv->hw->mac->ctrl_ane) + priv->hw->mac->ctrl_ane(priv->ioaddr, 0); + napi_enable(&priv->napi); netif_start_queue(dev); @@ -1184,21 +1761,25 @@ static int stmmac_release(struct net_device *dev) #endif clk_disable_unprepare(priv->stmmac_clk); + stmmac_release_ptp(priv); + return 0; } /** - * stmmac_xmit: + * stmmac_xmit: Tx entry point of the driver * @skb : the socket buffer * @dev : device pointer - * Description : Tx entry point of the driver. + * Description : this is the tx entry point of the driver. + * It programs the chain or the ring and supports oversized frames + * and SG feature. */ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); unsigned int txsize = priv->dma_tx_size; unsigned int entry; - int i, csum_insertion = 0; + int i, csum_insertion = 0, is_jumbo = 0; int nfrags = skb_shinfo(skb)->nr_frags; struct dma_desc *desc, *first; unsigned int nopaged_len = skb_headlen(skb); @@ -1207,8 +1788,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (!netif_queue_stopped(dev)) { netif_stop_queue(dev); /* This is a hard error, log it. */ - pr_err("%s: BUG! Tx Ring full when queue awake\n", - __func__); + pr_err("%s: Tx Ring full when queue awake\n", __func__); } return NETDEV_TX_BUSY; } @@ -1222,10 +1802,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef STMMAC_XMIT_DEBUG if ((skb->len > ETH_FRAME_LEN) || nfrags) - pr_debug("stmmac xmit: [entry %d]\n" - "\tskb addr %p - len: %d - nopaged_len: %d\n" + pr_debug("%s: [entry %d]: skb addr %p len: %d nopagedlen: %d\n" "\tn_frags: %d - ip_summed: %d - %s gso\n" - "\ttx_count_frames %d\n", entry, + "\ttx_count_frames %d\n", __func__, entry, skb, skb->len, nopaged_len, nfrags, skb->ip_summed, !skb_is_gso(skb) ? "isn't" : "is", priv->tx_count_frames); @@ -1233,7 +1812,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); - desc = priv->dma_tx + entry; + if (priv->extend_desc) + desc = (struct dma_desc *)(priv->dma_etx + entry); + else + desc = priv->dma_tx + entry; + first = desc; #ifdef STMMAC_XMIT_DEBUG @@ -1244,28 +1827,46 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) #endif priv->tx_skbuff[entry] = skb; - if (priv->hw->ring->is_jumbo_frm(skb->len, priv->plat->enh_desc)) { - entry = priv->hw->ring->jumbo_frm(priv, skb, csum_insertion); - desc = priv->dma_tx + entry; + /* To program the descriptors according to the size of the frame */ + if (priv->mode == STMMAC_RING_MODE) { + is_jumbo = priv->hw->ring->is_jumbo_frm(skb->len, + priv->plat->enh_desc); + if (unlikely(is_jumbo)) + entry = priv->hw->ring->jumbo_frm(priv, skb, + csum_insertion); } else { + is_jumbo = priv->hw->chain->is_jumbo_frm(skb->len, + priv->plat->enh_desc); + if (unlikely(is_jumbo)) + entry = priv->hw->chain->jumbo_frm(priv, skb, + csum_insertion); + } + if (likely(!is_jumbo)) { desc->des2 = dma_map_single(priv->device, skb->data, - nopaged_len, DMA_TO_DEVICE); + nopaged_len, DMA_TO_DEVICE); + priv->tx_skbuff_dma[entry] = desc->des2; priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, - csum_insertion); - } + csum_insertion, priv->mode); + } else + desc = first; for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int len = skb_frag_size(frag); entry = (++priv->cur_tx) % txsize; - desc = priv->dma_tx + entry; + if (priv->extend_desc) + desc = (struct dma_desc *)(priv->dma_etx + entry); + else + desc = priv->dma_tx + entry; TX_DBG("\t[entry %d] segment len: %d\n", entry, len); desc->des2 = skb_frag_dma_map(priv->device, frag, 0, len, DMA_TO_DEVICE); + priv->tx_skbuff_dma[entry] = desc->des2; priv->tx_skbuff[entry] = NULL; - priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion); + priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion, + priv->mode); wmb(); priv->hw->desc->set_tx_owner(desc); wmb(); @@ -1298,11 +1899,14 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef STMMAC_XMIT_DEBUG if (netif_msg_pktdata(priv)) { - pr_info("stmmac xmit: current=%d, dirty=%d, entry=%d, " - "first=%p, nfrags=%d\n", - (priv->cur_tx % txsize), (priv->dirty_tx % txsize), - entry, first, nfrags); - display_ring(priv->dma_tx, txsize); + pr_info("%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d" + __func__, (priv->cur_tx % txsize), + (priv->dirty_tx % txsize), entry, first, nfrags); + if (priv->extend_desc) + stmmac_display_ring((void *)priv->dma_etx, txsize, 1); + else + stmmac_display_ring((void *)priv->dma_tx, txsize, 0); + pr_info(">>> frame to be transmitted: "); print_pkt(skb->data, skb->len); } @@ -1314,7 +1918,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; - skb_tx_timestamp(skb); + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + /* declare that device is doing timestamping */ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + priv->hw->desc->enable_tx_timestamp(first); + } + + if (!priv->hwts_tx_en) + skb_tx_timestamp(skb); priv->hw->dma->enable_dma_transmission(priv->ioaddr); @@ -1323,14 +1935,26 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +/** + * stmmac_rx_refill: refill used skb preallocated buffers + * @priv: driver private structure + * Description : this is to reallocate the skb for the reception process + * that is based on zero-copy. + */ static inline void stmmac_rx_refill(struct stmmac_priv *priv) { unsigned int rxsize = priv->dma_rx_size; int bfsize = priv->dma_buf_sz; - struct dma_desc *p = priv->dma_rx; for (; priv->cur_rx - priv->dirty_rx > 0; priv->dirty_rx++) { unsigned int entry = priv->dirty_rx % rxsize; + struct dma_desc *p; + + if (priv->extend_desc) + p = (struct dma_desc *)(priv->dma_erx + entry); + else + p = priv->dma_rx + entry; + if (likely(priv->rx_skbuff[entry] == NULL)) { struct sk_buff *skb; @@ -1344,80 +1968,116 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) dma_map_single(priv->device, skb->data, bfsize, DMA_FROM_DEVICE); - (p + entry)->des2 = priv->rx_skbuff_dma[entry]; + p->des2 = priv->rx_skbuff_dma[entry]; - if (unlikely(priv->plat->has_gmac)) - priv->hw->ring->refill_desc3(bfsize, p + entry); + priv->hw->ring->refill_desc3(priv, p); RX_DBG(KERN_INFO "\trefill entry #%d\n", entry); } wmb(); - priv->hw->desc->set_rx_owner(p + entry); + priv->hw->desc->set_rx_owner(p); wmb(); } } +/** + * stmmac_rx_refill: refill used skb preallocated buffers + * @priv: driver private structure + * @limit: napi bugget. + * Description : this the function called by the napi poll method. + * It gets all the frames inside the ring. + */ static int stmmac_rx(struct stmmac_priv *priv, int limit) { unsigned int rxsize = priv->dma_rx_size; unsigned int entry = priv->cur_rx % rxsize; unsigned int next_entry; unsigned int count = 0; - struct dma_desc *p = priv->dma_rx + entry; - struct dma_desc *p_next; + int coe = priv->plat->rx_coe; #ifdef STMMAC_RX_DEBUG if (netif_msg_hw(priv)) { pr_debug(">>> stmmac_rx: descriptor ring:\n"); - display_ring(priv->dma_rx, rxsize); + if (priv->extend_desc) + stmmac_display_ring((void *)priv->dma_erx, rxsize, 1); + else + stmmac_display_ring((void *)priv->dma_rx, rxsize, 0); } #endif - while (!priv->hw->desc->get_rx_owner(p)) { + while (count < limit) { int status; + struct dma_desc *p; - if (count >= limit) + if (priv->extend_desc) + p = (struct dma_desc *)(priv->dma_erx + entry); + else + p = priv->dma_rx + entry; + + if (priv->hw->desc->get_rx_owner(p)) break; count++; next_entry = (++priv->cur_rx) % rxsize; - p_next = priv->dma_rx + next_entry; - prefetch(p_next); + if (priv->extend_desc) + prefetch(priv->dma_erx + next_entry); + else + prefetch(priv->dma_rx + next_entry); /* read the status of the incoming frame */ - status = (priv->hw->desc->rx_status(&priv->dev->stats, - &priv->xstats, p)); - if (unlikely(status == discard_frame)) + status = priv->hw->desc->rx_status(&priv->dev->stats, + &priv->xstats, p); + if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status)) + priv->hw->desc->rx_extended_status(&priv->dev->stats, + &priv->xstats, + priv->dma_erx + + entry); + if (unlikely(status == discard_frame)) { priv->dev->stats.rx_errors++; - else { + if (priv->hwts_rx_en && !priv->extend_desc) { + /* DESC2 & DESC3 will be overwitten by device + * with timestamp value, hence reinitialize + * them in stmmac_rx_refill() function so that + * device can reuse it. + */ + priv->rx_skbuff[entry] = NULL; + dma_unmap_single(priv->device, + priv->rx_skbuff_dma[entry], + priv->dma_buf_sz, + DMA_FROM_DEVICE); + } + } else { struct sk_buff *skb; int frame_len; - frame_len = priv->hw->desc->get_rx_frame_len(p, - priv->plat->rx_coe); + frame_len = priv->hw->desc->get_rx_frame_len(p, coe); + /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 - * Type frames (LLC/LLC-SNAP) */ + * Type frames (LLC/LLC-SNAP) + */ if (unlikely(status != llc_snap)) frame_len -= ETH_FCS_LEN; #ifdef STMMAC_RX_DEBUG if (frame_len > ETH_FRAME_LEN) pr_debug("\tRX frame size %d, COE status: %d\n", - frame_len, status); + frame_len, status); if (netif_msg_hw(priv)) pr_debug("\tdesc: %p [entry %d] buff=0x%x\n", - p, entry, p->des2); + p, entry, p->des2); #endif skb = priv->rx_skbuff[entry]; if (unlikely(!skb)) { pr_err("%s: Inconsistent Rx descriptor chain\n", - priv->dev->name); + priv->dev->name); priv->dev->stats.rx_dropped++; break; } prefetch(skb->data - NET_IP_ALIGN); priv->rx_skbuff[entry] = NULL; + stmmac_get_rx_hwtstamp(priv, entry, skb); + skb_put(skb, frame_len); dma_unmap_single(priv->device, priv->rx_skbuff_dma[entry], @@ -1430,7 +2090,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) #endif skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!priv->plat->rx_coe)) + if (unlikely(!coe)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1441,7 +2101,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) priv->dev->stats.rx_bytes += frame_len; } entry = next_entry; - p = p_next; /* use prefetched values */ } stmmac_rx_refill(priv); @@ -1499,18 +2158,16 @@ static int stmmac_config(struct net_device *dev, struct ifmap *map) /* Don't allow changing the I/O address */ if (map->base_addr != dev->base_addr) { - pr_warning("%s: can't change I/O address\n", dev->name); + pr_warn("%s: can't change I/O address\n", dev->name); return -EOPNOTSUPP; } /* Don't allow changing the IRQ */ if (map->irq != dev->irq) { - pr_warning("%s: can't change IRQ number %d\n", - dev->name, dev->irq); + pr_warn("%s: not change IRQ number %d\n", dev->name, dev->irq); return -EOPNOTSUPP; } - /* ignore other fields */ return 0; } @@ -1570,7 +2227,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) } static netdev_features_t stmmac_fix_features(struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { struct stmmac_priv *priv = netdev_priv(dev); @@ -1584,13 +2241,22 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, /* Some GMAC devices have a bugged Jumbo frame support that * needs to have the Tx COE disabled for oversized frames * (due to limited buffer sizes). In this case we disable - * the TX csum insertionin the TDES and not use SF. */ + * the TX csum insertionin the TDES and not use SF. + */ if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN)) features &= ~NETIF_F_ALL_CSUM; return features; } +/** + * stmmac_interrupt - main ISR + * @irq: interrupt number. + * @dev_id: to pass the net device pointer. + * Description: this is the main driver interrupt service routine. + * It calls the DMA ISR and also the core ISR to manage PMT, MMC, LPI + * interrupts. + */ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; @@ -1604,30 +2270,14 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) /* To handle GMAC own interrupts */ if (priv->plat->has_gmac) { int status = priv->hw->mac->host_irq_status((void __iomem *) - dev->base_addr); + dev->base_addr, + &priv->xstats); if (unlikely(status)) { - if (status & core_mmc_tx_irq) - priv->xstats.mmc_tx_irq_n++; - if (status & core_mmc_rx_irq) - priv->xstats.mmc_rx_irq_n++; - if (status & core_mmc_rx_csum_offload_irq) - priv->xstats.mmc_rx_csum_offload_irq_n++; - if (status & core_irq_receive_pmt_irq) - priv->xstats.irq_receive_pmt_irq_n++; - /* For LPI we need to save the tx status */ - if (status & core_irq_tx_path_in_lpi_mode) { - priv->xstats.irq_tx_path_in_lpi_mode_n++; + if (status & CORE_IRQ_TX_PATH_IN_LPI_MODE) priv->tx_path_in_lpi_mode = true; - } - if (status & core_irq_tx_path_exit_lpi_mode) { - priv->xstats.irq_tx_path_exit_lpi_mode_n++; + if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE) priv->tx_path_in_lpi_mode = false; - } - if (status & core_irq_rx_path_in_lpi_mode) - priv->xstats.irq_rx_path_in_lpi_mode_n++; - if (status & core_irq_rx_path_exit_lpi_mode) - priv->xstats.irq_rx_path_exit_lpi_mode_n++; } } @@ -1639,7 +2289,8 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) #ifdef CONFIG_NET_POLL_CONTROLLER /* Polling receive - used by NETCONSOLE and other diagnostic tools - * to allow network I/O with interrupts disabled. */ + * to allow network I/O with interrupts disabled. + */ static void stmmac_poll_controller(struct net_device *dev) { disable_irq(dev->irq); @@ -1655,21 +2306,30 @@ static void stmmac_poll_controller(struct net_device *dev) * a proprietary structure used to pass information to the driver. * @cmd: IOCTL command * Description: - * Currently there are no special functionality supported in IOCTL, just the - * phy_mii_ioctl(...) can be invoked. + * Currently it supports the phy_mii_ioctl(...) and HW time stamping. */ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct stmmac_priv *priv = netdev_priv(dev); - int ret; + int ret = -EOPNOTSUPP; if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) - return -EINVAL; - - ret = phy_mii_ioctl(priv->phydev, rq, cmd); + switch (cmd) { + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + if (!priv->phydev) + return -EINVAL; + ret = phy_mii_ioctl(priv->phydev, rq, cmd); + break; + case SIOCSHWTSTAMP: + ret = stmmac_hwtstamp_ioctl(dev, rq); + break; + default: + break; + } return ret; } @@ -1679,40 +2339,51 @@ static struct dentry *stmmac_fs_dir; static struct dentry *stmmac_rings_status; static struct dentry *stmmac_dma_cap; -static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v) +static void sysfs_display_ring(void *head, int size, int extend_desc, + struct seq_file *seq) { - struct tmp_s { - u64 a; - unsigned int b; - unsigned int c; - }; int i; - struct net_device *dev = seq->private; - struct stmmac_priv *priv = netdev_priv(dev); + struct dma_extended_desc *ep = (struct dma_extended_desc *)head; + struct dma_desc *p = (struct dma_desc *)head; - seq_printf(seq, "=======================\n"); - seq_printf(seq, " RX descriptor ring\n"); - seq_printf(seq, "=======================\n"); - - for (i = 0; i < priv->dma_rx_size; i++) { - struct tmp_s *x = (struct tmp_s *)(priv->dma_rx + i); - seq_printf(seq, "[%d] DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x", - i, (unsigned int)(x->a), - (unsigned int)((x->a) >> 32), x->b, x->c); + for (i = 0; i < size; i++) { + u64 x; + if (extend_desc) { + x = *(u64 *) ep; + seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + i, (unsigned int)virt_to_phys(ep), + (unsigned int)x, (unsigned int)(x >> 32), + ep->basic.des2, ep->basic.des3); + ep++; + } else { + x = *(u64 *) p; + seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + i, (unsigned int)virt_to_phys(ep), + (unsigned int)x, (unsigned int)(x >> 32), + p->des2, p->des3); + p++; + } seq_printf(seq, "\n"); } +} - seq_printf(seq, "\n"); - seq_printf(seq, "=======================\n"); - seq_printf(seq, " TX descriptor ring\n"); - seq_printf(seq, "=======================\n"); +static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v) +{ + struct net_device *dev = seq->private; + struct stmmac_priv *priv = netdev_priv(dev); + unsigned int txsize = priv->dma_tx_size; + unsigned int rxsize = priv->dma_rx_size; - for (i = 0; i < priv->dma_tx_size; i++) { - struct tmp_s *x = (struct tmp_s *)(priv->dma_tx + i); - seq_printf(seq, "[%d] DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x", - i, (unsigned int)(x->a), - (unsigned int)((x->a) >> 32), x->b, x->c); - seq_printf(seq, "\n"); + if (priv->extend_desc) { + seq_printf(seq, "Extended RX descriptor ring:\n"); + sysfs_display_ring((void *)priv->dma_erx, rxsize, 1, seq); + seq_printf(seq, "Extended TX descriptor ring:\n"); + sysfs_display_ring((void *)priv->dma_etx, txsize, 1, seq); + } else { + seq_printf(seq, "RX descriptor ring:\n"); + sysfs_display_ring((void *)priv->dma_rx, rxsize, 0, seq); + seq_printf(seq, "TX descriptor ring:\n"); + sysfs_display_ring((void *)priv->dma_tx, txsize, 0, seq); } return 0; @@ -1817,8 +2488,8 @@ static int stmmac_init_fs(struct net_device *dev) /* Entry to report DMA RX/TX rings */ stmmac_rings_status = debugfs_create_file("descriptors_status", - S_IRUGO, stmmac_fs_dir, dev, - &stmmac_rings_status_fops); + S_IRUGO, stmmac_fs_dir, dev, + &stmmac_rings_status_fops); if (!stmmac_rings_status || IS_ERR(stmmac_rings_status)) { pr_info("ERROR creating stmmac ring debugfs file\n"); @@ -1868,7 +2539,7 @@ static const struct net_device_ops stmmac_netdev_ops = { /** * stmmac_hw_init - Init the MAC device - * @priv : pointer to the private device structure. + * @priv: driver private structure * Description: this function detects which MAC device * (GMAC/MAC10-100) has to attached, checks the HW capability * (if supported) and sets the driver's features (for example @@ -1877,7 +2548,7 @@ static const struct net_device_ops stmmac_netdev_ops = { */ static int stmmac_hw_init(struct stmmac_priv *priv) { - int ret = 0; + int ret; struct mac_device_info *mac; /* Identify the MAC HW device */ @@ -1892,12 +2563,23 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->hw = mac; - /* To use the chained or ring mode */ - priv->hw->ring = &ring_mode_ops; - /* Get and dump the chip ID */ priv->synopsys_id = stmmac_get_synopsys_id(priv); + /* To use alternate (extended) or normal descriptor structures */ + stmmac_selec_desc_mode(priv); + + /* To use the chained or ring mode */ + if (chain_mode) { + priv->hw->chain = &chain_mode_ops; + pr_info(" Chain mode enabled\n"); + priv->mode = STMMAC_CHAIN_MODE; + } else { + priv->hw->ring = &ring_mode_ops; + pr_info(" Ring mode enabled\n"); + priv->mode = STMMAC_RING_MODE; + } + /* Get the HW capability (new GMAC newer than 3.50a) */ priv->hw_cap_support = stmmac_get_hw_features(priv); if (priv->hw_cap_support) { @@ -1921,14 +2603,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv) } else pr_info(" No HW DMA feature register supported"); - /* Select the enhnaced/normal descriptor structures */ - stmmac_selec_desc_mode(priv); - - /* Enable the IPC (Checksum Offload) and check if the feature has been - * enabled during the core configuration. */ ret = priv->hw->mac->rx_ipc(priv->ioaddr); if (!ret) { - pr_warning(" RX IPC Checksum Offload not configured.\n"); + pr_warn(" RX IPC Checksum Offload not configured.\n"); priv->plat->rx_coe = STMMAC_RX_COE_NONE; } @@ -1943,7 +2620,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) device_set_wakeup_capable(priv->device, 1); } - return ret; + return 0; } /** @@ -1984,12 +2661,15 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, stmmac_verify_args(); /* Override with kernel parameters if supplied XXX CRS XXX - * this needs to have multiple instances */ + * this needs to have multiple instances + */ if ((phyaddr >= 0) && (phyaddr <= 31)) priv->plat->phy_addr = phyaddr; /* Init MAC and get the capabilities */ - stmmac_hw_init(priv); + ret = stmmac_hw_init(priv); + if (ret) + goto error_free_netdev; ndev->netdev_ops = &stmmac_netdev_ops; @@ -1999,7 +2679,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, ndev->watchdog_timeo = msecs_to_jiffies(watchdog); #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ - ndev->features |= NETIF_F_HW_VLAN_RX; + ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); @@ -2029,7 +2709,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, priv->stmmac_clk = clk_get(priv->device, STMMAC_RESOURCE_NAME); if (IS_ERR(priv->stmmac_clk)) { - pr_warning("%s: warning: cannot get CSR clock\n", __func__); + pr_warn("%s: warning: cannot get CSR clock\n", __func__); goto error_clk_get; } @@ -2044,12 +2724,17 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, else priv->clk_csr = priv->plat->clk_csr; - /* MDIO bus Registration */ - ret = stmmac_mdio_register(ndev); - if (ret < 0) { - pr_debug("%s: MDIO bus (id: %d) registration failed", - __func__, priv->plat->bus_id); - goto error_mdio_register; + stmmac_check_pcs_mode(priv); + + if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI && + priv->pcs != STMMAC_PCS_RTBI) { + /* MDIO bus Registration */ + ret = stmmac_mdio_register(ndev); + if (ret < 0) { + pr_debug("%s: MDIO bus (id: %d) registration failed", + __func__, priv->plat->bus_id); + goto error_mdio_register; + } } return priv; @@ -2060,6 +2745,7 @@ error_clk_get: unregister_netdev(ndev); error_netdev_register: netif_napi_del(&priv->napi); +error_free_netdev: free_netdev(ndev); return NULL; @@ -2081,7 +2767,9 @@ int stmmac_dvr_remove(struct net_device *ndev) priv->hw->dma->stop_tx(priv->ioaddr); stmmac_set_mac(priv->ioaddr, false); - stmmac_mdio_unregister(ndev); + if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI && + priv->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); netif_carrier_off(ndev); unregister_netdev(ndev); free_netdev(ndev); @@ -2093,7 +2781,6 @@ int stmmac_dvr_remove(struct net_device *ndev) int stmmac_suspend(struct net_device *ndev) { struct stmmac_priv *priv = netdev_priv(ndev); - int dis_ic = 0; unsigned long flags; if (!ndev || !netif_running(ndev)) @@ -2107,18 +2794,13 @@ int stmmac_suspend(struct net_device *ndev) netif_device_detach(ndev); netif_stop_queue(ndev); - if (priv->use_riwt) - dis_ic = 1; - napi_disable(&priv->napi); /* Stop TX/RX DMA */ priv->hw->dma->stop_tx(priv->ioaddr); priv->hw->dma->stop_rx(priv->ioaddr); - /* Clear the Rx/Tx descriptors */ - priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size, - dis_ic); - priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size); + + stmmac_clear_descriptors(priv); /* Enable Power down mode by programming the PMT regs */ if (device_may_wakeup(priv->device)) @@ -2146,7 +2828,8 @@ int stmmac_resume(struct net_device *ndev) * automatically as soon as a magic packet or a Wake-up frame * is received. Anyway, it's better to manually clear * this bit because it can generate problems while resuming - * from another devices (e.g. serial console). */ + * from another devices (e.g. serial console). + */ if (device_may_wakeup(priv->device)) priv->hw->mac->pmt(priv->ioaddr, 0); else @@ -2257,6 +2940,9 @@ static int __init stmmac_cmdline_opt(char *str) } else if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; + } else if (!strncmp(opt, "chain_mode:", 11)) { + if (kstrtoint(opt + 11, 0, &chain_mode)) + goto err; } } return 0; @@ -2267,7 +2953,7 @@ err: } __setup("stmmaceth=", stmmac_cmdline_opt); -#endif +#endif /* MODULE */ MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet device driver"); MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 0b9829fe3eea..cc15039eaa47 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -177,7 +177,7 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->write = &stmmac_mdio_write; new_bus->reset = &stmmac_mdio_reset; snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", - new_bus->name, priv->plat->bus_id); + new_bus->name, priv->plat->bus_id); new_bus->priv = ndev; new_bus->irq = irqlist; new_bus->phy_mask = mdio_bus_data->phy_mask; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 19b3a2567a46..023b7c29cb2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -88,7 +88,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, continue; addr = pci_iomap(pdev, i, 0); if (addr == NULL) { - pr_err("%s: ERROR: cannot map register memory, aborting", + pr_err("%s: ERROR: cannot map register memory aborting", __func__); ret = -EIO; goto err_out_map_failed; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index b43d68b40e50..1d3780f55ba2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -88,11 +88,9 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) if (!res) return -ENODEV; - addr = devm_request_and_ioremap(dev, res); - if (!addr) { - pr_err("%s: ERROR: memory mapping failed", __func__); - return -ENOMEM; - } + addr = devm_ioremap_resource(dev, res); + if (IS_ERR(addr)) + return PTR_ERR(addr); if (pdev->dev.of_node) { plat_dat = devm_kzalloc(&pdev->dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c new file mode 100644 index 000000000000..b8b0eeed0f92 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -0,0 +1,211 @@ +/******************************************************************************* + PTP 1588 clock using the STMMAC. + + Copyright (C) 2013 Vayavya Labs Pvt Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Rayagond Kokatanur <rayagond@vayavyalabs.com> +*******************************************************************************/ +#include "stmmac.h" +#include "stmmac_ptp.h" + +/** + * stmmac_adjust_freq + * + * @ptp: pointer to ptp_clock_info structure + * @ppb: desired period change in parts ber billion + * + * Description: this function will adjust the frequency of hardware clock. + */ +static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) +{ + struct stmmac_priv *priv = + container_of(ptp, struct stmmac_priv, ptp_clock_ops); + unsigned long flags; + u32 diff, addend; + int neg_adj = 0; + u64 adj; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + + addend = priv->default_addend; + adj = addend; + adj *= ppb; + diff = div_u64(adj, 1000000000ULL); + addend = neg_adj ? (addend - diff) : (addend + diff); + + spin_lock_irqsave(&priv->ptp_lock, flags); + + priv->hw->ptp->config_addend(priv->ioaddr, addend); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +/** + * stmmac_adjust_time + * + * @ptp: pointer to ptp_clock_info structure + * @delta: desired change in nanoseconds + * + * Description: this function will shift/adjust the hardware clock time. + */ +static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) +{ + struct stmmac_priv *priv = + container_of(ptp, struct stmmac_priv, ptp_clock_ops); + unsigned long flags; + u32 sec, nsec; + u32 quotient, reminder; + int neg_adj = 0; + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + + quotient = div_u64_rem(delta, 1000000000ULL, &reminder); + sec = quotient; + nsec = reminder; + + spin_lock_irqsave(&priv->ptp_lock, flags); + + priv->hw->ptp->adjust_systime(priv->ioaddr, sec, nsec, neg_adj); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} + +/** + * stmmac_get_time + * + * @ptp: pointer to ptp_clock_info structure + * @ts: pointer to hold time/result + * + * Description: this function will read the current time from the + * hardware clock and store it in @ts. + */ +static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec *ts) +{ + struct stmmac_priv *priv = + container_of(ptp, struct stmmac_priv, ptp_clock_ops); + unsigned long flags; + u64 ns; + u32 reminder; + + spin_lock_irqsave(&priv->ptp_lock, flags); + + ns = priv->hw->ptp->get_systime(priv->ioaddr); + + spin_unlock_irqrestore(&priv->ptp_lock, flags); + + ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &reminder); + ts->tv_nsec = reminder; + + return 0; +} + +/** + * stmmac_set_time + * + * @ptp: pointer to ptp_clock_info structure + * @ts: time value to set + * + * Description: this function will set the current time on the + * hardware clock. + */ +static int stmmac_set_time(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct stmmac_priv *priv = + container_of(ptp, struct stmmac_priv, ptp_clock_ops); + unsigned long flags; + + spin_lock_irqsave(&priv->ptp_lock, flags); + + priv->hw->ptp->init_systime(priv->ioaddr, ts->tv_sec, ts->tv_nsec); + + spin_unlock_irqrestore(&priv->ptp_lock, flags); + + return 0; +} + +static int stmmac_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +/* structure describing a PTP hardware clock */ +static struct ptp_clock_info stmmac_ptp_clock_ops = { + .owner = THIS_MODULE, + .name = "stmmac_ptp_clock", + .max_adj = 62500000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .pps = 0, + .adjfreq = stmmac_adjust_freq, + .adjtime = stmmac_adjust_time, + .gettime = stmmac_get_time, + .settime = stmmac_set_time, + .enable = stmmac_enable, +}; + +/** + * stmmac_ptp_register + * @priv: driver private structure + * Description: this function will register the ptp clock driver + * to kernel. It also does some house keeping work. + */ +int stmmac_ptp_register(struct stmmac_priv *priv) +{ + spin_lock_init(&priv->ptp_lock); + priv->ptp_clock_ops = stmmac_ptp_clock_ops; + + priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, + priv->device); + if (IS_ERR(priv->ptp_clock)) { + priv->ptp_clock = NULL; + pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); + } else + pr_debug("Added PTP HW clock successfully on %s\n", + priv->dev->name); + + return 0; +} + +/** + * stmmac_ptp_unregister + * @priv: driver private structure + * Description: this function will remove/unregister the ptp clock driver + * from the kernel. + */ +void stmmac_ptp_unregister(struct stmmac_priv *priv) +{ + if (priv->ptp_clock) { + ptp_clock_unregister(priv->ptp_clock); + pr_debug("Removed PTP HW clock successfully on %s\n", + priv->dev->name); + } +} diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h new file mode 100644 index 000000000000..3dbc047622fa --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -0,0 +1,74 @@ +/****************************************************************************** + PTP Header file + + Copyright (C) 2013 Vayavya Labs Pvt Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Rayagond Kokatanur <rayagond@vayavyalabs.com> +******************************************************************************/ + +#ifndef __STMMAC_PTP_H__ +#define __STMMAC_PTP_H__ + +#define STMMAC_SYSCLOCK 62500000 + +/* IEEE 1588 PTP register offsets */ +#define PTP_TCR 0x0700 /* Timestamp Control Reg */ +#define PTP_SSIR 0x0704 /* Sub-Second Increment Reg */ +#define PTP_STSR 0x0708 /* System Time – Seconds Regr */ +#define PTP_STNSR 0x070C /* System Time – Nanoseconds Reg */ +#define PTP_STSUR 0x0710 /* System Time – Seconds Update Reg */ +#define PTP_STNSUR 0x0714 /* System Time – Nanoseconds Update Reg */ +#define PTP_TAR 0x0718 /* Timestamp Addend Reg */ +#define PTP_TTSR 0x071C /* Target Time Seconds Reg */ +#define PTP_TTNSR 0x0720 /* Target Time Nanoseconds Reg */ +#define PTP_STHWSR 0x0724 /* System Time - Higher Word Seconds Reg */ +#define PTP_TSR 0x0728 /* Timestamp Status */ + +#define PTP_STNSUR_ADDSUB_SHIFT 31 + +/* PTP TCR defines */ +#define PTP_TCR_TSENA 0x00000001 /* Timestamp Enable */ +#define PTP_TCR_TSCFUPDT 0x00000002 /* Timestamp Fine/Coarse Update */ +#define PTP_TCR_TSINIT 0x00000004 /* Timestamp Initialize */ +#define PTP_TCR_TSUPDT 0x00000008 /* Timestamp Update */ +/* Timestamp Interrupt Trigger Enable */ +#define PTP_TCR_TSTRIG 0x00000010 +#define PTP_TCR_TSADDREG 0x00000020 /* Addend Reg Update */ +#define PTP_TCR_TSENALL 0x00000100 /* Enable Timestamp for All Frames */ +/* Timestamp Digital or Binary Rollover Control */ +#define PTP_TCR_TSCTRLSSR 0x00000200 + +/* Enable PTP packet Processing for Version 2 Format */ +#define PTP_TCR_TSVER2ENA 0x00000400 +/* Enable Processing of PTP over Ethernet Frames */ +#define PTP_TCR_TSIPENA 0x00000800 +/* Enable Processing of PTP Frames Sent over IPv6-UDP */ +#define PTP_TCR_TSIPV6ENA 0x00001000 +/* Enable Processing of PTP Frames Sent over IPv4-UDP */ +#define PTP_TCR_TSIPV4ENA 0x00002000 +/* Enable Timestamp Snapshot for Event Messages */ +#define PTP_TCR_TSEVNTENA 0x00004000 +/* Enable Snapshot for Messages Relevant to Master */ +#define PTP_TCR_TSMSTRENA 0x00008000 +/* Select PTP packets for Taking Snapshots */ +#define PTP_TCR_SNAPTYPSEL_1 0x00010000 +/* Enable MAC address for PTP Frame Filtering */ +#define PTP_TCR_TSENMACADDR 0x00040000 + +#endif /* __STMMAC_PTP_H__ */ diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index e4c1c88e4c2a..95cff98d8a34 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6618,7 +6618,7 @@ static u64 niu_compute_tx_flags(struct sk_buff *skb, struct ethhdr *ehdr, (len << TXHDR_LEN_SHIFT) | ((l3off / 2) << TXHDR_L3START_SHIFT) | (ihl << TXHDR_IHL_SHIFT) | - ((eth_proto_inner < 1536) ? TXHDR_LLC : 0) | + ((eth_proto_inner < ETH_P_802_3_MIN) ? TXHDR_LLC : 0) | ((eth_proto == ETH_P_8021Q) ? TXHDR_VLAN : 0) | (ipv6 ? TXHDR_IP_VER : 0) | csum_bits); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 5fafca065305..054975939a18 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -1169,10 +1169,8 @@ static int bigmac_ether_init(struct platform_device *op, bp->bmac_block = dma_alloc_coherent(&bp->bigmac_op->dev, PAGE_SIZE, &bp->bblock_dvma, GFP_ATOMIC); - if (bp->bmac_block == NULL || bp->bblock_dvma == 0) { - printk(KERN_ERR "BIGMAC: Cannot allocate consistent DMA.\n"); + if (bp->bmac_block == NULL || bp->bblock_dvma == 0) goto fail_and_cleanup; - } /* Get the board revision of this BigMAC. */ bp->board_rev = of_getintprop_default(bp->bigmac_op->dev.of_node, diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index a1bff49a8155..436fa9d5a071 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -2752,10 +2752,8 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe) &hp->hblock_dvma, GFP_ATOMIC); err = -ENOMEM; - if (!hp->happy_block) { - printk(KERN_ERR "happymeal: Cannot allocate descriptors.\n"); + if (!hp->happy_block) goto err_out_iounmap; - } /* Force check of the link first time we are brought up. */ hp->linkcheck = 0; @@ -3068,14 +3066,11 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, hp->happy_bursts = DMA_BURSTBITS; #endif - hp->happy_block = (struct hmeal_init_block *) - dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &hp->hblock_dvma, GFP_KERNEL); - + hp->happy_block = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, + &hp->hblock_dvma, GFP_KERNEL); err = -ENODEV; - if (!hp->happy_block) { - printk(KERN_ERR "happymeal(PCI): Cannot get hme init block.\n"); + if (!hp->happy_block) goto err_out_iounmap; - } hp->linkcheck = 0; hp->timer_state = asleep; diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 49bf3e2eb652..8182591bc187 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -414,7 +414,7 @@ static void qe_rx(struct sunqe *qep) struct qe_rxd *this; struct sunqe_buffers *qbufs = qep->buffers; __u32 qbufs_dvma = qep->buffers_dvma; - int elem = qep->rx_new, drops = 0; + int elem = qep->rx_new; u32 flags; this = &rxbase[elem]; @@ -436,7 +436,6 @@ static void qe_rx(struct sunqe *qep) } else { skb = netdev_alloc_skb(dev, len + 2); if (skb == NULL) { - drops++; dev->stats.rx_dropped++; } else { skb_reserve(skb, 2); @@ -456,8 +455,6 @@ static void qe_rx(struct sunqe *qep) this = &rxbase[elem]; } qep->rx_new = elem; - if (drops) - printk(KERN_NOTICE "%s: Memory squeeze, deferring packet.\n", qep->dev->name); } static void qe_tx_reclaim(struct sunqe *qep); diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index e15cc71b826d..571452e786d5 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -733,7 +733,7 @@ static void __bdx_vlan_rx_vid(struct net_device *ndev, uint16_t vid, int enable) * @ndev: network device * @vid: VLAN vid to add */ -static int bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid) +static int bdx_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { __bdx_vlan_rx_vid(ndev, vid, 1); return 0; @@ -744,7 +744,7 @@ static int bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid) * @ndev: network device * @vid: VLAN vid to kill */ -static int bdx_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid) +static int bdx_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { __bdx_vlan_rx_vid(ndev, vid, 0); return 0; @@ -1102,10 +1102,9 @@ static void bdx_rx_alloc_skbs(struct bdx_priv *priv, struct rxf_fifo *f) dno = bdx_rxdb_available(db) - 1; while (dno > 0) { skb = netdev_alloc_skb(priv->ndev, f->m.pktsz + NET_IP_ALIGN); - if (!skb) { - pr_err("NO MEM: netdev_alloc_skb failed\n"); + if (!skb) break; - } + skb_reserve(skb, NET_IP_ALIGN); idx = bdx_rxdb_alloc_elem(db); @@ -1149,7 +1148,7 @@ NETIF_RX_MUX(struct bdx_priv *priv, u32 rxd_val1, u16 rxd_vlan, priv->ndev->name, GET_RXD_VLAN_ID(rxd_vlan), GET_RXD_VTAG(rxd_val1)); - __vlan_hwaccel_put_tag(skb, GET_RXD_VLAN_TCI(rxd_vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), GET_RXD_VLAN_TCI(rxd_vlan)); } netif_receive_skb(skb); } @@ -2018,12 +2017,12 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * so we can have them same for all ports of the board */ ndev->if_port = port; ndev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO - | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER | NETIF_F_RXCSUM + | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM /*| NETIF_F_FRAGLIST */ ; ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_HW_VLAN_TX; + NETIF_F_TSO | NETIF_F_HW_VLAN_CTAG_TX; if (pci_using_dac) ndev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 01ffbc486982..25c364209a21 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -126,6 +126,13 @@ do { \ #define CPSW_FIFO_DUAL_MAC_MODE (1 << 15) #define CPSW_FIFO_RATE_LIMIT_MODE (2 << 15) +#define CPSW_INTPACEEN (0x3f << 16) +#define CPSW_INTPRESCALE_MASK (0x7FF << 0) +#define CPSW_CMINTMAX_CNT 63 +#define CPSW_CMINTMIN_CNT 2 +#define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT) +#define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1) + #define cpsw_enable_irq(priv) \ do { \ u32 i; \ @@ -139,6 +146,10 @@ do { \ disable_irq_nosync(priv->irqs_table[i]); \ } while (0); +#define cpsw_slave_index(priv) \ + ((priv->data.dual_emac) ? priv->emac_port : \ + priv->data.active_slave) + static int debug_level; module_param(debug_level, int, 0); MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)"); @@ -160,6 +171,15 @@ struct cpsw_wr_regs { u32 rx_en; u32 tx_en; u32 misc_en; + u32 mem_allign1[8]; + u32 rx_thresh_stat; + u32 rx_stat; + u32 tx_stat; + u32 misc_stat; + u32 mem_allign2[8]; + u32 rx_imax; + u32 tx_imax; + }; struct cpsw_ss_regs { @@ -314,6 +334,8 @@ struct cpsw_priv { struct cpsw_host_regs __iomem *host_port_regs; u32 msg_enable; u32 version; + u32 coal_intvl; + u32 bus_freq_mhz; struct net_device_stats stats; int rx_packet_max; int host_port; @@ -436,7 +458,7 @@ void cpsw_tx_handler(void *token, int len, int status) * queue is stopped then start the queue as we have free desc for tx */ if (unlikely(netif_queue_stopped(ndev))) - netif_start_queue(ndev); + netif_wake_queue(ndev); cpts_tx_timestamp(priv->cpts, skb); priv->stats.tx_packets++; priv->stats.tx_bytes += len; @@ -612,6 +634,77 @@ static void cpsw_adjust_link(struct net_device *ndev) } } +static int cpsw_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + coal->rx_coalesce_usecs = priv->coal_intvl; + return 0; +} + +static int cpsw_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + u32 int_ctrl; + u32 num_interrupts = 0; + u32 prescale = 0; + u32 addnl_dvdr = 1; + u32 coal_intvl = 0; + + if (!coal->rx_coalesce_usecs) + return -EINVAL; + + coal_intvl = coal->rx_coalesce_usecs; + + int_ctrl = readl(&priv->wr_regs->int_control); + prescale = priv->bus_freq_mhz * 4; + + if (coal_intvl < CPSW_CMINTMIN_INTVL) + coal_intvl = CPSW_CMINTMIN_INTVL; + + if (coal_intvl > CPSW_CMINTMAX_INTVL) { + /* Interrupt pacer works with 4us Pulse, we can + * throttle further by dilating the 4us pulse. + */ + addnl_dvdr = CPSW_INTPRESCALE_MASK / prescale; + + if (addnl_dvdr > 1) { + prescale *= addnl_dvdr; + if (coal_intvl > (CPSW_CMINTMAX_INTVL * addnl_dvdr)) + coal_intvl = (CPSW_CMINTMAX_INTVL + * addnl_dvdr); + } else { + addnl_dvdr = 1; + coal_intvl = CPSW_CMINTMAX_INTVL; + } + } + + num_interrupts = (1000 * addnl_dvdr) / coal_intvl; + writel(num_interrupts, &priv->wr_regs->rx_imax); + writel(num_interrupts, &priv->wr_regs->tx_imax); + + int_ctrl |= CPSW_INTPACEEN; + int_ctrl &= (~CPSW_INTPRESCALE_MASK); + int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK); + writel(int_ctrl, &priv->wr_regs->int_control); + + cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl); + if (priv->data.dual_emac) { + int i; + + for (i = 0; i < priv->data.slaves; i++) { + priv = netdev_priv(priv->slaves[i].ndev); + priv->coal_intvl = coal_intvl; + } + } else { + priv->coal_intvl = coal_intvl; + } + + return 0; +} + static inline int __show_stat(char *buf, int maxlen, const char *name, u32 val) { static char *leader = "........................................"; @@ -834,6 +927,14 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); } + /* Enable Interrupt pacing if configured */ + if (priv->coal_intvl != 0) { + struct ethtool_coalesce coal; + + coal.rx_coalesce_usecs = (priv->coal_intvl << 4); + cpsw_set_coalesce(ndev, &coal); + } + cpdma_ctlr_start(priv->dma); cpsw_intr_enable(priv); napi_enable(&priv->napi); @@ -905,7 +1006,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, /* If there is no more tx desc left free then we need to * tell the kernel to stop sending us tx frames. */ - if (unlikely(cpdma_check_free_tx_desc(priv->txch))) + if (unlikely(!cpdma_check_free_tx_desc(priv->txch))) netif_stop_queue(ndev); return NETDEV_TX_OK; @@ -942,7 +1043,7 @@ static void cpsw_ndo_change_rx_flags(struct net_device *ndev, int flags) static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) { - struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave]; + struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave]; u32 ts_en, seq_id; if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) { @@ -971,7 +1072,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) if (priv->data.dual_emac) slave = &priv->slaves[priv->emac_port]; else - slave = &priv->slaves[priv->data.cpts_active_slave]; + slave = &priv->slaves[priv->data.active_slave]; ctrl = slave_read(slave, CPSW2_CONTROL); ctrl &= ~CTRL_ALL_TS_MASK; @@ -1056,14 +1157,26 @@ static int cpsw_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { + struct cpsw_priv *priv = netdev_priv(dev); + struct mii_ioctl_data *data = if_mii(req); + int slave_no = cpsw_slave_index(priv); + if (!netif_running(dev)) return -EINVAL; + switch (cmd) { #ifdef CONFIG_TI_CPTS - if (cmd == SIOCSHWTSTAMP) + case SIOCSHWTSTAMP: return cpsw_hwtstamp_ioctl(dev, req); #endif - return -ENOTSUPP; + case SIOCGMIIPHY: + data->phy_id = priv->slaves[slave_no].phy->addr; + break; + default: + return -ENOTSUPP; + } + + return 0; } static void cpsw_ndo_tx_timeout(struct net_device *ndev) @@ -1138,7 +1251,7 @@ clean_vid: } static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, - unsigned short vid) + __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); @@ -1150,7 +1263,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, } static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, - unsigned short vid) + __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); int ret; @@ -1244,12 +1357,39 @@ static int cpsw_get_ts_info(struct net_device *ndev, return 0; } +static int cpsw_get_settings(struct net_device *ndev, + struct ethtool_cmd *ecmd) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + int slave_no = cpsw_slave_index(priv); + + if (priv->slaves[slave_no].phy) + return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd); + else + return -EOPNOTSUPP; +} + +static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + int slave_no = cpsw_slave_index(priv); + + if (priv->slaves[slave_no].phy) + return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd); + else + return -EOPNOTSUPP; +} + static const struct ethtool_ops cpsw_ethtool_ops = { .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, .set_msglevel = cpsw_set_msglevel, .get_link = ethtool_op_get_link, .get_ts_info = cpsw_get_ts_info, + .get_settings = cpsw_get_settings, + .set_settings = cpsw_set_settings, + .get_coalesce = cpsw_get_coalesce, + .set_coalesce = cpsw_set_coalesce, }; static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, @@ -1282,12 +1422,12 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->slaves = prop; - if (of_property_read_u32(node, "cpts_active_slave", &prop)) { - pr_err("Missing cpts_active_slave property in the DT.\n"); + if (of_property_read_u32(node, "active_slave", &prop)) { + pr_err("Missing active_slave property in the DT.\n"); ret = -EINVAL; goto error_ret; } - data->cpts_active_slave = prop; + data->active_slave = prop; if (of_property_read_u32(node, "cpts_clock_mult", &prop)) { pr_err("Missing cpts_clock_mult property in the DT.\n"); @@ -1364,7 +1504,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, struct platform_device *mdio; parp = of_get_property(slave_node, "phy_id", &lenp); - if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) { + if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) { pr_err("Missing slave[%d] phy_id property\n", i); ret = -EINVAL; goto error_ret; @@ -1380,7 +1520,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); if (data->dual_emac) { - if (of_property_read_u32(node, "dual_emac_res_vlan", + if (of_property_read_u32(slave_node, "dual_emac_res_vlan", &prop)) { pr_err("Missing dual_emac_res_vlan in DT.\n"); slave_data->dual_emac_res_vlan = i+1; @@ -1437,6 +1577,9 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, priv_sl2->slaves = priv->slaves; priv_sl2->clk = priv->clk; + priv_sl2->coal_intvl = 0; + priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; + priv_sl2->cpsw_res = priv->cpsw_res; priv_sl2->regs = priv->regs; priv_sl2->host_port = priv->host_port; @@ -1456,7 +1599,7 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, priv_sl2->num_irqs = priv->num_irqs; } - ndev->features |= NETIF_F_HW_VLAN_FILTER; + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); @@ -1546,6 +1689,8 @@ static int cpsw_probe(struct platform_device *pdev) ret = -ENODEV; goto clean_slave_ret; } + priv->coal_intvl = 0; + priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000; priv->cpsw_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!priv->cpsw_res) { @@ -1692,7 +1837,7 @@ static int cpsw_probe(struct platform_device *pdev) k++; } - ndev->features |= NETIF_F_HW_VLAN_FILTER; + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 52c05366599a..6a0b47715a84 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1053,7 +1053,7 @@ static void emac_tx_handler(void *token, int len, int status) * queue is stopped then start the queue as we have free desc for tx */ if (unlikely(netif_queue_stopped(ndev))) - netif_start_queue(ndev); + netif_wake_queue(ndev); ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; dev_kfree_skb_any(skb); @@ -1102,7 +1102,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev) /* If there is no more tx desc left free then we need to * tell the kernel to stop sending us tx frames. */ - if (unlikely(cpdma_check_free_tx_desc(priv->txchan))) + if (unlikely(!cpdma_check_free_tx_desc(priv->txchan))) netif_stop_queue(ndev); return NETDEV_TX_OK; @@ -1438,7 +1438,7 @@ static int emac_poll(struct napi_struct *napi, int budget) * Polled functionality used by netconsole and others in non interrupt mode * */ -void emac_poll_controller(struct net_device *ndev) +static void emac_poll_controller(struct net_device *ndev) { struct emac_priv *priv = netdev_priv(ndev); @@ -1865,21 +1865,18 @@ static int davinci_emac_probe(struct platform_device *pdev) /* obtain emac clock from kernel */ - emac_clk = clk_get(&pdev->dev, NULL); + emac_clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(emac_clk)) { dev_err(&pdev->dev, "failed to get EMAC clock\n"); return -EBUSY; } emac_bus_frequency = clk_get_rate(emac_clk); - clk_put(emac_clk); /* TODO: Probe PHY here if possible */ ndev = alloc_etherdev(sizeof(struct emac_priv)); - if (!ndev) { - rc = -ENOMEM; - goto no_ndev; - } + if (!ndev) + return -ENOMEM; platform_set_drvdata(pdev, ndev); priv = netdev_priv(ndev); @@ -1893,7 +1890,7 @@ static int davinci_emac_probe(struct platform_device *pdev) if (!pdata) { dev_err(&pdev->dev, "no platform data\n"); rc = -ENODEV; - goto probe_quit; + goto no_pdata; } /* MAC addr and PHY mask , RMII enable info from platform_data */ @@ -1913,23 +1910,23 @@ static int davinci_emac_probe(struct platform_device *pdev) if (!res) { dev_err(&pdev->dev,"error getting res\n"); rc = -ENOENT; - goto probe_quit; + goto no_pdata; } priv->emac_base_phys = res->start + pdata->ctrl_reg_offset; size = resource_size(res); - if (!request_mem_region(res->start, size, ndev->name)) { + if (!devm_request_mem_region(&pdev->dev, res->start, + size, ndev->name)) { dev_err(&pdev->dev, "failed request_mem_region() for regs\n"); rc = -ENXIO; - goto probe_quit; + goto no_pdata; } - priv->remap_addr = ioremap(res->start, size); + priv->remap_addr = devm_ioremap(&pdev->dev, res->start, size); if (!priv->remap_addr) { dev_err(&pdev->dev, "unable to map IO\n"); rc = -ENOMEM; - release_mem_region(res->start, size); - goto probe_quit; + goto no_pdata; } priv->emac_base = priv->remap_addr + pdata->ctrl_reg_offset; ndev->base_addr = (unsigned long)priv->remap_addr; @@ -1962,7 +1959,7 @@ static int davinci_emac_probe(struct platform_device *pdev) if (!priv->dma) { dev_err(&pdev->dev, "error initializing DMA\n"); rc = -ENOMEM; - goto no_dma; + goto no_pdata; } priv->txchan = cpdma_chan_create(priv->dma, tx_chan_num(EMAC_DEF_TX_CH), @@ -1971,14 +1968,14 @@ static int davinci_emac_probe(struct platform_device *pdev) emac_rx_handler); if (WARN_ON(!priv->txchan || !priv->rxchan)) { rc = -ENOMEM; - goto no_irq_res; + goto no_cpdma_chan; } res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res) { dev_err(&pdev->dev, "error getting irq res\n"); rc = -ENOENT; - goto no_irq_res; + goto no_cpdma_chan; } ndev->irq = res->start; @@ -2000,7 +1997,7 @@ static int davinci_emac_probe(struct platform_device *pdev) if (rc) { dev_err(&pdev->dev, "error in register_netdev\n"); rc = -ENODEV; - goto no_irq_res; + goto no_cpdma_chan; } @@ -2015,20 +2012,14 @@ static int davinci_emac_probe(struct platform_device *pdev) return 0; -no_irq_res: +no_cpdma_chan: if (priv->txchan) cpdma_chan_destroy(priv->txchan); if (priv->rxchan) cpdma_chan_destroy(priv->rxchan); cpdma_ctlr_destroy(priv->dma); -no_dma: - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - iounmap(priv->remap_addr); - -probe_quit: +no_pdata: free_netdev(ndev); -no_ndev: return rc; } @@ -2041,14 +2032,12 @@ no_ndev: */ static int davinci_emac_remove(struct platform_device *pdev) { - struct resource *res; struct net_device *ndev = platform_get_drvdata(pdev); struct emac_priv *priv = netdev_priv(ndev); dev_notice(&ndev->dev, "DaVinci EMAC: davinci_emac_remove()\n"); platform_set_drvdata(pdev, NULL); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (priv->txchan) cpdma_chan_destroy(priv->txchan); @@ -2056,10 +2045,7 @@ static int davinci_emac_remove(struct platform_device *pdev) cpdma_chan_destroy(priv->rxchan); cpdma_ctlr_destroy(priv->dma); - release_mem_region(res->start, resource_size(res)); - unregister_netdev(ndev); - iounmap(priv->remap_addr); free_netdev(ndev); return 0; diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 22725386c5de..60c400f6d01f 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -320,6 +320,7 @@ static void tlan_remove_one(struct pci_dev *pdev) free_netdev(dev); pci_set_drvdata(pdev, NULL); + cancel_work_sync(&priv->tlan_tqueue); } static void tlan_start(struct net_device *dev) @@ -1911,10 +1912,8 @@ static void tlan_reset_lists(struct net_device *dev) list->frame_size = TLAN_MAX_FRAME_SIZE; list->buffer[0].count = TLAN_MAX_FRAME_SIZE | TLAN_LAST_BUFFER; skb = netdev_alloc_skb_ip_align(dev, TLAN_MAX_FRAME_SIZE + 5); - if (!skb) { - netdev_err(dev, "Out of memory for received data\n"); + if (!skb) break; - } list->buffer[0].address = pci_map_single(priv->pci_dev, skb->data, diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 445c0595c997..ad32af67e618 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -58,13 +58,6 @@ MODULE_DESCRIPTION("Gelic Network driver"); MODULE_LICENSE("GPL"); -static inline void gelic_card_enable_rxdmac(struct gelic_card *card); -static inline void gelic_card_disable_rxdmac(struct gelic_card *card); -static inline void gelic_card_disable_txdmac(struct gelic_card *card); -static inline void gelic_card_reset_chain(struct gelic_card *card, - struct gelic_descr_chain *chain, - struct gelic_descr *start_descr); - /* set irq_mask */ int gelic_card_set_irq_mask(struct gelic_card *card, u64 mask) { @@ -78,12 +71,12 @@ int gelic_card_set_irq_mask(struct gelic_card *card, u64 mask) return status; } -static inline void gelic_card_rx_irq_on(struct gelic_card *card) +static void gelic_card_rx_irq_on(struct gelic_card *card) { card->irq_mask |= GELIC_CARD_RXINT; gelic_card_set_irq_mask(card, card->irq_mask); } -static inline void gelic_card_rx_irq_off(struct gelic_card *card) +static void gelic_card_rx_irq_off(struct gelic_card *card) { card->irq_mask &= ~GELIC_CARD_RXINT; gelic_card_set_irq_mask(card, card->irq_mask); @@ -127,6 +120,120 @@ static int gelic_card_set_link_mode(struct gelic_card *card, int mode) return 0; } +/** + * gelic_card_disable_txdmac - disables the transmit DMA controller + * @card: card structure + * + * gelic_card_disable_txdmac terminates processing on the DMA controller by + * turing off DMA and issuing a force end + */ +static void gelic_card_disable_txdmac(struct gelic_card *card) +{ + int status; + + /* this hvc blocks until the DMA in progress really stopped */ + status = lv1_net_stop_tx_dma(bus_id(card), dev_id(card)); + if (status) + dev_err(ctodev(card), + "lv1_net_stop_tx_dma failed, status=%d\n", status); +} + +/** + * gelic_card_enable_rxdmac - enables the receive DMA controller + * @card: card structure + * + * gelic_card_enable_rxdmac enables the DMA controller by setting RX_DMA_EN + * in the GDADMACCNTR register + */ +static void gelic_card_enable_rxdmac(struct gelic_card *card) +{ + int status; + +#ifdef DEBUG + if (gelic_descr_get_status(card->rx_chain.head) != + GELIC_DESCR_DMA_CARDOWNED) { + printk(KERN_ERR "%s: status=%x\n", __func__, + be32_to_cpu(card->rx_chain.head->dmac_cmd_status)); + printk(KERN_ERR "%s: nextphy=%x\n", __func__, + be32_to_cpu(card->rx_chain.head->next_descr_addr)); + printk(KERN_ERR "%s: head=%p\n", __func__, + card->rx_chain.head); + } +#endif + status = lv1_net_start_rx_dma(bus_id(card), dev_id(card), + card->rx_chain.head->bus_addr, 0); + if (status) + dev_info(ctodev(card), + "lv1_net_start_rx_dma failed, status=%d\n", status); +} + +/** + * gelic_card_disable_rxdmac - disables the receive DMA controller + * @card: card structure + * + * gelic_card_disable_rxdmac terminates processing on the DMA controller by + * turing off DMA and issuing a force end + */ +static void gelic_card_disable_rxdmac(struct gelic_card *card) +{ + int status; + + /* this hvc blocks until the DMA in progress really stopped */ + status = lv1_net_stop_rx_dma(bus_id(card), dev_id(card)); + if (status) + dev_err(ctodev(card), + "lv1_net_stop_rx_dma failed, %d\n", status); +} + +/** + * gelic_descr_set_status -- sets the status of a descriptor + * @descr: descriptor to change + * @status: status to set in the descriptor + * + * changes the status to the specified value. Doesn't change other bits + * in the status + */ +static void gelic_descr_set_status(struct gelic_descr *descr, + enum gelic_descr_dma_status status) +{ + descr->dmac_cmd_status = cpu_to_be32(status | + (be32_to_cpu(descr->dmac_cmd_status) & + ~GELIC_DESCR_DMA_STAT_MASK)); + /* + * dma_cmd_status field is used to indicate whether the descriptor + * is valid or not. + * Usually caller of this function wants to inform that to the + * hardware, so we assure here the hardware sees the change. + */ + wmb(); +} + +/** + * gelic_card_reset_chain - reset status of a descriptor chain + * @card: card structure + * @chain: address of chain + * @start_descr: address of descriptor array + * + * Reset the status of dma descriptors to ready state + * and re-initialize the hardware chain for later use + */ +static void gelic_card_reset_chain(struct gelic_card *card, + struct gelic_descr_chain *chain, + struct gelic_descr *start_descr) +{ + struct gelic_descr *descr; + + for (descr = start_descr; start_descr != descr->next; descr++) { + gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED); + descr->next_descr_addr = cpu_to_be32(descr->next->bus_addr); + } + + chain->head = start_descr; + chain->tail = (descr - 1); + + (descr - 1)->next_descr_addr = 0; +} + void gelic_card_up(struct gelic_card *card) { pr_debug("%s: called\n", __func__); @@ -183,29 +290,6 @@ gelic_descr_get_status(struct gelic_descr *descr) } /** - * gelic_descr_set_status -- sets the status of a descriptor - * @descr: descriptor to change - * @status: status to set in the descriptor - * - * changes the status to the specified value. Doesn't change other bits - * in the status - */ -static void gelic_descr_set_status(struct gelic_descr *descr, - enum gelic_descr_dma_status status) -{ - descr->dmac_cmd_status = cpu_to_be32(status | - (be32_to_cpu(descr->dmac_cmd_status) & - ~GELIC_DESCR_DMA_STAT_MASK)); - /* - * dma_cmd_status field is used to indicate whether the descriptor - * is valid or not. - * Usually caller of this function wants to inform that to the - * hardware, so we assure here the hardware sees the change. - */ - wmb(); -} - -/** * gelic_card_free_chain - free descriptor chain * @card: card structure * @descr_in: address of desc @@ -286,31 +370,6 @@ iommu_error: } /** - * gelic_card_reset_chain - reset status of a descriptor chain - * @card: card structure - * @chain: address of chain - * @start_descr: address of descriptor array - * - * Reset the status of dma descriptors to ready state - * and re-initialize the hardware chain for later use - */ -static void gelic_card_reset_chain(struct gelic_card *card, - struct gelic_descr_chain *chain, - struct gelic_descr *start_descr) -{ - struct gelic_descr *descr; - - for (descr = start_descr; start_descr != descr->next; descr++) { - gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED); - descr->next_descr_addr = cpu_to_be32(descr->next->bus_addr); - } - - chain->head = start_descr; - chain->tail = (descr - 1); - - (descr - 1)->next_descr_addr = 0; -} -/** * gelic_descr_prepare_rx - reinitializes a rx descriptor * @card: card structure * @descr: descriptor to re-init @@ -599,71 +658,6 @@ void gelic_net_set_multi(struct net_device *netdev) } /** - * gelic_card_enable_rxdmac - enables the receive DMA controller - * @card: card structure - * - * gelic_card_enable_rxdmac enables the DMA controller by setting RX_DMA_EN - * in the GDADMACCNTR register - */ -static inline void gelic_card_enable_rxdmac(struct gelic_card *card) -{ - int status; - -#ifdef DEBUG - if (gelic_descr_get_status(card->rx_chain.head) != - GELIC_DESCR_DMA_CARDOWNED) { - printk(KERN_ERR "%s: status=%x\n", __func__, - be32_to_cpu(card->rx_chain.head->dmac_cmd_status)); - printk(KERN_ERR "%s: nextphy=%x\n", __func__, - be32_to_cpu(card->rx_chain.head->next_descr_addr)); - printk(KERN_ERR "%s: head=%p\n", __func__, - card->rx_chain.head); - } -#endif - status = lv1_net_start_rx_dma(bus_id(card), dev_id(card), - card->rx_chain.head->bus_addr, 0); - if (status) - dev_info(ctodev(card), - "lv1_net_start_rx_dma failed, status=%d\n", status); -} - -/** - * gelic_card_disable_rxdmac - disables the receive DMA controller - * @card: card structure - * - * gelic_card_disable_rxdmac terminates processing on the DMA controller by - * turing off DMA and issuing a force end - */ -static inline void gelic_card_disable_rxdmac(struct gelic_card *card) -{ - int status; - - /* this hvc blocks until the DMA in progress really stopped */ - status = lv1_net_stop_rx_dma(bus_id(card), dev_id(card)); - if (status) - dev_err(ctodev(card), - "lv1_net_stop_rx_dma failed, %d\n", status); -} - -/** - * gelic_card_disable_txdmac - disables the transmit DMA controller - * @card: card structure - * - * gelic_card_disable_txdmac terminates processing on the DMA controller by - * turing off DMA and issuing a force end - */ -static inline void gelic_card_disable_txdmac(struct gelic_card *card) -{ - int status; - - /* this hvc blocks until the DMA in progress really stopped */ - status = lv1_net_stop_tx_dma(bus_id(card), dev_id(card)); - if (status) - dev_err(ctodev(card), - "lv1_net_stop_tx_dma failed, status=%d\n", status); -} - -/** * gelic_net_stop - called upon ifconfig down * @netdev: interface device structure * @@ -746,7 +740,7 @@ static void gelic_descr_set_tx_cmdstat(struct gelic_descr *descr, } } -static inline struct sk_buff *gelic_put_vlan_tag(struct sk_buff *skb, +static struct sk_buff *gelic_put_vlan_tag(struct sk_buff *skb, unsigned short tag) { struct vlan_ethhdr *veth; diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index f1b91fd7e41c..c655fe60121e 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -352,8 +352,7 @@ spider_net_init_chain(struct spider_net_card *card, alloc_size = chain->num_desc * sizeof(struct spider_net_hw_descr); chain->hwring = dma_alloc_coherent(&card->pdev->dev, alloc_size, - &chain->dma_addr, GFP_KERNEL); - + &chain->dma_addr, GFP_KERNEL); if (!chain->hwring) return -ENOMEM; @@ -2330,8 +2329,8 @@ spider_net_setup_netdev(struct spider_net_card *card) if (SPIDER_NET_RX_CSUM_DEFAULT) netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX; - /* some time: NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - * NETIF_F_HW_VLAN_FILTER */ + /* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + * NETIF_F_HW_VLAN_CTAG_FILTER */ netdev->irq = card->pdev->irq; card->num_rx_ints = 0; diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 8fa947a2d929..3c69a0460832 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1308,27 +1308,16 @@ static int tsi108_open(struct net_device *dev) data->id, dev->irq, dev->name); } - data->rxring = dma_alloc_coherent(NULL, rxring_size, - &data->rxdma, GFP_KERNEL); - - if (!data->rxring) { - printk(KERN_DEBUG - "TSI108_ETH: failed to allocate memory for rxring!\n"); + data->rxring = dma_alloc_coherent(NULL, rxring_size, &data->rxdma, + GFP_KERNEL | __GFP_ZERO); + if (!data->rxring) return -ENOMEM; - } else { - memset(data->rxring, 0, rxring_size); - } - - data->txring = dma_alloc_coherent(NULL, txring_size, - &data->txdma, GFP_KERNEL); + data->txring = dma_alloc_coherent(NULL, txring_size, &data->txdma, + GFP_KERNEL | __GFP_ZERO); if (!data->txring) { - printk(KERN_DEBUG - "TSI108_ETH: failed to allocate memory for txring!\n"); pci_free_consistent(0, rxring_size, data->rxring, data->rxdma); return -ENOMEM; - } else { - memset(data->txring, 0, txring_size); } for (i = 0; i < TSI108_RXRING_LEN; i++) { diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 185c721c52d7..ca98acabf1b4 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -508,8 +508,10 @@ static struct rtnl_link_stats64 *rhine_get_stats64(struct net_device *dev, static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static const struct ethtool_ops netdev_ethtool_ops; static int rhine_close(struct net_device *dev); -static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid); -static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid); +static int rhine_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid); +static int rhine_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid); static void rhine_restart_tx(struct net_device *dev); static void rhine_wait_bit(struct rhine_private *rp, u8 reg, u8 mask, bool low) @@ -1026,8 +1028,9 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM; if (pdev->revision >= VT6105M) - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; /* dev->name not defined before register_netdev()! */ rc = register_netdev(dev); @@ -1414,7 +1417,7 @@ static void rhine_update_vcam(struct net_device *dev) rhine_set_vlan_cam_mask(ioaddr, vCAMmask); } -static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int rhine_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct rhine_private *rp = netdev_priv(dev); @@ -1425,7 +1428,7 @@ static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int rhine_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct rhine_private *rp = netdev_priv(dev); @@ -1933,7 +1936,7 @@ static int rhine_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); if (unlikely(desc_length & DescTag)) - __vlan_hwaccel_put_tag(skb, vlan_tci); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); netif_receive_skb(skb); u64_stats_update_begin(&rp->rx_stats.syncp); diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 1bc7f9fd2583..fb6248956ee2 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -525,7 +525,8 @@ static void velocity_init_cam_filter(struct velocity_info *vptr) mac_set_vlan_cam_mask(regs, vptr->vCAMmask); } -static int velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int velocity_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct velocity_info *vptr = netdev_priv(dev); @@ -536,7 +537,8 @@ static int velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int velocity_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct velocity_info *vptr = netdev_priv(dev); @@ -2078,7 +2080,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx) if (rd->rdesc0.RSR & RSR_DETAG) { u16 vid = swab16(le16_to_cpu(rd->rdesc1.PQTAG)); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } netif_rx(skb); @@ -2810,9 +2812,10 @@ static int velocity_found1(struct pci_dev *pdev, dev->ethtool_ops = &velocity_ethtool_ops; netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT); - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HW_VLAN_TX; - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER | - NETIF_F_HW_VLAN_RX | NETIF_F_IP_CSUM; + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_TX; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_IP_CSUM; ret = register_netdev(dev); if (ret < 0) diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index 545043cc4c0b..a518dcab396e 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -754,7 +754,7 @@ static int w5100_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int w5100_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -787,7 +787,7 @@ static int w5100_resume(struct device *dev) } return 0; } -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(w5100_pm_ops, w5100_suspend, w5100_resume); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index 7cbd0e6fc6f3..6e00e3f94ce4 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -666,7 +666,7 @@ static int w5300_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int w5300_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -699,7 +699,7 @@ static int w5300_resume(struct device *dev) } return 0; } -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(w5300_pm_ops, w5300_suspend, w5300_resume); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 9fc2ada4c3c2..57c2e5ef2804 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -245,39 +245,30 @@ static int temac_dma_bd_init(struct net_device *ndev) /* returns a virtual address and a physical address. */ lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, sizeof(*lp->tx_bd_v) * TX_BD_NUM, - &lp->tx_bd_p, GFP_KERNEL); - if (!lp->tx_bd_v) { - dev_err(&ndev->dev, - "unable to allocate DMA TX buffer descriptors"); + &lp->tx_bd_p, GFP_KERNEL | __GFP_ZERO); + if (!lp->tx_bd_v) goto out; - } + lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, sizeof(*lp->rx_bd_v) * RX_BD_NUM, - &lp->rx_bd_p, GFP_KERNEL); - if (!lp->rx_bd_v) { - dev_err(&ndev->dev, - "unable to allocate DMA RX buffer descriptors"); + &lp->rx_bd_p, GFP_KERNEL | __GFP_ZERO); + if (!lp->rx_bd_v) goto out; - } - memset(lp->tx_bd_v, 0, sizeof(*lp->tx_bd_v) * TX_BD_NUM); for (i = 0; i < TX_BD_NUM; i++) { lp->tx_bd_v[i].next = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * ((i + 1) % TX_BD_NUM); } - memset(lp->rx_bd_v, 0, sizeof(*lp->rx_bd_v) * RX_BD_NUM); for (i = 0; i < RX_BD_NUM; i++) { lp->rx_bd_v[i].next = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * ((i + 1) % RX_BD_NUM); skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE); - - if (skb == 0) { - dev_err(&ndev->dev, "alloc_skb error %d\n", i); + if (!skb) goto out; - } + lp->rx_skb[i] = skb; /* returns physical address of skb->data */ lp->rx_bd_v[i].phys = dma_map_single(ndev->dev.parent, @@ -789,9 +780,7 @@ static void ll_temac_recv(struct net_device *ndev) new_skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE); - - if (new_skb == 0) { - dev_err(&ndev->dev, "no memory for new sk_buff\n"); + if (!new_skb) { spin_unlock_irqrestore(&lp->rx_lock, flags); return; } @@ -1029,9 +1018,9 @@ static int temac_of_probe(struct platform_device *op) ndev->features |= NETIF_F_HW_CSUM; /* Can checksum all the packets. */ ndev->features |= NETIF_F_IPV6_CSUM; /* Can checksum IPV6 TCP/UDP */ ndev->features |= NETIF_F_HIGHDMA; /* Can DMA to high memory. */ - ndev->features |= NETIF_F_HW_VLAN_TX; /* Transmit VLAN hw accel */ - ndev->features |= NETIF_F_HW_VLAN_RX; /* Receive VLAN hw acceleration */ - ndev->features |= NETIF_F_HW_VLAN_FILTER; /* Receive VLAN filtering */ + ndev->features |= NETIF_F_HW_VLAN_CTAG_TX; /* Transmit VLAN hw accel */ + ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; /* Receive VLAN hw acceleration */ + ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; /* Receive VLAN filtering */ ndev->features |= NETIF_F_VLAN_CHALLENGED; /* cannot handle VLAN pkts */ ndev->features |= NETIF_F_GSO; /* Enable software GSO. */ ndev->features |= NETIF_F_MULTI_QUEUE; /* Has multiple TX/RX queues */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 278c9db3b5b8..24748e8367a1 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -204,41 +204,31 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, sizeof(*lp->tx_bd_v) * TX_BD_NUM, &lp->tx_bd_p, - GFP_KERNEL); - if (!lp->tx_bd_v) { - dev_err(&ndev->dev, "unable to allocate DMA Tx buffer " - "descriptors"); + GFP_KERNEL | __GFP_ZERO); + if (!lp->tx_bd_v) goto out; - } lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, sizeof(*lp->rx_bd_v) * RX_BD_NUM, &lp->rx_bd_p, - GFP_KERNEL); - if (!lp->rx_bd_v) { - dev_err(&ndev->dev, "unable to allocate DMA Rx buffer " - "descriptors"); + GFP_KERNEL | __GFP_ZERO); + if (!lp->rx_bd_v) goto out; - } - memset(lp->tx_bd_v, 0, sizeof(*lp->tx_bd_v) * TX_BD_NUM); for (i = 0; i < TX_BD_NUM; i++) { lp->tx_bd_v[i].next = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * ((i + 1) % TX_BD_NUM); } - memset(lp->rx_bd_v, 0, sizeof(*lp->rx_bd_v) * RX_BD_NUM); for (i = 0; i < RX_BD_NUM; i++) { lp->rx_bd_v[i].next = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * ((i + 1) % RX_BD_NUM); skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); - if (!skb) { - dev_err(&ndev->dev, "alloc_skb error %d\n", i); + if (!skb) goto out; - } lp->rx_bd_v[i].sw_id_offset = (u32) skb; lp->rx_bd_v[i].phys = dma_map_single(ndev->dev.parent, @@ -777,10 +767,9 @@ static void axienet_recv(struct net_device *ndev) packets++; new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); - if (!new_skb) { - dev_err(&ndev->dev, "no memory for new sk_buff\n"); + if (!new_skb) return; - } + cur_p->phys = dma_map_single(ndev->dev.parent, new_skb->data, lp->max_frm_size, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 98e09d0d3ce2..76210abf2e9b 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -1041,7 +1041,6 @@ xirc2ps_interrupt(int irq, void *dev_id) /* 1 extra so we can use insw */ skb = netdev_alloc_skb(dev, pktlen + 3); if (!skb) { - pr_notice("low memory, packet dropped (size=%u)\n", pktlen); dev->stats.rx_dropped++; } else { /* okay get the packet */ skb_reserve(skb, 2); diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 502c8ff1d985..4c8ddc944d51 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -1070,13 +1070,10 @@ static int dfx_driver_init(struct net_device *dev, const char *print_name, (PI_ALIGN_K_DESC_BLK - 1); bp->kmalloced = top_v = dma_alloc_coherent(bp->bus_dev, alloc_size, &bp->kmalloced_dma, - GFP_ATOMIC); - if (top_v == NULL) { - printk("%s: Could not allocate memory for host buffers " - "and structures!\n", print_name); + GFP_ATOMIC | __GFP_ZERO); + if (top_v == NULL) return DFX_K_FAILURE; - } - memset(top_v, 0, alloc_size); /* zero out memory before continuing */ + top_p = bp->kmalloced_dma; /* get physical address of buffer */ /* diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 4cf8f1017aad..b2d863f2ea42 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -866,7 +866,7 @@ static int yam_open(struct net_device *dev) printk(KERN_INFO "Trying %s at iobase 0x%lx irq %u\n", dev->name, dev->base_addr, dev->irq); - if (!dev || !yp->bitrate) + if (!yp->bitrate) return -ENXIO; if (!dev->base_addr || dev->base_addr > 0x1000 - YAM_EXTENT || dev->irq < 2 || dev->irq > 15) { diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index e5b19b056909..3c4d6274bb9b 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -202,6 +202,9 @@ static int rr_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; out: + if (rrpriv->evt_ring) + pci_free_consistent(pdev, EVT_RING_SIZE, rrpriv->evt_ring, + rrpriv->evt_ring_dma); if (rrpriv->rx_ring) pci_free_consistent(pdev, RX_TOTAL_SIZE, rrpriv->rx_ring, rrpriv->rx_ring_dma); diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1cd77483da50..f5f0f09e4cc5 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -470,8 +470,10 @@ static void netvsc_send_completion(struct hv_device *device, packet->trans_id; /* Notify the layer above us */ - nvsc_packet->completion.send.send_completion( - nvsc_packet->completion.send.send_completion_ctx); + if (nvsc_packet) + nvsc_packet->completion.send.send_completion( + nvsc_packet->completion.send. + send_completion_ctx); num_outstanding_sends = atomic_dec_return(&net_device->num_outstanding_sends); @@ -498,6 +500,7 @@ int netvsc_send(struct hv_device *device, int ret = 0; struct nvsp_message sendMessage; struct net_device *ndev; + u64 req_id; net_device = get_outbound_net_device(device); if (!net_device) @@ -518,20 +521,24 @@ int netvsc_send(struct hv_device *device, 0xFFFFFFFF; sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + if (packet->completion.send.send_completion) + req_id = (u64)packet; + else + req_id = 0; + if (packet->page_buf_cnt) { ret = vmbus_sendpacket_pagebuffer(device->channel, packet->page_buf, packet->page_buf_cnt, &sendMessage, sizeof(struct nvsp_message), - (unsigned long)packet); + req_id); } else { ret = vmbus_sendpacket(device->channel, &sendMessage, sizeof(struct nvsp_message), - (unsigned long)packet, + req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - } if (ret == 0) { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5f85205cd12b..088c55496191 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -241,13 +241,11 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, if (status == 1) { netif_carrier_on(net); - netif_wake_queue(net); ndev_ctx = netdev_priv(net); schedule_delayed_work(&ndev_ctx->dwork, 0); schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); } else { netif_carrier_off(net); - netif_tx_disable(net); } } @@ -431,7 +429,7 @@ static int netvsc_probe(struct hv_device *dev, /* TODO: Add GSO and Checksum offload */ net->hw_features = NETIF_F_SG; - net->features = NETIF_F_SG | NETIF_F_HW_VLAN_TX; + net->features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_TX; SET_ETHTOOL_OPS(net, ðtool_ops); SET_NETDEV_DEV(net, &dev->device); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 2b657d4d63a8..0775f0aefd1e 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -61,9 +61,6 @@ struct rndis_request { static void rndis_filter_send_completion(void *ctx); -static void rndis_filter_send_request_completion(void *ctx); - - static struct rndis_device *get_rndis_device(void) { @@ -241,10 +238,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->page_buf[0].len; } - packet->completion.send.send_completion_ctx = req;/* packet; */ - packet->completion.send.send_completion = - rndis_filter_send_request_completion; - packet->completion.send.send_completion_tid = (unsigned long)dev; + packet->completion.send.send_completion = NULL; ret = netvsc_send(dev->net_dev->dev, packet); return ret; @@ -999,9 +993,3 @@ static void rndis_filter_send_completion(void *ctx) /* Pass it back to the original handler */ filter_pkt->completion(filter_pkt->completion_ctx); } - - -static void rndis_filter_send_request_completion(void *ctx) -{ - /* Noop */ -} diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index fc1687ea4a42..6f10b4964726 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -51,7 +51,7 @@ struct at86rf230_local { struct ieee802154_dev *dev; spinlock_t lock; - bool irq_disabled; + bool irq_busy; bool is_tx; }; @@ -219,6 +219,9 @@ struct at86rf230_local { #define IRQ_PLL_UNL (1 << 1) #define IRQ_PLL_LOCK (1 << 0) +#define IRQ_ACTIVE_HIGH 0 +#define IRQ_ACTIVE_LOW 1 + #define STATE_P_ON 0x00 /* BUSY */ #define STATE_BUSY_RX 0x01 #define STATE_BUSY_TX 0x02 @@ -233,8 +236,8 @@ struct at86rf230_local { #define STATE_SLEEP 0x0F #define STATE_BUSY_RX_AACK 0x11 #define STATE_BUSY_TX_ARET 0x12 -#define STATE_BUSY_RX_AACK_ON 0x16 -#define STATE_BUSY_TX_ARET_ON 0x19 +#define STATE_RX_AACK_ON 0x16 +#define STATE_TX_ARET_ON 0x19 #define STATE_RX_ON_NOCLK 0x1C #define STATE_RX_AACK_ON_NOCLK 0x1D #define STATE_BUSY_RX_AACK_NOCLK 0x1E @@ -544,7 +547,7 @@ at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb) unsigned long flags; spin_lock(&lp->lock); - if (lp->irq_disabled) { + if (lp->irq_busy) { spin_unlock(&lp->lock); return -EBUSY; } @@ -619,6 +622,52 @@ err: return -EINVAL; } +static int +at86rf230_set_hw_addr_filt(struct ieee802154_dev *dev, + struct ieee802154_hw_addr_filt *filt, + unsigned long changed) +{ + struct at86rf230_local *lp = dev->priv; + + if (changed & IEEE802515_AFILT_SADDR_CHANGED) { + dev_vdbg(&lp->spi->dev, + "at86rf230_set_hw_addr_filt called for saddr\n"); + __at86rf230_write(lp, RG_SHORT_ADDR_0, filt->short_addr); + __at86rf230_write(lp, RG_SHORT_ADDR_1, filt->short_addr >> 8); + } + + if (changed & IEEE802515_AFILT_PANID_CHANGED) { + dev_vdbg(&lp->spi->dev, + "at86rf230_set_hw_addr_filt called for pan id\n"); + __at86rf230_write(lp, RG_PAN_ID_0, filt->pan_id); + __at86rf230_write(lp, RG_PAN_ID_1, filt->pan_id >> 8); + } + + if (changed & IEEE802515_AFILT_IEEEADDR_CHANGED) { + dev_vdbg(&lp->spi->dev, + "at86rf230_set_hw_addr_filt called for IEEE addr\n"); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_0, filt->ieee_addr[7]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_1, filt->ieee_addr[6]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_2, filt->ieee_addr[5]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_3, filt->ieee_addr[4]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_4, filt->ieee_addr[3]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_5, filt->ieee_addr[2]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_6, filt->ieee_addr[1]); + at86rf230_write_subreg(lp, SR_IEEE_ADDR_7, filt->ieee_addr[0]); + } + + if (changed & IEEE802515_AFILT_PANC_CHANGED) { + dev_vdbg(&lp->spi->dev, + "at86rf230_set_hw_addr_filt called for panc change\n"); + if (filt->pan_coord) + at86rf230_write_subreg(lp, SR_AACK_I_AM_COORD, 1); + else + at86rf230_write_subreg(lp, SR_AACK_I_AM_COORD, 0); + } + + return 0; +} + static struct ieee802154_ops at86rf230_ops = { .owner = THIS_MODULE, .xmit = at86rf230_xmit, @@ -626,6 +675,7 @@ static struct ieee802154_ops at86rf230_ops = { .set_channel = at86rf230_channel, .start = at86rf230_start, .stop = at86rf230_stop, + .set_hw_addr_filt = at86rf230_set_hw_addr_filt, }; static void at86rf230_irqwork(struct work_struct *work) @@ -658,8 +708,16 @@ static void at86rf230_irqwork(struct work_struct *work) } spin_lock_irqsave(&lp->lock, flags); - lp->irq_disabled = 0; + lp->irq_busy = 0; spin_unlock_irqrestore(&lp->lock, flags); +} + +static void at86rf230_irqwork_level(struct work_struct *work) +{ + struct at86rf230_local *lp = + container_of(work, struct at86rf230_local, irqwork); + + at86rf230_irqwork(work); enable_irq(lp->spi->irq); } @@ -668,10 +726,8 @@ static irqreturn_t at86rf230_isr(int irq, void *data) { struct at86rf230_local *lp = data; - disable_irq_nosync(irq); - spin_lock(&lp->lock); - lp->irq_disabled = 1; + lp->irq_busy = 1; spin_unlock(&lp->lock); schedule_work(&lp->irqwork); @@ -679,11 +735,23 @@ static irqreturn_t at86rf230_isr(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t at86rf230_isr_level(int irq, void *data) +{ + disable_irq_nosync(irq); + + return at86rf230_isr(irq, data); +} + +static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol) +{ + return at86rf230_write_subreg(lp, SR_IRQ_POLARITY, pol); +} static int at86rf230_hw_init(struct at86rf230_local *lp) { + struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data; + int rc, irq_pol; u8 status; - int rc; rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status); if (rc) @@ -701,12 +769,17 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) dev_info(&lp->spi->dev, "Status: %02x\n", status); } - rc = at86rf230_write_subreg(lp, SR_IRQ_MASK, 0xff); /* IRQ_TRX_UR | - * IRQ_CCA_ED | - * IRQ_TRX_END | - * IRQ_PLL_UNL | - * IRQ_PLL_LOCK - */ + /* configure irq polarity, defaults to high active */ + if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW)) + irq_pol = IRQ_ACTIVE_LOW; + else + irq_pol = IRQ_ACTIVE_HIGH; + + rc = at86rf230_irq_polarity(lp, irq_pol); + if (rc) + return rc; + + rc = at86rf230_write_subreg(lp, SR_IRQ_MASK, IRQ_TRX_END); if (rc) return rc; @@ -751,37 +824,38 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) return 0; } -static int at86rf230_fill_data(struct spi_device *spi) +static void at86rf230_fill_data(struct spi_device *spi) { struct at86rf230_local *lp = spi_get_drvdata(spi); struct at86rf230_platform_data *pdata = spi->dev.platform_data; - if (!pdata) { - dev_err(&spi->dev, "no platform_data\n"); - return -EINVAL; - } - lp->rstn = pdata->rstn; lp->slp_tr = pdata->slp_tr; lp->dig2 = pdata->dig2; - - return 0; } static int at86rf230_probe(struct spi_device *spi) { + struct at86rf230_platform_data *pdata; struct ieee802154_dev *dev; struct at86rf230_local *lp; - u8 man_id_0, man_id_1; - int rc; + u8 man_id_0, man_id_1, status; + irq_handler_t irq_handler; + work_func_t irq_worker; + int rc, supported = 0; const char *chip; - int supported = 0; if (!spi->irq) { dev_err(&spi->dev, "no IRQ specified\n"); return -EINVAL; } + pdata = spi->dev.platform_data; + if (!pdata) { + dev_err(&spi->dev, "no platform_data\n"); + return -EINVAL; + } + dev = ieee802154_alloc_device(sizeof(*lp), &at86rf230_ops); if (!dev) return -ENOMEM; @@ -791,23 +865,28 @@ static int at86rf230_probe(struct spi_device *spi) lp->spi = spi; - dev->priv = lp; dev->parent = &spi->dev; dev->extra_tx_headroom = 0; /* We do support only 2.4 Ghz */ dev->phy->channels_supported[0] = 0x7FFF800; dev->flags = IEEE802154_HW_OMIT_CKSUM; + if (pdata->irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { + irq_worker = at86rf230_irqwork; + irq_handler = at86rf230_isr; + } else { + irq_worker = at86rf230_irqwork_level; + irq_handler = at86rf230_isr_level; + } + mutex_init(&lp->bmux); - INIT_WORK(&lp->irqwork, at86rf230_irqwork); + INIT_WORK(&lp->irqwork, irq_worker); spin_lock_init(&lp->lock); init_completion(&lp->tx_complete); spi_set_drvdata(spi, lp); - rc = at86rf230_fill_data(spi); - if (rc) - goto err_fill; + at86rf230_fill_data(spi); rc = gpio_request(lp->rstn, "rstn"); if (rc) @@ -882,18 +961,23 @@ static int at86rf230_probe(struct spi_device *spi) if (rc) goto err_gpio_dir; - rc = request_irq(spi->irq, at86rf230_isr, IRQF_SHARED, + rc = request_irq(spi->irq, irq_handler, + IRQF_SHARED | pdata->irq_type, dev_name(&spi->dev), lp); if (rc) goto err_gpio_dir; + /* Read irq status register to reset irq line */ + rc = at86rf230_read_subreg(lp, RG_IRQ_STATUS, 0xff, 0, &status); + if (rc) + goto err_irq; + rc = ieee802154_register_device(lp->dev); if (rc) goto err_irq; return rc; - ieee802154_unregister_device(lp->dev); err_irq: free_irq(spi->irq, lp); flush_work(&lp->irqwork); @@ -903,7 +987,6 @@ err_gpio_dir: err_slp_tr: gpio_free(lp->rstn); err_rstn: -err_fill: spi_set_drvdata(spi, NULL); mutex_destroy(&lp->bmux); ieee802154_free_device(lp->dev); diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c index 8f1c25676d44..bf0d55e2dd63 100644 --- a/drivers/net/ieee802154/fakehard.c +++ b/drivers/net/ieee802154/fakehard.c @@ -106,26 +106,6 @@ static u8 fake_get_dsn(const struct net_device *dev) } /** - * fake_get_bsn - Retrieve the BSN of the device. - * @dev: The network device to retrieve the BSN for. - * - * Returns the IEEE 802.15.4 BSN for the network device. - * The BSN is the sequence number which will be added to each - * beacon frame sent by the MAC. - * - * BSN means 'Beacon Sequence Number'. - * - * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006 - * document. - */ -static u8 fake_get_bsn(const struct net_device *dev) -{ - BUG_ON(dev->type != ARPHRD_IEEE802154); - - return 0x00; /* BSN are implemented in HW, so return just 0 */ -} - -/** * fake_assoc_req - Make an association request to the HW. * @dev: The network device which we are associating to a network. * @addr: The coordinator with which we wish to associate. @@ -264,7 +244,6 @@ static struct ieee802154_mlme_ops fake_mlme = { .get_pan_id = fake_get_pan_id, .get_short_addr = fake_get_short_addr, .get_dsn = fake_get_dsn, - .get_bsn = fake_get_bsn, }; static int ieee802154_fake_open(struct net_device *dev) diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 3f2c7aaf28c4..ede3ce4912f9 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -22,8 +22,10 @@ #include <linux/spi/spi.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/pinctrl/consumer.h> #include <net/wpan-phy.h> #include <net/mac802154.h> +#include <net/ieee802154.h> /* MRF24J40 Short Address Registers */ #define REG_RXMCR 0x00 /* Receive MAC control */ @@ -91,9 +93,8 @@ struct mrf24j40 { #define MRF24J40_READLONG(reg) (1 << 15 | (reg) << 5) #define MRF24J40_WRITELONG(reg) (1 << 15 | (reg) << 5 | 1 << 4) -/* Maximum speed to run the device at. TODO: Get the real max value from - * someone at Microchip since it isn't in the datasheet. */ -#define MAX_SPI_SPEED_HZ 1000000 +/* The datasheet indicates the theoretical maximum for SCK to be 10MHz */ +#define MAX_SPI_SPEED_HZ 10000000 #define printdev(X) (&X->spi->dev) @@ -349,7 +350,9 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret) goto err; val |= 0x1; - val &= ~0x4; + /* Set TXNACKREQ if the ACK bit is set in the packet. */ + if (skb->data[0] & IEEE802154_FC_ACK_REQ) + val |= 0x4; write_short_reg(devrec, REG_TXNCON, val); INIT_COMPLETION(devrec->tx_complete); @@ -361,6 +364,7 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret == -ERESTARTSYS) goto err; if (ret == 0) { + dev_warn(printdev(devrec), "Timeout waiting for TX interrupt\n"); ret = -ETIMEDOUT; goto err; } @@ -370,7 +374,7 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret) goto err; if (val & 0x1) { - dev_err(printdev(devrec), "Error Sending. Retry count exceeded\n"); + dev_dbg(printdev(devrec), "Error Sending. Retry count exceeded\n"); ret = -ECOMM; /* TODO: Better error code ? */ } else dev_dbg(printdev(devrec), "Packet Sent\n"); @@ -477,7 +481,7 @@ static int mrf24j40_filter(struct ieee802154_dev *dev, int i; for (i = 0; i < 8; i++) write_short_reg(devrec, REG_EADR0+i, - filt->ieee_addr[i]); + filt->ieee_addr[7-i]); #ifdef DEBUG printk(KERN_DEBUG "Set long addr to: "); @@ -623,6 +627,7 @@ static int mrf24j40_probe(struct spi_device *spi) int ret = -ENOMEM; u8 val; struct mrf24j40 *devrec; + struct pinctrl *pinctrl; printk(KERN_INFO "mrf24j40: probe(). IRQ: %d\n", spi->irq); @@ -633,6 +638,11 @@ static int mrf24j40_probe(struct spi_device *spi) if (!devrec->buf) goto err_buf; + pinctrl = devm_pinctrl_get_select_default(&spi->dev); + if (IS_ERR(pinctrl)) + dev_warn(&spi->dev, + "pinctrl pins are not configured from the driver"); + spi->mode = SPI_MODE_0; /* TODO: Is this appropriate for right here? */ if (spi->max_speed_hz > MAX_SPI_SPEED_HZ) spi->max_speed_hz = MAX_SPI_SPEED_HZ; @@ -641,7 +651,7 @@ static int mrf24j40_probe(struct spi_device *spi) init_completion(&devrec->tx_complete); INIT_WORK(&devrec->irqwork, mrf24j40_isrwork); devrec->spi = spi; - dev_set_drvdata(&spi->dev, devrec); + spi_set_drvdata(spi, devrec); /* Register with the 802154 subsystem */ @@ -713,7 +723,7 @@ err_devrec: static int mrf24j40_remove(struct spi_device *spi) { - struct mrf24j40 *devrec = dev_get_drvdata(&spi->dev); + struct mrf24j40 *devrec = spi_get_drvdata(spi); dev_dbg(printdev(devrec), "remove\n"); @@ -725,7 +735,7 @@ static int mrf24j40_remove(struct spi_device *spi) * complete? */ /* Clean up the SPI stuff. */ - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); kfree(devrec->buf); kfree(devrec); return 0; @@ -749,18 +759,7 @@ static struct spi_driver mrf24j40_driver = { .remove = mrf24j40_remove, }; -static int __init mrf24j40_init(void) -{ - return spi_register_driver(&mrf24j40_driver); -} - -static void __exit mrf24j40_exit(void) -{ - spi_unregister_driver(&mrf24j40_driver); -} - -module_init(mrf24j40_init); -module_exit(mrf24j40_exit); +module_spi_driver(mrf24j40_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alan Ott"); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 82164381f778..dc9f6a45515d 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -166,7 +166,8 @@ static const struct net_device_ops ifb_netdev_ops = { #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \ NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6 | \ - NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX) + NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_TX) static void ifb_setup(struct net_device *dev) { diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c index 9cea451a6081..3adb43ce138f 100644 --- a/drivers/net/irda/ali-ircc.c +++ b/drivers/net/irda/ali-ircc.c @@ -352,21 +352,19 @@ static int ali_ircc_open(int i, chipio_t *info) /* Allocate memory if needed */ self->rx_buff.head = dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL); + &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto err_out2; } - memset(self->rx_buff.head, 0, self->rx_buff.truesize); self->tx_buff.head = dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL); + &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto err_out3; } - memset(self->tx_buff.head, 0, self->tx_buff.truesize); self->rx_buff.in_frame = FALSE; self->rx_buff.state = OUTSIDE_FRAME; diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c index b5151e4ced61..7a1f684edcb5 100644 --- a/drivers/net/irda/au1k_ir.c +++ b/drivers/net/irda/au1k_ir.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/types.h> +#include <linux/ioport.h> #include <net/irda/irda.h> #include <net/irda/irmod.h> @@ -882,12 +883,12 @@ static int au1k_irda_probe(struct platform_device *pdev) goto out; err = -EBUSY; - aup->ioarea = request_mem_region(r->start, r->end - r->start + 1, + aup->ioarea = request_mem_region(r->start, resource_size(r), pdev->name); if (!aup->ioarea) goto out; - aup->iobase = ioremap_nocache(r->start, r->end - r->start + 1); + aup->iobase = ioremap_nocache(r->start, resource_size(r)); if (!aup->iobase) goto out2; @@ -952,18 +953,7 @@ static struct platform_driver au1k_irda_driver = { .remove = au1k_irda_remove, }; -static int __init au1k_irda_load(void) -{ - return platform_driver_register(&au1k_irda_driver); -} - -static void __exit au1k_irda_unload(void) -{ - return platform_driver_unregister(&au1k_irda_driver); -} +module_platform_driver(au1k_irda_driver); MODULE_AUTHOR("Pete Popov <ppopov@mvista.com>"); MODULE_DESCRIPTION("Au1000 IrDA Device Driver"); - -module_init(au1k_irda_load); -module_exit(au1k_irda_unload); diff --git a/drivers/net/irda/bfin_sir.c b/drivers/net/irda/bfin_sir.c index fed4a05d55c7..a06fca61c9a0 100644 --- a/drivers/net/irda/bfin_sir.c +++ b/drivers/net/irda/bfin_sir.c @@ -389,7 +389,8 @@ static int bfin_sir_startup(struct bfin_sir_port *port, struct net_device *dev) set_dma_callback(port->rx_dma_channel, bfin_sir_dma_rx_int, dev); set_dma_callback(port->tx_dma_channel, bfin_sir_dma_tx_int, dev); - port->rx_dma_buf.buf = (unsigned char *)dma_alloc_coherent(NULL, PAGE_SIZE, &dma_handle, GFP_DMA); + port->rx_dma_buf.buf = dma_alloc_coherent(NULL, PAGE_SIZE, + &dma_handle, GFP_DMA); port->rx_dma_buf.head = 0; port->rx_dma_buf.tail = 0; port->rx_dma_nrows = 0; diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c index 2a4f2f153244..9cf836b57c49 100644 --- a/drivers/net/irda/nsc-ircc.c +++ b/drivers/net/irda/nsc-ircc.c @@ -431,22 +431,20 @@ static int __init nsc_ircc_open(chipio_t *info) /* Allocate memory if needed */ self->rx_buff.head = dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL); + &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto out2; } - memset(self->rx_buff.head, 0, self->rx_buff.truesize); self->tx_buff.head = dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL); + &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto out3; } - memset(self->tx_buff.head, 0, self->tx_buff.truesize); self->rx_buff.in_frame = FALSE; self->rx_buff.state = OUTSIDE_FRAME; diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c index 858de05bdb7d..964b116a0ab7 100644 --- a/drivers/net/irda/pxaficp_ir.c +++ b/drivers/net/irda/pxaficp_ir.c @@ -700,12 +700,12 @@ static int pxa_irda_start(struct net_device *dev) err = -ENOMEM; si->dma_rx_buff = dma_alloc_coherent(si->dev, IRDA_FRAME_SIZE_LIMIT, - &si->dma_rx_buff_phy, GFP_KERNEL ); + &si->dma_rx_buff_phy, GFP_KERNEL); if (!si->dma_rx_buff) goto err_dma_rx_buff; si->dma_tx_buff = dma_alloc_coherent(si->dev, IRDA_FRAME_SIZE_LIMIT, - &si->dma_tx_buff_phy, GFP_KERNEL ); + &si->dma_tx_buff_phy, GFP_KERNEL); if (!si->dma_tx_buff) goto err_dma_tx_buff; diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c index 5290952b60c2..aa05dad75335 100644 --- a/drivers/net/irda/smsc-ircc2.c +++ b/drivers/net/irda/smsc-ircc2.c @@ -563,24 +563,15 @@ static int smsc_ircc_open(unsigned int fir_base, unsigned int sir_base, u8 dma, self->rx_buff.head = dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL); - if (self->rx_buff.head == NULL) { - IRDA_ERROR("%s, Can't allocate memory for receive buffer!\n", - driver_name); + &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); + if (self->rx_buff.head == NULL) goto err_out2; - } self->tx_buff.head = dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL); - if (self->tx_buff.head == NULL) { - IRDA_ERROR("%s, Can't allocate memory for transmit buffer!\n", - driver_name); + &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); + if (self->tx_buff.head == NULL) goto err_out3; - } - - memset(self->rx_buff.head, 0, self->rx_buff.truesize); - memset(self->tx_buff.head, 0, self->tx_buff.truesize); self->rx_buff.in_frame = FALSE; self->rx_buff.state = OUTSIDE_FRAME; diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c index f9033c6a888c..51f2bc376101 100644 --- a/drivers/net/irda/via-ircc.c +++ b/drivers/net/irda/via-ircc.c @@ -364,21 +364,19 @@ static int via_ircc_open(struct pci_dev *pdev, chipio_t *info, unsigned int id) /* Allocate memory if needed */ self->rx_buff.head = dma_alloc_coherent(&pdev->dev, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL); + &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto err_out2; } - memset(self->rx_buff.head, 0, self->rx_buff.truesize); self->tx_buff.head = dma_alloc_coherent(&pdev->dev, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL); + &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto err_out3; } - memset(self->tx_buff.head, 0, self->tx_buff.truesize); self->rx_buff.in_frame = FALSE; self->rx_buff.state = OUTSIDE_FRAME; diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c index f5bb92f15880..bb8857a158a6 100644 --- a/drivers/net/irda/w83977af_ir.c +++ b/drivers/net/irda/w83977af_ir.c @@ -216,22 +216,19 @@ static int w83977af_open(int i, unsigned int iobase, unsigned int irq, /* Allocate memory if needed */ self->rx_buff.head = dma_alloc_coherent(NULL, self->rx_buff.truesize, - &self->rx_buff_dma, GFP_KERNEL); + &self->rx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->rx_buff.head == NULL) { err = -ENOMEM; goto err_out1; } - memset(self->rx_buff.head, 0, self->rx_buff.truesize); - self->tx_buff.head = dma_alloc_coherent(NULL, self->tx_buff.truesize, - &self->tx_buff_dma, GFP_KERNEL); + &self->tx_buff_dma, GFP_KERNEL | __GFP_ZERO); if (self->tx_buff.head == NULL) { err = -ENOMEM; goto err_out2; } - memset(self->tx_buff.head, 0, self->tx_buff.truesize); self->rx_buff.in_frame = FALSE; self->rx_buff.state = OUTSIDE_FRAME; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 417b2af1aa80..d5a141c7c4e7 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -46,9 +46,16 @@ struct macvlan_port { static void macvlan_port_destroy(struct net_device *dev); -#define macvlan_port_get_rcu(dev) \ - ((struct macvlan_port *) rcu_dereference(dev->rx_handler_data)) -#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data) +static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler_data); +} + +static struct macvlan_port *macvlan_port_get_rtnl(const struct net_device *dev) +{ + return rtnl_dereference(dev->rx_handler_data); +} + #define macvlan_port_exists(dev) (dev->priv_flags & IFF_MACVLAN_PORT) static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, @@ -464,7 +471,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ - NETIF_F_HW_VLAN_FILTER) + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) @@ -560,21 +567,21 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, } static int macvlan_vlan_rx_add_vid(struct net_device *dev, - unsigned short vid) + __be16 proto, u16 vid) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - return vlan_vid_add(lowerdev, vid); + return vlan_vid_add(lowerdev, proto, vid); } static int macvlan_vlan_rx_kill_vid(struct net_device *dev, - unsigned short vid) + __be16 proto, u16 vid) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - vlan_vid_del(lowerdev, vid); + vlan_vid_del(lowerdev, proto, vid); return 0; } @@ -660,6 +667,7 @@ void macvlan_common_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); + dev->priv_flags |= IFF_UNICAST_FLT; dev->netdev_ops = &macvlan_netdev_ops; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, @@ -702,7 +710,7 @@ static int macvlan_port_create(struct net_device *dev) static void macvlan_port_destroy(struct net_device *dev) { - struct macvlan_port *port = macvlan_port_get(dev); + struct macvlan_port *port = macvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); @@ -771,7 +779,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; } - port = macvlan_port_get(lowerdev); + port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ if (port->passthru) @@ -920,7 +928,7 @@ static int macvlan_device_event(struct notifier_block *unused, if (!macvlan_port_exists(dev)) return NOTIFY_DONE; - port = macvlan_port_get(dev); + port = macvlan_port_get_rtnl(dev); switch (event) { case NETDEV_CHANGE: diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a449439bd653..59e9605de316 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -725,6 +725,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, goto err_kfree; } + skb_probe_transport_header(skb, ETH_HLEN); + rcu_read_lock_bh(); vlan = rcu_dereference_bh(q->vlan); /* copy skb_ubuf_info for callback when skb has no error */ diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 37add21a3d7d..59ac143dec25 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -666,6 +666,7 @@ static int netconsole_netdev_event(struct notifier_block *this, goto done; spin_lock_irqsave(&target_list_lock, flags); +restart: list_for_each_entry(nt, &target_list, list) { netconsole_target_get(nt); if (nt->np.dev == dev) { @@ -678,15 +679,17 @@ static int netconsole_netdev_event(struct notifier_block *this, case NETDEV_UNREGISTER: /* * rtnl_lock already held + * we might sleep in __netpoll_cleanup() */ - if (nt->np.dev) { - __netpoll_cleanup(&nt->np); - dev_put(nt->np.dev); - nt->np.dev = NULL; - } + spin_unlock_irqrestore(&target_list_lock, flags); + __netpoll_cleanup(&nt->np); + spin_lock_irqsave(&target_list_lock, flags); + dev_put(nt->np.dev); + nt->np.dev = NULL; nt->enabled = 0; stopped = true; - break; + netconsole_target_put(nt); + goto restart; } } netconsole_target_put(nt); diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index ec40ba882f61..ff2e45e9cb54 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -159,7 +159,7 @@ static int lxt973a2_update_link(struct phy_device *phydev) return 0; } -int lxt973a2_read_status(struct phy_device *phydev) +static int lxt973a2_read_status(struct phy_device *phydev) { int adv; int err; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 22dec9c7ef05..202fe1ff1987 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -7,6 +7,8 @@ * * Copyright (c) 2004 Freescale Semiconductor, Inc. * + * Copyright (c) 2013 Michael Stapelberg <michael@stapelberg.de> + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your @@ -80,6 +82,28 @@ #define MII_88E1318S_PHY_MSCR1_REG 16 #define MII_88E1318S_PHY_MSCR1_PAD_ODD BIT(6) +/* Copper Specific Interrupt Enable Register */ +#define MII_88E1318S_PHY_CSIER 0x12 +/* WOL Event Interrupt Enable */ +#define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7) + +/* LED Timer Control Register */ +#define MII_88E1318S_PHY_LED_PAGE 0x03 +#define MII_88E1318S_PHY_LED_TCR 0x12 +#define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15) +#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE BIT(7) +#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW BIT(11) + +/* Magic Packet MAC address registers */ +#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2 0x17 +#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1 0x18 +#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0 0x19 + +#define MII_88E1318S_PHY_WOL_PAGE 0x11 +#define MII_88E1318S_PHY_WOL_CTRL 0x10 +#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS BIT(12) +#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14) + #define MII_88E1121_PHY_LED_CTRL 16 #define MII_88E1121_PHY_LED_PAGE 3 #define MII_88E1121_PHY_LED_DEF 0x0030 @@ -696,6 +720,107 @@ static int m88e1121_did_interrupt(struct phy_device *phydev) return 0; } +static void m88e1318_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + wol->supported = WAKE_MAGIC; + wol->wolopts = 0; + + if (phy_write(phydev, MII_MARVELL_PHY_PAGE, + MII_88E1318S_PHY_WOL_PAGE) < 0) + return; + + if (phy_read(phydev, MII_88E1318S_PHY_WOL_CTRL) & + MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE) + wol->wolopts |= WAKE_MAGIC; + + if (phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x00) < 0) + return; +} + +static int m88e1318_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + int err, oldpage, temp; + + oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE); + + if (wol->wolopts & WAKE_MAGIC) { + /* Explicitly switch to page 0x00, just to be sure */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0x00); + if (err < 0) + return err; + + /* Enable the WOL interrupt */ + temp = phy_read(phydev, MII_88E1318S_PHY_CSIER); + temp |= MII_88E1318S_PHY_CSIER_WOL_EIE; + err = phy_write(phydev, MII_88E1318S_PHY_CSIER, temp); + if (err < 0) + return err; + + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, + MII_88E1318S_PHY_LED_PAGE); + if (err < 0) + return err; + + /* Setup LED[2] as interrupt pin (active low) */ + temp = phy_read(phydev, MII_88E1318S_PHY_LED_TCR); + temp &= ~MII_88E1318S_PHY_LED_TCR_FORCE_INT; + temp |= MII_88E1318S_PHY_LED_TCR_INTn_ENABLE; + temp |= MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW; + err = phy_write(phydev, MII_88E1318S_PHY_LED_TCR, temp); + if (err < 0) + return err; + + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, + MII_88E1318S_PHY_WOL_PAGE); + if (err < 0) + return err; + + /* Store the device address for the magic packet */ + err = phy_write(phydev, MII_88E1318S_PHY_MAGIC_PACKET_WORD2, + ((phydev->attached_dev->dev_addr[5] << 8) | + phydev->attached_dev->dev_addr[4])); + if (err < 0) + return err; + err = phy_write(phydev, MII_88E1318S_PHY_MAGIC_PACKET_WORD1, + ((phydev->attached_dev->dev_addr[3] << 8) | + phydev->attached_dev->dev_addr[2])); + if (err < 0) + return err; + err = phy_write(phydev, MII_88E1318S_PHY_MAGIC_PACKET_WORD0, + ((phydev->attached_dev->dev_addr[1] << 8) | + phydev->attached_dev->dev_addr[0])); + if (err < 0) + return err; + + /* Clear WOL status and enable magic packet matching */ + temp = phy_read(phydev, MII_88E1318S_PHY_WOL_CTRL); + temp |= MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS; + temp |= MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE; + err = phy_write(phydev, MII_88E1318S_PHY_WOL_CTRL, temp); + if (err < 0) + return err; + } else { + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, + MII_88E1318S_PHY_WOL_PAGE); + if (err < 0) + return err; + + /* Clear WOL status and disable magic packet matching */ + temp = phy_read(phydev, MII_88E1318S_PHY_WOL_CTRL); + temp |= MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS; + temp &= ~MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE; + err = phy_write(phydev, MII_88E1318S_PHY_WOL_CTRL, temp); + if (err < 0) + return err; + } + + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage); + if (err < 0) + return err; + + return 0; +} + static struct phy_driver marvell_drivers[] = { { .phy_id = MARVELL_PHY_ID_88E1101, @@ -772,6 +897,8 @@ static struct phy_driver marvell_drivers[] = { .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .did_interrupt = &m88e1121_did_interrupt, + .get_wol = &m88e1318_get_wol, + .set_wol = &m88e1318_set_wol, .driver = { .owner = THIS_MODULE }, }, { diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 27274986ab56..a47f9236d966 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -235,17 +235,7 @@ static struct platform_driver mdio_gpio_driver = { }, }; -static int __init mdio_gpio_init(void) -{ - return platform_driver_register(&mdio_gpio_driver); -} -module_init(mdio_gpio_init); - -static void __exit mdio_gpio_exit(void) -{ - platform_driver_unregister(&mdio_gpio_driver); -} -module_exit(mdio_gpio_exit); +module_platform_driver(mdio_gpio_driver); MODULE_ALIAS("platform:mdio-gpio"); MODULE_AUTHOR("Laurent Pinchart, Paulius Zaleckas"); diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c index 09297fe05ae5..b51fa1f469b0 100644 --- a/drivers/net/phy/mdio-octeon.c +++ b/drivers/net/phy/mdio-octeon.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2009,2011 Cavium, Inc. + * Copyright (C) 2009-2012 Cavium, Inc. */ #include <linux/platform_device.h> @@ -27,30 +27,98 @@ #define SMI_CLK 0x18 #define SMI_EN 0x20 +enum octeon_mdiobus_mode { + UNINIT = 0, + C22, + C45 +}; + struct octeon_mdiobus { struct mii_bus *mii_bus; u64 register_base; resource_size_t mdio_phys; resource_size_t regsize; + enum octeon_mdiobus_mode mode; int phy_irq[PHY_MAX_ADDR]; }; +static void octeon_mdiobus_set_mode(struct octeon_mdiobus *p, + enum octeon_mdiobus_mode m) +{ + union cvmx_smix_clk smi_clk; + + if (m == p->mode) + return; + + smi_clk.u64 = cvmx_read_csr(p->register_base + SMI_CLK); + smi_clk.s.mode = (m == C45) ? 1 : 0; + smi_clk.s.preamble = 1; + cvmx_write_csr(p->register_base + SMI_CLK, smi_clk.u64); + p->mode = m; +} + +static int octeon_mdiobus_c45_addr(struct octeon_mdiobus *p, + int phy_id, int regnum) +{ + union cvmx_smix_cmd smi_cmd; + union cvmx_smix_wr_dat smi_wr; + int timeout = 1000; + + octeon_mdiobus_set_mode(p, C45); + + smi_wr.u64 = 0; + smi_wr.s.dat = regnum & 0xffff; + cvmx_write_csr(p->register_base + SMI_WR_DAT, smi_wr.u64); + + regnum = (regnum >> 16) & 0x1f; + + smi_cmd.u64 = 0; + smi_cmd.s.phy_op = 0; /* MDIO_CLAUSE_45_ADDRESS */ + smi_cmd.s.phy_adr = phy_id; + smi_cmd.s.reg_adr = regnum; + cvmx_write_csr(p->register_base + SMI_CMD, smi_cmd.u64); + + do { + /* Wait 1000 clocks so we don't saturate the RSL bus + * doing reads. + */ + __delay(1000); + smi_wr.u64 = cvmx_read_csr(p->register_base + SMI_WR_DAT); + } while (smi_wr.s.pending && --timeout); + + if (timeout <= 0) + return -EIO; + return 0; +} + static int octeon_mdiobus_read(struct mii_bus *bus, int phy_id, int regnum) { struct octeon_mdiobus *p = bus->priv; union cvmx_smix_cmd smi_cmd; union cvmx_smix_rd_dat smi_rd; + unsigned int op = 1; /* MDIO_CLAUSE_22_READ */ int timeout = 1000; + if (regnum & MII_ADDR_C45) { + int r = octeon_mdiobus_c45_addr(p, phy_id, regnum); + if (r < 0) + return r; + + regnum = (regnum >> 16) & 0x1f; + op = 3; /* MDIO_CLAUSE_45_READ */ + } else { + octeon_mdiobus_set_mode(p, C22); + } + + smi_cmd.u64 = 0; - smi_cmd.s.phy_op = 1; /* MDIO_CLAUSE_22_READ */ + smi_cmd.s.phy_op = op; smi_cmd.s.phy_adr = phy_id; smi_cmd.s.reg_adr = regnum; cvmx_write_csr(p->register_base + SMI_CMD, smi_cmd.u64); do { - /* - * Wait 1000 clocks so we don't saturate the RSL bus + /* Wait 1000 clocks so we don't saturate the RSL bus * doing reads. */ __delay(1000); @@ -69,21 +137,33 @@ static int octeon_mdiobus_write(struct mii_bus *bus, int phy_id, struct octeon_mdiobus *p = bus->priv; union cvmx_smix_cmd smi_cmd; union cvmx_smix_wr_dat smi_wr; + unsigned int op = 0; /* MDIO_CLAUSE_22_WRITE */ int timeout = 1000; + + if (regnum & MII_ADDR_C45) { + int r = octeon_mdiobus_c45_addr(p, phy_id, regnum); + if (r < 0) + return r; + + regnum = (regnum >> 16) & 0x1f; + op = 1; /* MDIO_CLAUSE_45_WRITE */ + } else { + octeon_mdiobus_set_mode(p, C22); + } + smi_wr.u64 = 0; smi_wr.s.dat = val; cvmx_write_csr(p->register_base + SMI_WR_DAT, smi_wr.u64); smi_cmd.u64 = 0; - smi_cmd.s.phy_op = 0; /* MDIO_CLAUSE_22_WRITE */ + smi_cmd.s.phy_op = op; smi_cmd.s.phy_adr = phy_id; smi_cmd.s.reg_adr = regnum; cvmx_write_csr(p->register_base + SMI_CMD, smi_cmd.u64); do { - /* - * Wait 1000 clocks so we don't saturate the RSL bus + /* Wait 1000 clocks so we don't saturate the RSL bus * doing reads. */ __delay(1000); @@ -197,18 +277,7 @@ void octeon_mdiobus_force_mod_depencency(void) } EXPORT_SYMBOL(octeon_mdiobus_force_mod_depencency); -static int __init octeon_mdiobus_mod_init(void) -{ - return platform_driver_register(&octeon_mdiobus_driver); -} - -static void __exit octeon_mdiobus_mod_exit(void) -{ - platform_driver_unregister(&octeon_mdiobus_driver); -} - -module_init(octeon_mdiobus_mod_init); -module_exit(octeon_mdiobus_mod_exit); +module_platform_driver(octeon_mdiobus_driver); MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_VERSION(DRV_VERSION); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index abf7b6153d00..2510435f34ed 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -53,6 +53,18 @@ #define KS8737_CTRL_INT_ACTIVE_HIGH (1 << 14) #define KSZ8051_RMII_50MHZ_CLK (1 << 7) +static int ksz_config_flags(struct phy_device *phydev) +{ + int regval; + + if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) { + regval = phy_read(phydev, MII_KSZPHY_CTRL); + regval |= KSZ8051_RMII_50MHZ_CLK; + return phy_write(phydev, MII_KSZPHY_CTRL, regval); + } + return 0; +} + static int kszphy_ack_interrupt(struct phy_device *phydev) { /* bit[7..0] int status, which is a read and clear register. */ @@ -114,22 +126,19 @@ static int kszphy_config_init(struct phy_device *phydev) static int ksz8021_config_init(struct phy_device *phydev) { + int rc; const u16 val = KSZPHY_OMSO_B_CAST_OFF | KSZPHY_OMSO_RMII_OVERRIDE; phy_write(phydev, MII_KSZPHY_OMSO, val); - return 0; + rc = ksz_config_flags(phydev); + return rc < 0 ? rc : 0; } static int ks8051_config_init(struct phy_device *phydev) { - int regval; - - if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) { - regval = phy_read(phydev, MII_KSZPHY_CTRL); - regval |= KSZ8051_RMII_50MHZ_CLK; - phy_write(phydev, MII_KSZPHY_CTRL, regval); - } + int rc; - return 0; + rc = ksz_config_flags(phydev); + return rc < 0 ? rc : 0; } #define KSZ8873MLL_GLOBAL_CONTROL_4 0x06 @@ -192,6 +201,19 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, }, { + .phy_id = PHY_ID_KSZ8031, + .phy_id_mask = 0x00ffffff, + .name = "Micrel KSZ8031", + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = ksz8021_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { .phy_id = PHY_ID_KSZ8041, .phy_id_mask = 0x00fffff0, .name = "Micrel KSZ8041", @@ -325,6 +347,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, { PHY_ID_KSZ8021, 0x00ffffff }, + { PHY_ID_KSZ8031, 0x00ffffff }, { PHY_ID_KSZ8041, 0x00fffff0 }, { PHY_ID_KSZ8051, 0x00fffff0 }, { PHY_ID_KSZ8061, 0x00fffff0 }, diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ef9ea9248223..c14f14741b3f 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -463,33 +463,6 @@ void phy_stop_machine(struct phy_device *phydev) } /** - * phy_force_reduction - reduce PHY speed/duplex settings by one step - * @phydev: target phy_device struct - * - * Description: Reduces the speed/duplex settings by one notch, - * in this order-- - * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF. - * The function bottoms out at 10/HALF. - */ -static void phy_force_reduction(struct phy_device *phydev) -{ - int idx; - - idx = phy_find_setting(phydev->speed, phydev->duplex); - - idx++; - - idx = phy_find_valid(idx, phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - pr_info("Trying %d/%s\n", - phydev->speed, DUPLEX_FULL == phydev->duplex ? "FULL" : "HALF"); -} - - -/** * phy_error - enter HALTED state for this PHY device * @phydev: target phy_device struct * @@ -818,30 +791,11 @@ void phy_state_machine(struct work_struct *work) phydev->adjust_link(phydev->attached_dev); } else if (0 == phydev->link_timeout--) { - int idx; - needs_aneg = 1; /* If we have the magic_aneg bit, * we try again */ if (phydev->drv->flags & PHY_HAS_MAGICANEG) break; - - /* The timer expired, and we still - * don't have a setting, so we try - * forcing it until we find one that - * works, starting from the fastest speed, - * and working our way down */ - idx = phy_find_valid(0, phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - phydev->autoneg = AUTONEG_DISABLE; - - pr_info("Trying %d/%s\n", - phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "FULL" : "HALF"); } break; case PHY_NOLINK: @@ -866,10 +820,8 @@ void phy_state_machine(struct work_struct *work) phydev->state = PHY_RUNNING; netif_carrier_on(phydev->attached_dev); } else { - if (0 == phydev->link_timeout--) { - phy_force_reduction(phydev); + if (0 == phydev->link_timeout--) needs_aneg = 1; - } } phydev->adjust_link(phydev->attached_dev); @@ -1188,3 +1140,19 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data) return 0; } EXPORT_SYMBOL(phy_ethtool_set_eee); + +int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + if (phydev->drv->set_wol) + return phydev->drv->set_wol(phydev, wol); + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(phy_ethtool_set_wol); + +void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) +{ + if (phydev->drv->get_wol) + phydev->drv->get_wol(phydev, wol); +} +EXPORT_SYMBOL(phy_ethtool_get_wol); diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 5c87eef40bf9..d11c93e69e03 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -281,7 +281,7 @@ static int ks8995_probe(struct spi_device *spi) mutex_init(&ks->lock); ks->pdata = pdata; ks->spi = spi_dev_get(spi); - dev_set_drvdata(&spi->dev, ks); + spi_set_drvdata(spi, ks); spi->mode = SPI_MODE_0; spi->bits_per_word = 8; @@ -325,7 +325,7 @@ static int ks8995_probe(struct spi_device *spi) return 0; err_drvdata: - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); kfree(ks); return err; } @@ -334,10 +334,10 @@ static int ks8995_remove(struct spi_device *spi) { struct ks8995_data *ks8995; - ks8995 = dev_get_drvdata(&spi->dev); + ks8995 = spi_get_drvdata(spi); sysfs_remove_bin_file(&spi->dev.kobj, &ks8995_registers_attr); - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); kfree(ks8995); return 0; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 2585c383e623..3492b5391273 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -61,7 +61,7 @@ MODULE_DESCRIPTION("Vitesse PHY driver"); MODULE_AUTHOR("Kriston Carson"); MODULE_LICENSE("GPL"); -int vsc824x_add_skew(struct phy_device *phydev) +static int vsc824x_add_skew(struct phy_device *phydev) { int err; int extcon; @@ -81,7 +81,6 @@ int vsc824x_add_skew(struct phy_device *phydev) return err; } -EXPORT_SYMBOL(vsc824x_add_skew); static int vsc824x_config_init(struct phy_device *phydev) { diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index bed62d9c53c8..1f7bef90b467 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -560,7 +560,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev) * so don't forget to remove it. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; rawp = skb->data; diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 1a12033d2efa..090c834d7dbd 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -105,64 +105,15 @@ static const struct ppp_channel_ops sync_ops = { }; /* - * Utility procedures to print a buffer in hex/ascii + * Utility procedure to print a buffer in hex/ascii */ static void -ppp_print_hex (register __u8 * out, const __u8 * in, int count) -{ - register __u8 next_ch; - static const char hex[] = "0123456789ABCDEF"; - - while (count-- > 0) { - next_ch = *in++; - *out++ = hex[(next_ch >> 4) & 0x0F]; - *out++ = hex[next_ch & 0x0F]; - ++out; - } -} - -static void -ppp_print_char (register __u8 * out, const __u8 * in, int count) -{ - register __u8 next_ch; - - while (count-- > 0) { - next_ch = *in++; - - if (next_ch < 0x20 || next_ch > 0x7e) - *out++ = '.'; - else { - *out++ = next_ch; - if (next_ch == '%') /* printk/syslogd has a bug !! */ - *out++ = '%'; - } - } - *out = '\0'; -} - -static void ppp_print_buffer (const char *name, const __u8 *buf, int count) { - __u8 line[44]; - if (name != NULL) printk(KERN_DEBUG "ppp_synctty: %s, count = %d\n", name, count); - while (count > 8) { - memset (line, 32, 44); - ppp_print_hex (line, buf, 8); - ppp_print_char (&line[8 * 3], buf, 8); - printk(KERN_DEBUG "%s\n", line); - count -= 8; - buf += 8; - } - - if (count > 0) { - memset (line, 32, 44); - ppp_print_hex (line, buf, count); - ppp_print_char (&line[8 * 3], buf, count); - printk(KERN_DEBUG "%s\n", line); - } + print_hex_dump_bytes("", DUMP_PREFIX_NONE, buf, count); } diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig index c3011af68e91..c853d84fd99f 100644 --- a/drivers/net/team/Kconfig +++ b/drivers/net/team/Kconfig @@ -37,6 +37,18 @@ config NET_TEAM_MODE_ROUNDROBIN To compile this team mode as a module, choose M here: the module will be called team_mode_roundrobin. +config NET_TEAM_MODE_RANDOM + tristate "Random mode support" + depends on NET_TEAM + ---help--- + Basic mode where port used for transmitting packets is selected + randomly. + + All added ports are setup to have team's device address. + + To compile this team mode as a module, choose M here: the module + will be called team_mode_random. + config NET_TEAM_MODE_ACTIVEBACKUP tristate "Active-backup mode support" depends on NET_TEAM diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile index 975763014e5a..c57e85889751 100644 --- a/drivers/net/team/Makefile +++ b/drivers/net/team/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_NET_TEAM) += team.o obj-$(CONFIG_NET_TEAM_MODE_BROADCAST) += team_mode_broadcast.o obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o +obj-$(CONFIG_NET_TEAM_MODE_RANDOM) += team_mode_random.o obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o obj-$(CONFIG_NET_TEAM_MODE_LOADBALANCE) += team_mode_loadbalance.o diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 05c5efe84591..7c43261975bd 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -73,11 +73,24 @@ static int team_port_set_orig_dev_addr(struct team_port *port) return __set_port_dev_addr(port->dev, port->orig.dev_addr); } -int team_port_set_team_dev_addr(struct team_port *port) +static int team_port_set_team_dev_addr(struct team *team, + struct team_port *port) +{ + return __set_port_dev_addr(port->dev, team->dev->dev_addr); +} + +int team_modeop_port_enter(struct team *team, struct team_port *port) +{ + return team_port_set_team_dev_addr(team, port); +} +EXPORT_SYMBOL(team_modeop_port_enter); + +void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port) { - return __set_port_dev_addr(port->dev, port->team->dev->dev_addr); + team_port_set_team_dev_addr(team, port); } -EXPORT_SYMBOL(team_port_set_team_dev_addr); +EXPORT_SYMBOL(team_modeop_port_change_dev_addr); static void team_refresh_port_linkup(struct team_port *port) { @@ -490,9 +503,9 @@ static bool team_dummy_transmit(struct team *team, struct sk_buff *skb) return false; } -rx_handler_result_t team_dummy_receive(struct team *team, - struct team_port *port, - struct sk_buff *skb) +static rx_handler_result_t team_dummy_receive(struct team *team, + struct team_port *port, + struct sk_buff *skb) { return RX_HANDLER_ANOTHER; } @@ -1138,6 +1151,8 @@ static int team_port_del(struct team *team, struct net_device *port_dev) netdev_upper_dev_unlink(port_dev, dev); team_port_disable_netpoll(port); vlan_vids_del_by_dev(port_dev, dev); + dev_uc_unsync(port_dev, dev); + dev_mc_unsync(port_dev, dev); dev_close(port_dev); team_port_leave(team, port); @@ -1489,8 +1504,8 @@ static void team_set_rx_mode(struct net_device *dev) rcu_read_lock(); list_for_each_entry_rcu(port, &team->port_list, list) { - dev_uc_sync(port->dev, dev); - dev_mc_sync(port->dev, dev); + dev_uc_sync_multiple(port->dev, dev); + dev_mc_sync_multiple(port->dev, dev); } rcu_read_unlock(); } @@ -1583,7 +1598,7 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) return stats; } -static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid) +static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct team *team = netdev_priv(dev); struct team_port *port; @@ -1595,7 +1610,7 @@ static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid) */ mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { - err = vlan_vid_add(port->dev, vid); + err = vlan_vid_add(port->dev, proto, vid); if (err) goto unwind; } @@ -1605,20 +1620,20 @@ static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid) unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) - vlan_vid_del(port->dev, vid); + vlan_vid_del(port->dev, proto, vid); mutex_unlock(&team->lock); return err; } -static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid) +static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct team *team = netdev_priv(dev); struct team_port *port; rcu_read_lock(); list_for_each_entry_rcu(port, &team->port_list, list) - vlan_vid_del(port->dev, vid); + vlan_vid_del(port->dev, proto, vid); rcu_read_unlock(); return 0; @@ -1826,9 +1841,9 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GRO; dev->hw_features = TEAM_VLAN_FEATURES | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c index c5db428e73fa..c366cd299c06 100644 --- a/drivers/net/team/team_mode_broadcast.c +++ b/drivers/net/team/team_mode_broadcast.c @@ -46,20 +46,10 @@ static bool bc_transmit(struct team *team, struct sk_buff *skb) return sum_ret; } -static int bc_port_enter(struct team *team, struct team_port *port) -{ - return team_port_set_team_dev_addr(port); -} - -static void bc_port_change_dev_addr(struct team *team, struct team_port *port) -{ - team_port_set_team_dev_addr(port); -} - static const struct team_mode_ops bc_mode_ops = { .transmit = bc_transmit, - .port_enter = bc_port_enter, - .port_change_dev_addr = bc_port_change_dev_addr, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode bc_mode = { diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c new file mode 100644 index 000000000000..9eabfaa22f3e --- /dev/null +++ b/drivers/net/team/team_mode_random.c @@ -0,0 +1,71 @@ +/* + * drivers/net/team/team_mode_random.c - Random mode for team + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/reciprocal_div.h> +#include <linux/if_team.h> + +static u32 random_N(unsigned int N) +{ + return reciprocal_divide(random32(), N); +} + +static bool rnd_transmit(struct team *team, struct sk_buff *skb) +{ + struct team_port *port; + int port_index; + + port_index = random_N(team->en_port_count); + port = team_get_port_by_index_rcu(team, port_index); + port = team_get_first_port_txable_rcu(team, port); + if (unlikely(!port)) + goto drop; + if (team_dev_queue_xmit(team, port, skb)) + return false; + return true; + +drop: + dev_kfree_skb_any(skb); + return false; +} + +static const struct team_mode_ops rnd_mode_ops = { + .transmit = rnd_transmit, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, +}; + +static const struct team_mode rnd_mode = { + .kind = "random", + .owner = THIS_MODULE, + .ops = &rnd_mode_ops, +}; + +static int __init rnd_init_module(void) +{ + return team_mode_register(&rnd_mode); +} + +static void __exit rnd_cleanup_module(void) +{ + team_mode_unregister(&rnd_mode); +} + +module_init(rnd_init_module); +module_exit(rnd_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("Random mode for team"); +MODULE_ALIAS("team-mode-random"); diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index 105135aa8f05..d268e4de781b 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -25,26 +25,6 @@ static struct rr_priv *rr_priv(struct team *team) return (struct rr_priv *) &team->mode_priv; } -static struct team_port *__get_first_port_up(struct team *team, - struct team_port *port) -{ - struct team_port *cur; - - if (team_port_txable(port)) - return port; - cur = port; - list_for_each_entry_continue_rcu(cur, &team->port_list, list) - if (team_port_txable(port)) - return cur; - list_for_each_entry_rcu(cur, &team->port_list, list) { - if (cur == port) - break; - if (team_port_txable(port)) - return cur; - } - return NULL; -} - static bool rr_transmit(struct team *team, struct sk_buff *skb) { struct team_port *port; @@ -52,7 +32,7 @@ static bool rr_transmit(struct team *team, struct sk_buff *skb) port_index = rr_priv(team)->sent_packets++ % team->en_port_count; port = team_get_port_by_index_rcu(team, port_index); - port = __get_first_port_up(team, port); + port = team_get_first_port_txable_rcu(team, port); if (unlikely(!port)) goto drop; if (team_dev_queue_xmit(team, port, skb)) @@ -64,20 +44,10 @@ drop: return false; } -static int rr_port_enter(struct team *team, struct team_port *port) -{ - return team_port_set_team_dev_addr(port); -} - -static void rr_port_change_dev_addr(struct team *team, struct team_port *port) -{ - team_port_set_team_dev_addr(port); -} - static const struct team_mode_ops rr_mode_ops = { .transmit = rr_transmit, - .port_enter = rr_port_enter, - .port_change_dev_addr = rr_port_change_dev_addr, + .port_enter = team_modeop_port_enter, + .port_change_dev_addr = team_modeop_port_change_dev_addr, }; static const struct team_mode rr_mode = { diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2c6a22e278ea..66109a2ad886 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -409,14 +409,12 @@ static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; struct tun_struct *tun; - struct net_device *dev; tun = rtnl_dereference(tfile->tun); if (tun && !tfile->detached) { u16 index = tfile->queue_index; BUG_ON(index >= tun->numqueues); - dev = tun->dev; rcu_assign_pointer(tun->tfiles[index], tun->tfiles[tun->numqueues - 1]); @@ -747,6 +745,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; skb_orphan(skb); + nf_reset(skb); + /* Enqueue packet */ skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); @@ -1203,6 +1203,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); + rxhash = skb_get_rxhash(skb); netif_rx_ni(skb); @@ -1592,7 +1594,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tun->flags & TUN_TAP_MQ && (tun->numqueues + tun->numdisabled > 1)) - return err; + return -EBUSY; } else { char *name; @@ -1654,6 +1656,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES; dev->features = dev->hw_features; + dev->vlan_features = dev->features; INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file); diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 3b6e9b83342d..7c769d8e25ad 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -268,7 +268,7 @@ config USB_NET_SMSC75XX select CRC16 select CRC32 help - This option adds support for SMSC LAN95XX based USB 2.0 + This option adds support for SMSC LAN75XX based USB 2.0 Gigabit Ethernet adapters. config USB_NET_SMSC95XX diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 709753469099..ad5d1e4384db 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -55,11 +55,7 @@ static void asix_status(struct usbnet *dev, struct urb *urb) event = urb->transfer_buffer; link = event->link & 0x01; if (netif_carrier_ok(dev->net) != link) { - if (link) { - netif_carrier_on(dev->net); - usbnet_defer_kevent (dev, EVENT_LINK_RESET ); - } else - netif_carrier_off(dev->net); + usbnet_link_change(dev, link, 1); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 71c27d8d214f..bd8758fa38c1 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -352,11 +352,7 @@ static void ax88179_status(struct usbnet *dev, struct urb *urb) link = (((__force u32)event->intdata1) & AX_INT_PPLS_LINK) >> 16; if (netif_carrier_ok(dev->net) != link) { - if (link) - usbnet_defer_kevent(dev, EVENT_LINK_RESET); - else - netif_carrier_off(dev->net); - + usbnet_link_change(dev, link, 1); netdev_info(dev->net, "ax88179 - Link status is: %d\n", link); } } @@ -455,7 +451,7 @@ static int ax88179_resume(struct usb_interface *intf) u16 tmp16; u8 tmp8; - netif_carrier_off(dev->net); + usbnet_link_change(dev, 0, 0); /* Power up ethernet PHY */ tmp16 = 0; @@ -1068,7 +1064,7 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) /* Restart autoneg */ mii_nway_restart(&dev->mii); - netif_carrier_off(dev->net); + usbnet_link_change(dev, 0, 0); return 0; } @@ -1356,7 +1352,7 @@ static int ax88179_reset(struct usbnet *dev) /* Restart autoneg */ mii_nway_restart(&dev->mii); - netif_carrier_off(dev->net); + usbnet_link_change(dev, 0, 0); return 0; } diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 57136dc1b887..4ff71d619cd8 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -406,10 +406,7 @@ void usbnet_cdc_status(struct usbnet *dev, struct urb *urb) case USB_CDC_NOTIFY_NETWORK_CONNECTION: netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", event->wValue ? "on" : "off"); - if (event->wValue) - netif_carrier_on(dev->net); - else - netif_carrier_off(dev->net); + usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: /* tx/rx rates */ netif_dbg(dev, timer, dev->net, "CDC: speed change (len %d)\n", diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 248d2dc765a5..c96454434f7b 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -68,18 +68,9 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) struct cdc_ncm_ctx *ctx; struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; - u8 data_altsetting = CDC_NCM_DATA_ALTSETTING_NCM; + u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf); struct cdc_mbim_state *info = (void *)&dev->data; - /* see if interface supports MBIM alternate setting */ - if (intf->num_altsetting == 2) { - if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) - usb_set_interface(dev->udev, - intf->cur_altsetting->desc.bInterfaceNumber, - CDC_NCM_COMM_ALTSETTING_MBIM); - data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM; - } - /* Probably NCM, defer for cdc_ncm_bind */ if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; @@ -110,7 +101,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->flags |= IFF_NOARP; /* no need to put the VLAN tci in the packet headers */ - dev->net->features |= NETIF_F_HW_VLAN_TX; + dev->net->features |= NETIF_F_HW_VLAN_CTAG_TX; err: return ret; } @@ -143,7 +134,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb goto error; if (skb) { - if (skb->len <= sizeof(ETH_HLEN)) + if (skb->len <= ETH_HLEN) goto error; /* mapping VLANs to MBIM sessions: @@ -230,7 +221,7 @@ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_ /* map MBIM session to VLAN */ if (tci) - vlan_put_tag(skb, tci); + vlan_put_tag(skb, htons(ETH_P_8021Q), tci); err: return skb; } diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 61b74a2b89ac..43afde8f48d2 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -55,6 +55,14 @@ #define DRIVER_VERSION "14-Mar-2012" +#if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM) +static bool prefer_mbim = true; +#else +static bool prefer_mbim; +#endif +module_param(prefer_mbim, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(prefer_mbim, "Prefer MBIM setting on dual NCM/MBIM functions"); + static void cdc_ncm_txpath_bh(unsigned long param); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); @@ -354,8 +362,8 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ u8 iface_no; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (ctx == NULL) - return -ENODEV; + if (!ctx) + return -ENOMEM; hrtimer_init(&ctx->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ctx->tx_timer.function = &cdc_ncm_tx_timer_cb; @@ -550,9 +558,12 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) } EXPORT_SYMBOL_GPL(cdc_ncm_unbind); -static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) +/* Select the MBIM altsetting iff it is preferred and available, + * returning the number of the corresponding data interface altsetting + */ +u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf) { - int ret; + struct usb_host_interface *alt; /* The MBIM spec defines a NCM compatible default altsetting, * which we may have matched: @@ -568,23 +579,27 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) * endpoint descriptors, shall be constructed according to * the rules given in section 6 (USB Device Model) of this * specification." - * - * Do not bind to such interfaces, allowing cdc_mbim to handle - * them */ -#if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM) - if ((intf->num_altsetting == 2) && - !usb_set_interface(dev->udev, - intf->cur_altsetting->desc.bInterfaceNumber, - CDC_NCM_COMM_ALTSETTING_MBIM)) { - if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) - return -ENODEV; - else - usb_set_interface(dev->udev, - intf->cur_altsetting->desc.bInterfaceNumber, - CDC_NCM_COMM_ALTSETTING_NCM); + if (prefer_mbim && intf->num_altsetting == 2) { + alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM); + if (alt && cdc_ncm_comm_intf_is_mbim(alt) && + !usb_set_interface(dev->udev, + intf->cur_altsetting->desc.bInterfaceNumber, + CDC_NCM_COMM_ALTSETTING_MBIM)) + return CDC_NCM_DATA_ALTSETTING_MBIM; } -#endif + return CDC_NCM_DATA_ALTSETTING_NCM; +} +EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting); + +static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int ret; + + /* MBIM backwards compatible function? */ + cdc_ncm_select_altsetting(dev, intf); + if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) + return -ENODEV; /* NCM data altsetting is always 1 */ ret = cdc_ncm_bind_common(dev, intf, 1); @@ -595,7 +610,7 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) * (carrier is OFF) during attach, so the IP network stack does not * start IPv6 negotiation and more. */ - netif_carrier_off(dev->net); + usbnet_link_change(dev, 0, 0); return ret; } @@ -1091,12 +1106,9 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) " %sconnected\n", ctx->netdev->name, ctx->connected ? "" : "dis"); - if (ctx->connected) - netif_carrier_on(dev->net); - else { - netif_carrier_off(dev->net); + usbnet_link_change(dev, ctx->connected, 0); + if (!ctx->connected) ctx->tx_speed = ctx->rx_speed = 0; - } break; case USB_CDC_NOTIFY_SPEED_CHANGE: @@ -1109,8 +1121,9 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) break; default: - dev_err(&dev->udev->dev, "NCM: unexpected " - "notification 0x%02x!\n", event->bNotificationType); + dev_dbg(&dev->udev->dev, + "NCM: unexpected notification 0x%02x!\n", + event->bNotificationType); break; } } diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 174e5ecea4cc..2dbb9460349d 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -524,12 +524,7 @@ static void dm9601_status(struct usbnet *dev, struct urb *urb) link = !!(buf[0] & 0x40); if (netif_carrier_ok(dev->net) != link) { - if (link) { - netif_carrier_on(dev->net); - usbnet_defer_kevent (dev, EVENT_LINK_RESET); - } - else - netif_carrier_off(dev->net); + usbnet_link_change(dev, link, 1); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 3f3f566afa0b..03832d3780aa 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -576,11 +576,7 @@ static void mcs7830_status(struct usbnet *dev, struct urb *urb) */ if (data->link_counter > 20) { data->link_counter = 0; - if (link) { - netif_carrier_on(dev->net); - usbnet_defer_kevent(dev, EVENT_LINK_RESET); - } else - netif_carrier_off(dev->net); + usbnet_link_change(dev, link, 0); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } else diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index efb5c7c33a28..2a3579f67910 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ethtool.h> +#include <linux/etherdevice.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/usb/cdc.h> @@ -52,6 +53,96 @@ struct qmi_wwan_state { struct usb_interface *data; }; +/* default ethernet address used by the modem */ +static const u8 default_modem_addr[ETH_ALEN] = {0x02, 0x50, 0xf3}; + +/* Make up an ethernet header if the packet doesn't have one. + * + * A firmware bug common among several devices cause them to send raw + * IP packets under some circumstances. There is no way for the + * driver/host to know when this will happen. And even when the bug + * hits, some packets will still arrive with an intact header. + * + * The supported devices are only capably of sending IPv4, IPv6 and + * ARP packets on a point-to-point link. Any packet with an ethernet + * header will have either our address or a broadcast/multicast + * address as destination. ARP packets will always have a header. + * + * This means that this function will reliably add the appropriate + * header iff necessary, provided our hardware address does not start + * with 4 or 6. + * + * Another common firmware bug results in all packets being addressed + * to 00:a0:c6:00:00:00 despite the host address being different. + * This function will also fixup such packets. + */ +static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + __be16 proto; + + /* usbnet rx_complete guarantees that skb->len is at least + * hard_header_len, so we can inspect the dest address without + * checking skb->len + */ + switch (skb->data[0] & 0xf0) { + case 0x40: + proto = htons(ETH_P_IP); + break; + case 0x60: + proto = htons(ETH_P_IPV6); + break; + case 0x00: + if (is_multicast_ether_addr(skb->data)) + return 1; + /* possibly bogus destination - rewrite just in case */ + skb_reset_mac_header(skb); + goto fix_dest; + default: + /* pass along other packets without modifications */ + return 1; + } + if (skb_headroom(skb) < ETH_HLEN) + return 0; + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + eth_hdr(skb)->h_proto = proto; + memset(eth_hdr(skb)->h_source, 0, ETH_ALEN); +fix_dest: + memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); + return 1; +} + +/* very simplistic detection of IPv4 or IPv6 headers */ +static bool possibly_iphdr(const char *data) +{ + return (data[0] & 0xd0) == 0x40; +} + +/* disallow addresses which may be confused with IP headers */ +static int qmi_wwan_mac_addr(struct net_device *dev, void *p) +{ + int ret; + struct sockaddr *addr = p; + + ret = eth_prepare_mac_addr_change(dev, p); + if (ret < 0) + return ret; + if (possibly_iphdr(addr->sa_data)) + return -EADDRNOTAVAIL; + eth_commit_mac_addr_change(dev, p); + return 0; +} + +static const struct net_device_ops qmi_wwan_netdev_ops = { + .ndo_open = usbnet_open, + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_change_mtu = usbnet_change_mtu, + .ndo_set_mac_address = qmi_wwan_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + /* using a counter to merge subdriver requests with our own into a combined state */ static int qmi_wwan_manage_power(struct usbnet *dev, int on) { @@ -139,16 +230,9 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct qmi_wwan_state))); - /* control and data is shared? */ - if (intf->cur_altsetting->desc.bNumEndpoints == 3) { - info->control = intf; - info->data = intf; - goto shared; - } - - /* else require a single interrupt status endpoint on control intf */ - if (intf->cur_altsetting->desc.bNumEndpoints != 1) - goto err; + /* set up initial state */ + info->control = intf; + info->data = intf; /* and a number of CDC descriptors */ while (len > 3) { @@ -207,25 +291,14 @@ next_desc: buf += h->bLength; } - /* did we find all the required ones? */ - if (!(found & (1 << USB_CDC_HEADER_TYPE)) || - !(found & (1 << USB_CDC_UNION_TYPE))) { - dev_err(&intf->dev, "CDC functional descriptors missing\n"); - goto err; - } - - /* verify CDC Union */ - if (desc->bInterfaceNumber != cdc_union->bMasterInterface0) { - dev_err(&intf->dev, "bogus CDC Union: master=%u\n", cdc_union->bMasterInterface0); - goto err; - } - - /* need to save these for unbind */ - info->control = intf; - info->data = usb_ifnum_to_if(dev->udev, cdc_union->bSlaveInterface0); - if (!info->data) { - dev_err(&intf->dev, "bogus CDC Union: slave=%u\n", cdc_union->bSlaveInterface0); - goto err; + /* Use separate control and data interfaces if we found a CDC Union */ + if (cdc_union) { + info->data = usb_ifnum_to_if(dev->udev, cdc_union->bSlaveInterface0); + if (desc->bInterfaceNumber != cdc_union->bMasterInterface0 || !info->data) { + dev_err(&intf->dev, "bogus CDC Union: master=%u, slave=%u\n", + cdc_union->bMasterInterface0, cdc_union->bSlaveInterface0); + goto err; + } } /* errors aren't fatal - we can live with the dynamic address */ @@ -235,17 +308,30 @@ next_desc: } /* claim data interface and set it up */ - status = usb_driver_claim_interface(driver, info->data, dev); - if (status < 0) - goto err; + if (info->control != info->data) { + status = usb_driver_claim_interface(driver, info->data, dev); + if (status < 0) + goto err; + } -shared: status = qmi_wwan_register_subdriver(dev); if (status < 0 && info->control != info->data) { usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver, info->data); } + /* Never use the same address on both ends of the link, even + * if the buggy firmware told us to. + */ + if (!compare_ether_addr(dev->net->dev_addr, default_modem_addr)) + eth_hw_addr_random(dev->net); + + /* make MAC addr easily distinguishable from an IP header */ + if (possibly_iphdr(dev->net->dev_addr)) { + dev->net->dev_addr[0] |= 0x02; /* set local assignment bit */ + dev->net->dev_addr[0] &= 0xbf; /* clear "IP" bit */ + } + dev->net->netdev_ops = &qmi_wwan_netdev_ops; err: return status; } @@ -324,6 +410,7 @@ static const struct driver_info qmi_wwan_info = { .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, + .rx_fixup = qmi_wwan_rx_fixup, }; #define HUAWEI_VENDOR_ID 0x12D1 diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 79ab2435d9d3..a923d61c6fc5 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -413,11 +413,10 @@ static void sierra_net_handle_lsi(struct usbnet *dev, char *data, if (link_up) { sierra_net_set_ctx_index(priv, hh->msgspecific.byte); priv->link_up = 1; - netif_carrier_on(dev->net); } else { priv->link_up = 0; - netif_carrier_off(dev->net); } + usbnet_link_change(dev, link_up, 0); } static void sierra_net_dosync(struct usbnet *dev) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 9abe51710f22..1a15ec14c386 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -914,8 +914,12 @@ static int smsc75xx_set_rx_max_frame_length(struct usbnet *dev, int size) static int smsc75xx_change_mtu(struct net_device *netdev, int new_mtu) { struct usbnet *dev = netdev_priv(netdev); + int ret; + + if (new_mtu > MAX_SINGLE_PACKET_SIZE) + return -EINVAL; - int ret = smsc75xx_set_rx_max_frame_length(dev, new_mtu); + ret = smsc75xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN); if (ret < 0) { netdev_warn(dev->net, "Failed to set mac rx frame length\n"); return ret; @@ -1324,7 +1328,7 @@ static int smsc75xx_reset(struct usbnet *dev) netif_dbg(dev, ifup, dev->net, "FCT_TX_CTL set to 0x%08x\n", buf); - ret = smsc75xx_set_rx_max_frame_length(dev, 1514); + ret = smsc75xx_set_rx_max_frame_length(dev, dev->net->mtu + ETH_HLEN); if (ret < 0) { netdev_warn(dev->net, "Failed to set max rx frame length\n"); return ret; @@ -2134,8 +2138,8 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) else if (rx_cmd_a & (RX_CMD_A_LONG | RX_CMD_A_RUNT)) dev->net->stats.rx_frame_errors++; } else { - /* ETH_FRAME_LEN + 4(CRC) + 2(COE) + 4(Vlan) */ - if (unlikely(size > (ETH_FRAME_LEN + 12))) { + /* MAX_SINGLE_PACKET_SIZE + 4(CRC) + 2(COE) + 4(Vlan) */ + if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12))) { netif_dbg(dev, rx_err, dev->net, "size err rx_cmd_a=0x%08x\n", rx_cmd_a); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 51f3192f3931..1e5a9b72650e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -938,6 +938,27 @@ static const struct ethtool_ops usbnet_ethtool_ops = { /*-------------------------------------------------------------------------*/ +static void __handle_link_change(struct usbnet *dev) +{ + if (!test_bit(EVENT_DEV_OPEN, &dev->flags)) + return; + + if (!netif_carrier_ok(dev->net)) { + /* kill URBs for reading packets to save bus bandwidth */ + unlink_urbs(dev, &dev->rxq); + + /* + * tx_timeout will unlink URBs for sending packets and + * tx queue is stopped by netcore after link becomes off + */ + } else { + /* submitting URBs for reading packets */ + tasklet_schedule(&dev->bh); + } + + clear_bit(EVENT_LINK_CHANGE, &dev->flags); +} + /* work that cannot be done in interrupt context uses keventd. * * NOTE: with 2.5 we could do more of this using completion callbacks, @@ -1035,8 +1056,14 @@ skip_reset: } else { usb_autopm_put_interface(dev->intf); } + + /* handle link change from link resetting */ + __handle_link_change(dev); } + if (test_bit (EVENT_LINK_CHANGE, &dev->flags)) + __handle_link_change(dev); + if (dev->flags) netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags); } @@ -1286,6 +1313,7 @@ static void usbnet_bh (unsigned long param) // or are we maybe short a few urbs? } else if (netif_running (dev->net) && netif_device_present (dev->net) && + netif_carrier_ok(dev->net) && !timer_pending (&dev->delay) && !test_bit (EVENT_RX_HALT, &dev->flags)) { int temp = dev->rxq.qlen; @@ -1521,7 +1549,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) netif_device_attach (net); if (dev->driver_info->flags & FLAG_LINK_INTR) - netif_carrier_off(net); + usbnet_link_change(dev, 0, 0); return 0; @@ -1653,6 +1681,21 @@ int usbnet_manage_power(struct usbnet *dev, int on) } EXPORT_SYMBOL(usbnet_manage_power); +void usbnet_link_change(struct usbnet *dev, bool link, bool need_reset) +{ + /* update link after link is reseted */ + if (link && !need_reset) + netif_carrier_on(dev->net); + else + netif_carrier_off(dev->net); + + if (need_reset && link) + usbnet_defer_kevent(dev, EVENT_LINK_RESET); + else + usbnet_defer_kevent(dev, EVENT_LINK_CHANGE); +} +EXPORT_SYMBOL(usbnet_link_change); + /*-------------------------------------------------------------------------*/ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, void *data, u16 size) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 07a4af0aa3dc..177f911f5946 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -255,7 +255,8 @@ static const struct net_device_ops veth_netdev_ops = { #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ - NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX) + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) static void veth_setup(struct net_device *dev) { diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 57ac4b0294bc..50077753a0e5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -154,7 +154,7 @@ struct padded_vnet_hdr { */ static int vq2txq(struct virtqueue *vq) { - return (virtqueue_get_queue_index(vq) - 1) / 2; + return (vq->index - 1) / 2; } static int txq2vq(int txq) @@ -164,7 +164,7 @@ static int txq2vq(int txq) static int vq2rxq(struct virtqueue *vq) { - return virtqueue_get_queue_index(vq) / 2; + return vq->index / 2; } static int rxq2vq(int rxq) @@ -1006,7 +1006,8 @@ static void virtnet_set_rx_mode(struct net_device *dev) kfree(buf); } -static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid) +static int virtnet_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; @@ -1019,7 +1020,8 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid) return 0; } -static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) +static int virtnet_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; @@ -1376,7 +1378,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) if (vi->has_cvq) { vi->cvq = vqs[total_vqs - 1]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) - vi->dev->features |= NETIF_F_HW_VLAN_FILTER; + vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } for (i = 0; i < vi->max_queue_pairs; i++) { @@ -1511,6 +1513,8 @@ static int virtnet_probe(struct virtio_device *vdev) /* (!csum && gso) case will be fixed by register_netdev() */ } + dev->vlan_features = dev->features; + /* Configuration may specify what MAC to use. Otherwise random. */ if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC, offsetof(struct virtio_net_config, mac), diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 4aad350e4dae..55a62cae2cb4 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1293,7 +1293,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, skb->protocol = eth_type_trans(skb, adapter->netdev); if (unlikely(rcd->ts)) - __vlan_hwaccel_put_tag(skb, rcd->tci); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci); if (adapter->netdev->features & NETIF_F_LRO) netif_receive_skb(skb); @@ -1931,7 +1931,7 @@ vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter) static int -vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); @@ -1953,7 +1953,7 @@ vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid) static int -vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); @@ -2107,7 +2107,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) devRead->misc.uptFeatures |= UPT1_F_LRO; devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS); } - if (adapter->netdev->features & NETIF_F_HW_VLAN_RX) + if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) devRead->misc.uptFeatures |= UPT1_F_RXVLAN; devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu); @@ -2669,14 +2669,15 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) struct net_device *netdev = adapter->netdev; netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_LRO; if (dma64) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->vlan_features = netdev->hw_features & - ~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); - netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_FILTER; + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); + netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; } @@ -2958,6 +2959,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->num_rx_queues = num_rx_queues; adapter->num_tx_queues = num_tx_queues; + adapter->rx_buf_per_pkt = 1; size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues; diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index a0feb17a0238..600ab56c0008 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -263,7 +263,8 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) unsigned long flags; netdev_features_t changed = features ^ netdev->features; - if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_HW_VLAN_RX)) { + if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | + NETIF_F_HW_VLAN_CTAG_RX)) { if (features & NETIF_F_RXCSUM) adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXCSUM; @@ -279,7 +280,7 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) adapter->shared->devRead.misc.uptFeatures &= ~UPT1_F_LRO; - if (features & NETIF_F_HW_VLAN_RX) + if (features & NETIF_F_HW_VLAN_CTAG_RX) adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXVLAN; else @@ -472,6 +473,12 @@ vmxnet3_set_ringparam(struct net_device *netdev, VMXNET3_RX_RING_MAX_SIZE) return -EINVAL; + /* if adapter not yet initialized, do nothing */ + if (adapter->rx_buf_per_pkt == 0) { + netdev_err(netdev, "adapter not completely initialized, " + "ring size cannot be changed yet\n"); + return -EOPNOTSUPP; + } /* round it up to a multiple of VMXNET3_RING_SIZE_ALIGN */ new_tx_ring_size = (param->tx_pending + VMXNET3_RING_SIZE_MASK) & diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 3198384689d9..35418146fa17 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -70,10 +70,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.1.29.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.1.30.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01011D00 +#define VMXNET3_DRIVER_VERSION_NUM 0x01011E00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c3e3d2929ee3..a7fd9a089a35 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -33,7 +33,7 @@ #include <net/arp.h> #include <net/ndisc.h> #include <net/ip.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/icmp.h> #include <net/udp.h> #include <net/rtnetlink.h> @@ -81,35 +81,33 @@ struct vxlan_net { struct hlist_head vni_list[VNI_HASH_SIZE]; }; +struct vxlan_rdst { + struct rcu_head rcu; + __be32 remote_ip; + __be16 remote_port; + u32 remote_vni; + u32 remote_ifindex; + struct vxlan_rdst *remote_next; +}; + /* Forwarding table entry */ struct vxlan_fdb { struct hlist_node hlist; /* linked list of entries */ struct rcu_head rcu; unsigned long updated; /* jiffies */ unsigned long used; - __be32 remote_ip; + struct vxlan_rdst remote; u16 state; /* see ndm_state */ + u8 flags; /* see ndm_flags */ u8 eth_addr[ETH_ALEN]; }; -/* Per-cpu network traffic stats */ -struct vxlan_stats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - /* Pseudo network device */ struct vxlan_dev { struct hlist_node hlist; struct net_device *dev; - struct vxlan_stats __percpu *stats; - __u32 vni; /* virtual network id */ - __be32 gaddr; /* multicast group */ + struct vxlan_rdst default_dst; /* default destination */ __be32 saddr; /* source address */ - unsigned int link; /* link to multicast over */ __u16 port_min; /* source port range */ __u16 port_max; __u8 tos; /* TOS override */ @@ -147,7 +145,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) struct vxlan_dev *vxlan; hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) { - if (vxlan->vni == id) + if (vxlan->default_dst.remote_vni == id) return vxlan; } @@ -157,7 +155,8 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) /* Fill in neighbour message in skbuff. */ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, const struct vxlan_fdb *fdb, - u32 portid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags, + const struct vxlan_rdst *rdst) { unsigned long now = jiffies; struct nda_cacheinfo ci; @@ -176,19 +175,29 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (type == RTM_GETNEIGH) { ndm->ndm_family = AF_INET; - send_ip = fdb->remote_ip != 0; + send_ip = rdst->remote_ip != htonl(INADDR_ANY); send_eth = !is_zero_ether_addr(fdb->eth_addr); } else ndm->ndm_family = AF_BRIDGE; ndm->ndm_state = fdb->state; ndm->ndm_ifindex = vxlan->dev->ifindex; - ndm->ndm_flags = NTF_SELF; + ndm->ndm_flags = fdb->flags; ndm->ndm_type = NDA_DST; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; - if (send_ip && nla_put_be32(skb, NDA_DST, fdb->remote_ip)) + if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip)) + goto nla_put_failure; + + if (rdst->remote_port && rdst->remote_port != vxlan_port && + nla_put_be16(skb, NDA_PORT, rdst->remote_port)) + goto nla_put_failure; + if (rdst->remote_vni != vxlan->default_dst.remote_vni && + nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) + goto nla_put_failure; + if (rdst->remote_ifindex && + nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); @@ -211,6 +220,9 @@ static inline size_t vxlan_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + nla_total_size(sizeof(__be32)) /* NDA_DST */ + + nla_total_size(sizeof(__be32)) /* NDA_PORT */ + + nla_total_size(sizeof(__be32)) /* NDA_VNI */ + + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -225,7 +237,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan, if (skb == NULL) goto errout; - err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0); + err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote); if (err < 0) { /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -247,7 +259,8 @@ static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) memset(&f, 0, sizeof f); f.state = NUD_STALE; - f.remote_ip = ipa; /* goes to NDA_DST */ + f.remote.remote_ip = ipa; /* goes to NDA_DST */ + f.remote.remote_vni = VXLAN_N_VID; vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); } @@ -300,10 +313,39 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, return NULL; } +/* Add/update destinations for multicast */ +static int vxlan_fdb_append(struct vxlan_fdb *f, + __be32 ip, __u32 port, __u32 vni, __u32 ifindex) +{ + struct vxlan_rdst *rd_prev, *rd; + + rd_prev = NULL; + for (rd = &f->remote; rd; rd = rd->remote_next) { + if (rd->remote_ip == ip && + rd->remote_port == port && + rd->remote_vni == vni && + rd->remote_ifindex == ifindex) + return 0; + rd_prev = rd; + } + rd = kmalloc(sizeof(*rd), GFP_ATOMIC); + if (rd == NULL) + return -ENOBUFS; + rd->remote_ip = ip; + rd->remote_port = port; + rd->remote_vni = vni; + rd->remote_ifindex = ifindex; + rd->remote_next = NULL; + rd_prev->remote_next = rd; + return 1; +} + /* Add new entry to forwarding table -- assumes lock held */ static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, __be32 ip, - __u16 state, __u16 flags) + __u16 state, __u16 flags, + __u32 port, __u32 vni, __u32 ifindex, + __u8 ndm_flags) { struct vxlan_fdb *f; int notify = 0; @@ -320,6 +362,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, f->updated = jiffies; notify = 1; } + if (f->flags != ndm_flags) { + f->flags = ndm_flags; + f->updated = jiffies; + notify = 1; + } + if ((flags & NLM_F_APPEND) && + is_multicast_ether_addr(f->eth_addr)) { + int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); + + if (rc < 0) + return rc; + notify |= rc; + } } else { if (!(flags & NLM_F_CREATE)) return -ENOENT; @@ -333,8 +388,13 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, return -ENOMEM; notify = 1; - f->remote_ip = ip; + f->remote.remote_ip = ip; + f->remote.remote_port = port; + f->remote.remote_vni = vni; + f->remote.remote_ifindex = ifindex; + f->remote.remote_next = NULL; f->state = state; + f->flags = ndm_flags; f->updated = f->used = jiffies; memcpy(f->eth_addr, mac, ETH_ALEN); @@ -349,6 +409,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, return 0; } +static void vxlan_fdb_free(struct rcu_head *head) +{ + struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); + + while (f->remote.remote_next) { + struct vxlan_rdst *rd = f->remote.remote_next; + + f->remote.remote_next = rd->remote_next; + kfree(rd); + } + kfree(f); +} + static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) { netdev_dbg(vxlan->dev, @@ -358,7 +431,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH); hlist_del_rcu(&f->hlist); - kfree_rcu(f, rcu); + call_rcu(&f->rcu, vxlan_fdb_free); } /* Add static entry (via netlink) */ @@ -367,7 +440,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 flags) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct net *net = dev_net(vxlan->dev); __be32 ip; + u32 port, vni, ifindex; int err; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { @@ -384,8 +459,36 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], ip = nla_get_be32(tb[NDA_DST]); + if (tb[NDA_PORT]) { + if (nla_len(tb[NDA_PORT]) != sizeof(u32)) + return -EINVAL; + port = nla_get_u32(tb[NDA_PORT]); + } else + port = vxlan_port; + + if (tb[NDA_VNI]) { + if (nla_len(tb[NDA_VNI]) != sizeof(u32)) + return -EINVAL; + vni = nla_get_u32(tb[NDA_VNI]); + } else + vni = vxlan->default_dst.remote_vni; + + if (tb[NDA_IFINDEX]) { + struct net_device *tdev; + + if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) + return -EINVAL; + ifindex = nla_get_u32(tb[NDA_IFINDEX]); + tdev = dev_get_by_index(net, ifindex); + if (!tdev) + return -EADDRNOTAVAIL; + dev_put(tdev); + } else + ifindex = 0; + spin_lock_bh(&vxlan->hash_lock); - err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags); + err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port, + vni, ifindex, ndm->ndm_flags); spin_unlock_bh(&vxlan->hash_lock); return err; @@ -423,18 +526,21 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, int err; hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { - if (idx < cb->args[0]) - goto skip; - - err = vxlan_fdb_info(skb, vxlan, f, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - NLM_F_MULTI); - if (err < 0) - break; + struct vxlan_rdst *rd; + for (rd = &f->remote; rd; rd = rd->remote_next) { + if (idx < cb->args[0]) + goto skip; + + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI, rd); + if (err < 0) + break; skip: - ++idx; + ++idx; + } } } @@ -454,22 +560,25 @@ static void vxlan_snoop(struct net_device *dev, f = vxlan_find_mac(vxlan, src_mac); if (likely(f)) { f->used = jiffies; - if (likely(f->remote_ip == src_ip)) + if (likely(f->remote.remote_ip == src_ip)) return; if (net_ratelimit()) netdev_info(dev, "%pM migrated from %pI4 to %pI4\n", - src_mac, &f->remote_ip, &src_ip); + src_mac, &f->remote.remote_ip, &src_ip); - f->remote_ip = src_ip; + f->remote.remote_ip = src_ip; f->updated = jiffies; } else { /* learned new entry */ spin_lock(&vxlan->hash_lock); err = vxlan_fdb_create(vxlan, src_mac, src_ip, NUD_REACHABLE, - NLM_F_EXCL|NLM_F_CREATE); + NLM_F_EXCL|NLM_F_CREATE, + vxlan_port, + vxlan->default_dst.remote_vni, + 0, NTF_SELF); spin_unlock(&vxlan->hash_lock); } } @@ -490,7 +599,7 @@ static bool vxlan_group_used(struct vxlan_net *vn, if (!netif_running(vxlan->dev)) continue; - if (vxlan->gaddr == this->gaddr) + if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip) return true; } @@ -504,8 +613,8 @@ static int vxlan_join_group(struct net_device *dev) struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct sock *sk = vn->sock->sk; struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = vxlan->gaddr, - .imr_ifindex = vxlan->link, + .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, + .imr_ifindex = vxlan->default_dst.remote_ifindex, }; int err; @@ -532,8 +641,8 @@ static int vxlan_leave_group(struct net_device *dev) int err = 0; struct sock *sk = vn->sock->sk; struct ip_mreqn mreq = { - .imr_multiaddr.s_addr = vxlan->gaddr, - .imr_ifindex = vxlan->link, + .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, + .imr_ifindex = vxlan->default_dst.remote_ifindex, }; /* Only leave group when last vxlan is done. */ @@ -556,7 +665,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct iphdr *oip; struct vxlanhdr *vxh; struct vxlan_dev *vxlan; - struct vxlan_stats *stats; + struct pcpu_tstats *stats; __u32 vni; int err; @@ -632,7 +741,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } } - stats = this_cpu_ptr(vxlan->stats); + stats = this_cpu_ptr(vxlan->dev->tstats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len; @@ -691,7 +800,6 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) n = neigh_lookup(&arp_tbl, &tip, dev); if (n) { - struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; struct sk_buff *reply; @@ -701,7 +809,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) } f = vxlan_find_mac(vxlan, n->ha); - if (f && f->remote_ip == 0) { + if (f && f->remote.remote_ip == htonl(INADDR_ANY)) { /* bridge-local neighbor */ neigh_release(n); goto out; @@ -763,28 +871,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -/* Extract dsfield from inner protocol */ -static inline u8 vxlan_get_dsfield(const struct iphdr *iph, - const struct sk_buff *skb) -{ - if (skb->protocol == htons(ETH_P_IP)) - return iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - return ipv6_get_dsfield((const struct ipv6hdr *)iph); - else - return 0; -} - -/* Propogate ECN bits out */ -static inline u8 vxlan_ecn_encap(u8 tos, - const struct iphdr *iph, - const struct sk_buff *skb) -{ - u8 inner = vxlan_get_dsfield(iph, skb); - - return INET_ECN_encapsulate(tos, inner); -} - static void vxlan_sock_free(struct sk_buff *skb) { sock_put(skb->sk); @@ -820,68 +906,75 @@ static u16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb) return (((u64) hash * range) >> 32) + vxlan->port_min; } -/* Transmit local packets over Vxlan - * - * Outer IP header inherits ECN and DF from inner header. - * Outer UDP destination is the VXLAN assigned port. - * source port is based on hash of flow - */ -static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) +static int handle_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return err; + + skb_shinfo(skb)->gso_type |= (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP); + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return 0; +} + +/* Bypass encapsulation if the destination is local */ +static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, + struct vxlan_dev *dst_vxlan) +{ + struct pcpu_tstats *tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); + struct pcpu_tstats *rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats); + + skb->pkt_type = PACKET_HOST; + skb->encapsulation = 0; + skb->dev = dst_vxlan->dev; + __skb_pull(skb, skb_network_offset(skb)); + + if (dst_vxlan->flags & VXLAN_F_LEARN) + vxlan_snoop(skb->dev, htonl(INADDR_LOOPBACK), + eth_hdr(skb)->h_source); + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->tx_packets++; + tx_stats->tx_bytes += skb->len; + u64_stats_update_end(&tx_stats->syncp); + + if (netif_rx(skb) == NET_RX_SUCCESS) { + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->rx_packets++; + rx_stats->rx_bytes += skb->len; + u64_stats_update_end(&rx_stats->syncp); + } else { + skb->dev->stats.rx_dropped++; + } +} + +static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, + struct vxlan_rdst *rdst, bool did_rsc) { struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; - struct ethhdr *eth; struct iphdr *iph; struct vxlanhdr *vxh; struct udphdr *uh; struct flowi4 fl4; - unsigned int pkt_len = skb->len; __be32 dst; - __u16 src_port; + __u16 src_port, dst_port; + u32 vni; __be16 df = 0; __u8 tos, ttl; - int err; - bool did_rsc = false; - const struct vxlan_fdb *f; - - skb_reset_mac_header(skb); - eth = eth_hdr(skb); - - if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) - return arp_reduce(dev, skb); - else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) - did_rsc = route_shortcircuit(dev, skb); - f = vxlan_find_mac(vxlan, eth->h_dest); - if (f == NULL) { - did_rsc = false; - dst = vxlan->gaddr; - if (!dst && (vxlan->flags & VXLAN_F_L2MISS) && - !is_multicast_ether_addr(eth->h_dest)) - vxlan_fdb_miss(vxlan, eth->h_dest); - } else - dst = f->remote_ip; + dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port; + vni = rdst->remote_vni; + dst = rdst->remote_ip; if (!dst) { if (did_rsc) { - __skb_pull(skb, skb_network_offset(skb)); - skb->ip_summed = CHECKSUM_NONE; - skb->pkt_type = PACKET_HOST; - /* short-circuited back to local bridge */ - if (netif_rx(skb) == NET_RX_SUCCESS) { - struct vxlan_stats *stats = - this_cpu_ptr(vxlan->stats); - - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += pkt_len; - u64_stats_update_end(&stats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + vxlan_encap_bypass(skb, vxlan, vxlan); return NETDEV_TX_OK; } goto drop; @@ -904,12 +997,12 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) tos = vxlan->tos; if (tos == 1) - tos = vxlan_get_dsfield(old_iph, skb); + tos = ip_tunnel_get_dsfield(old_iph, skb); src_port = vxlan_src_port(vxlan, skb); memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = vxlan->link; + fl4.flowi4_oif = rdst->remote_ifindex; fl4.flowi4_tos = RT_TOS(tos); fl4.daddr = dst; fl4.saddr = vxlan->saddr; @@ -928,6 +1021,19 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } + /* Bypass encapsulation if the destination is local */ + if (rt->rt_flags & RTCF_LOCAL && + !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { + struct vxlan_dev *dst_vxlan; + + ip_rt_put(rt); + dst_vxlan = vxlan_find_vni(dev_net(dev), vni); + if (!dst_vxlan) + goto tx_error; + vxlan_encap_bypass(skb, vxlan, dst_vxlan); + return NETDEV_TX_OK; + } + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); @@ -936,13 +1042,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); - vxh->vx_vni = htonl(vxlan->vni << 8); + vxh->vx_vni = htonl(vni << 8); __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); - uh->dest = htons(vxlan_port); + uh->dest = htons(dst_port); uh->source = htons(src_port); uh->len = htons(skb->len); @@ -955,7 +1061,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; iph->protocol = IPPROTO_UDP; - iph->tos = vxlan_ecn_encap(tos, old_iph, skb); + iph->tos = ip_tunnel_ecn_encap(tos, old_iph, skb); iph->daddr = dst; iph->saddr = fl4.saddr; iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); @@ -965,22 +1071,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxlan_set_owner(dev, skb); - /* See iptunnel_xmit() */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - struct vxlan_stats *stats = this_cpu_ptr(vxlan->stats); + if (handle_offloads(skb)) + goto drop; - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += pkt_len; - u64_stats_update_end(&stats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; drop: @@ -994,6 +1088,65 @@ tx_free: return NETDEV_TX_OK; } +/* Transmit local packets over Vxlan + * + * Outer IP header inherits ECN and DF from inner header. + * Outer UDP destination is the VXLAN assigned port. + * source port is based on hash of flow + */ +static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct ethhdr *eth; + bool did_rsc = false; + struct vxlan_rdst *rdst0, *rdst; + struct vxlan_fdb *f; + int rc1, rc; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) + return arp_reduce(dev, skb); + + f = vxlan_find_mac(vxlan, eth->h_dest); + did_rsc = false; + + if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && + ntohs(eth->h_proto) == ETH_P_IP) { + did_rsc = route_shortcircuit(dev, skb); + if (did_rsc) + f = vxlan_find_mac(vxlan, eth->h_dest); + } + + if (f == NULL) { + rdst0 = &vxlan->default_dst; + + if (rdst0->remote_ip == htonl(INADDR_ANY) && + (vxlan->flags & VXLAN_F_L2MISS) && + !is_multicast_ether_addr(eth->h_dest)) + vxlan_fdb_miss(vxlan, eth->h_dest); + } else + rdst0 = &f->remote; + + rc = NETDEV_TX_OK; + + /* if there are multiple destinations, send copies */ + for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) { + struct sk_buff *skb1; + + skb1 = skb_clone(skb, GFP_ATOMIC); + rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc); + if (rc == NETDEV_TX_OK) + rc = rc1; + } + + rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc); + if (rc == NETDEV_TX_OK) + rc = rc1; + return rc; +} + /* Walk the forwarding table and purge stale entries */ static void vxlan_cleanup(unsigned long arg) { @@ -1034,10 +1187,8 @@ static void vxlan_cleanup(unsigned long arg) /* Setup stats when device is created */ static int vxlan_init(struct net_device *dev) { - struct vxlan_dev *vxlan = netdev_priv(dev); - - vxlan->stats = alloc_percpu(struct vxlan_stats); - if (!vxlan->stats) + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) return -ENOMEM; return 0; @@ -1049,7 +1200,7 @@ static int vxlan_open(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; - if (vxlan->gaddr) { + if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { err = vxlan_join_group(dev); if (err) return err; @@ -1083,7 +1234,7 @@ static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - if (vxlan->gaddr) + if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) vxlan_leave_group(dev); del_timer_sync(&vxlan->age_timer); @@ -1093,49 +1244,6 @@ static int vxlan_stop(struct net_device *dev) return 0; } -/* Merge per-cpu statistics */ -static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_stats tmp, sum = { 0 }; - unsigned int cpu; - - for_each_possible_cpu(cpu) { - unsigned int start; - const struct vxlan_stats *stats - = per_cpu_ptr(vxlan->stats, cpu); - - do { - start = u64_stats_fetch_begin_bh(&stats->syncp); - memcpy(&tmp, stats, sizeof(tmp)); - } while (u64_stats_fetch_retry_bh(&stats->syncp, start)); - - sum.tx_bytes += tmp.tx_bytes; - sum.tx_packets += tmp.tx_packets; - sum.rx_bytes += tmp.rx_bytes; - sum.rx_packets += tmp.rx_packets; - } - - stats->tx_bytes = sum.tx_bytes; - stats->tx_packets = sum.tx_packets; - stats->rx_bytes = sum.rx_bytes; - stats->rx_packets = sum.rx_packets; - - stats->multicast = dev->stats.multicast; - stats->rx_length_errors = dev->stats.rx_length_errors; - stats->rx_frame_errors = dev->stats.rx_frame_errors; - stats->rx_errors = dev->stats.rx_errors; - - stats->tx_dropped = dev->stats.tx_dropped; - stats->tx_carrier_errors = dev->stats.tx_carrier_errors; - stats->tx_aborted_errors = dev->stats.tx_aborted_errors; - stats->collisions = dev->stats.collisions; - stats->tx_errors = dev->stats.tx_errors; - - return stats; -} - /* Stub, nothing needs to be done. */ static void vxlan_set_multicast_list(struct net_device *dev) { @@ -1146,7 +1254,7 @@ static const struct net_device_ops vxlan_netdev_ops = { .ndo_open = vxlan_open, .ndo_stop = vxlan_stop, .ndo_start_xmit = vxlan_xmit, - .ndo_get_stats64 = vxlan_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_set_rx_mode = vxlan_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -1163,9 +1271,7 @@ static struct device_type vxlan_type = { static void vxlan_free(struct net_device *dev) { - struct vxlan_dev *vxlan = netdev_priv(dev); - - free_percpu(vxlan->stats); + free_percpu(dev->tstats); free_netdev(dev); } @@ -1189,8 +1295,10 @@ static void vxlan_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -1212,7 +1320,7 @@ static void vxlan_setup(struct net_device *dev) static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, - [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, @@ -1250,14 +1358,6 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) return -ERANGE; } - if (data[IFLA_VXLAN_GROUP]) { - __be32 gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]); - if (!IN_MULTICAST(ntohl(gaddr))) { - pr_debug("group address is not IPv4 multicast\n"); - return -EADDRNOTAVAIL; - } - } - if (data[IFLA_VXLAN_PORT_RANGE]) { const struct ifla_vxlan_port_range *p = nla_data(data[IFLA_VXLAN_PORT_RANGE]); @@ -1288,6 +1388,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; __u32 vni; int err; @@ -1299,21 +1400,21 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, pr_info("duplicate VNI %u\n", vni); return -EEXIST; } - vxlan->vni = vni; + dst->remote_vni = vni; - if (data[IFLA_VXLAN_GROUP]) - vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]); + if (data[IFLA_VXLAN_REMOTE]) + dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_REMOTE]); if (data[IFLA_VXLAN_LOCAL]) vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]); if (data[IFLA_VXLAN_LINK] && - (vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]))) { + (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) { struct net_device *lowerdev - = __dev_get_by_index(net, vxlan->link); + = __dev_get_by_index(net, dst->remote_ifindex); if (!lowerdev) { - pr_info("ifindex %d does not exist\n", vxlan->link); + pr_info("ifindex %d does not exist\n", dst->remote_ifindex); return -ENODEV; } @@ -1365,7 +1466,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, err = register_netdevice(dev); if (!err) - hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni)); + hlist_add_head_rcu(&vxlan->hlist, vni_head(net, dst->remote_vni)); return err; } @@ -1383,7 +1484,7 @@ static size_t vxlan_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ - nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */ + nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_REMOTE */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ nla_total_size(sizeof(__be32))+ /* IFLA_VXLAN_LOCAL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ @@ -1402,18 +1503,19 @@ static size_t vxlan_get_size(const struct net_device *dev) static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { const struct vxlan_dev *vxlan = netdev_priv(dev); + const struct vxlan_rdst *dst = &vxlan->default_dst; struct ifla_vxlan_port_range ports = { .low = htons(vxlan->port_min), .high = htons(vxlan->port_max), }; - if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni)) + if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni)) goto nla_put_failure; - if (vxlan->gaddr && nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr)) + if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_REMOTE, dst->remote_ip)) goto nla_put_failure; - if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link)) + if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex)) goto nla_put_failure; if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr)) @@ -1506,6 +1608,14 @@ static __net_init int vxlan_init_net(struct net *net) static __net_exit void vxlan_exit_net(struct net *net) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_dev *vxlan; + unsigned h; + + rtnl_lock(); + for (h = 0; h < VNI_HASH_SIZE; ++h) + hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) + dev_close(vxlan->dev); + rtnl_unlock(); if (vn->sock) { sk_release_kernel(vn->sock->sk); @@ -1547,6 +1657,7 @@ static void __exit vxlan_cleanup_module(void) { rtnl_link_unregister(&vxlan_link_ops); unregister_pernet_device(&vxlan_net_ops); + rcu_barrier(); } module_exit(vxlan_cleanup_module); diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index 91454a4ec8bf..797024507c71 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -80,8 +80,6 @@ static int wil_vring_alloc(struct wil6210_priv *wil, struct vring *vring) */ vring->va = dma_alloc_coherent(dev, sz, &vring->pa, GFP_KERNEL); if (!vring->va) { - wil_err(wil, "vring_alloc [%d] failed to alloc DMA mem\n", - vring->size); kfree(vring->ctx); vring->ctx = NULL; return -ENOMEM; diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c index 122146943bf2..523355b87659 100644 --- a/drivers/net/wireless/b43/dma.c +++ b/drivers/net/wireless/b43/dma.c @@ -419,8 +419,6 @@ static inline static int alloc_ringmemory(struct b43_dmaring *ring) { - gfp_t flags = GFP_KERNEL; - /* The specs call for 4K buffers for 30- and 32-bit DMA with 4K * alignment and 8K buffers for 64-bit DMA with 8K alignment. * In practice we could use smaller buffers for the latter, but the @@ -435,12 +433,9 @@ static int alloc_ringmemory(struct b43_dmaring *ring) ring->descbase = dma_alloc_coherent(ring->dev->dev->dma_dev, ring_mem_size, &(ring->dmabase), - flags); - if (!ring->descbase) { - b43err(ring->dev->wl, "DMA ringmemory allocation failed\n"); + GFP_KERNEL | __GFP_ZERO); + if (!ring->descbase) return -ENOMEM; - } - memset(ring->descbase, 0, ring_mem_size); return 0; } diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index 2d3c6644f82d..faeafe219c57 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -334,13 +334,9 @@ static int alloc_ringmemory(struct b43legacy_dmaring *ring) ring->descbase = dma_alloc_coherent(ring->dev->dev->dma_dev, B43legacy_DMA_RINGMEMSIZE, &(ring->dmabase), - GFP_KERNEL); - if (!ring->descbase) { - b43legacyerr(ring->dev->wl, "DMA ringmemory allocation" - " failed\n"); + GFP_KERNEL | __GFP_ZERO); + if (!ring->descbase) return -ENOMEM; - } - memset(ring->descbase, 0, B43legacy_DMA_RINGMEMSIZE); return 0; } diff --git a/drivers/net/wireless/iwlegacy/3945.c b/drivers/net/wireless/iwlegacy/3945.c index e0b9d7fa5de0..dc1e6da9976a 100644 --- a/drivers/net/wireless/iwlegacy/3945.c +++ b/drivers/net/wireless/iwlegacy/3945.c @@ -2379,10 +2379,8 @@ il3945_hw_set_hw_params(struct il_priv *il) il->_3945.shared_virt = dma_alloc_coherent(&il->pci_dev->dev, sizeof(struct il3945_shared), &il->_3945.shared_phys, GFP_KERNEL); - if (!il->_3945.shared_virt) { - IL_ERR("failed to allocate pci memory\n"); + if (!il->_3945.shared_virt) return -ENOMEM; - } il->hw_params.bcast_id = IL3945_BROADCAST_ID; diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 431ae6cc5f8f..b8f82e688c72 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -1921,8 +1921,8 @@ drop_unlock: static inline int il4965_alloc_dma_ptr(struct il_priv *il, struct il_dma_ptr *ptr, size_t size) { - ptr->addr = - dma_alloc_coherent(&il->pci_dev->dev, size, &ptr->dma, GFP_KERNEL); + ptr->addr = dma_alloc_coherent(&il->pci_dev->dev, size, &ptr->dma, + GFP_KERNEL); if (!ptr->addr) return -ENOMEM; ptr->size = size; diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index 65becfe57a7d..592d0aa634a8 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -2566,15 +2566,13 @@ il_rx_queue_alloc(struct il_priv *il) INIT_LIST_HEAD(&rxq->rx_used); /* Alloc the circular buffer of Read Buffer Descriptors (RBDs) */ - rxq->bd = - dma_alloc_coherent(dev, 4 * RX_QUEUE_SIZE, &rxq->bd_dma, - GFP_KERNEL); + rxq->bd = dma_alloc_coherent(dev, 4 * RX_QUEUE_SIZE, &rxq->bd_dma, + GFP_KERNEL); if (!rxq->bd) goto err_bd; - rxq->rb_stts = - dma_alloc_coherent(dev, sizeof(struct il_rb_status), - &rxq->rb_stts_dma, GFP_KERNEL); + rxq->rb_stts = dma_alloc_coherent(dev, sizeof(struct il_rb_status), + &rxq->rb_stts_dma, GFP_KERNEL); if (!rxq->rb_stts) goto err_rb; @@ -2941,10 +2939,9 @@ il_tx_queue_alloc(struct il_priv *il, struct il_tx_queue *txq, u32 id) * shared with device */ txq->tfds = dma_alloc_coherent(dev, tfd_sz, &txq->q.dma_addr, GFP_KERNEL); - if (!txq->tfds) { - IL_ERR("Fail to alloc TFDs\n"); + if (!txq->tfds) goto error; - } + txq->q.id = id; return 0; @@ -4890,7 +4887,7 @@ il_add_beacon_time(struct il_priv *il, u32 base, u32 addon, } EXPORT_SYMBOL(il_add_beacon_time); -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int il_pci_suspend(struct device *device) @@ -4941,7 +4938,7 @@ il_pci_resume(struct device *device) SIMPLE_DEV_PM_OPS(il_pm_ops, il_pci_suspend, il_pci_resume); EXPORT_SYMBOL(il_pm_ops); -#endif /* CONFIG_PM */ +#endif /* CONFIG_PM_SLEEP */ static void il_update_qos(struct il_priv *il) diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h index 728aa1306ab8..f8246f2d88f9 100644 --- a/drivers/net/wireless/iwlegacy/common.h +++ b/drivers/net/wireless/iwlegacy/common.h @@ -2231,9 +2231,8 @@ il_alloc_fw_desc(struct pci_dev *pci_dev, struct fw_desc *desc) return -EINVAL; } - desc->v_addr = - dma_alloc_coherent(&pci_dev->dev, desc->len, &desc->p_addr, - GFP_KERNEL); + desc->v_addr = dma_alloc_coherent(&pci_dev->dev, desc->len, + &desc->p_addr, GFP_KERNEL); return (desc->v_addr != NULL) ? 0 : -ENOMEM; } diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index d97c1fad7bc3..c5e30294c5ac 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -501,10 +501,8 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, * shared with device */ txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz, &txq->q.dma_addr, GFP_KERNEL); - if (!txq->tfds) { - IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz); + if (!txq->tfds) goto error; - } BUILD_BUG_ON(IWL_HCMD_SCRATCHBUF_SIZE != sizeof(*txq->scratchbufs)); BUILD_BUG_ON(offsetof(struct iwl_pcie_txq_scratch_buf, scratch) != diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c index 4fd49a007b51..978e7eb26567 100644 --- a/drivers/net/wireless/p54/p54spi.c +++ b/drivers/net/wireless/p54/p54spi.c @@ -396,7 +396,7 @@ static int p54spi_rx(struct p54s_priv *priv) static irqreturn_t p54spi_interrupt(int irq, void *config) { struct spi_device *spi = config; - struct p54s_priv *priv = dev_get_drvdata(&spi->dev); + struct p54s_priv *priv = spi_get_drvdata(spi); ieee80211_queue_work(priv->hw, &priv->work); @@ -609,7 +609,7 @@ static int p54spi_probe(struct spi_device *spi) priv = hw->priv; priv->hw = hw; - dev_set_drvdata(&spi->dev, priv); + spi_set_drvdata(spi, priv); priv->spi = spi; spi->bits_per_word = 16; @@ -685,7 +685,7 @@ err_free: static int p54spi_remove(struct spi_device *spi) { - struct p54s_priv *priv = dev_get_drvdata(&spi->dev); + struct p54s_priv *priv = spi_get_drvdata(spi); p54_unregister_common(priv->hw); diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 4775b5d172d5..ebada812b3a5 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -953,7 +953,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, unsigned char *data, int len) { __be16 proto = ((struct ethhdr *)data)->h_proto; - if (ntohs(proto) >= 1536) { /* DIX II ethernet frame */ + if (ntohs(proto) >= ETH_P_802_3_MIN) { /* DIX II ethernet frame */ pr_debug("ray_cs translate_frame DIX II\n"); /* Copy LLC header to card buffer */ memcpy_toio(&ptx->var, eth2_llc, sizeof(eth2_llc)); diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index 3b266d3231a3..4c67c2f9ea71 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -257,7 +257,7 @@ static int wl1251_spi_probe(struct spi_device *spi) wl = hw->priv; SET_IEEE80211_DEV(hw, &spi->dev); - dev_set_drvdata(&spi->dev, wl); + spi_set_drvdata(spi, wl); wl->if_priv = spi; wl->if_ops = &wl1251_spi_ops; @@ -311,7 +311,7 @@ static int wl1251_spi_probe(struct spi_device *spi) static int wl1251_spi_remove(struct spi_device *spi) { - struct wl1251 *wl = dev_get_drvdata(&spi->dev); + struct wl1251 *wl = spi_get_drvdata(spi); free_irq(wl->irq, wl); wl1251_free_hw(wl); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index cd49ba949636..a2865f17c667 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -47,11 +47,25 @@ #include <asm/xen/hypercall.h> #include <asm/xen/page.h> +/* + * This is the maximum slots a skb can have. If a guest sends a skb + * which exceeds this limit it is considered malicious. + */ +#define MAX_SKB_SLOTS_DEFAULT 20 +static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT; +module_param(max_skb_slots, uint, 0444); + +typedef unsigned int pending_ring_idx_t; +#define INVALID_PENDING_RING_IDX (~0U) + struct pending_tx_info { - struct xen_netif_tx_request req; + struct xen_netif_tx_request req; /* coalesced tx request */ struct xenvif *vif; + pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX + * if it is head of one or more tx + * reqs + */ }; -typedef unsigned int pending_ring_idx_t; struct netbk_rx_meta { int id; @@ -102,7 +116,11 @@ struct xen_netbk { atomic_t netfront_count; struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; + /* Coalescing tx requests before copying makes number of grant + * copy ops greater or equal to number of slots required. In + * worst case a tx request consumes 2 gnttab_copy. + */ + struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; u16 pending_ring[MAX_PENDING_REQS]; @@ -118,6 +136,16 @@ struct xen_netbk { static struct xen_netbk *xen_netbk; static int xen_netbk_group_nr; +/* + * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of + * one or more merged tx requests, otherwise it is the continuation of + * previous tx request. + */ +static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) +{ + return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; +} + void xen_netbk_add_xenvif(struct xenvif *vif) { int i; @@ -250,6 +278,7 @@ static int max_required_rx_slots(struct xenvif *vif) { int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (vif->can_sg || vif->gso || vif->gso_prefix) max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ @@ -657,6 +686,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) __skb_queue_tail(&rxq, skb); /* Filled the batch queue? */ + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) break; } @@ -898,51 +928,91 @@ static void netbk_fatal_tx_err(struct xenvif *vif) static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, + RING_IDX first_idx, struct xen_netif_tx_request *txp, int work_to_do) { RING_IDX cons = vif->tx.req_cons; - int frags = 0; + int slots = 0; + int drop_err = 0; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { - if (frags >= work_to_do) { - netdev_err(vif->dev, "Need more frags\n"); + if (slots >= work_to_do) { + netdev_err(vif->dev, + "Asked for %d slots but exceeds this limit\n", + work_to_do); netbk_fatal_tx_err(vif); return -ENODATA; } - if (unlikely(frags >= MAX_SKB_FRAGS)) { - netdev_err(vif->dev, "Too many frags\n"); + /* This guest is really using too many slots and + * considered malicious. + */ + if (unlikely(slots >= max_skb_slots)) { + netdev_err(vif->dev, + "Malicious frontend using %d slots, threshold %u\n", + slots, max_skb_slots); netbk_fatal_tx_err(vif); return -E2BIG; } - memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), + /* Xen network protocol had implicit dependency on + * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the + * historical MAX_SKB_FRAGS value 18 to honor the same + * behavior as before. Any packet using more than 18 + * slots but less than max_skb_slots slots is dropped + */ + if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Too many slots (%d) exceeding limit (%d), dropping packet\n", + slots, XEN_NETIF_NR_SLOTS_MIN); + drop_err = -E2BIG; + } + + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); - if (txp->size > first->size) { - netdev_err(vif->dev, "Frag is bigger than frame.\n"); - netbk_fatal_tx_err(vif); - return -EIO; + + /* If the guest submitted a frame >= 64 KiB then + * first->size overflowed and following slots will + * appear to be larger than the frame. + * + * This cannot be fatal error as there are buggy + * frontends that do this. + * + * Consume all slots and drop the packet. + */ + if (!drop_err && txp->size > first->size) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Invalid tx request, slot size %u > remaining size %u\n", + txp->size, first->size); + drop_err = -EIO; } first->size -= txp->size; - frags++; + slots++; if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { - netdev_err(vif->dev, "txp->offset: %x, size: %u\n", + netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", txp->offset, txp->size); netbk_fatal_tx_err(vif); return -EINVAL; } } while ((txp++)->flags & XEN_NETTXF_more_data); - return frags; + + if (drop_err) { + netbk_tx_err(vif, first, first_idx + slots); + return drop_err; + } + + return slots; } static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, - struct sk_buff *skb, u16 pending_idx) { struct page *page; @@ -963,48 +1033,114 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = *((u16 *)skb->data); - int i, start; + u16 head_idx = 0; + int slot, start; + struct page *page; + pending_ring_idx_t index, start_idx = 0; + uint16_t dst_offset; + unsigned int nr_slots; + struct pending_tx_info *first = NULL; + + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN. + */ + nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); - for (i = start; i < shinfo->nr_frags; i++, txp++) { - struct page *page; - pending_ring_idx_t index; + /* Coalesce tx requests, at this point the packet passed in + * should be <= 64K. Any packets larger than 64K have been + * handled in netbk_count_requests(). + */ + for (shinfo->nr_frags = slot = start; slot < nr_slots; + shinfo->nr_frags++) { struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; - index = pending_index(netbk->pending_cons++); - pending_idx = netbk->pending_ring[index]; - page = xen_netbk_alloc_page(netbk, skb, pending_idx); + page = alloc_page(GFP_KERNEL|__GFP_COLD); if (!page) goto err; - gop->source.u.ref = txp->gref; - gop->source.domid = vif->domid; - gop->source.offset = txp->offset; - - gop->dest.u.gmfn = virt_to_mfn(page_address(page)); - gop->dest.domid = DOMID_SELF; - gop->dest.offset = txp->offset; - - gop->len = txp->size; - gop->flags = GNTCOPY_source_gref; + dst_offset = 0; + first = NULL; + while (dst_offset < PAGE_SIZE && slot < nr_slots) { + gop->flags = GNTCOPY_source_gref; + + gop->source.u.ref = txp->gref; + gop->source.domid = vif->domid; + gop->source.offset = txp->offset; + + gop->dest.domid = DOMID_SELF; + + gop->dest.offset = dst_offset; + gop->dest.u.gmfn = virt_to_mfn(page_address(page)); + + if (dst_offset + txp->size > PAGE_SIZE) { + /* This page can only merge a portion + * of tx request. Do not increment any + * pointer / counter here. The txp + * will be dealt with in future + * rounds, eventually hitting the + * `else` branch. + */ + gop->len = PAGE_SIZE - dst_offset; + txp->offset += gop->len; + txp->size -= gop->len; + dst_offset += gop->len; /* quit loop */ + } else { + /* This tx request can be merged in the page */ + gop->len = txp->size; + dst_offset += gop->len; + + index = pending_index(netbk->pending_cons++); + + pending_idx = netbk->pending_ring[index]; + + memcpy(&pending_tx_info[pending_idx].req, txp, + sizeof(*txp)); + xenvif_get(vif); + + pending_tx_info[pending_idx].vif = vif; + + /* Poison these fields, corresponding + * fields for head tx req will be set + * to correct values after the loop. + */ + netbk->mmap_pages[pending_idx] = (void *)(~0UL); + pending_tx_info[pending_idx].head = + INVALID_PENDING_RING_IDX; + + if (!first) { + first = &pending_tx_info[pending_idx]; + start_idx = index; + head_idx = pending_idx; + } + + txp++; + slot++; + } - gop++; + gop++; + } - memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); - xenvif_get(vif); - pending_tx_info[pending_idx].vif = vif; - frag_set_pending_idx(&frags[i], pending_idx); + first->req.offset = 0; + first->req.size = dst_offset; + first->head = start_idx; + set_page_ext(page, netbk, head_idx); + netbk->mmap_pages[head_idx] = page; + frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); } + BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); + return gop; err: /* Unwind, freeing all pages and sending error responses. */ - while (i-- > start) { - xen_netbk_idx_release(netbk, frag_get_pending_idx(&frags[i]), - XEN_NETIF_RSP_ERROR); + while (shinfo->nr_frags-- > start) { + xen_netbk_idx_release(netbk, + frag_get_pending_idx(&frags[shinfo->nr_frags]), + XEN_NETIF_RSP_ERROR); } /* The head too, if necessary. */ if (start) @@ -1020,8 +1156,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, struct gnttab_copy *gop = *gopp; u16 pending_idx = *((u16 *)skb->data); struct skb_shared_info *shinfo = skb_shinfo(skb); + struct pending_tx_info *tx_info; int nr_frags = shinfo->nr_frags; int i, err, start; + u16 peek; /* peek into next tx request */ /* Check status of header. */ err = gop->status; @@ -1033,11 +1171,20 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, for (i = start; i < nr_frags; i++) { int j, newerr; + pending_ring_idx_t head; pending_idx = frag_get_pending_idx(&shinfo->frags[i]); + tx_info = &netbk->pending_tx_info[pending_idx]; + head = tx_info->head; /* Check error status: if okay then remember grant handle. */ - newerr = (++gop)->status; + do { + newerr = (++gop)->status; + if (newerr) + break; + peek = netbk->pending_ring[pending_index(++head)]; + } while (!pending_tx_is_head(netbk, peek)); + if (likely(!newerr)) { /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) @@ -1157,7 +1304,6 @@ static int netbk_set_skb_gso(struct xenvif *vif, static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) { struct iphdr *iph; - unsigned char *th; int err = -EPROTO; int recalculate_partial_csum = 0; @@ -1181,27 +1327,26 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) goto out; iph = (void *)skb->data; - th = skb->data + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - - skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct tcphdr, check))) + goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = (struct tcphdr *)th; + struct tcphdr *tcph = tcp_hdr(skb); tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct udphdr, check))) + goto out; if (recalculate_partial_csum) { - struct udphdr *udph = (struct udphdr *)th; + struct udphdr *udph = udp_hdr(skb); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_UDP, 0); @@ -1215,9 +1360,6 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) goto out; } - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - err = 0; out: @@ -1262,11 +1404,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; + struct xen_netif_tx_request txfrags[max_skb_slots]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1327,7 +1470,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) continue; } - ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); + ret = netbk_count_requests(vif, &txreq, idx, + txfrags, work_to_do); if (unlikely(ret < 0)) continue; @@ -1354,7 +1498,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < MAX_SKB_FRAGS) ? + ret < XEN_NETIF_NR_SLOTS_MIN) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1381,7 +1525,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) } /* XXX could copy straight to head */ - page = xen_netbk_alloc_page(netbk, skb, pending_idx); + page = xen_netbk_alloc_page(netbk, pending_idx); if (!page) { kfree_skb(skb); netbk_tx_err(vif, &txreq, idx); @@ -1404,6 +1548,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) memcpy(&netbk->pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); netbk->pending_tx_info[pending_idx].vif = vif; + netbk->pending_tx_info[pending_idx].head = index; *((u16 *)skb->data) = pending_idx; __skb_put(skb, data_len); @@ -1496,6 +1641,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) skb->dev = vif->dev; skb->protocol = eth_type_trans(skb, skb->dev); + skb_reset_network_header(skb); if (checksum_setup(vif, skb)) { netdev_dbg(vif->dev, @@ -1504,6 +1650,8 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) continue; } + skb_probe_transport_header(skb, 0); + vif->dev->stats.rx_bytes += skb->len; vif->dev->stats.rx_packets++; @@ -1531,7 +1679,10 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, { struct xenvif *vif; struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; + pending_ring_idx_t head; + u16 peek; /* peek into next tx request */ + + BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL)); /* Already complete? */ if (netbk->mmap_pages[pending_idx] == NULL) @@ -1540,19 +1691,40 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, pending_tx_info = &netbk->pending_tx_info[pending_idx]; vif = pending_tx_info->vif; + head = pending_tx_info->head; + + BUG_ON(!pending_tx_is_head(netbk, head)); + BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx); + + do { + pending_ring_idx_t index; + pending_ring_idx_t idx = pending_index(head); + u16 info_idx = netbk->pending_ring[idx]; - make_tx_response(vif, &pending_tx_info->req, status); + pending_tx_info = &netbk->pending_tx_info[info_idx]; + make_tx_response(vif, &pending_tx_info->req, status); - index = pending_index(netbk->pending_prod++); - netbk->pending_ring[index] = pending_idx; + /* Setting any number other than + * INVALID_PENDING_RING_IDX indicates this slot is + * starting a new packet / ending a previous packet. + */ + pending_tx_info->head = 0; - xenvif_put(vif); + index = pending_index(netbk->pending_prod++); + netbk->pending_ring[index] = netbk->pending_ring[info_idx]; + + xenvif_put(vif); + + peek = netbk->pending_ring[pending_index(++head)]; + + } while (!pending_tx_is_head(netbk, peek)); netbk->mmap_pages[pending_idx]->mapping = 0; put_page(netbk->mmap_pages[pending_idx]); netbk->mmap_pages[pending_idx] = NULL; } + static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st) @@ -1605,8 +1777,9 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && - !list_empty(&netbk->net_schedule_list)) + if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) return 1; return 0; @@ -1689,6 +1862,13 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; + if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) { + printk(KERN_INFO + "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n", + max_skb_slots, XEN_NETIF_NR_SLOTS_MIN); + max_skb_slots = XEN_NETIF_NR_SLOTS_MIN; + } + xen_netbk_group_nr = num_online_cpus(); xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); if (!xen_netbk) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7ffa43bd7cf9..1db101415069 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -36,7 +36,7 @@ #include <linux/skbuff.h> #include <linux/ethtool.h> #include <linux/if_ether.h> -#include <linux/tcp.h> +#include <net/tcp.h> #include <linux/udp.h> #include <linux/moduleparam.h> #include <linux/mm.h> @@ -537,7 +537,6 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netfront_info *np = netdev_priv(dev); struct netfront_stats *stats = this_cpu_ptr(np->stats); struct xen_netif_tx_request *tx; - struct xen_netif_extra_info *extra; char *data = skb->data; RING_IDX i; grant_ref_t ref; @@ -548,6 +547,16 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len = skb_headlen(skb); unsigned long flags; + /* If skb->len is too big for wire format, drop skb and alert + * user about misconfiguration. + */ + if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) { + net_alert_ratelimited( + "xennet: skb->len = %u, too big for wire format\n", + skb->len); + goto drop; + } + slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) + xennet_count_skb_frag_slots(skb); if (unlikely(slots > MAX_SKB_FRAGS + 1)) { @@ -581,7 +590,6 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; - extra = NULL; tx->flags = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -597,10 +605,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); - if (extra) - extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; - else - tx->flags |= XEN_NETTXF_extra_info; + tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; @@ -609,7 +614,6 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; - extra = gso; } np->tx.req_prod_pvt = i + 1; @@ -718,7 +722,7 @@ static int xennet_get_responses(struct netfront_info *np, struct sk_buff *skb = xennet_get_rx_skb(np, cons); grant_ref_t ref = xennet_get_rx_ref(np, cons); int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); - int frags = 1; + int slots = 1; int err = 0; unsigned long ret; @@ -741,7 +745,7 @@ static int xennet_get_responses(struct netfront_info *np, /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad - * situation to the system controller to reboot the backed. + * situation to the system controller to reboot the backend. */ if (ref == GRANT_INVALID_REF) { if (net_ratelimit()) @@ -762,27 +766,27 @@ next: if (!(rx->flags & XEN_NETRXF_more_data)) break; - if (cons + frags == rp) { + if (cons + slots == rp) { if (net_ratelimit()) - dev_warn(dev, "Need more frags\n"); + dev_warn(dev, "Need more slots\n"); err = -ENOENT; break; } - rx = RING_GET_RESPONSE(&np->rx, cons + frags); - skb = xennet_get_rx_skb(np, cons + frags); - ref = xennet_get_rx_ref(np, cons + frags); - frags++; + rx = RING_GET_RESPONSE(&np->rx, cons + slots); + skb = xennet_get_rx_skb(np, cons + slots); + ref = xennet_get_rx_ref(np, cons + slots); + slots++; } - if (unlikely(frags > max)) { + if (unlikely(slots > max)) { if (net_ratelimit()) - dev_warn(dev, "Too many frags\n"); + dev_warn(dev, "Too many slots\n"); err = -E2BIG; } if (unlikely(err)) - np->rx.rsp_cons = cons + frags; + np->rx.rsp_cons = cons + slots; return err; } @@ -1064,7 +1068,8 @@ err: static int xennet_change_mtu(struct net_device *dev, int mtu) { - int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + int max = xennet_can_sg(dev) ? + XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -1368,6 +1373,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); SET_NETDEV_DEV(netdev, &dev->dev); + netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); + np->netdev = netdev; netif_carrier_off(netdev); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index e3a8b22ef9dd..23049aeca662 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -34,7 +34,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { struct phy_device *phy; struct device_node *child; - int rc, i; + const __be32 *paddr; + u32 addr; + bool is_c45, scanphys = false; + int rc, i, len; /* Mask out all PHYs from auto probing. Instead the PHYs listed in * the device tree are populated after the bus has been registered */ @@ -54,14 +57,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) /* Loop over the child nodes and register a phy_device for each one */ for_each_available_child_of_node(np, child) { - const __be32 *paddr; - u32 addr; - int len; - bool is_c45; - /* A PHY must have a reg property in the range [0-31] */ paddr = of_get_property(child, "reg", &len); if (!paddr || len < sizeof(*paddr)) { + scanphys = true; dev_err(&mdio->dev, "%s has invalid PHY address\n", child->full_name); continue; @@ -111,6 +110,59 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) child->name, addr); } + if (!scanphys) + return 0; + + /* auto scan for PHYs with empty reg property */ + for_each_available_child_of_node(np, child) { + /* Skip PHYs with reg property set */ + paddr = of_get_property(child, "reg", &len); + if (paddr) + continue; + + is_c45 = of_device_is_compatible(child, + "ethernet-phy-ieee802.3-c45"); + + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + /* skip already registered PHYs */ + if (mdio->phy_map[addr]) + continue; + + /* be noisy to encourage people to set reg property */ + dev_info(&mdio->dev, "scan phy %s at address %i\n", + child->name, addr); + + phy = get_phy_device(mdio, addr, is_c45); + if (!phy || IS_ERR(phy)) + continue; + + if (mdio->irq) { + mdio->irq[addr] = + irq_of_parse_and_map(child, 0); + if (!mdio->irq[addr]) + mdio->irq[addr] = PHY_POLL; + } + + /* Associate the OF node with the device structure so it + * can be looked up later */ + of_node_get(child); + phy->dev.of_node = child; + + /* All data is now stored in the phy struct; + * register it */ + rc = phy_device_register(phy); + if (rc) { + phy_device_free(phy); + of_node_put(child); + continue; + } + + dev_info(&mdio->dev, "registered phy %s at address %i\n", + child->name, addr); + break; + } + } + return 0; } EXPORT_SYMBOL(of_mdiobus_register); diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 445ffda715ad..7c12d9c2b230 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -276,6 +276,7 @@ static struct file_system_type oprofilefs_type = { .mount = oprofilefs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("oprofilefs"); int __init oprofilefs_register(void) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 39c937f9b426..5147c210df52 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -53,14 +53,15 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context) return; } - if (!pci_dev->pm_cap || !pci_dev->pme_support - || pci_check_pme_status(pci_dev)) { - if (pci_dev->pme_poll) - pci_dev->pme_poll = false; + /* Clear PME Status if set. */ + if (pci_dev->pme_support) + pci_check_pme_status(pci_dev); - pci_wakeup_event(pci_dev); - pm_runtime_resume(&pci_dev->dev); - } + if (pci_dev->pme_poll) + pci_dev->pme_poll = false; + + pci_wakeup_event(pci_dev); + pm_runtime_resume(&pci_dev->dev); if (pci_dev->subordinate) pci_pme_wakeup_bus(pci_dev->subordinate); @@ -331,8 +332,14 @@ static void pci_acpi_cleanup(struct device *dev) } } +static bool pci_acpi_bus_match(struct device *dev) +{ + return dev->bus == &pci_bus_type; +} + static struct acpi_bus_type acpi_pci_bus = { - .bus = &pci_bus_type, + .name = "PCI", + .match = pci_acpi_bus_match, .find_device = acpi_pci_find_device, .setup = pci_acpi_setup, .cleanup = pci_acpi_cleanup, diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 1fa1e482a999..79277fb36c6b 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -390,9 +390,10 @@ static void pci_device_shutdown(struct device *dev) /* * Turn off Bus Master bit on the device to tell it to not - * continue to do DMA + * continue to do DMA. Don't touch devices in D3cold or unknown states. */ - pci_clear_master(pci_dev); + if (pci_dev->current_state <= PCI_D3hot) + pci_clear_master(pci_dev); } #ifdef CONFIG_PM diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 08c243ab034e..ed4d09498337 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -185,14 +185,6 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = { #endif /* !PM */ /* - * PCIe port runtime suspend is broken for some chipsets, so use a - * black list to disable runtime PM for these chipsets. - */ -static const struct pci_device_id port_runtime_pm_black_list[] = { - { /* end: all zeroes */ } -}; - -/* * pcie_portdrv_probe - Probe PCI-Express port devices * @dev: PCI-Express port device being probed * @@ -225,16 +217,11 @@ static int pcie_portdrv_probe(struct pci_dev *dev, * it by default. */ dev->d3cold_allowed = false; - if (!pci_match_id(port_runtime_pm_black_list, dev)) - pm_runtime_put_noidle(&dev->dev); - return 0; } static void pcie_portdrv_remove(struct pci_dev *dev) { - if (!pci_match_id(port_runtime_pm_black_list, dev)) - pm_runtime_get_noresume(&dev->dev); pcie_port_device_remove(dev); pci_disable_device(dev); } diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index ab886b7ee327..c5d0a08a8747 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -118,17 +118,11 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) void __iomem *rom; /* - * Some devices may provide ROMs via a source other than the BAR - */ - if (pdev->rom && pdev->romlen) { - *size = pdev->romlen; - return phys_to_virt(pdev->rom); - /* * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy * memory map if the VGA enable bit of the Bridge Control register is * set for embedded VGA. */ - } else if (res->flags & IORESOURCE_ROM_SHADOW) { + if (res->flags & IORESOURCE_ROM_SHADOW) { /* primary video rom always starts here */ start = (loff_t)0xC0000; *size = 0x20000; /* cover C000:0 through E000:0 */ @@ -187,8 +181,7 @@ void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom) if (res->flags & (IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY)) return; - if (!pdev->rom || !pdev->romlen) - iounmap(rom); + iounmap(rom); /* Disable again before continuing, leave enabled if pci=rom */ if (!(res->flags & (IORESOURCE_ROM_ENABLE | IORESOURCE_ROM_SHADOW))) @@ -212,7 +205,24 @@ void pci_cleanup_rom(struct pci_dev *pdev) } } +/** + * pci_platform_rom - provides a pointer to any ROM image provided by the + * platform + * @pdev: pointer to pci device struct + * @size: pointer to receive size of pci window over ROM + */ +void __iomem *pci_platform_rom(struct pci_dev *pdev, size_t *size) +{ + if (pdev->rom && pdev->romlen) { + *size = pdev->romlen; + return phys_to_virt((phys_addr_t)pdev->rom); + } + + return NULL; +} + EXPORT_SYMBOL(pci_map_rom); EXPORT_SYMBOL(pci_unmap_rom); EXPORT_SYMBOL_GPL(pci_enable_rom); EXPORT_SYMBOL_GPL(pci_disable_rom); +EXPORT_SYMBOL(pci_platform_rom); diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c index c689c04a4f52..2d2f0a43d36b 100644 --- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c +++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c @@ -620,7 +620,7 @@ int mvebu_pinctrl_probe(struct platform_device *pdev) /* special soc specific control */ if (ctrl->mpp_get || ctrl->mpp_set) { - if (!ctrl->name || !ctrl->mpp_set || !ctrl->mpp_set) { + if (!ctrl->name || !ctrl->mpp_get || !ctrl->mpp_set) { dev_err(&pdev->dev, "wrong soc control info\n"); return -EINVAL; } diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index ac8d382a79bb..d611ecfcbf70 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -622,7 +622,7 @@ static const struct file_operations pinconf_dbg_pinname_fops = { static int pinconf_dbg_state_print(struct seq_file *s, void *d) { if (strlen(dbg_state_name)) - seq_printf(s, "%s\n", dbg_pinname); + seq_printf(s, "%s\n", dbg_state_name); else seq_printf(s, "No pin state set\n"); return 0; diff --git a/drivers/pinctrl/pinconf.h b/drivers/pinctrl/pinconf.h index e3ed8cb072a5..bfda73d64eed 100644 --- a/drivers/pinctrl/pinconf.h +++ b/drivers/pinctrl/pinconf.h @@ -90,7 +90,7 @@ static inline void pinconf_init_device_debugfs(struct dentry *devroot, * pin config. */ -#ifdef CONFIG_GENERIC_PINCONF +#if defined(CONFIG_GENERIC_PINCONF) && defined(CONFIG_DEBUG_FS) void pinconf_generic_dump_pin(struct pinctrl_dev *pctldev, struct seq_file *s, unsigned pin); diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c index caecdd373061..c542a97c82f3 100644 --- a/drivers/pinctrl/pinctrl-abx500.c +++ b/drivers/pinctrl/pinctrl-abx500.c @@ -422,7 +422,7 @@ static u8 abx500_get_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip, } /* check if pin use AlternateFunction register */ - if ((af.alt_bit1 == UNUSED) && (af.alt_bit1 == UNUSED)) + if ((af.alt_bit1 == UNUSED) && (af.alt_bit2 == UNUSED)) return mode; /* * if pin GPIOSEL bit is set and pin supports alternate function, diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 75933a6aa828..efb7f10e902a 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1277,21 +1277,80 @@ static int alt_gpio_irq_type(struct irq_data *d, unsigned type) } #ifdef CONFIG_PM + +static u32 wakeups[MAX_GPIO_BANKS]; +static u32 backups[MAX_GPIO_BANKS]; + static int gpio_irq_set_wake(struct irq_data *d, unsigned state) { struct at91_gpio_chip *at91_gpio = irq_data_get_irq_chip_data(d); unsigned bank = at91_gpio->pioc_idx; + unsigned mask = 1 << d->hwirq; if (unlikely(bank >= MAX_GPIO_BANKS)) return -EINVAL; + if (state) + wakeups[bank] |= mask; + else + wakeups[bank] &= ~mask; + irq_set_irq_wake(at91_gpio->pioc_virq, state); return 0; } + +void at91_pinctrl_gpio_suspend(void) +{ + int i; + + for (i = 0; i < gpio_banks; i++) { + void __iomem *pio; + + if (!gpio_chips[i]) + continue; + + pio = gpio_chips[i]->regbase; + + backups[i] = __raw_readl(pio + PIO_IMR); + __raw_writel(backups[i], pio + PIO_IDR); + __raw_writel(wakeups[i], pio + PIO_IER); + + if (!wakeups[i]) { + clk_unprepare(gpio_chips[i]->clock); + clk_disable(gpio_chips[i]->clock); + } else { + printk(KERN_DEBUG "GPIO-%c may wake for %08x\n", + 'A'+i, wakeups[i]); + } + } +} + +void at91_pinctrl_gpio_resume(void) +{ + int i; + + for (i = 0; i < gpio_banks; i++) { + void __iomem *pio; + + if (!gpio_chips[i]) + continue; + + pio = gpio_chips[i]->regbase; + + if (!wakeups[i]) { + if (clk_prepare(gpio_chips[i]->clock) == 0) + clk_enable(gpio_chips[i]->clock); + } + + __raw_writel(wakeups[i], pio + PIO_IDR); + __raw_writel(backups[i], pio + PIO_IER); + } +} + #else #define gpio_irq_set_wake NULL -#endif +#endif /* CONFIG_PM */ static struct irq_chip gpio_irqchip = { .name = "GPIO", diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 1a00658b3ea0..bd83c8b01cd1 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -194,6 +194,11 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin, } if (!gpio_range) { + /* + * A pin should not be freed more times than allocated. + */ + if (WARN_ON(!desc->mux_usecount)) + return NULL; desc->mux_usecount--; if (desc->mux_usecount) return NULL; diff --git a/drivers/platform/x86/chromeos_laptop.c b/drivers/platform/x86/chromeos_laptop.c index 93d66809355a..3e5b4497a1d0 100644 --- a/drivers/platform/x86/chromeos_laptop.c +++ b/drivers/platform/x86/chromeos_laptop.c @@ -23,6 +23,9 @@ #include <linux/dmi.h> #include <linux/i2c.h> +#include <linux/i2c/atmel_mxt_ts.h> +#include <linux/input.h> +#include <linux/interrupt.h> #include <linux/module.h> #define ATMEL_TP_I2C_ADDR 0x4b @@ -67,15 +70,49 @@ static struct i2c_board_info __initdata tsl2563_als_device = { I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR), }; +static struct mxt_platform_data atmel_224s_tp_platform_data = { + .x_line = 18, + .y_line = 12, + .x_size = 102*20, + .y_size = 68*20, + .blen = 0x80, /* Gain setting is in upper 4 bits */ + .threshold = 0x32, + .voltage = 0, /* 3.3V */ + .orient = MXT_VERTICAL_FLIP, + .irqflags = IRQF_TRIGGER_FALLING, + .is_tp = true, + .key_map = { KEY_RESERVED, + KEY_RESERVED, + KEY_RESERVED, + BTN_LEFT }, + .config = NULL, + .config_length = 0, +}; + static struct i2c_board_info __initdata atmel_224s_tp_device = { I2C_BOARD_INFO("atmel_mxt_tp", ATMEL_TP_I2C_ADDR), - .platform_data = NULL, + .platform_data = &atmel_224s_tp_platform_data, .flags = I2C_CLIENT_WAKE, }; +static struct mxt_platform_data atmel_1664s_platform_data = { + .x_line = 32, + .y_line = 50, + .x_size = 1700, + .y_size = 2560, + .blen = 0x89, /* Gain setting is in upper 4 bits */ + .threshold = 0x28, + .voltage = 0, /* 3.3V */ + .orient = MXT_ROTATED_90_COUNTER, + .irqflags = IRQF_TRIGGER_FALLING, + .is_tp = false, + .config = NULL, + .config_length = 0, +}; + static struct i2c_board_info __initdata atmel_1664s_device = { I2C_BOARD_INFO("atmel_mxt_ts", ATMEL_TS_I2C_ADDR), - .platform_data = NULL, + .platform_data = &atmel_1664s_platform_data, .flags = I2C_CLIENT_WAKE, }; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 45cacf79f3a7..1a779bbfb87d 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -134,7 +134,6 @@ static const struct key_entry hp_wmi_keymap[] = { { KE_KEY, 0x2142, { KEY_MEDIA } }, { KE_KEY, 0x213b, { KEY_INFO } }, { KE_KEY, 0x2169, { KEY_DIRECTION } }, - { KE_KEY, 0x216a, { KEY_SETUP } }, { KE_KEY, 0x231b, { KEY_HELP } }, { KE_END, 0 } }; @@ -925,9 +924,6 @@ static int __init hp_wmi_init(void) err = hp_wmi_input_setup(); if (err) return err; - - //Enable magic for hotkeys that run on the SMBus - ec_write(0xe6,0x6e); } if (bios_capable) { diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9a907567f41e..edec135b1685 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1964,9 +1964,6 @@ struct tp_nvram_state { /* kthread for the hotkey poller */ static struct task_struct *tpacpi_hotkey_task; -/* Acquired while the poller kthread is running, use to sync start/stop */ -static struct mutex hotkey_thread_mutex; - /* * Acquire mutex to write poller control variables as an * atomic block. @@ -2462,8 +2459,6 @@ static int hotkey_kthread(void *data) unsigned int poll_freq; bool was_frozen; - mutex_lock(&hotkey_thread_mutex); - if (tpacpi_lifecycle == TPACPI_LIFE_EXITING) goto exit; @@ -2523,7 +2518,6 @@ static int hotkey_kthread(void *data) } exit: - mutex_unlock(&hotkey_thread_mutex); return 0; } @@ -2533,9 +2527,6 @@ static void hotkey_poll_stop_sync(void) if (tpacpi_hotkey_task) { kthread_stop(tpacpi_hotkey_task); tpacpi_hotkey_task = NULL; - mutex_lock(&hotkey_thread_mutex); - /* at this point, the thread did exit */ - mutex_unlock(&hotkey_thread_mutex); } } @@ -3234,7 +3225,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) mutex_init(&hotkey_mutex); #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL - mutex_init(&hotkey_thread_mutex); mutex_init(&hotkey_thread_data_mutex); #endif diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 8813fc03aa09..55cd459a3908 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -353,8 +353,14 @@ static int __init acpi_pnp_find_device(struct device *dev, acpi_handle * handle) /* complete initialization of a PNPACPI device includes having * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling. */ +static bool acpi_pnp_bus_match(struct device *dev) +{ + return dev->bus == &pnp_bus_type; +} + static struct acpi_bus_type __initdata acpi_pnp_bus = { - .bus = &pnp_bus_type, + .name = "PNP", + .match = acpi_pnp_bus_match, .find_device = acpi_pnp_find_device, }; diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 79f4bce061bd..4a8c388364ca 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/bitops.h> +#include <linux/idr.h> #include <linux/device.h> #include <linux/err.h> #include <linux/init.h> @@ -32,7 +32,6 @@ #include "ptp_private.h" #define PTP_MAX_ALARMS 4 -#define PTP_MAX_CLOCKS 8 #define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT) #define PTP_PPS_EVENT PPS_CAPTUREASSERT #define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC) @@ -42,8 +41,7 @@ static dev_t ptp_devt; static struct class *ptp_class; -static DECLARE_BITMAP(ptp_clocks_map, PTP_MAX_CLOCKS); -static DEFINE_MUTEX(ptp_clocks_mutex); /* protects 'ptp_clocks_map' */ +static DEFINE_IDA(ptp_clocks_map); /* time stamp event queue operations */ @@ -171,12 +169,7 @@ static void delete_ptp_clock(struct posix_clock *pc) struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); mutex_destroy(&ptp->tsevq_mux); - - /* Remove the clock from the bit map. */ - mutex_lock(&ptp_clocks_mutex); - clear_bit(ptp->index, ptp_clocks_map); - mutex_unlock(&ptp_clocks_mutex); - + ida_simple_remove(&ptp_clocks_map, ptp->index); kfree(ptp); } @@ -191,21 +184,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (info->n_alarm > PTP_MAX_ALARMS) return ERR_PTR(-EINVAL); - /* Find a free clock slot and reserve it. */ - err = -EBUSY; - mutex_lock(&ptp_clocks_mutex); - index = find_first_zero_bit(ptp_clocks_map, PTP_MAX_CLOCKS); - if (index < PTP_MAX_CLOCKS) - set_bit(index, ptp_clocks_map); - else - goto no_slot; - /* Initialize a clock structure. */ err = -ENOMEM; ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL); if (ptp == NULL) goto no_memory; + index = ida_simple_get(&ptp_clocks_map, 0, MINORMASK + 1, GFP_KERNEL); + if (index < 0) { + err = index; + goto no_slot; + } + ptp->clock.ops = ptp_clock_ops; ptp->clock.release = delete_ptp_clock; ptp->info = info; @@ -248,7 +238,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, goto no_clock; } - mutex_unlock(&ptp_clocks_mutex); return ptp; no_clock: @@ -260,11 +249,9 @@ no_sysfs: device_destroy(ptp_class, ptp->devid); no_device: mutex_destroy(&ptp->tsevq_mux); +no_slot: kfree(ptp); no_memory: - clear_bit(index, ptp_clocks_map); -no_slot: - mutex_unlock(&ptp_clocks_mutex); return ERR_PTR(err); } EXPORT_SYMBOL(ptp_clock_register); @@ -323,7 +310,8 @@ EXPORT_SYMBOL(ptp_clock_index); static void __exit ptp_exit(void) { class_destroy(ptp_class); - unregister_chrdev_region(ptp_devt, PTP_MAX_CLOCKS); + unregister_chrdev_region(ptp_devt, MINORMASK + 1); + ida_destroy(&ptp_clocks_map); } static int __init ptp_init(void) @@ -336,7 +324,7 @@ static int __init ptp_init(void) return PTR_ERR(ptp_class); } - err = alloc_chrdev_region(&ptp_devt, 0, PTP_MAX_CLOCKS, "ptp"); + err = alloc_chrdev_region(&ptp_devt, 0, MINORMASK + 1, "ptp"); if (err < 0) { pr_err("ptp: failed to allocate device region\n"); goto no_region; diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index 1367655eee39..bea94510ad2d 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -118,7 +118,7 @@ struct pch_ts_regs { * struct pch_dev - Driver private data */ struct pch_dev { - struct pch_ts_regs *regs; + struct pch_ts_regs __iomem *regs; struct ptp_clock *ptp_clock; struct ptp_clock_info caps; int exts0_enabled; @@ -154,7 +154,7 @@ static inline void pch_eth_enable_set(struct pch_dev *chip) iowrite32(val, (&chip->regs->ts_sel)); } -static u64 pch_systime_read(struct pch_ts_regs *regs) +static u64 pch_systime_read(struct pch_ts_regs __iomem *regs) { u64 ns; u32 lo, hi; @@ -169,7 +169,7 @@ static u64 pch_systime_read(struct pch_ts_regs *regs) return ns; } -static void pch_systime_write(struct pch_ts_regs *regs, u64 ns) +static void pch_systime_write(struct pch_ts_regs __iomem *regs, u64 ns) { u32 hi, lo; @@ -315,7 +315,7 @@ int pch_set_station_address(u8 *addr, struct pci_dev *pdev) struct pch_dev *chip = pci_get_drvdata(pdev); /* Verify the parameter */ - if ((chip->regs == 0) || addr == (u8 *)NULL) { + if ((chip->regs == NULL) || addr == (u8 *)NULL) { dev_err(&pdev->dev, "invalid params returning PCH_INVALIDPARAM\n"); return PCH_INVALIDPARAM; @@ -361,7 +361,7 @@ EXPORT_SYMBOL(pch_set_station_address); static irqreturn_t isr(int irq, void *priv) { struct pch_dev *pch_dev = priv; - struct pch_ts_regs *regs = pch_dev->regs; + struct pch_ts_regs __iomem *regs = pch_dev->regs; struct ptp_clock_event event; u32 ack = 0, lo, hi, val; @@ -415,7 +415,7 @@ static int ptp_pch_adjfreq(struct ptp_clock_info *ptp, s32 ppb) u32 diff, addend; int neg_adj = 0; struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps); - struct pch_ts_regs *regs = pch_dev->regs; + struct pch_ts_regs __iomem *regs = pch_dev->regs; if (ppb < 0) { neg_adj = 1; @@ -438,7 +438,7 @@ static int ptp_pch_adjtime(struct ptp_clock_info *ptp, s64 delta) s64 now; unsigned long flags; struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps); - struct pch_ts_regs *regs = pch_dev->regs; + struct pch_ts_regs __iomem *regs = pch_dev->regs; spin_lock_irqsave(&pch_dev->register_lock, flags); now = pch_systime_read(regs); @@ -455,7 +455,7 @@ static int ptp_pch_gettime(struct ptp_clock_info *ptp, struct timespec *ts) u32 remainder; unsigned long flags; struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps); - struct pch_ts_regs *regs = pch_dev->regs; + struct pch_ts_regs __iomem *regs = pch_dev->regs; spin_lock_irqsave(&pch_dev->register_lock, flags); ns = pch_systime_read(regs); @@ -472,7 +472,7 @@ static int ptp_pch_settime(struct ptp_clock_info *ptp, u64 ns; unsigned long flags; struct pch_dev *pch_dev = container_of(ptp, struct pch_dev, caps); - struct pch_ts_regs *regs = pch_dev->regs; + struct pch_ts_regs __iomem *regs = pch_dev->regs; ns = ts->tv_sec * 1000000000ULL; ns += ts->tv_nsec; @@ -567,9 +567,9 @@ static void pch_remove(struct pci_dev *pdev) free_irq(pdev->irq, chip); /* unmap the virtual IO memory space */ - if (chip->regs != 0) { + if (chip->regs != NULL) { iounmap(chip->regs); - chip->regs = 0; + chip->regs = NULL; } /* release the reserved IO memory space */ if (chip->mem_base != 0) { @@ -670,7 +670,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_req_irq: ptp_clock_unregister(chip->ptp_clock); iounmap(chip->regs); - chip->regs = 0; + chip->regs = NULL; err_ioremap: release_mem_region(chip->mem_base, chip->mem_size); @@ -723,9 +723,10 @@ static s32 __init ptp_pch_init(void) module_init(ptp_pch_init); module_exit(ptp_pch_exit); -module_param_string(station, pch_param.station, sizeof pch_param.station, 0444); +module_param_string(station, + pch_param.station, sizeof(pch_param.station), 0444); MODULE_PARM_DESC(station, - "IEEE 1588 station address to use - column separated hex values"); + "IEEE 1588 station address to use - colon separated hex values"); MODULE_AUTHOR("LAPIS SEMICONDUCTOR, <tshimizu818@gmail.com>"); MODULE_DESCRIPTION("PTP clock using the EG20T timer"); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index da9782bd27d0..e3661c20cf38 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2830,7 +2830,7 @@ EXPORT_SYMBOL_GPL(regulator_get_bypass_regmap); * regulator_allow_bypass - allow the regulator to go into bypass mode * * @regulator: Regulator to configure - * @allow: enable or disable bypass mode + * @enable: enable or disable bypass mode * * Allow the regulator to go into bypass mode if all other consumers * for the regulator also enable bypass mode and the machine @@ -3057,9 +3057,13 @@ int regulator_bulk_enable(int num_consumers, return 0; err: - pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret); - while (--i >= 0) - regulator_disable(consumers[i].consumer); + for (i = 0; i < num_consumers; i++) { + if (consumers[i].ret < 0) + pr_err("Failed to enable %s: %d\n", consumers[i].supply, + consumers[i].ret); + else + regulator_disable(consumers[i].consumer); + } return ret; } diff --git a/drivers/regulator/db8500-prcmu.c b/drivers/regulator/db8500-prcmu.c index 219d162b651e..a53c11a529d5 100644 --- a/drivers/regulator/db8500-prcmu.c +++ b/drivers/regulator/db8500-prcmu.c @@ -528,7 +528,7 @@ static int db8500_regulator_probe(struct platform_device *pdev) return 0; } -static int __exit db8500_regulator_remove(struct platform_device *pdev) +static int db8500_regulator_remove(struct platform_device *pdev) { int i; @@ -553,7 +553,7 @@ static struct platform_driver db8500_regulator_driver = { .owner = THIS_MODULE, }, .probe = db8500_regulator_probe, - .remove = __exit_p(db8500_regulator_remove), + .remove = db8500_regulator_remove, }; static int __init db8500_regulator_init(void) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index cde13bb5a8fb..39cf14606784 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -4,6 +4,7 @@ * Copyright 2011-2012 Texas Instruments Inc. * * Author: Graeme Gregory <gg@slimlogic.co.uk> + * Author: Ian Lartey <ian@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -156,7 +157,7 @@ static const struct regs_info palmas_regs_info[] = { * * So they are basically (maxV-minV)/stepV */ -#define PALMAS_SMPS_NUM_VOLTAGES 116 +#define PALMAS_SMPS_NUM_VOLTAGES 117 #define PALMAS_SMPS10_NUM_VOLTAGES 2 #define PALMAS_LDO_NUM_VOLTAGES 50 diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 74508cc62d67..f705d25b437c 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -471,24 +471,23 @@ twl4030ldo_set_voltage_sel(struct regulator_dev *rdev, unsigned selector) selector); } -static int twl4030ldo_get_voltage(struct regulator_dev *rdev) +static int twl4030ldo_get_voltage_sel(struct regulator_dev *rdev) { struct twlreg_info *info = rdev_get_drvdata(rdev); - int vsel = twlreg_read(info, TWL_MODULE_PM_RECEIVER, - VREG_VOLTAGE); + int vsel = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_VOLTAGE); if (vsel < 0) return vsel; vsel &= info->table_len - 1; - return LDO_MV(info->table[vsel]) * 1000; + return vsel; } static struct regulator_ops twl4030ldo_ops = { .list_voltage = twl4030ldo_list_voltage, .set_voltage_sel = twl4030ldo_set_voltage_sel, - .get_voltage = twl4030ldo_get_voltage, + .get_voltage_sel = twl4030ldo_get_voltage_sel, .enable = twl4030reg_enable, .disable = twl4030reg_disable, diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index cc1f7bf53fd0..c6d77e20622c 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -4,7 +4,7 @@ menu "Remoteproc drivers" config REMOTEPROC tristate depends on HAS_DMA - select FW_CONFIG + select FW_LOADER select VIRTIO config OMAP_REMOTEPROC diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 29387df4bfc9..8edb4aed5d36 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -217,7 +217,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) * TODO: support predefined notifyids (via resource table) */ ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL); - if (ret) { + if (ret < 0) { dev_err(dev, "idr_alloc failed: %d\n", ret); dma_free_coherent(dev->parent, size, va, dma); return ret; @@ -366,10 +366,12 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc, /* it is now safe to add the virtio device */ ret = rproc_add_virtio_dev(rvdev, rsc->id); if (ret) - goto free_rvdev; + goto remove_rvdev; return 0; +remove_rvdev: + list_del(&rvdev->node); free_rvdev: kfree(rvdev); return ret; diff --git a/drivers/remoteproc/ste_modem_rproc.c b/drivers/remoteproc/ste_modem_rproc.c index a7743c069339..fb95c4220052 100644 --- a/drivers/remoteproc/ste_modem_rproc.c +++ b/drivers/remoteproc/ste_modem_rproc.c @@ -240,6 +240,8 @@ static int sproc_drv_remove(struct platform_device *pdev) /* Unregister as remoteproc device */ rproc_del(sproc->rproc); + dma_free_coherent(sproc->rproc->dev.parent, SPROC_FW_SIZE, + sproc->fw_addr, sproc->fw_dma_addr); rproc_put(sproc->rproc); mdev->drv_data = NULL; @@ -297,10 +299,13 @@ static int sproc_probe(struct platform_device *pdev) /* Register as a remoteproc device */ err = rproc_add(rproc); if (err) - goto free_rproc; + goto free_mem; return 0; +free_mem: + dma_free_coherent(rproc->dev.parent, SPROC_FW_SIZE, + sproc->fw_addr, sproc->fw_dma_addr); free_rproc: /* Reset device data upon error */ mdev->drv_data = NULL; diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index 0dde688ca09b..969abbad7fe3 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -239,11 +239,9 @@ static int da9052_rtc_probe(struct platform_device *pdev) rtc->da9052 = dev_get_drvdata(pdev->dev.parent); platform_set_drvdata(pdev, rtc); - rtc->irq = platform_get_irq_byname(pdev, "ALM"); - ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, - da9052_rtc_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - "ALM", rtc); + rtc->irq = DA9052_IRQ_ALARM; + ret = da9052_request_irq(rtc->da9052, rtc->irq, "ALM", + da9052_rtc_irq, rtc); if (ret != 0) { rtc_err(rtc->da9052, "irq registration failed: %d\n", ret); return ret; diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index 57233c885998..8f87fec27ce7 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c @@ -14,6 +14,7 @@ #include <linux/platform_device.h> #include <linux/of.h> #include <linux/delay.h> +#include <linux/clk.h> #include <linux/gfp.h> #include <linux/module.h> @@ -41,6 +42,7 @@ struct rtc_plat_data { struct rtc_device *rtc; void __iomem *ioaddr; int irq; + struct clk *clk; }; static int mv_rtc_set_time(struct device *dev, struct rtc_time *tm) @@ -221,6 +223,7 @@ static int mv_rtc_probe(struct platform_device *pdev) struct rtc_plat_data *pdata; resource_size_t size; u32 rtc_time; + int ret = 0; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) @@ -239,11 +242,17 @@ static int mv_rtc_probe(struct platform_device *pdev) if (!pdata->ioaddr) return -ENOMEM; + pdata->clk = devm_clk_get(&pdev->dev, NULL); + /* Not all SoCs require a clock.*/ + if (!IS_ERR(pdata->clk)) + clk_prepare_enable(pdata->clk); + /* make sure the 24 hours mode is enabled */ rtc_time = readl(pdata->ioaddr + RTC_TIME_REG_OFFS); if (rtc_time & RTC_HOURS_12H_MODE) { dev_err(&pdev->dev, "24 Hours mode not supported.\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } /* make sure it is actually functional */ @@ -252,7 +261,8 @@ static int mv_rtc_probe(struct platform_device *pdev) rtc_time = readl(pdata->ioaddr + RTC_TIME_REG_OFFS); if (rtc_time == 0x01000000) { dev_err(&pdev->dev, "internal RTC not ticking\n"); - return -ENODEV; + ret = -ENODEV; + goto out; } } @@ -268,8 +278,10 @@ static int mv_rtc_probe(struct platform_device *pdev) } else pdata->rtc = rtc_device_register(pdev->name, &pdev->dev, &mv_rtc_ops, THIS_MODULE); - if (IS_ERR(pdata->rtc)) - return PTR_ERR(pdata->rtc); + if (IS_ERR(pdata->rtc)) { + ret = PTR_ERR(pdata->rtc); + goto out; + } if (pdata->irq >= 0) { writel(0, pdata->ioaddr + RTC_ALARM_INTERRUPT_MASK_REG_OFFS); @@ -282,6 +294,11 @@ static int mv_rtc_probe(struct platform_device *pdev) } return 0; +out: + if (!IS_ERR(pdata->clk)) + clk_disable_unprepare(pdata->clk); + + return ret; } static int __exit mv_rtc_remove(struct platform_device *pdev) @@ -292,6 +309,9 @@ static int __exit mv_rtc_remove(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); rtc_device_unregister(pdata->rtc); + if (!IS_ERR(pdata->clk)) + clk_disable_unprepare(pdata->clk); + return 0; } diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 9978ad4433cb..e9b9c8392832 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -135,6 +135,11 @@ static const struct block_device_operations scm_blk_devops = { .release = scm_release, }; +static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req) +{ + return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT; +} + static void scm_request_prepare(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; @@ -195,14 +200,18 @@ void scm_request_requeue(struct scm_request *scmrq) scm_release_cluster(scmrq); blk_requeue_request(bdev->rq, scmrq->request); + atomic_dec(&bdev->queued_reqs); scm_request_done(scmrq); scm_ensure_queue_restart(bdev); } void scm_request_finish(struct scm_request *scmrq) { + struct scm_blk_dev *bdev = scmrq->bdev; + scm_release_cluster(scmrq); blk_end_request_all(scmrq->request, scmrq->error); + atomic_dec(&bdev->queued_reqs); scm_request_done(scmrq); } @@ -218,6 +227,10 @@ static void scm_blk_request(struct request_queue *rq) if (req->cmd_type != REQ_TYPE_FS) continue; + if (!scm_permit_request(bdev, req)) { + scm_ensure_queue_restart(bdev); + return; + } scmrq = scm_request_fetch(); if (!scmrq) { SCM_LOG(5, "no request"); @@ -231,11 +244,13 @@ static void scm_blk_request(struct request_queue *rq) return; } if (scm_need_cluster_request(scmrq)) { + atomic_inc(&bdev->queued_reqs); blk_start_request(req); scm_initiate_cluster_request(scmrq); return; } scm_request_prepare(scmrq); + atomic_inc(&bdev->queued_reqs); blk_start_request(req); ret = scm_start_aob(scmrq->aob); @@ -244,7 +259,6 @@ static void scm_blk_request(struct request_queue *rq) scm_request_requeue(scmrq); return; } - atomic_inc(&bdev->queued_reqs); } } @@ -280,6 +294,38 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, int error) tasklet_hi_schedule(&bdev->tasklet); } +static void scm_blk_handle_error(struct scm_request *scmrq) +{ + struct scm_blk_dev *bdev = scmrq->bdev; + unsigned long flags; + + if (scmrq->error != -EIO) + goto restart; + + /* For -EIO the response block is valid. */ + switch (scmrq->aob->response.eqc) { + case EQC_WR_PROHIBIT: + spin_lock_irqsave(&bdev->lock, flags); + if (bdev->state != SCM_WR_PROHIBIT) + pr_info("%lx: Write access to the SCM increment is suspended\n", + (unsigned long) bdev->scmdev->address); + bdev->state = SCM_WR_PROHIBIT; + spin_unlock_irqrestore(&bdev->lock, flags); + goto requeue; + default: + break; + } + +restart: + if (!scm_start_aob(scmrq->aob)) + return; + +requeue: + spin_lock_irqsave(&bdev->rq_lock, flags); + scm_request_requeue(scmrq); + spin_unlock_irqrestore(&bdev->rq_lock, flags); +} + static void scm_blk_tasklet(struct scm_blk_dev *bdev) { struct scm_request *scmrq; @@ -293,11 +339,8 @@ static void scm_blk_tasklet(struct scm_blk_dev *bdev) spin_unlock_irqrestore(&bdev->lock, flags); if (scmrq->error && scmrq->retries-- > 0) { - if (scm_start_aob(scmrq->aob)) { - spin_lock_irqsave(&bdev->rq_lock, flags); - scm_request_requeue(scmrq); - spin_unlock_irqrestore(&bdev->rq_lock, flags); - } + scm_blk_handle_error(scmrq); + /* Request restarted or requeued, handle next. */ spin_lock_irqsave(&bdev->lock, flags); continue; @@ -310,7 +353,6 @@ static void scm_blk_tasklet(struct scm_blk_dev *bdev) } scm_request_finish(scmrq); - atomic_dec(&bdev->queued_reqs); spin_lock_irqsave(&bdev->lock, flags); } spin_unlock_irqrestore(&bdev->lock, flags); @@ -332,6 +374,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) } bdev->scmdev = scmdev; + bdev->state = SCM_OPER; spin_lock_init(&bdev->rq_lock); spin_lock_init(&bdev->lock); INIT_LIST_HEAD(&bdev->finished_requests); @@ -396,6 +439,18 @@ void scm_blk_dev_cleanup(struct scm_blk_dev *bdev) put_disk(bdev->gendisk); } +void scm_blk_set_available(struct scm_blk_dev *bdev) +{ + unsigned long flags; + + spin_lock_irqsave(&bdev->lock, flags); + if (bdev->state == SCM_WR_PROHIBIT) + pr_info("%lx: Write access to the SCM increment is restored\n", + (unsigned long) bdev->scmdev->address); + bdev->state = SCM_OPER; + spin_unlock_irqrestore(&bdev->lock, flags); +} + static int __init scm_blk_init(void) { int ret = -EINVAL; @@ -408,12 +463,15 @@ static int __init scm_blk_init(void) goto out; scm_major = ret; - if (scm_alloc_rqs(nr_requests)) + ret = scm_alloc_rqs(nr_requests); + if (ret) goto out_unreg; scm_debug = debug_register("scm_log", 16, 1, 16); - if (!scm_debug) + if (!scm_debug) { + ret = -ENOMEM; goto out_free; + } debug_register_view(scm_debug, &debug_hex_ascii_view); debug_set_level(scm_debug, 2); diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h index 3c1ccf494647..8b387b32fd62 100644 --- a/drivers/s390/block/scm_blk.h +++ b/drivers/s390/block/scm_blk.h @@ -21,6 +21,7 @@ struct scm_blk_dev { spinlock_t rq_lock; /* guard the request queue */ spinlock_t lock; /* guard the rest of the blockdev */ atomic_t queued_reqs; + enum {SCM_OPER, SCM_WR_PROHIBIT} state; struct list_head finished_requests; #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE struct list_head cluster_list; @@ -48,6 +49,7 @@ struct scm_request { int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *); void scm_blk_dev_cleanup(struct scm_blk_dev *); +void scm_blk_set_available(struct scm_blk_dev *); void scm_blk_irq(struct scm_device *, void *, int); void scm_request_finish(struct scm_request *); diff --git a/drivers/s390/block/scm_drv.c b/drivers/s390/block/scm_drv.c index 9fa0a908607b..c98cf52d78d1 100644 --- a/drivers/s390/block/scm_drv.c +++ b/drivers/s390/block/scm_drv.c @@ -13,12 +13,23 @@ #include <asm/eadm.h> #include "scm_blk.h" -static void notify(struct scm_device *scmdev) +static void scm_notify(struct scm_device *scmdev, enum scm_event event) { - pr_info("%lu: The capabilities of the SCM increment changed\n", - (unsigned long) scmdev->address); - SCM_LOG(2, "State changed"); - SCM_LOG_STATE(2, scmdev); + struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev); + + switch (event) { + case SCM_CHANGE: + pr_info("%lx: The capabilities of the SCM increment changed\n", + (unsigned long) scmdev->address); + SCM_LOG(2, "State changed"); + SCM_LOG_STATE(2, scmdev); + break; + case SCM_AVAIL: + SCM_LOG(2, "Increment available"); + SCM_LOG_STATE(2, scmdev); + scm_blk_set_available(bdev); + break; + } } static int scm_probe(struct scm_device *scmdev) @@ -64,7 +75,7 @@ static struct scm_driver scm_drv = { .name = "scm_block", .owner = THIS_MODULE, }, - .notify = notify, + .notify = scm_notify, .probe = scm_probe, .remove = scm_remove, .handler = scm_blk_irq, diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 30a2255389e5..cd798386b622 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -627,6 +627,8 @@ static int __init sclp_detect_standby_memory(void) struct read_storage_sccb *sccb; int i, id, assigned, rc; + if (OLDMEM_BASE) /* No standby memory in kdump mode */ + return 0; if (!early_read_info_sccb_valid) return 0; if ((sclp_facilities & 0xe00000000000ULL) != 0xe00000000000ULL) diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index b907dba24025..cee69dac3e18 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -915,7 +915,7 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) int i, rc; /* Check if the tty3270 is already there. */ - view = raw3270_find_view(&tty3270_fn, tty->index); + view = raw3270_find_view(&tty3270_fn, tty->index + RAW3270_FIRSTMINOR); if (!IS_ERR(view)) { tp = container_of(view, struct tty3270, view); tty->driver_data = tp; @@ -927,15 +927,16 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) tp->inattr = TF_INPUT; return tty_port_install(&tp->port, driver, tty); } - if (tty3270_max_index < tty->index) - tty3270_max_index = tty->index; + if (tty3270_max_index < tty->index + 1) + tty3270_max_index = tty->index + 1; /* Allocate tty3270 structure on first open. */ tp = tty3270_alloc_view(); if (IS_ERR(tp)) return PTR_ERR(tp); - rc = raw3270_add_view(&tp->view, &tty3270_fn, tty->index); + rc = raw3270_add_view(&tp->view, &tty3270_fn, + tty->index + RAW3270_FIRSTMINOR); if (rc) { tty3270_free_view(tp); return rc; @@ -1846,12 +1847,12 @@ static const struct tty_operations tty3270_ops = { void tty3270_create_cb(int minor) { - tty_register_device(tty3270_driver, minor, NULL); + tty_register_device(tty3270_driver, minor - RAW3270_FIRSTMINOR, NULL); } void tty3270_destroy_cb(int minor) { - tty_unregister_device(tty3270_driver, minor); + tty_unregister_device(tty3270_driver, minor - RAW3270_FIRSTMINOR); } struct raw3270_notifier tty3270_notifier = @@ -1884,7 +1885,8 @@ static int __init tty3270_init(void) driver->driver_name = "tty3270"; driver->name = "3270/tty"; driver->major = IBM_TTY3270_MAJOR; - driver->minor_start = 0; + driver->minor_start = RAW3270_FIRSTMINOR; + driver->name_base = RAW3270_FIRSTMINOR; driver->type = TTY_DRIVER_TYPE_SYSTEM; driver->subtype = SYSTEM_TYPE_TTY; driver->init_termios = tty_std_termios; diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 31ceef1beb8b..e16c553f6556 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -433,6 +433,20 @@ static void chsc_process_sei_scm_change(struct chsc_sei_nt0_area *sei_area) " failed (rc=%d).\n", ret); } +static void chsc_process_sei_scm_avail(struct chsc_sei_nt0_area *sei_area) +{ + int ret; + + CIO_CRW_EVENT(4, "chsc: scm available information\n"); + if (sei_area->rs != 7) + return; + + ret = scm_process_availability_information(); + if (ret) + CIO_CRW_EVENT(0, "chsc: process availability information" + " failed (rc=%d).\n", ret); +} + static void chsc_process_sei_nt2(struct chsc_sei_nt2_area *sei_area) { switch (sei_area->cc) { @@ -468,6 +482,9 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area) case 12: /* scm change notification */ chsc_process_sei_scm_change(sei_area); break; + case 14: /* scm available notification */ + chsc_process_sei_scm_avail(sei_area); + break; default: /* other stuff */ CIO_CRW_EVENT(2, "chsc: sei nt0 unhandled cc=%d\n", sei_area->cc); diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 227e05f674b3..349d5fc47196 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -156,8 +156,10 @@ int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token); #ifdef CONFIG_SCM_BUS int scm_update_information(void); +int scm_process_availability_information(void); #else /* CONFIG_SCM_BUS */ static inline int scm_update_information(void) { return 0; } +static inline int scm_process_availability_information(void) { return 0; } #endif /* CONFIG_SCM_BUS */ diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c index bcf20f3aa51b..46ec25632e8b 100644 --- a/drivers/s390/cio/scm.c +++ b/drivers/s390/cio/scm.c @@ -211,7 +211,7 @@ static void scmdev_update(struct scm_device *scmdev, struct sale *sale) goto out; scmdrv = to_scm_drv(scmdev->dev.driver); if (changed && scmdrv->notify) - scmdrv->notify(scmdev); + scmdrv->notify(scmdev, SCM_CHANGE); out: device_unlock(&scmdev->dev); if (changed) @@ -297,6 +297,22 @@ int scm_update_information(void) return ret; } +static int scm_dev_avail(struct device *dev, void *unused) +{ + struct scm_driver *scmdrv = to_scm_drv(dev->driver); + struct scm_device *scmdev = to_scm_dev(dev); + + if (dev->driver && scmdrv->notify) + scmdrv->notify(scmdev, SCM_AVAIL); + + return 0; +} + +int scm_process_availability_information(void) +{ + return bus_for_each_dev(&scm_bus_type, NULL, NULL, scm_dev_avail); +} + static int __init scm_init(void) { int ret; diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 2029b6caa595..fb877b59ec57 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -166,7 +166,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) vcdev = to_vc_device(info->vq->vdev); ccw_device_get_schid(vcdev->cdev, &schid); - do_kvm_notify(schid, virtqueue_get_queue_index(vq)); + do_kvm_notify(schid, vq->index); } static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, @@ -188,7 +188,7 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) unsigned long flags; unsigned long size; int ret; - unsigned int index = virtqueue_get_queue_index(vq); + unsigned int index = vq->index; /* Remove from our list. */ spin_lock_irqsave(&vcdev->lock, flags); @@ -610,7 +610,7 @@ static struct virtqueue *virtio_ccw_vq_by_ind(struct virtio_ccw_device *vcdev, vq = NULL; spin_lock_irqsave(&vcdev->lock, flags); list_for_each_entry(info, &vcdev->virtqueues, node) { - if (virtqueue_get_queue_index(info->vq) == index) { + if (info->vq->index == index) { vq = info->vq; break; } diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index d87961d4c0de..c4f392d5db4c 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -769,6 +769,7 @@ struct qeth_card { unsigned long thread_start_mask; unsigned long thread_allowed_mask; unsigned long thread_running_mask; + struct task_struct *recovery_task; spinlock_t ip_lock; struct list_head ip_list; struct list_head *ip_tbd_list; @@ -862,6 +863,8 @@ extern struct qeth_card_list_struct qeth_core_card_list; extern struct kmem_cache *qeth_core_header_cache; extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; +void qeth_set_recovery_task(struct qeth_card *); +void qeth_clear_recovery_task(struct qeth_card *); void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); int qeth_threads_running(struct qeth_card *, unsigned long); int qeth_wait_for_threads(struct qeth_card *, unsigned long); @@ -915,7 +918,8 @@ int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *, int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long), void *reply_param); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); -int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int); +int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int); +int qeth_get_elements_for_frags(struct sk_buff *); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, int, int); int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *, @@ -928,7 +932,7 @@ void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...); int qeth_core_ethtool_get_settings(struct net_device *, struct ethtool_cmd *); int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback); -int qeth_hdr_chk_and_bounce(struct sk_buff *, int); +int qeth_hdr_chk_and_bounce(struct sk_buff *, struct qeth_hdr **, int); int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 0d8cdff81813..6cd0fc1b203a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -177,6 +177,23 @@ const char *qeth_get_cardname_short(struct qeth_card *card) return "n/a"; } +void qeth_set_recovery_task(struct qeth_card *card) +{ + card->recovery_task = current; +} +EXPORT_SYMBOL_GPL(qeth_set_recovery_task); + +void qeth_clear_recovery_task(struct qeth_card *card) +{ + card->recovery_task = NULL; +} +EXPORT_SYMBOL_GPL(qeth_clear_recovery_task); + +static bool qeth_is_recovery_task(const struct qeth_card *card) +{ + return card->recovery_task == current; +} + void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask) { @@ -205,6 +222,8 @@ EXPORT_SYMBOL_GPL(qeth_threads_running); int qeth_wait_for_threads(struct qeth_card *card, unsigned long threads) { + if (qeth_is_recovery_task(card)) + return 0; return wait_event_interruptible(card->wait_q, qeth_threads_running(card, threads) == 0); } @@ -316,7 +335,7 @@ static inline int qeth_alloc_cq(struct qeth_card *card) card->qdio.no_in_queues = 2; - card->qdio.out_bufstates = (struct qdio_outbuf_state *) + card->qdio.out_bufstates = kzalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_outbuf_state), GFP_KERNEL); @@ -3679,14 +3698,34 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(qeth_get_priority_queue); -int qeth_get_elements_no(struct qeth_card *card, void *hdr, +int qeth_get_elements_for_frags(struct sk_buff *skb) +{ + int cnt, length, e, elements = 0; + struct skb_frag_struct *frag; + char *data; + + for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) { + frag = &skb_shinfo(skb)->frags[cnt]; + data = (char *)page_to_phys(skb_frag_page(frag)) + + frag->page_offset; + length = frag->size; + e = PFN_UP((unsigned long)data + length - 1) - + PFN_DOWN((unsigned long)data); + elements += e; + } + return elements; +} +EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); + +int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, int elems) { int dlen = skb->len - skb->data_len; int elements_needed = PFN_UP((unsigned long)skb->data + dlen - 1) - PFN_DOWN((unsigned long)skb->data); - elements_needed += skb_shinfo(skb)->nr_frags; + elements_needed += qeth_get_elements_for_frags(skb); + if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) { QETH_DBF_MESSAGE(2, "Invalid size of IP packet " "(Number=%d / Length=%d). Discarded.\n", @@ -3697,7 +3736,7 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr, } EXPORT_SYMBOL_GPL(qeth_get_elements_no); -int qeth_hdr_chk_and_bounce(struct sk_buff *skb, int len) +int qeth_hdr_chk_and_bounce(struct sk_buff *skb, struct qeth_hdr **hdr, int len) { int hroom, inpage, rest; @@ -3710,6 +3749,8 @@ int qeth_hdr_chk_and_bounce(struct sk_buff *skb, int len) return 1; memmove(skb->data - rest, skb->data, skb->len - skb->data_len); skb->data -= rest; + skb->tail -= rest; + *hdr = (struct qeth_hdr *)skb->data; QETH_DBF_MESSAGE(2, "skb bounce len: %d rest: %d\n", len, rest); } return 0; @@ -3771,12 +3812,23 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb, for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) { frag = &skb_shinfo(skb)->frags[cnt]; - buffer->element[element].addr = (char *) - page_to_phys(skb_frag_page(frag)) - + frag->page_offset; - buffer->element[element].length = frag->size; - buffer->element[element].eflags = SBAL_EFLAGS_MIDDLE_FRAG; - element++; + data = (char *)page_to_phys(skb_frag_page(frag)) + + frag->page_offset; + length = frag->size; + while (length > 0) { + length_here = PAGE_SIZE - + ((unsigned long) data % PAGE_SIZE); + if (length < length_here) + length_here = length; + + buffer->element[element].addr = data; + buffer->element[element].length = length_here; + buffer->element[element].eflags = + SBAL_EFLAGS_MIDDLE_FRAG; + length -= length_here; + data += length_here; + element++; + } } if (buffer->element[element - 1].eflags) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d690166efeaf..ec8ccdae7aba 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -302,7 +302,8 @@ static void qeth_l2_process_vlans(struct qeth_card *card) spin_unlock_bh(&card->vlanlock); } -static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; struct qeth_vlan_vid *id; @@ -331,7 +332,8 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_vlan_vid *id, *tmpid = NULL; struct qeth_card *card = dev->ml_priv; @@ -771,8 +773,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - elements = qeth_get_elements_no(card, (void *)hdr, new_skb, - elements_needed); + elements = qeth_get_elements_no(card, new_skb, elements_needed); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); @@ -780,7 +781,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (card->info.type != QETH_CARD_TYPE_IQD) { - if (qeth_hdr_chk_and_bounce(new_skb, + if (qeth_hdr_chk_and_bounce(new_skb, &hdr, sizeof(struct qeth_hdr_layer2))) goto tx_drop; rc = qeth_do_send_packet(card, queue, new_skb, hdr, @@ -959,7 +960,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops); else SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops); - card->dev->features |= NETIF_F_HW_VLAN_FILTER; + card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); SET_NETDEV_DEV(card->dev, &card->gdev->dev); @@ -1143,6 +1144,7 @@ static int qeth_l2_recover(void *ptr) QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); + qeth_set_recovery_task(card); __qeth_l2_set_offline(card->gdev, 1); rc = __qeth_l2_set_online(card->gdev, 1); if (!rc) @@ -1153,6 +1155,7 @@ static int qeth_l2_recover(void *ptr) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } + qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 091ca0efa1c5..c1b0b2761f8d 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -623,7 +623,7 @@ static int qeth_l3_send_setrouting(struct qeth_card *card, return rc; } -static void qeth_l3_correct_routing_type(struct qeth_card *card, +static int qeth_l3_correct_routing_type(struct qeth_card *card, enum qeth_routing_types *type, enum qeth_prot_versions prot) { if (card->info.type == QETH_CARD_TYPE_IQD) { @@ -632,7 +632,7 @@ static void qeth_l3_correct_routing_type(struct qeth_card *card, case PRIMARY_CONNECTOR: case SECONDARY_CONNECTOR: case MULTICAST_ROUTER: - return; + return 0; default: goto out_inval; } @@ -641,17 +641,18 @@ static void qeth_l3_correct_routing_type(struct qeth_card *card, case NO_ROUTER: case PRIMARY_ROUTER: case SECONDARY_ROUTER: - return; + return 0; case MULTICAST_ROUTER: if (qeth_is_ipafunc_supported(card, prot, IPA_OSA_MC_ROUTER)) - return; + return 0; default: goto out_inval; } } out_inval: *type = NO_ROUTER; + return -EINVAL; } int qeth_l3_setrouting_v4(struct qeth_card *card) @@ -660,8 +661,10 @@ int qeth_l3_setrouting_v4(struct qeth_card *card) QETH_CARD_TEXT(card, 3, "setrtg4"); - qeth_l3_correct_routing_type(card, &card->options.route4.type, + rc = qeth_l3_correct_routing_type(card, &card->options.route4.type, QETH_PROT_IPV4); + if (rc) + return rc; rc = qeth_l3_send_setrouting(card, card->options.route4.type, QETH_PROT_IPV4); @@ -683,8 +686,10 @@ int qeth_l3_setrouting_v6(struct qeth_card *card) if (!qeth_is_supported(card, IPA_IPV6)) return 0; - qeth_l3_correct_routing_type(card, &card->options.route6.type, + rc = qeth_l3_correct_routing_type(card, &card->options.route6.type, QETH_PROT_IPV6); + if (rc) + return rc; rc = qeth_l3_send_setrouting(card, card->options.route6.type, QETH_PROT_IPV6); @@ -1654,7 +1659,8 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card) for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) { struct net_device *netdev; - netdev = __vlan_find_dev_deep(card->dev, vid); + netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + vid); if (netdev == NULL || !(netdev->flags & IFF_UP)) continue; @@ -1715,7 +1721,8 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card) for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) { struct net_device *netdev; - netdev = __vlan_find_dev_deep(card->dev, vid); + netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + vid); if (netdev == NULL || !(netdev->flags & IFF_UP)) continue; @@ -1759,7 +1766,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr4"); - netdev = __vlan_find_dev_deep(card->dev, vid); + netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid); if (!netdev) return; in_dev = in_dev_get(netdev); @@ -1789,7 +1796,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr6"); - netdev = __vlan_find_dev_deep(card->dev, vid); + netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid); if (!netdev) return; in6_dev = in6_dev_get(netdev); @@ -1819,7 +1826,8 @@ static void qeth_l3_free_vlan_addresses(struct qeth_card *card, rcu_read_unlock(); } -static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; @@ -1827,7 +1835,8 @@ static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) return 0; } -static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; unsigned long flags; @@ -1970,7 +1979,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, &vlan_tag); len = skb->len; if (is_vlan && !card->options.sniffer) - __vlan_hwaccel_put_tag(skb, vlan_tag); + __vlan_hwaccel_put_tag(skb, + htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&card->napi, skb); } break; @@ -2079,7 +2089,8 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev, struct net_device *netdev; rcu_read_lock(); - netdev = __vlan_find_dev_deep(card->dev, vid); + netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + vid); rcu_read_unlock(); if (netdev == dev) { rc = QETH_VLAN_CARD; @@ -2898,7 +2909,9 @@ static inline int qeth_l3_tso_elements(struct sk_buff *skb) tcp_hdr(skb)->doff * 4; int tcpd_len = skb->len - (tcpd - (unsigned long)skb->data); int elements = PFN_UP(tcpd + tcpd_len - 1) - PFN_DOWN(tcpd); - elements += skb_shinfo(skb)->nr_frags; + + elements += qeth_get_elements_for_frags(skb); + return elements; } @@ -3024,8 +3037,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) qeth_l3_hdr_csum(card, hdr, new_skb); } - elems = qeth_get_elements_no(card, (void *)hdr, new_skb, - elements_needed); + elems = qeth_get_elements_no(card, new_skb, elements_needed); if (!elems) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); @@ -3043,7 +3055,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) else len = sizeof(struct qeth_hdr_layer3); - if (qeth_hdr_chk_and_bounce(new_skb, len)) + if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len)) goto tx_drop; rc = qeth_do_send_packet(card, queue, new_skb, hdr, elements_needed); @@ -3287,9 +3299,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops); - card->dev->features |= NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER; + card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; card->dev->gso_max_size = 15 * PAGE_SIZE; @@ -3348,7 +3360,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) rc = -ENODEV; goto out_remove; } - qeth_trace_features(card); if (!card->dev && qeth_l3_setup_netdev(card)) { rc = -ENODEV; @@ -3425,6 +3436,7 @@ contin: qeth_l3_set_multicast_list(card->dev); rtnl_unlock(); } + qeth_trace_features(card); /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); @@ -3508,6 +3520,7 @@ static int qeth_l3_recover(void *ptr) QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); + qeth_set_recovery_task(card); __qeth_l3_set_offline(card->gdev, 1); rc = __qeth_l3_set_online(card->gdev, 1); if (!rc) @@ -3518,6 +3531,7 @@ static int qeth_l3_recover(void *ptr) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } + qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index ebc379486267..e70af2406ff9 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -87,6 +87,8 @@ static ssize_t qeth_l3_dev_route_store(struct qeth_card *card, rc = qeth_l3_setrouting_v6(card); } out: + if (rc) + route->type = old_route_type; mutex_unlock(&card->conf_mutex); return rc ? rc : count; } diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 1a9d1e3ce64c..c1441ed282eb 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -282,7 +282,7 @@ static irqreturn_t bbc_i2c_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void __init reset_one_i2c(struct bbc_i2c_bus *bp) +static void reset_one_i2c(struct bbc_i2c_bus *bp) { writeb(I2C_PCF_PIN, bp->i2c_control_regs + 0x0); writeb(bp->own, bp->i2c_control_regs + 0x1); @@ -291,7 +291,7 @@ static void __init reset_one_i2c(struct bbc_i2c_bus *bp) writeb(I2C_PCF_IDLE, bp->i2c_control_regs + 0x0); } -static struct bbc_i2c_bus * __init attach_one_i2c(struct platform_device *op, int index) +static struct bbc_i2c_bus * attach_one_i2c(struct platform_device *op, int index) { struct bbc_i2c_bus *bp; struct device_node *dp; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 2daf4b0da434..90bc7bd00966 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -940,6 +940,7 @@ static int bnx2fc_libfc_config(struct fc_lport *lport) fc_exch_init(lport); fc_rport_init(lport); fc_disc_init(lport); + fc_disc_config(lport, lport); return 0; } @@ -2133,6 +2134,7 @@ static int _bnx2fc_create(struct net_device *netdev, } ctlr = bnx2fc_to_ctlr(interface); + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); interface->vlan_id = vlan_id; interface->timer_work_queue = @@ -2143,7 +2145,7 @@ static int _bnx2fc_create(struct net_device *netdev, goto ifput_err; } - lport = bnx2fc_if_create(interface, &interface->hba->pcidev->dev, 0); + lport = bnx2fc_if_create(interface, &cdev->dev, 0); if (!lport) { printk(KERN_ERR PFX "Failed to create interface (%s)\n", netdev->name); @@ -2159,8 +2161,6 @@ static int _bnx2fc_create(struct net_device *netdev, /* Make this master N_port */ ctlr->lp = lport; - cdev = fcoe_ctlr_to_ctlr_dev(ctlr); - if (link_state == BNX2FC_CREATE_LINK_UP) cdev->enabled = FCOE_CTLR_ENABLED; else diff --git a/drivers/scsi/csiostor/Makefile b/drivers/scsi/csiostor/Makefile index b581966c88f9..913b9a92fb06 100644 --- a/drivers/scsi/csiostor/Makefile +++ b/drivers/scsi/csiostor/Makefile @@ -8,4 +8,5 @@ ccflags-y += -I$(srctree)/drivers/net/ethernet/chelsio/cxgb4 obj-$(CONFIG_SCSI_CHELSIO_FCOE) += csiostor.o csiostor-objs := csio_attr.o csio_init.o csio_lnode.o csio_scsi.o \ - csio_hw.o csio_isr.o csio_mb.o csio_rnode.o csio_wr.o + csio_hw.o csio_hw_t4.o csio_hw_t5.o csio_isr.o \ + csio_mb.o csio_rnode.o csio_wr.o diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index bdd78fb4fc70..a0b4c8991deb 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -61,7 +61,7 @@ int csio_msi = 2; static int dev_num; /* FCoE Adapter types & its description */ -static const struct csio_adap_desc csio_fcoe_adapters[] = { +static const struct csio_adap_desc csio_t4_fcoe_adapters[] = { {"T440-Dbg 10G", "Chelsio T440-Dbg 10G [FCoE]"}, {"T420-CR 10G", "Chelsio T420-CR 10G [FCoE]"}, {"T422-CR 10G/1G", "Chelsio T422-CR 10G/1G [FCoE]"}, @@ -77,7 +77,38 @@ static const struct csio_adap_desc csio_fcoe_adapters[] = { {"B404-BT 1G", "Chelsio B404-BT 1G [FCoE]"}, {"T480-CR 10G", "Chelsio T480-CR 10G [FCoE]"}, {"T440-LP-CR 10G", "Chelsio T440-LP-CR 10G [FCoE]"}, - {"T4 FPGA", "Chelsio T4 FPGA [FCoE]"} + {"AMSTERDAM 10G", "Chelsio AMSTERDAM 10G [FCoE]"}, + {"HUAWEI T480 10G", "Chelsio HUAWEI T480 10G [FCoE]"}, + {"HUAWEI T440 10G", "Chelsio HUAWEI T440 10G [FCoE]"}, + {"HUAWEI STG 10G", "Chelsio HUAWEI STG 10G [FCoE]"}, + {"ACROMAG XAUI 10G", "Chelsio ACROMAG XAUI 10G [FCoE]"}, + {"ACROMAG SFP+ 10G", "Chelsio ACROMAG SFP+ 10G [FCoE]"}, + {"QUANTA SFP+ 10G", "Chelsio QUANTA SFP+ 10G [FCoE]"}, + {"HUAWEI 10Gbase-T", "Chelsio HUAWEI 10Gbase-T [FCoE]"}, + {"HUAWEI T4TOE 10G", "Chelsio HUAWEI T4TOE 10G [FCoE]"} +}; + +static const struct csio_adap_desc csio_t5_fcoe_adapters[] = { + {"T580-Dbg 10G", "Chelsio T580-Dbg 10G [FCoE]"}, + {"T520-CR 10G", "Chelsio T520-CR 10G [FCoE]"}, + {"T522-CR 10G/1G", "Chelsio T452-CR 10G/1G [FCoE]"}, + {"T540-CR 10G", "Chelsio T540-CR 10G [FCoE]"}, + {"T520-BCH 10G", "Chelsio T520-BCH 10G [FCoE]"}, + {"T540-BCH 10G", "Chelsio T540-BCH 10G [FCoE]"}, + {"T540-CH 10G", "Chelsio T540-CH 10G [FCoE]"}, + {"T520-SO 10G", "Chelsio T520-SO 10G [FCoE]"}, + {"T520-CX4 10G", "Chelsio T520-CX4 10G [FCoE]"}, + {"T520-BT 10G", "Chelsio T520-BT 10G [FCoE]"}, + {"T504-BT 1G", "Chelsio T504-BT 1G [FCoE]"}, + {"B520-SR 10G", "Chelsio B520-SR 10G [FCoE]"}, + {"B504-BT 1G", "Chelsio B504-BT 1G [FCoE]"}, + {"T580-CR 10G", "Chelsio T580-CR 10G [FCoE]"}, + {"T540-LP-CR 10G", "Chelsio T540-LP-CR 10G [FCoE]"}, + {"AMSTERDAM 10G", "Chelsio AMSTERDAM 10G [FCoE]"}, + {"T580-LP-CR 40G", "Chelsio T580-LP-CR 40G [FCoE]"}, + {"T520-LL-CR 10G", "Chelsio T520-LL-CR 10G [FCoE]"}, + {"T560-CR 40G", "Chelsio T560-CR 40G [FCoE]"}, + {"T580-CR 40G", "Chelsio T580-CR 40G [FCoE]"} }; static void csio_mgmtm_cleanup(struct csio_mgmtm *); @@ -124,7 +155,7 @@ int csio_is_hw_removing(struct csio_hw *hw) * at the time it indicated completion is stored there. Returns 0 if the * operation completes and -EAGAIN otherwise. */ -static int +int csio_hw_wait_op_done_val(struct csio_hw *hw, int reg, uint32_t mask, int polarity, int attempts, int delay, uint32_t *valp) { @@ -145,6 +176,24 @@ csio_hw_wait_op_done_val(struct csio_hw *hw, int reg, uint32_t mask, } } +/* + * csio_hw_tp_wr_bits_indirect - set/clear bits in an indirect TP register + * @hw: the adapter + * @addr: the indirect TP register address + * @mask: specifies the field within the register to modify + * @val: new value for the field + * + * Sets a field of an indirect TP register to the given value. + */ +void +csio_hw_tp_wr_bits_indirect(struct csio_hw *hw, unsigned int addr, + unsigned int mask, unsigned int val) +{ + csio_wr_reg32(hw, addr, TP_PIO_ADDR); + val |= csio_rd_reg32(hw, TP_PIO_DATA) & ~mask; + csio_wr_reg32(hw, val, TP_PIO_DATA); +} + void csio_set_reg_field(struct csio_hw *hw, uint32_t reg, uint32_t mask, uint32_t value) @@ -157,242 +206,22 @@ csio_set_reg_field(struct csio_hw *hw, uint32_t reg, uint32_t mask, } -/* - * csio_hw_mc_read - read from MC through backdoor accesses - * @hw: the hw module - * @addr: address of first byte requested - * @data: 64 bytes of data containing the requested address - * @ecc: where to store the corresponding 64-bit ECC word - * - * Read 64 bytes of data from MC starting at a 64-byte-aligned address - * that covers the requested address @addr. If @parity is not %NULL it - * is assigned the 64-bit ECC word for the read data. - */ -int -csio_hw_mc_read(struct csio_hw *hw, uint32_t addr, __be32 *data, - uint64_t *ecc) -{ - int i; - - if (csio_rd_reg32(hw, MC_BIST_CMD) & START_BIST) - return -EBUSY; - csio_wr_reg32(hw, addr & ~0x3fU, MC_BIST_CMD_ADDR); - csio_wr_reg32(hw, 64, MC_BIST_CMD_LEN); - csio_wr_reg32(hw, 0xc, MC_BIST_DATA_PATTERN); - csio_wr_reg32(hw, BIST_OPCODE(1) | START_BIST | BIST_CMD_GAP(1), - MC_BIST_CMD); - i = csio_hw_wait_op_done_val(hw, MC_BIST_CMD, START_BIST, - 0, 10, 1, NULL); - if (i) - return i; - -#define MC_DATA(i) MC_BIST_STATUS_REG(MC_BIST_STATUS_RDATA, i) - - for (i = 15; i >= 0; i--) - *data++ = htonl(csio_rd_reg32(hw, MC_DATA(i))); - if (ecc) - *ecc = csio_rd_reg64(hw, MC_DATA(16)); -#undef MC_DATA - return 0; -} - -/* - * csio_hw_edc_read - read from EDC through backdoor accesses - * @hw: the hw module - * @idx: which EDC to access - * @addr: address of first byte requested - * @data: 64 bytes of data containing the requested address - * @ecc: where to store the corresponding 64-bit ECC word - * - * Read 64 bytes of data from EDC starting at a 64-byte-aligned address - * that covers the requested address @addr. If @parity is not %NULL it - * is assigned the 64-bit ECC word for the read data. - */ -int -csio_hw_edc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data, - uint64_t *ecc) -{ - int i; - - idx *= EDC_STRIDE; - if (csio_rd_reg32(hw, EDC_BIST_CMD + idx) & START_BIST) - return -EBUSY; - csio_wr_reg32(hw, addr & ~0x3fU, EDC_BIST_CMD_ADDR + idx); - csio_wr_reg32(hw, 64, EDC_BIST_CMD_LEN + idx); - csio_wr_reg32(hw, 0xc, EDC_BIST_DATA_PATTERN + idx); - csio_wr_reg32(hw, BIST_OPCODE(1) | BIST_CMD_GAP(1) | START_BIST, - EDC_BIST_CMD + idx); - i = csio_hw_wait_op_done_val(hw, EDC_BIST_CMD + idx, START_BIST, - 0, 10, 1, NULL); - if (i) - return i; - -#define EDC_DATA(i) (EDC_BIST_STATUS_REG(EDC_BIST_STATUS_RDATA, i) + idx) - - for (i = 15; i >= 0; i--) - *data++ = htonl(csio_rd_reg32(hw, EDC_DATA(i))); - if (ecc) - *ecc = csio_rd_reg64(hw, EDC_DATA(16)); -#undef EDC_DATA - return 0; -} - -/* - * csio_mem_win_rw - read/write memory through PCIE memory window - * @hw: the adapter - * @addr: address of first byte requested - * @data: MEMWIN0_APERTURE bytes of data containing the requested address - * @dir: direction of transfer 1 => read, 0 => write - * - * Read/write MEMWIN0_APERTURE bytes of data from MC starting at a - * MEMWIN0_APERTURE-byte-aligned address that covers the requested - * address @addr. - */ -static int -csio_mem_win_rw(struct csio_hw *hw, u32 addr, u32 *data, int dir) -{ - int i; - - /* - * Setup offset into PCIE memory window. Address must be a - * MEMWIN0_APERTURE-byte-aligned address. (Read back MA register to - * ensure that changes propagate before we attempt to use the new - * values.) - */ - csio_wr_reg32(hw, addr & ~(MEMWIN0_APERTURE - 1), - PCIE_MEM_ACCESS_OFFSET); - csio_rd_reg32(hw, PCIE_MEM_ACCESS_OFFSET); - - /* Collecting data 4 bytes at a time upto MEMWIN0_APERTURE */ - for (i = 0; i < MEMWIN0_APERTURE; i = i + sizeof(__be32)) { - if (dir) - *data++ = csio_rd_reg32(hw, (MEMWIN0_BASE + i)); - else - csio_wr_reg32(hw, *data++, (MEMWIN0_BASE + i)); - } - - return 0; -} - -/* - * csio_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window - * @hw: the csio_hw - * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC - * @addr: address within indicated memory type - * @len: amount of memory to transfer - * @buf: host memory buffer - * @dir: direction of transfer 1 => read, 0 => write - * - * Reads/writes an [almost] arbitrary memory region in the firmware: the - * firmware memory address, length and host buffer must be aligned on - * 32-bit boudaries. The memory is transferred as a raw byte sequence - * from/to the firmware's memory. If this memory contains data - * structures which contain multi-byte integers, it's the callers - * responsibility to perform appropriate byte order conversions. - */ -static int -csio_memory_rw(struct csio_hw *hw, int mtype, u32 addr, u32 len, - uint32_t *buf, int dir) -{ - uint32_t pos, start, end, offset, memoffset; - int ret; - uint32_t *data; - - /* - * Argument sanity checks ... - */ - if ((addr & 0x3) || (len & 0x3)) - return -EINVAL; - - data = kzalloc(MEMWIN0_APERTURE, GFP_KERNEL); - if (!data) - return -ENOMEM; - - /* Offset into the region of memory which is being accessed - * MEM_EDC0 = 0 - * MEM_EDC1 = 1 - * MEM_MC = 2 - */ - memoffset = (mtype * (5 * 1024 * 1024)); - - /* Determine the PCIE_MEM_ACCESS_OFFSET */ - addr = addr + memoffset; - - /* - * The underlaying EDC/MC read routines read MEMWIN0_APERTURE bytes - * at a time so we need to round down the start and round up the end. - * We'll start copying out of the first line at (addr - start) a word - * at a time. - */ - start = addr & ~(MEMWIN0_APERTURE-1); - end = (addr + len + MEMWIN0_APERTURE-1) & ~(MEMWIN0_APERTURE-1); - offset = (addr - start)/sizeof(__be32); - - for (pos = start; pos < end; pos += MEMWIN0_APERTURE, offset = 0) { - /* - * If we're writing, copy the data from the caller's memory - * buffer - */ - if (!dir) { - /* - * If we're doing a partial write, then we need to do - * a read-modify-write ... - */ - if (offset || len < MEMWIN0_APERTURE) { - ret = csio_mem_win_rw(hw, pos, data, 1); - if (ret) { - kfree(data); - return ret; - } - } - while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) && - len > 0) { - data[offset++] = *buf++; - len -= sizeof(__be32); - } - } - - /* - * Transfer a block of memory and bail if there's an error. - */ - ret = csio_mem_win_rw(hw, pos, data, dir); - if (ret) { - kfree(data); - return ret; - } - - /* - * If we're reading, copy the data into the caller's memory - * buffer. - */ - if (dir) - while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) && - len > 0) { - *buf++ = data[offset++]; - len -= sizeof(__be32); - } - } - - kfree(data); - - return 0; -} - static int csio_memory_write(struct csio_hw *hw, int mtype, u32 addr, u32 len, u32 *buf) { - return csio_memory_rw(hw, mtype, addr, len, buf, 0); + return hw->chip_ops->chip_memory_rw(hw, MEMWIN_CSIOSTOR, mtype, + addr, len, buf, 0); } /* * EEPROM reads take a few tens of us while writes can take a bit over 5 ms. */ -#define EEPROM_MAX_RD_POLL 40 -#define EEPROM_MAX_WR_POLL 6 -#define EEPROM_STAT_ADDR 0x7bfc -#define VPD_BASE 0x400 -#define VPD_BASE_OLD 0 -#define VPD_LEN 512 +#define EEPROM_MAX_RD_POLL 40 +#define EEPROM_MAX_WR_POLL 6 +#define EEPROM_STAT_ADDR 0x7bfc +#define VPD_BASE 0x400 +#define VPD_BASE_OLD 0 +#define VPD_LEN 1024 #define VPD_INFO_FLD_HDR_SIZE 3 /* @@ -817,23 +646,6 @@ out: return 0; } -/* - * csio_hw_flash_cfg_addr - return the address of the flash - * configuration file - * @hw: the HW module - * - * Return the address within the flash where the Firmware Configuration - * File is stored. - */ -static unsigned int -csio_hw_flash_cfg_addr(struct csio_hw *hw) -{ - if (hw->params.sf_size == 0x100000) - return FPGA_FLASH_CFG_OFFSET; - else - return FLASH_CFG_OFFSET; -} - static void csio_hw_print_fw_version(struct csio_hw *hw, char *str) { @@ -898,13 +710,13 @@ csio_hw_check_fw_version(struct csio_hw *hw) minor = FW_HDR_FW_VER_MINOR_GET(hw->fwrev); micro = FW_HDR_FW_VER_MICRO_GET(hw->fwrev); - if (major != FW_VERSION_MAJOR) { /* major mismatch - fail */ + if (major != FW_VERSION_MAJOR(hw)) { /* major mismatch - fail */ csio_err(hw, "card FW has major version %u, driver wants %u\n", - major, FW_VERSION_MAJOR); + major, FW_VERSION_MAJOR(hw)); return -EINVAL; } - if (minor == FW_VERSION_MINOR && micro == FW_VERSION_MICRO) + if (minor == FW_VERSION_MINOR(hw) && micro == FW_VERSION_MICRO(hw)) return 0; /* perfect match */ /* Minor/micro version mismatch */ @@ -1044,7 +856,7 @@ static void csio_set_pcie_completion_timeout(struct csio_hw *hw, u8 range) { uint16_t val; - uint32_t pcie_cap; + int pcie_cap; if (!csio_pci_capability(hw->pdev, PCI_CAP_ID_EXP, &pcie_cap)) { pci_read_config_word(hw->pdev, @@ -1056,84 +868,6 @@ csio_set_pcie_completion_timeout(struct csio_hw *hw, u8 range) } } - -/* - * Return the specified PCI-E Configuration Space register from our Physical - * Function. We try first via a Firmware LDST Command since we prefer to let - * the firmware own all of these registers, but if that fails we go for it - * directly ourselves. - */ -static uint32_t -csio_read_pcie_cfg4(struct csio_hw *hw, int reg) -{ - u32 val = 0; - struct csio_mb *mbp; - int rv; - struct fw_ldst_cmd *ldst_cmd; - - mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC); - if (!mbp) { - CSIO_INC_STATS(hw, n_err_nomem); - pci_read_config_dword(hw->pdev, reg, &val); - return val; - } - - csio_mb_ldst(hw, mbp, CSIO_MB_DEFAULT_TMO, reg); - - rv = csio_mb_issue(hw, mbp); - - /* - * If the LDST Command suucceeded, exctract the returned register - * value. Otherwise read it directly ourself. - */ - if (rv == 0) { - ldst_cmd = (struct fw_ldst_cmd *)(mbp->mb); - val = ntohl(ldst_cmd->u.pcie.data[0]); - } else - pci_read_config_dword(hw->pdev, reg, &val); - - mempool_free(mbp, hw->mb_mempool); - - return val; -} /* csio_read_pcie_cfg4 */ - -static int -csio_hw_set_mem_win(struct csio_hw *hw) -{ - u32 bar0; - - /* - * Truncation intentional: we only read the bottom 32-bits of the - * 64-bit BAR0/BAR1 ... We use the hardware backdoor mechanism to - * read BAR0 instead of using pci_resource_start() because we could be - * operating from within a Virtual Machine which is trapping our - * accesses to our Configuration Space and we need to set up the PCI-E - * Memory Window decoders with the actual addresses which will be - * coming across the PCI-E link. - */ - bar0 = csio_read_pcie_cfg4(hw, PCI_BASE_ADDRESS_0); - bar0 &= PCI_BASE_ADDRESS_MEM_MASK; - - /* - * Set up memory window for accessing adapter memory ranges. (Read - * back MA register to ensure that changes propagate before we attempt - * to use the new values.) - */ - csio_wr_reg32(hw, (bar0 + MEMWIN0_BASE) | BIR(0) | - WINDOW(ilog2(MEMWIN0_APERTURE) - 10), - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0)); - csio_wr_reg32(hw, (bar0 + MEMWIN1_BASE) | BIR(0) | - WINDOW(ilog2(MEMWIN1_APERTURE) - 10), - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1)); - csio_wr_reg32(hw, (bar0 + MEMWIN2_BASE) | BIR(0) | - WINDOW(ilog2(MEMWIN2_APERTURE) - 10), - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2)); - csio_rd_reg32(hw, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2)); - return 0; -} /* csio_hw_set_mem_win */ - - - /*****************************************************************************/ /* HW State machine assists */ /*****************************************************************************/ @@ -1234,7 +968,9 @@ retry: for (;;) { uint32_t pcie_fw; + spin_unlock_irq(&hw->lock); msleep(50); + spin_lock_irq(&hw->lock); waiting -= 50; /* @@ -2121,9 +1857,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) uint32_t *cfg_data; int value_to_add = 0; - if (request_firmware(&cf, CSIO_CF_FNAME, dev) < 0) { - csio_err(hw, "could not find config file " CSIO_CF_FNAME - ",err: %d\n", ret); + if (request_firmware(&cf, CSIO_CF_FNAME(hw), dev) < 0) { + csio_err(hw, "could not find config file %s, err: %d\n", + CSIO_CF_FNAME(hw), ret); return -ENOENT; } @@ -2147,9 +1883,24 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) ret = csio_memory_write(hw, mtype, maddr, cf->size + value_to_add, cfg_data); + + if ((ret == 0) && (value_to_add != 0)) { + union { + u32 word; + char buf[4]; + } last; + size_t size = cf->size & ~0x3; + int i; + + last.word = cfg_data[size >> 2]; + for (i = value_to_add; i < 4; i++) + last.buf[i] = 0; + ret = csio_memory_write(hw, mtype, maddr + size, 4, &last.word); + } if (ret == 0) { - csio_info(hw, "config file upgraded to " CSIO_CF_FNAME "\n"); - strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64); + csio_info(hw, "config file upgraded to %s\n", + CSIO_CF_FNAME(hw)); + snprintf(path, 64, "%s%s", "/lib/firmware/", CSIO_CF_FNAME(hw)); } leave: @@ -2179,7 +1930,7 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) { unsigned int mtype, maddr; int rv; - uint32_t finiver, finicsum, cfcsum; + uint32_t finiver = 0, finicsum = 0, cfcsum = 0; int using_flash; char path[64]; @@ -2207,7 +1958,7 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) * config file from flash. */ mtype = FW_MEMTYPE_CF_FLASH; - maddr = csio_hw_flash_cfg_addr(hw); + maddr = hw->chip_ops->chip_flash_cfg_addr(hw); using_flash = 1; } else { /* @@ -2346,30 +2097,32 @@ csio_hw_flash_fw(struct csio_hw *hw) struct pci_dev *pci_dev = hw->pdev; struct device *dev = &pci_dev->dev ; - if (request_firmware(&fw, CSIO_FW_FNAME, dev) < 0) { - csio_err(hw, "could not find firmware image " CSIO_FW_FNAME - ",err: %d\n", ret); + if (request_firmware(&fw, CSIO_FW_FNAME(hw), dev) < 0) { + csio_err(hw, "could not find firmware image %s, err: %d\n", + CSIO_FW_FNAME(hw), ret); return -EINVAL; } hdr = (const struct fw_hdr *)fw->data; fw_ver = ntohl(hdr->fw_ver); - if (FW_HDR_FW_VER_MAJOR_GET(fw_ver) != FW_VERSION_MAJOR) + if (FW_HDR_FW_VER_MAJOR_GET(fw_ver) != FW_VERSION_MAJOR(hw)) return -EINVAL; /* wrong major version, won't do */ /* * If the flash FW is unusable or we found something newer, load it. */ - if (FW_HDR_FW_VER_MAJOR_GET(hw->fwrev) != FW_VERSION_MAJOR || + if (FW_HDR_FW_VER_MAJOR_GET(hw->fwrev) != FW_VERSION_MAJOR(hw) || fw_ver > hw->fwrev) { ret = csio_hw_fw_upgrade(hw, hw->pfn, fw->data, fw->size, /*force=*/false); if (!ret) - csio_info(hw, "firmware upgraded to version %pI4 from " - CSIO_FW_FNAME "\n", &hdr->fw_ver); + csio_info(hw, + "firmware upgraded to version %pI4 from %s\n", + &hdr->fw_ver, CSIO_FW_FNAME(hw)); else csio_err(hw, "firmware upgrade failed! err=%d\n", ret); - } + } else + ret = -EINVAL; release_firmware(fw); @@ -2410,7 +2163,7 @@ csio_hw_configure(struct csio_hw *hw) /* Set pci completion timeout value to 4 seconds. */ csio_set_pcie_completion_timeout(hw, 0xd); - csio_hw_set_mem_win(hw); + hw->chip_ops->chip_set_mem_win(hw, MEMWIN_CSIOSTOR); rv = csio_hw_get_fw_version(hw, &hw->fwrev); if (rv != 0) @@ -2478,6 +2231,8 @@ csio_hw_configure(struct csio_hw *hw) } else { if (hw->fw_state == CSIO_DEV_STATE_INIT) { + hw->flags |= CSIO_HWF_USING_SOFT_PARAMS; + /* device parameters */ rv = csio_get_device_params(hw); if (rv != 0) @@ -2651,7 +2406,7 @@ csio_hw_intr_disable(struct csio_hw *hw) } -static void +void csio_hw_fatal_err(struct csio_hw *hw) { csio_set_reg_field(hw, SGE_CONTROL, GLOBALENABLE, 0); @@ -2990,14 +2745,6 @@ csio_hws_pcierr(struct csio_hw *hw, enum csio_hw_ev evt) /* END: HW SM */ /*****************************************************************************/ -/* Slow path handlers */ -struct intr_info { - unsigned int mask; /* bits to check in interrupt status */ - const char *msg; /* message to print or NULL */ - short stat_idx; /* stat counter to increment or -1 */ - unsigned short fatal; /* whether the condition reported is fatal */ -}; - /* * csio_handle_intr_status - table driven interrupt handler * @hw: HW instance @@ -3011,7 +2758,7 @@ struct intr_info { * by an entry specifying mask 0. Returns the number of fatal interrupt * conditions. */ -static int +int csio_handle_intr_status(struct csio_hw *hw, unsigned int reg, const struct intr_info *acts) { @@ -3038,80 +2785,6 @@ csio_handle_intr_status(struct csio_hw *hw, unsigned int reg, } /* - * Interrupt handler for the PCIE module. - */ -static void -csio_pcie_intr_handler(struct csio_hw *hw) -{ - static struct intr_info sysbus_intr_info[] = { - { RNPP, "RXNP array parity error", -1, 1 }, - { RPCP, "RXPC array parity error", -1, 1 }, - { RCIP, "RXCIF array parity error", -1, 1 }, - { RCCP, "Rx completions control array parity error", -1, 1 }, - { RFTP, "RXFT array parity error", -1, 1 }, - { 0, NULL, 0, 0 } - }; - static struct intr_info pcie_port_intr_info[] = { - { TPCP, "TXPC array parity error", -1, 1 }, - { TNPP, "TXNP array parity error", -1, 1 }, - { TFTP, "TXFT array parity error", -1, 1 }, - { TCAP, "TXCA array parity error", -1, 1 }, - { TCIP, "TXCIF array parity error", -1, 1 }, - { RCAP, "RXCA array parity error", -1, 1 }, - { OTDD, "outbound request TLP discarded", -1, 1 }, - { RDPE, "Rx data parity error", -1, 1 }, - { TDUE, "Tx uncorrectable data error", -1, 1 }, - { 0, NULL, 0, 0 } - }; - static struct intr_info pcie_intr_info[] = { - { MSIADDRLPERR, "MSI AddrL parity error", -1, 1 }, - { MSIADDRHPERR, "MSI AddrH parity error", -1, 1 }, - { MSIDATAPERR, "MSI data parity error", -1, 1 }, - { MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, - { MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, - { MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, - { MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, - { PIOCPLPERR, "PCI PIO completion FIFO parity error", -1, 1 }, - { PIOREQPERR, "PCI PIO request FIFO parity error", -1, 1 }, - { TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, - { CCNTPERR, "PCI CMD channel count parity error", -1, 1 }, - { CREQPERR, "PCI CMD channel request parity error", -1, 1 }, - { CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, - { DCNTPERR, "PCI DMA channel count parity error", -1, 1 }, - { DREQPERR, "PCI DMA channel request parity error", -1, 1 }, - { DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, - { HCNTPERR, "PCI HMA channel count parity error", -1, 1 }, - { HREQPERR, "PCI HMA channel request parity error", -1, 1 }, - { HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, - { CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, - { FIDPERR, "PCI FID parity error", -1, 1 }, - { INTXCLRPERR, "PCI INTx clear parity error", -1, 1 }, - { MATAGPERR, "PCI MA tag parity error", -1, 1 }, - { PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, - { RXCPLPERR, "PCI Rx completion parity error", -1, 1 }, - { RXWRPERR, "PCI Rx write parity error", -1, 1 }, - { RPLPERR, "PCI replay buffer parity error", -1, 1 }, - { PCIESINT, "PCI core secondary fault", -1, 1 }, - { PCIEPINT, "PCI core primary fault", -1, 1 }, - { UNXSPLCPLERR, "PCI unexpected split completion error", -1, - 0 }, - { 0, NULL, 0, 0 } - }; - - int fat; - - fat = csio_handle_intr_status(hw, - PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, - sysbus_intr_info) + - csio_handle_intr_status(hw, - PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, - pcie_port_intr_info) + - csio_handle_intr_status(hw, PCIE_INT_CAUSE, pcie_intr_info); - if (fat) - csio_hw_fatal_err(hw); -} - -/* * TP interrupt handler. */ static void csio_tp_intr_handler(struct csio_hw *hw) @@ -3517,7 +3190,7 @@ static void csio_ncsi_intr_handler(struct csio_hw *hw) */ static void csio_xgmac_intr_handler(struct csio_hw *hw, int port) { - uint32_t v = csio_rd_reg32(hw, PORT_REG(port, XGMAC_PORT_INT_CAUSE)); + uint32_t v = csio_rd_reg32(hw, CSIO_MAC_INT_CAUSE_REG(hw, port)); v &= TXFIFO_PRTY_ERR | RXFIFO_PRTY_ERR; if (!v) @@ -3527,7 +3200,7 @@ static void csio_xgmac_intr_handler(struct csio_hw *hw, int port) csio_fatal(hw, "XGMAC %d Tx FIFO parity error\n", port); if (v & RXFIFO_PRTY_ERR) csio_fatal(hw, "XGMAC %d Rx FIFO parity error\n", port); - csio_wr_reg32(hw, v, PORT_REG(port, XGMAC_PORT_INT_CAUSE)); + csio_wr_reg32(hw, v, CSIO_MAC_INT_CAUSE_REG(hw, port)); csio_hw_fatal_err(hw); } @@ -3596,7 +3269,7 @@ csio_hw_slow_intr_handler(struct csio_hw *hw) csio_xgmac_intr_handler(hw, 3); if (cause & PCIE) - csio_pcie_intr_handler(hw); + hw->chip_ops->chip_pcie_intr_handler(hw); if (cause & MC) csio_mem_intr_handler(hw, MEM_MC); @@ -4262,6 +3935,7 @@ csio_hw_get_device_id(struct csio_hw *hw) &hw->params.pci.device_id); csio_dev_id_cached(hw); + hw->chip_id = (hw->params.pci.device_id & CSIO_HW_CHIP_MASK); } /* csio_hw_get_device_id */ @@ -4280,19 +3954,21 @@ csio_hw_set_description(struct csio_hw *hw, uint16_t ven_id, uint16_t dev_id) prot_type = (dev_id & CSIO_ASIC_DEVID_PROTO_MASK); adap_type = (dev_id & CSIO_ASIC_DEVID_TYPE_MASK); - if (prot_type == CSIO_FPGA) { + if (prot_type == CSIO_T4_FCOE_ASIC) { + memcpy(hw->hw_ver, + csio_t4_fcoe_adapters[adap_type].model_no, 16); memcpy(hw->model_desc, - csio_fcoe_adapters[13].description, 32); - } else if (prot_type == CSIO_T4_FCOE_ASIC) { + csio_t4_fcoe_adapters[adap_type].description, + 32); + } else if (prot_type == CSIO_T5_FCOE_ASIC) { memcpy(hw->hw_ver, - csio_fcoe_adapters[adap_type].model_no, 16); + csio_t5_fcoe_adapters[adap_type].model_no, 16); memcpy(hw->model_desc, - csio_fcoe_adapters[adap_type].description, 32); + csio_t5_fcoe_adapters[adap_type].description, + 32); } else { char tempName[32] = "Chelsio FCoE Controller"; memcpy(hw->model_desc, tempName, 32); - - CSIO_DB_ASSERT(0); } } } /* csio_hw_set_description */ @@ -4321,6 +3997,9 @@ csio_hw_init(struct csio_hw *hw) strcpy(hw->name, CSIO_HW_NAME); + /* Initialize the HW chip ops with T4/T5 specific ops */ + hw->chip_ops = csio_is_t4(hw->chip_id) ? &t4_ops : &t5_ops; + /* Set the model & its description */ ven_id = hw->params.pci.vendor_id; diff --git a/drivers/scsi/csiostor/csio_hw.h b/drivers/scsi/csiostor/csio_hw.h index 9edcca4c71af..489fc095cb03 100644 --- a/drivers/scsi/csiostor/csio_hw.h +++ b/drivers/scsi/csiostor/csio_hw.h @@ -48,6 +48,7 @@ #include <scsi/scsi_device.h> #include <scsi/scsi_transport_fc.h> +#include "csio_hw_chip.h" #include "csio_wr.h" #include "csio_mb.h" #include "csio_scsi.h" @@ -60,13 +61,6 @@ */ #define FW_HOSTERROR 255 -#define CSIO_FW_FNAME "cxgb4/t4fw.bin" -#define CSIO_CF_FNAME "cxgb4/t4-config.txt" - -#define FW_VERSION_MAJOR 1 -#define FW_VERSION_MINOR 2 -#define FW_VERSION_MICRO 8 - #define CSIO_HW_NAME "Chelsio FCoE Adapter" #define CSIO_MAX_PFN 8 #define CSIO_MAX_PPORTS 4 @@ -123,8 +117,6 @@ extern int csio_msi; #define CSIO_VENDOR_ID 0x1425 #define CSIO_ASIC_DEVID_PROTO_MASK 0xFF00 #define CSIO_ASIC_DEVID_TYPE_MASK 0x00FF -#define CSIO_FPGA 0xA000 -#define CSIO_T4_FCOE_ASIC 0x4600 #define CSIO_GLBL_INTR_MASK (CIM | MPS | PL | PCIE | MC | EDC0 | \ EDC1 | LE | TP | MA | PM_TX | PM_RX | \ @@ -207,17 +199,6 @@ enum { SF_SIZE = SF_SEC_SIZE * 16, /* serial flash size */ }; -enum { MEM_EDC0, MEM_EDC1, MEM_MC }; - -enum { - MEMWIN0_APERTURE = 2048, - MEMWIN0_BASE = 0x1b800, - MEMWIN1_APERTURE = 32768, - MEMWIN1_BASE = 0x28000, - MEMWIN2_APERTURE = 65536, - MEMWIN2_BASE = 0x30000, -}; - /* serial flash and firmware constants */ enum { SF_ATTEMPTS = 10, /* max retries for SF operations */ @@ -239,9 +220,6 @@ enum { FLASH_CFG_MAX_SIZE = 0x10000 , /* max size of the flash config file*/ FLASH_CFG_OFFSET = 0x1f0000, FLASH_CFG_START_SEC = FLASH_CFG_OFFSET / SF_SEC_SIZE, - FPGA_FLASH_CFG_OFFSET = 0xf0000 , /* if FPGA mode, then cfg file is - * at 1MB - 64KB */ - FPGA_FLASH_CFG_START_SEC = FPGA_FLASH_CFG_OFFSET / SF_SEC_SIZE, }; /* @@ -259,6 +237,8 @@ enum { FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC), FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS), + /* Location of Firmware Configuration File in FLASH. */ + FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC), }; #undef FLASH_START @@ -310,7 +290,7 @@ struct csio_adap_desc { struct pci_params { uint16_t vendor_id; uint16_t device_id; - uint32_t vpd_cap_addr; + int vpd_cap_addr; uint16_t speed; uint8_t width; }; @@ -513,6 +493,7 @@ struct csio_hw { uint32_t fwrev; uint32_t tp_vers; char chip_ver; + uint16_t chip_id; /* Tells T4/T5 chip */ uint32_t cfg_finiver; uint32_t cfg_finicsum; uint32_t cfg_cfcsum; @@ -556,6 +537,9 @@ struct csio_hw { */ struct csio_fcoe_res_info fres_info; /* Fcoe resource info */ + struct csio_hw_chip_ops *chip_ops; /* T4/T5 Chip specific + * Operations + */ /* MSIX vectors */ struct csio_msix_entries msix_entries[CSIO_MAX_MSIX_VECS]; @@ -636,9 +620,16 @@ csio_us_to_core_ticks(struct csio_hw *hw, uint32_t us) #define csio_dbg(__hw, __fmt, ...) #endif +int csio_hw_wait_op_done_val(struct csio_hw *, int, uint32_t, int, + int, int, uint32_t *); +void csio_hw_tp_wr_bits_indirect(struct csio_hw *, unsigned int, + unsigned int, unsigned int); int csio_mgmt_req_lookup(struct csio_mgmtm *, struct csio_ioreq *); void csio_hw_intr_disable(struct csio_hw *); -int csio_hw_slow_intr_handler(struct csio_hw *hw); +int csio_hw_slow_intr_handler(struct csio_hw *); +int csio_handle_intr_status(struct csio_hw *, unsigned int, + const struct intr_info *); + int csio_hw_start(struct csio_hw *); int csio_hw_stop(struct csio_hw *); int csio_hw_reset(struct csio_hw *); @@ -647,19 +638,17 @@ int csio_is_hw_removing(struct csio_hw *); int csio_fwevtq_handler(struct csio_hw *); void csio_evtq_worker(struct work_struct *); -int csio_enqueue_evt(struct csio_hw *hw, enum csio_evt type, - void *evt_msg, uint16_t len); +int csio_enqueue_evt(struct csio_hw *, enum csio_evt, void *, uint16_t); void csio_evtq_flush(struct csio_hw *hw); int csio_request_irqs(struct csio_hw *); void csio_intr_enable(struct csio_hw *); void csio_intr_disable(struct csio_hw *, bool); +void csio_hw_fatal_err(struct csio_hw *); struct csio_lnode *csio_lnode_alloc(struct csio_hw *); int csio_config_queues(struct csio_hw *); -int csio_hw_mc_read(struct csio_hw *, uint32_t, __be32 *, uint64_t *); -int csio_hw_edc_read(struct csio_hw *, int, uint32_t, __be32 *, uint64_t *); int csio_hw_init(struct csio_hw *); void csio_hw_exit(struct csio_hw *); #endif /* ifndef __CSIO_HW_H__ */ diff --git a/drivers/scsi/csiostor/csio_hw_chip.h b/drivers/scsi/csiostor/csio_hw_chip.h new file mode 100644 index 000000000000..bca0de61ae80 --- /dev/null +++ b/drivers/scsi/csiostor/csio_hw_chip.h @@ -0,0 +1,175 @@ +/* + * This file is part of the Chelsio FCoE driver for Linux. + * + * Copyright (c) 2008-2013 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CSIO_HW_CHIP_H__ +#define __CSIO_HW_CHIP_H__ + +#include "csio_defs.h" + +/* FCoE device IDs for T4 */ +#define CSIO_DEVID_T440DBG_FCOE 0x4600 +#define CSIO_DEVID_T420CR_FCOE 0x4601 +#define CSIO_DEVID_T422CR_FCOE 0x4602 +#define CSIO_DEVID_T440CR_FCOE 0x4603 +#define CSIO_DEVID_T420BCH_FCOE 0x4604 +#define CSIO_DEVID_T440BCH_FCOE 0x4605 +#define CSIO_DEVID_T440CH_FCOE 0x4606 +#define CSIO_DEVID_T420SO_FCOE 0x4607 +#define CSIO_DEVID_T420CX_FCOE 0x4608 +#define CSIO_DEVID_T420BT_FCOE 0x4609 +#define CSIO_DEVID_T404BT_FCOE 0x460A +#define CSIO_DEVID_B420_FCOE 0x460B +#define CSIO_DEVID_B404_FCOE 0x460C +#define CSIO_DEVID_T480CR_FCOE 0x460D +#define CSIO_DEVID_T440LPCR_FCOE 0x460E +#define CSIO_DEVID_AMSTERDAM_T4_FCOE 0x460F +#define CSIO_DEVID_HUAWEI_T480_FCOE 0x4680 +#define CSIO_DEVID_HUAWEI_T440_FCOE 0x4681 +#define CSIO_DEVID_HUAWEI_STG310_FCOE 0x4682 +#define CSIO_DEVID_ACROMAG_XMC_XAUI 0x4683 +#define CSIO_DEVID_ACROMAG_XMC_SFP_FCOE 0x4684 +#define CSIO_DEVID_QUANTA_MEZZ_SFP_FCOE 0x4685 +#define CSIO_DEVID_HUAWEI_10GT_FCOE 0x4686 +#define CSIO_DEVID_HUAWEI_T440_TOE_FCOE 0x4687 + +/* FCoE device IDs for T5 */ +#define CSIO_DEVID_T580DBG_FCOE 0x5600 +#define CSIO_DEVID_T520CR_FCOE 0x5601 +#define CSIO_DEVID_T522CR_FCOE 0x5602 +#define CSIO_DEVID_T540CR_FCOE 0x5603 +#define CSIO_DEVID_T520BCH_FCOE 0x5604 +#define CSIO_DEVID_T540BCH_FCOE 0x5605 +#define CSIO_DEVID_T540CH_FCOE 0x5606 +#define CSIO_DEVID_T520SO_FCOE 0x5607 +#define CSIO_DEVID_T520CX_FCOE 0x5608 +#define CSIO_DEVID_T520BT_FCOE 0x5609 +#define CSIO_DEVID_T504BT_FCOE 0x560A +#define CSIO_DEVID_B520_FCOE 0x560B +#define CSIO_DEVID_B504_FCOE 0x560C +#define CSIO_DEVID_T580CR2_FCOE 0x560D +#define CSIO_DEVID_T540LPCR_FCOE 0x560E +#define CSIO_DEVID_AMSTERDAM_T5_FCOE 0x560F +#define CSIO_DEVID_T580LPCR_FCOE 0x5610 +#define CSIO_DEVID_T520LLCR_FCOE 0x5611 +#define CSIO_DEVID_T560CR_FCOE 0x5612 +#define CSIO_DEVID_T580CR_FCOE 0x5613 + +/* Define MACRO values */ +#define CSIO_HW_T4 0x4000 +#define CSIO_T4_FCOE_ASIC 0x4600 +#define CSIO_HW_T5 0x5000 +#define CSIO_T5_FCOE_ASIC 0x5600 +#define CSIO_HW_CHIP_MASK 0xF000 +#define T4_REGMAP_SIZE (160 * 1024) +#define T5_REGMAP_SIZE (332 * 1024) +#define FW_FNAME_T4 "cxgb4/t4fw.bin" +#define FW_FNAME_T5 "cxgb4/t5fw.bin" +#define FW_CFG_NAME_T4 "cxgb4/t4-config.txt" +#define FW_CFG_NAME_T5 "cxgb4/t5-config.txt" + +/* Define static functions */ +static inline int csio_is_t4(uint16_t chip) +{ + return (chip == CSIO_HW_T4); +} + +static inline int csio_is_t5(uint16_t chip) +{ + return (chip == CSIO_HW_T5); +} + +/* Define MACRO DEFINITIONS */ +#define CSIO_DEVICE(devid, idx) \ + { PCI_VENDOR_ID_CHELSIO, (devid), PCI_ANY_ID, PCI_ANY_ID, 0, 0, (idx) } + +#define CSIO_HW_PIDX(hw, index) \ + (csio_is_t4(hw->chip_id) ? (PIDX(index)) : \ + (PIDX_T5(index) | DBTYPE(1U))) + +#define CSIO_HW_LP_INT_THRESH(hw, val) \ + (csio_is_t4(hw->chip_id) ? (LP_INT_THRESH(val)) : \ + (V_LP_INT_THRESH_T5(val))) + +#define CSIO_HW_M_LP_INT_THRESH(hw) \ + (csio_is_t4(hw->chip_id) ? (LP_INT_THRESH_MASK) : (M_LP_INT_THRESH_T5)) + +#define CSIO_MAC_INT_CAUSE_REG(hw, port) \ + (csio_is_t4(hw->chip_id) ? (PORT_REG(port, XGMAC_PORT_INT_CAUSE)) : \ + (T5_PORT_REG(port, MAC_PORT_INT_CAUSE))) + +#define FW_VERSION_MAJOR(hw) (csio_is_t4(hw->chip_id) ? 1 : 0) +#define FW_VERSION_MINOR(hw) (csio_is_t4(hw->chip_id) ? 2 : 0) +#define FW_VERSION_MICRO(hw) (csio_is_t4(hw->chip_id) ? 8 : 0) + +#define CSIO_FW_FNAME(hw) \ + (csio_is_t4(hw->chip_id) ? FW_FNAME_T4 : FW_FNAME_T5) + +#define CSIO_CF_FNAME(hw) \ + (csio_is_t4(hw->chip_id) ? FW_CFG_NAME_T4 : FW_CFG_NAME_T5) + +/* Declare ENUMS */ +enum { MEM_EDC0, MEM_EDC1, MEM_MC, MEM_MC0 = MEM_MC, MEM_MC1 }; + +enum { + MEMWIN_APERTURE = 2048, + MEMWIN_BASE = 0x1b800, + MEMWIN_CSIOSTOR = 6, /* PCI-e Memory Window access */ +}; + +/* Slow path handlers */ +struct intr_info { + unsigned int mask; /* bits to check in interrupt status */ + const char *msg; /* message to print or NULL */ + short stat_idx; /* stat counter to increment or -1 */ + unsigned short fatal; /* whether the condition reported is fatal */ +}; + +/* T4/T5 Chip specific ops */ +struct csio_hw; +struct csio_hw_chip_ops { + int (*chip_set_mem_win)(struct csio_hw *, uint32_t); + void (*chip_pcie_intr_handler)(struct csio_hw *); + uint32_t (*chip_flash_cfg_addr)(struct csio_hw *); + int (*chip_mc_read)(struct csio_hw *, int, uint32_t, + __be32 *, uint64_t *); + int (*chip_edc_read)(struct csio_hw *, int, uint32_t, + __be32 *, uint64_t *); + int (*chip_memory_rw)(struct csio_hw *, u32, int, u32, + u32, uint32_t *, int); + void (*chip_dfs_create_ext_mem)(struct csio_hw *); +}; + +extern struct csio_hw_chip_ops t4_ops; +extern struct csio_hw_chip_ops t5_ops; + +#endif /* #ifndef __CSIO_HW_CHIP_H__ */ diff --git a/drivers/scsi/csiostor/csio_hw_t4.c b/drivers/scsi/csiostor/csio_hw_t4.c new file mode 100644 index 000000000000..89ecbac5478f --- /dev/null +++ b/drivers/scsi/csiostor/csio_hw_t4.c @@ -0,0 +1,403 @@ +/* + * This file is part of the Chelsio FCoE driver for Linux. + * + * Copyright (c) 2008-2013 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "csio_hw.h" +#include "csio_init.h" + +/* + * Return the specified PCI-E Configuration Space register from our Physical + * Function. We try first via a Firmware LDST Command since we prefer to let + * the firmware own all of these registers, but if that fails we go for it + * directly ourselves. + */ +static uint32_t +csio_t4_read_pcie_cfg4(struct csio_hw *hw, int reg) +{ + u32 val = 0; + struct csio_mb *mbp; + int rv; + struct fw_ldst_cmd *ldst_cmd; + + mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC); + if (!mbp) { + CSIO_INC_STATS(hw, n_err_nomem); + pci_read_config_dword(hw->pdev, reg, &val); + return val; + } + + csio_mb_ldst(hw, mbp, CSIO_MB_DEFAULT_TMO, reg); + rv = csio_mb_issue(hw, mbp); + + /* + * If the LDST Command suucceeded, exctract the returned register + * value. Otherwise read it directly ourself. + */ + if (rv == 0) { + ldst_cmd = (struct fw_ldst_cmd *)(mbp->mb); + val = ntohl(ldst_cmd->u.pcie.data[0]); + } else + pci_read_config_dword(hw->pdev, reg, &val); + + mempool_free(mbp, hw->mb_mempool); + + return val; +} + +static int +csio_t4_set_mem_win(struct csio_hw *hw, uint32_t win) +{ + u32 bar0; + u32 mem_win_base; + + /* + * Truncation intentional: we only read the bottom 32-bits of the + * 64-bit BAR0/BAR1 ... We use the hardware backdoor mechanism to + * read BAR0 instead of using pci_resource_start() because we could be + * operating from within a Virtual Machine which is trapping our + * accesses to our Configuration Space and we need to set up the PCI-E + * Memory Window decoders with the actual addresses which will be + * coming across the PCI-E link. + */ + bar0 = csio_t4_read_pcie_cfg4(hw, PCI_BASE_ADDRESS_0); + bar0 &= PCI_BASE_ADDRESS_MEM_MASK; + + mem_win_base = bar0 + MEMWIN_BASE; + + /* + * Set up memory window for accessing adapter memory ranges. (Read + * back MA register to ensure that changes propagate before we attempt + * to use the new values.) + */ + csio_wr_reg32(hw, mem_win_base | BIR(0) | + WINDOW(ilog2(MEMWIN_APERTURE) - 10), + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, win)); + csio_rd_reg32(hw, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, win)); + return 0; +} + +/* + * Interrupt handler for the PCIE module. + */ +static void +csio_t4_pcie_intr_handler(struct csio_hw *hw) +{ + static struct intr_info sysbus_intr_info[] = { + { RNPP, "RXNP array parity error", -1, 1 }, + { RPCP, "RXPC array parity error", -1, 1 }, + { RCIP, "RXCIF array parity error", -1, 1 }, + { RCCP, "Rx completions control array parity error", -1, 1 }, + { RFTP, "RXFT array parity error", -1, 1 }, + { 0, NULL, 0, 0 } + }; + static struct intr_info pcie_port_intr_info[] = { + { TPCP, "TXPC array parity error", -1, 1 }, + { TNPP, "TXNP array parity error", -1, 1 }, + { TFTP, "TXFT array parity error", -1, 1 }, + { TCAP, "TXCA array parity error", -1, 1 }, + { TCIP, "TXCIF array parity error", -1, 1 }, + { RCAP, "RXCA array parity error", -1, 1 }, + { OTDD, "outbound request TLP discarded", -1, 1 }, + { RDPE, "Rx data parity error", -1, 1 }, + { TDUE, "Tx uncorrectable data error", -1, 1 }, + { 0, NULL, 0, 0 } + }; + + static struct intr_info pcie_intr_info[] = { + { MSIADDRLPERR, "MSI AddrL parity error", -1, 1 }, + { MSIADDRHPERR, "MSI AddrH parity error", -1, 1 }, + { MSIDATAPERR, "MSI data parity error", -1, 1 }, + { MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, + { MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, + { MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, + { MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, + { PIOCPLPERR, "PCI PIO completion FIFO parity error", -1, 1 }, + { PIOREQPERR, "PCI PIO request FIFO parity error", -1, 1 }, + { TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, + { CCNTPERR, "PCI CMD channel count parity error", -1, 1 }, + { CREQPERR, "PCI CMD channel request parity error", -1, 1 }, + { CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, + { DCNTPERR, "PCI DMA channel count parity error", -1, 1 }, + { DREQPERR, "PCI DMA channel request parity error", -1, 1 }, + { DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, + { HCNTPERR, "PCI HMA channel count parity error", -1, 1 }, + { HREQPERR, "PCI HMA channel request parity error", -1, 1 }, + { HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, + { CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, + { FIDPERR, "PCI FID parity error", -1, 1 }, + { INTXCLRPERR, "PCI INTx clear parity error", -1, 1 }, + { MATAGPERR, "PCI MA tag parity error", -1, 1 }, + { PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, + { RXCPLPERR, "PCI Rx completion parity error", -1, 1 }, + { RXWRPERR, "PCI Rx write parity error", -1, 1 }, + { RPLPERR, "PCI replay buffer parity error", -1, 1 }, + { PCIESINT, "PCI core secondary fault", -1, 1 }, + { PCIEPINT, "PCI core primary fault", -1, 1 }, + { UNXSPLCPLERR, "PCI unexpected split completion error", -1, + 0 }, + { 0, NULL, 0, 0 } + }; + + int fat; + fat = csio_handle_intr_status(hw, + PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + sysbus_intr_info) + + csio_handle_intr_status(hw, + PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + pcie_port_intr_info) + + csio_handle_intr_status(hw, PCIE_INT_CAUSE, pcie_intr_info); + if (fat) + csio_hw_fatal_err(hw); +} + +/* + * csio_t4_flash_cfg_addr - return the address of the flash configuration file + * @hw: the HW module + * + * Return the address within the flash where the Firmware Configuration + * File is stored. + */ +static unsigned int +csio_t4_flash_cfg_addr(struct csio_hw *hw) +{ + return FLASH_CFG_OFFSET; +} + +/* + * csio_t4_mc_read - read from MC through backdoor accesses + * @hw: the hw module + * @idx: not used for T4 adapter + * @addr: address of first byte requested + * @data: 64 bytes of data containing the requested address + * @ecc: where to store the corresponding 64-bit ECC word + * + * Read 64 bytes of data from MC starting at a 64-byte-aligned address + * that covers the requested address @addr. If @parity is not %NULL it + * is assigned the 64-bit ECC word for the read data. + */ +static int +csio_t4_mc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data, + uint64_t *ecc) +{ + int i; + + if (csio_rd_reg32(hw, MC_BIST_CMD) & START_BIST) + return -EBUSY; + csio_wr_reg32(hw, addr & ~0x3fU, MC_BIST_CMD_ADDR); + csio_wr_reg32(hw, 64, MC_BIST_CMD_LEN); + csio_wr_reg32(hw, 0xc, MC_BIST_DATA_PATTERN); + csio_wr_reg32(hw, BIST_OPCODE(1) | START_BIST | BIST_CMD_GAP(1), + MC_BIST_CMD); + i = csio_hw_wait_op_done_val(hw, MC_BIST_CMD, START_BIST, + 0, 10, 1, NULL); + if (i) + return i; + +#define MC_DATA(i) MC_BIST_STATUS_REG(MC_BIST_STATUS_RDATA, i) + + for (i = 15; i >= 0; i--) + *data++ = htonl(csio_rd_reg32(hw, MC_DATA(i))); + if (ecc) + *ecc = csio_rd_reg64(hw, MC_DATA(16)); +#undef MC_DATA + return 0; +} + +/* + * csio_t4_edc_read - read from EDC through backdoor accesses + * @hw: the hw module + * @idx: which EDC to access + * @addr: address of first byte requested + * @data: 64 bytes of data containing the requested address + * @ecc: where to store the corresponding 64-bit ECC word + * + * Read 64 bytes of data from EDC starting at a 64-byte-aligned address + * that covers the requested address @addr. If @parity is not %NULL it + * is assigned the 64-bit ECC word for the read data. + */ +static int +csio_t4_edc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data, + uint64_t *ecc) +{ + int i; + + idx *= EDC_STRIDE; + if (csio_rd_reg32(hw, EDC_BIST_CMD + idx) & START_BIST) + return -EBUSY; + csio_wr_reg32(hw, addr & ~0x3fU, EDC_BIST_CMD_ADDR + idx); + csio_wr_reg32(hw, 64, EDC_BIST_CMD_LEN + idx); + csio_wr_reg32(hw, 0xc, EDC_BIST_DATA_PATTERN + idx); + csio_wr_reg32(hw, BIST_OPCODE(1) | BIST_CMD_GAP(1) | START_BIST, + EDC_BIST_CMD + idx); + i = csio_hw_wait_op_done_val(hw, EDC_BIST_CMD + idx, START_BIST, + 0, 10, 1, NULL); + if (i) + return i; + +#define EDC_DATA(i) (EDC_BIST_STATUS_REG(EDC_BIST_STATUS_RDATA, i) + idx) + + for (i = 15; i >= 0; i--) + *data++ = htonl(csio_rd_reg32(hw, EDC_DATA(i))); + if (ecc) + *ecc = csio_rd_reg64(hw, EDC_DATA(16)); +#undef EDC_DATA + return 0; +} + +/* + * csio_t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window + * @hw: the csio_hw + * @win: PCI-E memory Window to use + * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_MC0 (or MEM_MC) or MEM_MC1 + * @addr: address within indicated memory type + * @len: amount of memory to transfer + * @buf: host memory buffer + * @dir: direction of transfer 1 => read, 0 => write + * + * Reads/writes an [almost] arbitrary memory region in the firmware: the + * firmware memory address, length and host buffer must be aligned on + * 32-bit boudaries. The memory is transferred as a raw byte sequence + * from/to the firmware's memory. If this memory contains data + * structures which contain multi-byte integers, it's the callers + * responsibility to perform appropriate byte order conversions. + */ +static int +csio_t4_memory_rw(struct csio_hw *hw, u32 win, int mtype, u32 addr, + u32 len, uint32_t *buf, int dir) +{ + u32 pos, start, offset, memoffset, bar0; + u32 edc_size, mc_size, mem_reg, mem_aperture, mem_base; + + /* + * Argument sanity checks ... + */ + if ((addr & 0x3) || (len & 0x3)) + return -EINVAL; + + /* Offset into the region of memory which is being accessed + * MEM_EDC0 = 0 + * MEM_EDC1 = 1 + * MEM_MC = 2 -- T4 + */ + edc_size = EDRAM_SIZE_GET(csio_rd_reg32(hw, MA_EDRAM0_BAR)); + if (mtype != MEM_MC1) + memoffset = (mtype * (edc_size * 1024 * 1024)); + else { + mc_size = EXT_MEM_SIZE_GET(csio_rd_reg32(hw, + MA_EXT_MEMORY_BAR)); + memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; + } + + /* Determine the PCIE_MEM_ACCESS_OFFSET */ + addr = addr + memoffset; + + /* + * Each PCI-E Memory Window is programmed with a window size -- or + * "aperture" -- which controls the granularity of its mapping onto + * adapter memory. We need to grab that aperture in order to know + * how to use the specified window. The window is also programmed + * with the base address of the Memory Window in BAR0's address + * space. For T4 this is an absolute PCI-E Bus Address. For T5 + * the address is relative to BAR0. + */ + mem_reg = csio_rd_reg32(hw, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, win)); + mem_aperture = 1 << (WINDOW(mem_reg) + 10); + mem_base = GET_PCIEOFST(mem_reg) << 10; + + bar0 = csio_t4_read_pcie_cfg4(hw, PCI_BASE_ADDRESS_0); + bar0 &= PCI_BASE_ADDRESS_MEM_MASK; + mem_base -= bar0; + + start = addr & ~(mem_aperture-1); + offset = addr - start; + + csio_dbg(hw, "csio_t4_memory_rw: mem_reg: 0x%x, mem_aperture: 0x%x\n", + mem_reg, mem_aperture); + csio_dbg(hw, "csio_t4_memory_rw: mem_base: 0x%x, mem_offset: 0x%x\n", + mem_base, memoffset); + csio_dbg(hw, "csio_t4_memory_rw: bar0: 0x%x, start:0x%x, offset:0x%x\n", + bar0, start, offset); + csio_dbg(hw, "csio_t4_memory_rw: mtype: %d, addr: 0x%x, len: %d\n", + mtype, addr, len); + + for (pos = start; len > 0; pos += mem_aperture, offset = 0) { + /* + * Move PCI-E Memory Window to our current transfer + * position. Read it back to ensure that changes propagate + * before we attempt to use the new value. + */ + csio_wr_reg32(hw, pos, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, win)); + csio_rd_reg32(hw, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, win)); + + while (offset < mem_aperture && len > 0) { + if (dir) + *buf++ = csio_rd_reg32(hw, mem_base + offset); + else + csio_wr_reg32(hw, *buf++, mem_base + offset); + + offset += sizeof(__be32); + len -= sizeof(__be32); + } + } + return 0; +} + +/* + * csio_t4_dfs_create_ext_mem - setup debugfs for MC to read the values + * @hw: the csio_hw + * + * This function creates files in the debugfs with external memory region MC. + */ +static void +csio_t4_dfs_create_ext_mem(struct csio_hw *hw) +{ + u32 size; + int i = csio_rd_reg32(hw, MA_TARGET_MEM_ENABLE); + if (i & EXT_MEM_ENABLE) { + size = csio_rd_reg32(hw, MA_EXT_MEMORY_BAR); + csio_add_debugfs_mem(hw, "mc", MEM_MC, + EXT_MEM_SIZE_GET(size)); + } +} + +/* T4 adapter specific function */ +struct csio_hw_chip_ops t4_ops = { + .chip_set_mem_win = csio_t4_set_mem_win, + .chip_pcie_intr_handler = csio_t4_pcie_intr_handler, + .chip_flash_cfg_addr = csio_t4_flash_cfg_addr, + .chip_mc_read = csio_t4_mc_read, + .chip_edc_read = csio_t4_edc_read, + .chip_memory_rw = csio_t4_memory_rw, + .chip_dfs_create_ext_mem = csio_t4_dfs_create_ext_mem, +}; diff --git a/drivers/scsi/csiostor/csio_hw_t5.c b/drivers/scsi/csiostor/csio_hw_t5.c new file mode 100644 index 000000000000..27745c170c24 --- /dev/null +++ b/drivers/scsi/csiostor/csio_hw_t5.c @@ -0,0 +1,397 @@ +/* + * This file is part of the Chelsio FCoE driver for Linux. + * + * Copyright (c) 2008-2013 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "csio_hw.h" +#include "csio_init.h" + +static int +csio_t5_set_mem_win(struct csio_hw *hw, uint32_t win) +{ + u32 mem_win_base; + /* + * Truncation intentional: we only read the bottom 32-bits of the + * 64-bit BAR0/BAR1 ... We use the hardware backdoor mechanism to + * read BAR0 instead of using pci_resource_start() because we could be + * operating from within a Virtual Machine which is trapping our + * accesses to our Configuration Space and we need to set up the PCI-E + * Memory Window decoders with the actual addresses which will be + * coming across the PCI-E link. + */ + + /* For T5, only relative offset inside the PCIe BAR is passed */ + mem_win_base = MEMWIN_BASE; + + /* + * Set up memory window for accessing adapter memory ranges. (Read + * back MA register to ensure that changes propagate before we attempt + * to use the new values.) + */ + csio_wr_reg32(hw, mem_win_base | BIR(0) | + WINDOW(ilog2(MEMWIN_APERTURE) - 10), + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, win)); + csio_rd_reg32(hw, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, win)); + + return 0; +} + +/* + * Interrupt handler for the PCIE module. + */ +static void +csio_t5_pcie_intr_handler(struct csio_hw *hw) +{ + static struct intr_info sysbus_intr_info[] = { + { RNPP, "RXNP array parity error", -1, 1 }, + { RPCP, "RXPC array parity error", -1, 1 }, + { RCIP, "RXCIF array parity error", -1, 1 }, + { RCCP, "Rx completions control array parity error", -1, 1 }, + { RFTP, "RXFT array parity error", -1, 1 }, + { 0, NULL, 0, 0 } + }; + static struct intr_info pcie_port_intr_info[] = { + { TPCP, "TXPC array parity error", -1, 1 }, + { TNPP, "TXNP array parity error", -1, 1 }, + { TFTP, "TXFT array parity error", -1, 1 }, + { TCAP, "TXCA array parity error", -1, 1 }, + { TCIP, "TXCIF array parity error", -1, 1 }, + { RCAP, "RXCA array parity error", -1, 1 }, + { OTDD, "outbound request TLP discarded", -1, 1 }, + { RDPE, "Rx data parity error", -1, 1 }, + { TDUE, "Tx uncorrectable data error", -1, 1 }, + { 0, NULL, 0, 0 } + }; + + static struct intr_info pcie_intr_info[] = { + { MSTGRPPERR, "Master Response Read Queue parity error", + -1, 1 }, + { MSTTIMEOUTPERR, "Master Timeout FIFO parity error", -1, 1 }, + { MSIXSTIPERR, "MSI-X STI SRAM parity error", -1, 1 }, + { MSIXADDRLPERR, "MSI-X AddrL parity error", -1, 1 }, + { MSIXADDRHPERR, "MSI-X AddrH parity error", -1, 1 }, + { MSIXDATAPERR, "MSI-X data parity error", -1, 1 }, + { MSIXDIPERR, "MSI-X DI parity error", -1, 1 }, + { PIOCPLGRPPERR, "PCI PIO completion Group FIFO parity error", + -1, 1 }, + { PIOREQGRPPERR, "PCI PIO request Group FIFO parity error", + -1, 1 }, + { TARTAGPERR, "PCI PCI target tag FIFO parity error", -1, 1 }, + { MSTTAGQPERR, "PCI master tag queue parity error", -1, 1 }, + { CREQPERR, "PCI CMD channel request parity error", -1, 1 }, + { CRSPPERR, "PCI CMD channel response parity error", -1, 1 }, + { DREQWRPERR, "PCI DMA channel write request parity error", + -1, 1 }, + { DREQPERR, "PCI DMA channel request parity error", -1, 1 }, + { DRSPPERR, "PCI DMA channel response parity error", -1, 1 }, + { HREQWRPERR, "PCI HMA channel count parity error", -1, 1 }, + { HREQPERR, "PCI HMA channel request parity error", -1, 1 }, + { HRSPPERR, "PCI HMA channel response parity error", -1, 1 }, + { CFGSNPPERR, "PCI config snoop FIFO parity error", -1, 1 }, + { FIDPERR, "PCI FID parity error", -1, 1 }, + { VFIDPERR, "PCI INTx clear parity error", -1, 1 }, + { MAGRPPERR, "PCI MA group FIFO parity error", -1, 1 }, + { PIOTAGPERR, "PCI PIO tag parity error", -1, 1 }, + { IPRXHDRGRPPERR, "PCI IP Rx header group parity error", + -1, 1 }, + { IPRXDATAGRPPERR, "PCI IP Rx data group parity error", + -1, 1 }, + { RPLPERR, "PCI IP replay buffer parity error", -1, 1 }, + { IPSOTPERR, "PCI IP SOT buffer parity error", -1, 1 }, + { TRGT1GRPPERR, "PCI TRGT1 group FIFOs parity error", -1, 1 }, + { READRSPERR, "Outbound read error", -1, 0 }, + { 0, NULL, 0, 0 } + }; + + int fat; + fat = csio_handle_intr_status(hw, + PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS, + sysbus_intr_info) + + csio_handle_intr_status(hw, + PCIE_CORE_UTL_PCI_EXPRESS_PORT_STATUS, + pcie_port_intr_info) + + csio_handle_intr_status(hw, PCIE_INT_CAUSE, pcie_intr_info); + if (fat) + csio_hw_fatal_err(hw); +} + +/* + * csio_t5_flash_cfg_addr - return the address of the flash configuration file + * @hw: the HW module + * + * Return the address within the flash where the Firmware Configuration + * File is stored. + */ +static unsigned int +csio_t5_flash_cfg_addr(struct csio_hw *hw) +{ + return FLASH_CFG_START; +} + +/* + * csio_t5_mc_read - read from MC through backdoor accesses + * @hw: the hw module + * @idx: index to the register + * @addr: address of first byte requested + * @data: 64 bytes of data containing the requested address + * @ecc: where to store the corresponding 64-bit ECC word + * + * Read 64 bytes of data from MC starting at a 64-byte-aligned address + * that covers the requested address @addr. If @parity is not %NULL it + * is assigned the 64-bit ECC word for the read data. + */ +static int +csio_t5_mc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data, + uint64_t *ecc) +{ + int i; + uint32_t mc_bist_cmd_reg, mc_bist_cmd_addr_reg, mc_bist_cmd_len_reg; + uint32_t mc_bist_status_rdata_reg, mc_bist_data_pattern_reg; + + mc_bist_cmd_reg = MC_REG(MC_P_BIST_CMD, idx); + mc_bist_cmd_addr_reg = MC_REG(MC_P_BIST_CMD_ADDR, idx); + mc_bist_cmd_len_reg = MC_REG(MC_P_BIST_CMD_LEN, idx); + mc_bist_status_rdata_reg = MC_REG(MC_P_BIST_STATUS_RDATA, idx); + mc_bist_data_pattern_reg = MC_REG(MC_P_BIST_DATA_PATTERN, idx); + + if (csio_rd_reg32(hw, mc_bist_cmd_reg) & START_BIST) + return -EBUSY; + csio_wr_reg32(hw, addr & ~0x3fU, mc_bist_cmd_addr_reg); + csio_wr_reg32(hw, 64, mc_bist_cmd_len_reg); + csio_wr_reg32(hw, 0xc, mc_bist_data_pattern_reg); + csio_wr_reg32(hw, BIST_OPCODE(1) | START_BIST | BIST_CMD_GAP(1), + mc_bist_cmd_reg); + i = csio_hw_wait_op_done_val(hw, mc_bist_cmd_reg, START_BIST, + 0, 10, 1, NULL); + if (i) + return i; + +#define MC_DATA(i) MC_BIST_STATUS_REG(MC_BIST_STATUS_RDATA, i) + + for (i = 15; i >= 0; i--) + *data++ = htonl(csio_rd_reg32(hw, MC_DATA(i))); + if (ecc) + *ecc = csio_rd_reg64(hw, MC_DATA(16)); +#undef MC_DATA + return 0; +} + +/* + * csio_t5_edc_read - read from EDC through backdoor accesses + * @hw: the hw module + * @idx: which EDC to access + * @addr: address of first byte requested + * @data: 64 bytes of data containing the requested address + * @ecc: where to store the corresponding 64-bit ECC word + * + * Read 64 bytes of data from EDC starting at a 64-byte-aligned address + * that covers the requested address @addr. If @parity is not %NULL it + * is assigned the 64-bit ECC word for the read data. + */ +static int +csio_t5_edc_read(struct csio_hw *hw, int idx, uint32_t addr, __be32 *data, + uint64_t *ecc) +{ + int i; + uint32_t edc_bist_cmd_reg, edc_bist_cmd_addr_reg, edc_bist_cmd_len_reg; + uint32_t edc_bist_cmd_data_pattern, edc_bist_status_rdata_reg; + +/* + * These macro are missing in t4_regs.h file. + */ +#define EDC_STRIDE_T5 (EDC_T51_BASE_ADDR - EDC_T50_BASE_ADDR) +#define EDC_REG_T5(reg, idx) (reg + EDC_STRIDE_T5 * idx) + + edc_bist_cmd_reg = EDC_REG_T5(EDC_H_BIST_CMD, idx); + edc_bist_cmd_addr_reg = EDC_REG_T5(EDC_H_BIST_CMD_ADDR, idx); + edc_bist_cmd_len_reg = EDC_REG_T5(EDC_H_BIST_CMD_LEN, idx); + edc_bist_cmd_data_pattern = EDC_REG_T5(EDC_H_BIST_DATA_PATTERN, idx); + edc_bist_status_rdata_reg = EDC_REG_T5(EDC_H_BIST_STATUS_RDATA, idx); +#undef EDC_REG_T5 +#undef EDC_STRIDE_T5 + + if (csio_rd_reg32(hw, edc_bist_cmd_reg) & START_BIST) + return -EBUSY; + csio_wr_reg32(hw, addr & ~0x3fU, edc_bist_cmd_addr_reg); + csio_wr_reg32(hw, 64, edc_bist_cmd_len_reg); + csio_wr_reg32(hw, 0xc, edc_bist_cmd_data_pattern); + csio_wr_reg32(hw, BIST_OPCODE(1) | START_BIST | BIST_CMD_GAP(1), + edc_bist_cmd_reg); + i = csio_hw_wait_op_done_val(hw, edc_bist_cmd_reg, START_BIST, + 0, 10, 1, NULL); + if (i) + return i; + +#define EDC_DATA(i) (EDC_BIST_STATUS_REG(EDC_BIST_STATUS_RDATA, i) + idx) + + for (i = 15; i >= 0; i--) + *data++ = htonl(csio_rd_reg32(hw, EDC_DATA(i))); + if (ecc) + *ecc = csio_rd_reg64(hw, EDC_DATA(16)); +#undef EDC_DATA + return 0; +} + +/* + * csio_t5_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window + * @hw: the csio_hw + * @win: PCI-E memory Window to use + * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_MC0 (or MEM_MC) or MEM_MC1 + * @addr: address within indicated memory type + * @len: amount of memory to transfer + * @buf: host memory buffer + * @dir: direction of transfer 1 => read, 0 => write + * + * Reads/writes an [almost] arbitrary memory region in the firmware: the + * firmware memory address, length and host buffer must be aligned on + * 32-bit boudaries. The memory is transferred as a raw byte sequence + * from/to the firmware's memory. If this memory contains data + * structures which contain multi-byte integers, it's the callers + * responsibility to perform appropriate byte order conversions. + */ +static int +csio_t5_memory_rw(struct csio_hw *hw, u32 win, int mtype, u32 addr, + u32 len, uint32_t *buf, int dir) +{ + u32 pos, start, offset, memoffset; + u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; + + /* + * Argument sanity checks ... + */ + if ((addr & 0x3) || (len & 0x3)) + return -EINVAL; + + /* Offset into the region of memory which is being accessed + * MEM_EDC0 = 0 + * MEM_EDC1 = 1 + * MEM_MC = 2 -- T4 + * MEM_MC0 = 2 -- For T5 + * MEM_MC1 = 3 -- For T5 + */ + edc_size = EDRAM_SIZE_GET(csio_rd_reg32(hw, MA_EDRAM0_BAR)); + if (mtype != MEM_MC1) + memoffset = (mtype * (edc_size * 1024 * 1024)); + else { + mc_size = EXT_MEM_SIZE_GET(csio_rd_reg32(hw, + MA_EXT_MEMORY_BAR)); + memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; + } + + /* Determine the PCIE_MEM_ACCESS_OFFSET */ + addr = addr + memoffset; + + /* + * Each PCI-E Memory Window is programmed with a window size -- or + * "aperture" -- which controls the granularity of its mapping onto + * adapter memory. We need to grab that aperture in order to know + * how to use the specified window. The window is also programmed + * with the base address of the Memory Window in BAR0's address + * space. For T4 this is an absolute PCI-E Bus Address. For T5 + * the address is relative to BAR0. + */ + mem_reg = csio_rd_reg32(hw, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, win)); + mem_aperture = 1 << (WINDOW(mem_reg) + 10); + mem_base = GET_PCIEOFST(mem_reg) << 10; + + start = addr & ~(mem_aperture-1); + offset = addr - start; + win_pf = V_PFNUM(hw->pfn); + + csio_dbg(hw, "csio_t5_memory_rw: mem_reg: 0x%x, mem_aperture: 0x%x\n", + mem_reg, mem_aperture); + csio_dbg(hw, "csio_t5_memory_rw: mem_base: 0x%x, mem_offset: 0x%x\n", + mem_base, memoffset); + csio_dbg(hw, "csio_t5_memory_rw: start:0x%x, offset:0x%x, win_pf:%d\n", + start, offset, win_pf); + csio_dbg(hw, "csio_t5_memory_rw: mtype: %d, addr: 0x%x, len: %d\n", + mtype, addr, len); + + for (pos = start; len > 0; pos += mem_aperture, offset = 0) { + /* + * Move PCI-E Memory Window to our current transfer + * position. Read it back to ensure that changes propagate + * before we attempt to use the new value. + */ + csio_wr_reg32(hw, pos | win_pf, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, win)); + csio_rd_reg32(hw, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, win)); + + while (offset < mem_aperture && len > 0) { + if (dir) + *buf++ = csio_rd_reg32(hw, mem_base + offset); + else + csio_wr_reg32(hw, *buf++, mem_base + offset); + + offset += sizeof(__be32); + len -= sizeof(__be32); + } + } + return 0; +} + +/* + * csio_t5_dfs_create_ext_mem - setup debugfs for MC0 or MC1 to read the values + * @hw: the csio_hw + * + * This function creates files in the debugfs with external memory region + * MC0 & MC1. + */ +static void +csio_t5_dfs_create_ext_mem(struct csio_hw *hw) +{ + u32 size; + int i = csio_rd_reg32(hw, MA_TARGET_MEM_ENABLE); + if (i & EXT_MEM_ENABLE) { + size = csio_rd_reg32(hw, MA_EXT_MEMORY_BAR); + csio_add_debugfs_mem(hw, "mc0", MEM_MC0, + EXT_MEM_SIZE_GET(size)); + } + if (i & EXT_MEM1_ENABLE) { + size = csio_rd_reg32(hw, MA_EXT_MEMORY1_BAR); + csio_add_debugfs_mem(hw, "mc1", MEM_MC1, + EXT_MEM_SIZE_GET(size)); + } +} + +/* T5 adapter specific function */ +struct csio_hw_chip_ops t5_ops = { + .chip_set_mem_win = csio_t5_set_mem_win, + .chip_pcie_intr_handler = csio_t5_pcie_intr_handler, + .chip_flash_cfg_addr = csio_t5_flash_cfg_addr, + .chip_mc_read = csio_t5_mc_read, + .chip_edc_read = csio_t5_edc_read, + .chip_memory_rw = csio_t5_memory_rw, + .chip_dfs_create_ext_mem = csio_t5_dfs_create_ext_mem, +}; diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 0604b5ff3638..00346fe939d5 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -81,9 +81,11 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) __be32 data[16]; if (mem == MEM_MC) - ret = csio_hw_mc_read(hw, pos, data, NULL); + ret = hw->chip_ops->chip_mc_read(hw, 0, pos, + data, NULL); else - ret = csio_hw_edc_read(hw, mem, pos, data, NULL); + ret = hw->chip_ops->chip_edc_read(hw, mem, pos, + data, NULL); if (ret) return ret; @@ -108,7 +110,7 @@ static const struct file_operations csio_mem_debugfs_fops = { .llseek = default_llseek, }; -static void csio_add_debugfs_mem(struct csio_hw *hw, const char *name, +void csio_add_debugfs_mem(struct csio_hw *hw, const char *name, unsigned int idx, unsigned int size_mb) { struct dentry *de; @@ -131,9 +133,8 @@ static int csio_setup_debugfs(struct csio_hw *hw) csio_add_debugfs_mem(hw, "edc0", MEM_EDC0, 5); if (i & EDRAM1_ENABLE) csio_add_debugfs_mem(hw, "edc1", MEM_EDC1, 5); - if (i & EXT_MEM_ENABLE) - csio_add_debugfs_mem(hw, "mc", MEM_MC, - EXT_MEM_SIZE_GET(csio_rd_reg32(hw, MA_EXT_MEMORY_BAR))); + + hw->chip_ops->chip_dfs_create_ext_mem(hw); return 0; } @@ -1169,7 +1170,7 @@ static struct pci_error_handlers csio_err_handler = { }; static DEFINE_PCI_DEVICE_TABLE(csio_pci_tbl) = { - CSIO_DEVICE(CSIO_DEVID_T440DBG_FCOE, 0), /* T440DBG FCOE */ + CSIO_DEVICE(CSIO_DEVID_T440DBG_FCOE, 0), /* T4 DEBUG FCOE */ CSIO_DEVICE(CSIO_DEVID_T420CR_FCOE, 0), /* T420CR FCOE */ CSIO_DEVICE(CSIO_DEVID_T422CR_FCOE, 0), /* T422CR FCOE */ CSIO_DEVICE(CSIO_DEVID_T440CR_FCOE, 0), /* T440CR FCOE */ @@ -1184,8 +1185,34 @@ static DEFINE_PCI_DEVICE_TABLE(csio_pci_tbl) = { CSIO_DEVICE(CSIO_DEVID_B404_FCOE, 0), /* B404 FCOE */ CSIO_DEVICE(CSIO_DEVID_T480CR_FCOE, 0), /* T480 CR FCOE */ CSIO_DEVICE(CSIO_DEVID_T440LPCR_FCOE, 0), /* T440 LP-CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_PE10K, 0), /* PE10K FCOE */ - CSIO_DEVICE(CSIO_DEVID_PE10K_PF1, 0), /* PE10K FCOE on PF1 */ + CSIO_DEVICE(CSIO_DEVID_AMSTERDAM_T4_FCOE, 0), /* AMSTERDAM T4 FCOE */ + CSIO_DEVICE(CSIO_DEVID_HUAWEI_T480_FCOE, 0), /* HUAWEI T480 FCOE */ + CSIO_DEVICE(CSIO_DEVID_HUAWEI_T440_FCOE, 0), /* HUAWEI T440 FCOE */ + CSIO_DEVICE(CSIO_DEVID_HUAWEI_STG310_FCOE, 0), /* HUAWEI STG FCOE */ + CSIO_DEVICE(CSIO_DEVID_ACROMAG_XMC_XAUI, 0), /* ACROMAG XAUI FCOE */ + CSIO_DEVICE(CSIO_DEVID_QUANTA_MEZZ_SFP_FCOE, 0),/* QUANTA MEZZ FCOE */ + CSIO_DEVICE(CSIO_DEVID_HUAWEI_10GT_FCOE, 0), /* HUAWEI 10GT FCOE */ + CSIO_DEVICE(CSIO_DEVID_HUAWEI_T440_TOE_FCOE, 0),/* HUAWEI T4 TOE FCOE */ + CSIO_DEVICE(CSIO_DEVID_T580DBG_FCOE, 0), /* T5 DEBUG FCOE */ + CSIO_DEVICE(CSIO_DEVID_T520CR_FCOE, 0), /* T520CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T522CR_FCOE, 0), /* T522CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T540CR_FCOE, 0), /* T540CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T520BCH_FCOE, 0), /* T520BCH FCOE */ + CSIO_DEVICE(CSIO_DEVID_T540BCH_FCOE, 0), /* T540BCH FCOE */ + CSIO_DEVICE(CSIO_DEVID_T540CH_FCOE, 0), /* T540CH FCOE */ + CSIO_DEVICE(CSIO_DEVID_T520SO_FCOE, 0), /* T520SO FCOE */ + CSIO_DEVICE(CSIO_DEVID_T520CX_FCOE, 0), /* T520CX FCOE */ + CSIO_DEVICE(CSIO_DEVID_T520BT_FCOE, 0), /* T520BT FCOE */ + CSIO_DEVICE(CSIO_DEVID_T504BT_FCOE, 0), /* T504BT FCOE */ + CSIO_DEVICE(CSIO_DEVID_B520_FCOE, 0), /* B520 FCOE */ + CSIO_DEVICE(CSIO_DEVID_B504_FCOE, 0), /* B504 FCOE */ + CSIO_DEVICE(CSIO_DEVID_T580CR2_FCOE, 0), /* T580 CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T540LPCR_FCOE, 0), /* T540 LP-CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_AMSTERDAM_T5_FCOE, 0), /* AMSTERDAM T5 FCOE */ + CSIO_DEVICE(CSIO_DEVID_T580LPCR_FCOE, 0), /* T580 LP-CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T520LLCR_FCOE, 0), /* T520 LL-CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T560CR_FCOE, 0), /* T560 CR FCOE */ + CSIO_DEVICE(CSIO_DEVID_T580CR_FCOE, 0), /* T580 CR FCOE */ { 0, 0, 0, 0, 0, 0, 0 } }; @@ -1259,4 +1286,5 @@ MODULE_DESCRIPTION(CSIO_DRV_DESC); MODULE_LICENSE(CSIO_DRV_LICENSE); MODULE_DEVICE_TABLE(pci, csio_pci_tbl); MODULE_VERSION(CSIO_DRV_VERSION); -MODULE_FIRMWARE(CSIO_FW_FNAME); +MODULE_FIRMWARE(FW_FNAME_T4); +MODULE_FIRMWARE(FW_FNAME_T5); diff --git a/drivers/scsi/csiostor/csio_init.h b/drivers/scsi/csiostor/csio_init.h index 0838fd7ec9c7..5cc5d317a442 100644 --- a/drivers/scsi/csiostor/csio_init.h +++ b/drivers/scsi/csiostor/csio_init.h @@ -52,31 +52,6 @@ #define CSIO_DRV_DESC "Chelsio FCoE driver" #define CSIO_DRV_VERSION "1.0.0" -#define CSIO_DEVICE(devid, idx) \ -{ PCI_VENDOR_ID_CHELSIO, (devid), PCI_ANY_ID, PCI_ANY_ID, 0, 0, (idx) } - -#define CSIO_IS_T4_FPGA(_dev) (((_dev) == CSIO_DEVID_PE10K) ||\ - ((_dev) == CSIO_DEVID_PE10K_PF1)) - -/* FCoE device IDs */ -#define CSIO_DEVID_PE10K 0xA000 -#define CSIO_DEVID_PE10K_PF1 0xA001 -#define CSIO_DEVID_T440DBG_FCOE 0x4600 -#define CSIO_DEVID_T420CR_FCOE 0x4601 -#define CSIO_DEVID_T422CR_FCOE 0x4602 -#define CSIO_DEVID_T440CR_FCOE 0x4603 -#define CSIO_DEVID_T420BCH_FCOE 0x4604 -#define CSIO_DEVID_T440BCH_FCOE 0x4605 -#define CSIO_DEVID_T440CH_FCOE 0x4606 -#define CSIO_DEVID_T420SO_FCOE 0x4607 -#define CSIO_DEVID_T420CX_FCOE 0x4608 -#define CSIO_DEVID_T420BT_FCOE 0x4609 -#define CSIO_DEVID_T404BT_FCOE 0x460A -#define CSIO_DEVID_B420_FCOE 0x460B -#define CSIO_DEVID_B404_FCOE 0x460C -#define CSIO_DEVID_T480CR_FCOE 0x460D -#define CSIO_DEVID_T440LPCR_FCOE 0x460E - extern struct fc_function_template csio_fc_transport_funcs; extern struct fc_function_template csio_fc_transport_vport_funcs; @@ -100,6 +75,10 @@ struct csio_lnode *csio_shost_init(struct csio_hw *, struct device *, bool, void csio_shost_exit(struct csio_lnode *); void csio_lnodes_exit(struct csio_hw *, bool); +/* DebugFS helper routines */ +void csio_add_debugfs_mem(struct csio_hw *, const char *, + unsigned int, unsigned int); + static inline struct Scsi_Host * csio_ln_to_shost(struct csio_lnode *ln) { diff --git a/drivers/scsi/csiostor/csio_lnode.h b/drivers/scsi/csiostor/csio_lnode.h index 8d84988ab06d..0f9c04175b11 100644 --- a/drivers/scsi/csiostor/csio_lnode.h +++ b/drivers/scsi/csiostor/csio_lnode.h @@ -114,7 +114,7 @@ struct csio_lnode_stats { uint32_t n_rnode_match; /* matched rnode */ uint32_t n_dev_loss_tmo; /* Device loss timeout */ uint32_t n_fdmi_err; /* fdmi err */ - uint32_t n_evt_fw[RSCN_DEV_LOST]; /* fw events */ + uint32_t n_evt_fw[PROTO_ERR_IMPL_LOGO]; /* fw events */ enum csio_ln_ev n_evt_sm[CSIO_LNE_MAX_EVENT]; /* State m/c events */ uint32_t n_rnode_alloc; /* rnode allocated */ uint32_t n_rnode_free; /* rnode freed */ diff --git a/drivers/scsi/csiostor/csio_rnode.c b/drivers/scsi/csiostor/csio_rnode.c index 51c6a388de2b..e9c3b045f587 100644 --- a/drivers/scsi/csiostor/csio_rnode.c +++ b/drivers/scsi/csiostor/csio_rnode.c @@ -302,7 +302,7 @@ csio_confirm_rnode(struct csio_lnode *ln, uint32_t rdev_flowid, { uint8_t rport_type; struct csio_rnode *rn, *match_rn; - uint32_t vnp_flowid; + uint32_t vnp_flowid = 0; __be32 *port_id; port_id = (__be32 *)&rdevp->r_id[0]; @@ -350,6 +350,14 @@ csio_confirm_rnode(struct csio_lnode *ln, uint32_t rdev_flowid, * Else, go ahead and alloc a new rnode. */ if (!memcmp(csio_rn_wwpn(match_rn), rdevp->wwpn, 8)) { + if (rn == match_rn) + goto found_rnode; + csio_ln_dbg(ln, + "nport_id:x%x and wwpn:%llx" + " match for ssni:x%x\n", + rn->nport_id, + wwn_to_u64(rdevp->wwpn), + rdev_flowid); if (csio_is_rnode_ready(rn)) { csio_ln_warn(ln, "rnode is already" diff --git a/drivers/scsi/csiostor/csio_rnode.h b/drivers/scsi/csiostor/csio_rnode.h index a3b434c801da..65940096a80d 100644 --- a/drivers/scsi/csiostor/csio_rnode.h +++ b/drivers/scsi/csiostor/csio_rnode.h @@ -63,7 +63,7 @@ struct csio_rnode_stats { uint32_t n_err_nomem; /* error nomem */ uint32_t n_evt_unexp; /* unexpected event */ uint32_t n_evt_drop; /* unexpected event */ - uint32_t n_evt_fw[RSCN_DEV_LOST]; /* fw events */ + uint32_t n_evt_fw[PROTO_ERR_IMPL_LOGO]; /* fw events */ enum csio_rn_ev n_evt_sm[CSIO_RNFE_MAX_EVENT]; /* State m/c events */ uint32_t n_lun_rst; /* Number of resets of * of LUNs under this diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c index c32df1bdaa97..4255ce264abf 100644 --- a/drivers/scsi/csiostor/csio_wr.c +++ b/drivers/scsi/csiostor/csio_wr.c @@ -85,8 +85,8 @@ csio_wr_ring_fldb(struct csio_hw *hw, struct csio_q *flq) */ if (flq->inc_idx >= 8) { csio_wr_reg32(hw, DBPRIO(1) | QID(flq->un.fl.flid) | - PIDX(flq->inc_idx / 8), - MYPF_REG(SGE_PF_KDOORBELL)); + CSIO_HW_PIDX(hw, flq->inc_idx / 8), + MYPF_REG(SGE_PF_KDOORBELL)); flq->inc_idx &= 7; } } @@ -989,7 +989,8 @@ csio_wr_issue(struct csio_hw *hw, int qidx, bool prio) wmb(); /* Ring SGE Doorbell writing q->pidx into it */ csio_wr_reg32(hw, DBPRIO(prio) | QID(q->un.eq.physeqid) | - PIDX(q->inc_idx), MYPF_REG(SGE_PF_KDOORBELL)); + CSIO_HW_PIDX(hw, q->inc_idx), + MYPF_REG(SGE_PF_KDOORBELL)); q->inc_idx = 0; return 0; @@ -1331,20 +1332,30 @@ csio_wr_fixup_host_params(struct csio_hw *hw) /* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */ csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0); - csio_wr_reg32(hw, - (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2) + - sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1), - SGE_FL_BUFFER_SIZE2); - csio_wr_reg32(hw, - (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3) + - sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1), - SGE_FL_BUFFER_SIZE3); + + /* + * If using hard params, the following will get set correctly + * in csio_wr_set_sge(). + */ + if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS) { + csio_wr_reg32(hw, + (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2) + + sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1), + SGE_FL_BUFFER_SIZE2); + csio_wr_reg32(hw, + (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3) + + sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1), + SGE_FL_BUFFER_SIZE3); + } csio_wr_reg32(hw, HPZ0(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ); /* default value of rx_dma_offset of the NIC driver */ csio_set_reg_field(hw, SGE_CONTROL, PKTSHIFT_MASK, PKTSHIFT(CSIO_SGE_RX_DMA_OFFSET)); + + csio_hw_tp_wr_bits_indirect(hw, TP_INGRESS_CONFIG, + CSUM_HAS_PSEUDO_HDR, 0); } static void @@ -1460,18 +1471,21 @@ csio_wr_set_sge(struct csio_hw *hw) * and generate an interrupt when this occurs so we can recover. */ csio_set_reg_field(hw, SGE_DBFIFO_STATUS, - HP_INT_THRESH(HP_INT_THRESH_MASK) | - LP_INT_THRESH(LP_INT_THRESH_MASK), - HP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH) | - LP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH)); + HP_INT_THRESH(HP_INT_THRESH_MASK) | + CSIO_HW_LP_INT_THRESH(hw, CSIO_HW_M_LP_INT_THRESH(hw)), + HP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH) | + CSIO_HW_LP_INT_THRESH(hw, CSIO_SGE_DBFIFO_INT_THRESH)); + csio_set_reg_field(hw, SGE_DOORBELL_CONTROL, ENABLE_DROP, ENABLE_DROP); /* SGE_FL_BUFFER_SIZE0 is set up by csio_wr_fixup_host_params(). */ CSIO_SET_FLBUF_SIZE(hw, 1, CSIO_SGE_FLBUF_SIZE1); - CSIO_SET_FLBUF_SIZE(hw, 2, CSIO_SGE_FLBUF_SIZE2); - CSIO_SET_FLBUF_SIZE(hw, 3, CSIO_SGE_FLBUF_SIZE3); + csio_wr_reg32(hw, (CSIO_SGE_FLBUF_SIZE2 + sge->csio_fl_align - 1) + & ~(sge->csio_fl_align - 1), SGE_FL_BUFFER_SIZE2); + csio_wr_reg32(hw, (CSIO_SGE_FLBUF_SIZE3 + sge->csio_fl_align - 1) + & ~(sge->csio_fl_align - 1), SGE_FL_BUFFER_SIZE3); CSIO_SET_FLBUF_SIZE(hw, 4, CSIO_SGE_FLBUF_SIZE4); CSIO_SET_FLBUF_SIZE(hw, 5, CSIO_SGE_FLBUF_SIZE5); CSIO_SET_FLBUF_SIZE(hw, 6, CSIO_SGE_FLBUF_SIZE6); @@ -1522,22 +1536,24 @@ void csio_wr_sge_init(struct csio_hw *hw) { /* - * If we are master: + * If we are master and chip is not initialized: * - If we plan to use the config file, we need to fixup some * host specific registers, and read the rest of the SGE * configuration. * - If we dont plan to use the config file, we need to initialize * SGE entirely, including fixing the host specific registers. + * If we are master and chip is initialized, just read and work off of + * the already initialized SGE values. * If we arent the master, we are only allowed to read and work off of * the already initialized SGE values. * * Therefore, before calling this function, we assume that the master- - * ship of the card, and whether to use config file or not, have - * already been decided. In other words, CSIO_HWF_USING_SOFT_PARAMS and - * CSIO_HWF_MASTER should be set/unset. + * ship of the card, state and whether to use config file or not, have + * already been decided. */ if (csio_is_hw_master(hw)) { - csio_wr_fixup_host_params(hw); + if (hw->fw_state != CSIO_DEV_STATE_INIT) + csio_wr_fixup_host_params(hw); if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS) csio_wr_get_sge(hw); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index b5d92fc93c70..292b24f9bf93 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -490,7 +490,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) { struct net_device *netdev = fcoe->netdev; struct fcoe_ctlr *fip = fcoe_to_ctlr(fcoe); - struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); rtnl_lock(); if (!fcoe->removed) @@ -501,7 +500,6 @@ static void fcoe_interface_cleanup(struct fcoe_interface *fcoe) /* tear-down the FCoE controller */ fcoe_ctlr_destroy(fip); scsi_host_put(fip->lp->host); - fcoe_ctlr_device_delete(ctlr_dev); dev_put(netdev); module_put(THIS_MODULE); } @@ -1657,7 +1655,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp) skb->priority = fcoe->priority; if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN && - fcoe->realdev->features & NETIF_F_HW_VLAN_TX) { + fcoe->realdev->features & NETIF_F_HW_VLAN_CTAG_TX) { skb->vlan_tci = VLAN_TAG_PRESENT | vlan_dev_vlan_id(fcoe->netdev); skb->dev = fcoe->realdev; @@ -2194,6 +2192,8 @@ out_nodev: */ static void fcoe_destroy_work(struct work_struct *work) { + struct fcoe_ctlr_device *cdev; + struct fcoe_ctlr *ctlr; struct fcoe_port *port; struct fcoe_interface *fcoe; struct Scsi_Host *shost; @@ -2224,10 +2224,15 @@ static void fcoe_destroy_work(struct work_struct *work) mutex_lock(&fcoe_config_mutex); fcoe = port->priv; + ctlr = fcoe_to_ctlr(fcoe); + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + fcoe_if_destroy(port->lport); fcoe_interface_cleanup(fcoe); mutex_unlock(&fcoe_config_mutex); + + fcoe_ctlr_device_delete(cdev); } /** @@ -2335,7 +2340,9 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, rc = -EIO; rtnl_unlock(); fcoe_interface_cleanup(fcoe); - goto out_nortnl; + mutex_unlock(&fcoe_config_mutex); + fcoe_ctlr_device_delete(ctlr_dev); + goto out; } /* Make this the "master" N_Port */ @@ -2375,8 +2382,8 @@ static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, out_nodev: rtnl_unlock(); -out_nortnl: mutex_unlock(&fcoe_config_mutex); +out: return rc; } diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 08c3bc398da2..a76247201be5 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2815,6 +2815,47 @@ unlock: } /** + * fcoe_ctlr_mode_set() - Set or reset the ctlr's mode + * @lport: The local port to be (re)configured + * @fip: The FCoE controller whose mode is changing + * @fip_mode: The new fip mode + * + * Note that the we shouldn't be changing the libfc discovery settings + * (fc_disc_config) while an lport is going through the libfc state + * machine. The mode can only be changed when a fcoe_ctlr device is + * disabled, so that should ensure that this routine is only called + * when nothing is happening. + */ +void fcoe_ctlr_mode_set(struct fc_lport *lport, struct fcoe_ctlr *fip, + enum fip_state fip_mode) +{ + void *priv; + + WARN_ON(lport->state != LPORT_ST_RESET && + lport->state != LPORT_ST_DISABLED); + + if (fip_mode == FIP_MODE_VN2VN) { + lport->rport_priv_size = sizeof(struct fcoe_rport); + lport->point_to_multipoint = 1; + lport->tt.disc_recv_req = fcoe_ctlr_disc_recv; + lport->tt.disc_start = fcoe_ctlr_disc_start; + lport->tt.disc_stop = fcoe_ctlr_disc_stop; + lport->tt.disc_stop_final = fcoe_ctlr_disc_stop_final; + priv = fip; + } else { + lport->rport_priv_size = 0; + lport->point_to_multipoint = 0; + lport->tt.disc_recv_req = NULL; + lport->tt.disc_start = NULL; + lport->tt.disc_stop = NULL; + lport->tt.disc_stop_final = NULL; + priv = lport; + } + + fc_disc_config(lport, priv); +} + +/** * fcoe_libfc_config() - Sets up libfc related properties for local port * @lport: The local port to configure libfc for * @fip: The FCoE controller in use by the local port @@ -2833,21 +2874,9 @@ int fcoe_libfc_config(struct fc_lport *lport, struct fcoe_ctlr *fip, fc_exch_init(lport); fc_elsct_init(lport); fc_lport_init(lport); - if (fip->mode == FIP_MODE_VN2VN) - lport->rport_priv_size = sizeof(struct fcoe_rport); fc_rport_init(lport); - if (fip->mode == FIP_MODE_VN2VN) { - lport->point_to_multipoint = 1; - lport->tt.disc_recv_req = fcoe_ctlr_disc_recv; - lport->tt.disc_start = fcoe_ctlr_disc_start; - lport->tt.disc_stop = fcoe_ctlr_disc_stop; - lport->tt.disc_stop_final = fcoe_ctlr_disc_stop_final; - mutex_init(&lport->disc.disc_mutex); - INIT_LIST_HEAD(&lport->disc.rports); - lport->disc.priv = fip; - } else { - fc_disc_init(lport); - } + fc_disc_init(lport); + fcoe_ctlr_mode_set(lport, fip, fip->mode); return 0; } EXPORT_SYMBOL_GPL(fcoe_libfc_config); @@ -2875,6 +2904,7 @@ EXPORT_SYMBOL(fcoe_fcf_get_selected); void fcoe_ctlr_set_fip_mode(struct fcoe_ctlr_device *ctlr_dev) { struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(ctlr_dev); + struct fc_lport *lport = ctlr->lp; mutex_lock(&ctlr->ctlr_mutex); switch (ctlr_dev->mode) { @@ -2888,5 +2918,7 @@ void fcoe_ctlr_set_fip_mode(struct fcoe_ctlr_device *ctlr_dev) } mutex_unlock(&ctlr->ctlr_mutex); + + fcoe_ctlr_mode_set(lport, ctlr, ctlr->mode); } EXPORT_SYMBOL(fcoe_ctlr_set_fip_mode); diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index a044f593e8b9..d0fa4b6c551f 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1899,8 +1899,8 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev) sdev->allow_restart = 1; blk_queue_rq_timeout(sdev->request_queue, 120 * HZ); } - scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun); spin_unlock_irqrestore(shost->host_lock, lock_flags); + scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun); return 0; } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index f328089a1060..2197b57fb225 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -5148,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; @@ -9349,7 +9349,10 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) int_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg); spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - rc = request_irq(pdev->irq, ipr_test_intr, 0, IPR_NAME, ioa_cfg); + if (ioa_cfg->intr_flag == IPR_USE_MSIX) + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_test_intr, 0, IPR_NAME, ioa_cfg); + else + rc = request_irq(pdev->irq, ipr_test_intr, 0, IPR_NAME, ioa_cfg); if (rc) { dev_err(&pdev->dev, "Can not assign irq %d\n", pdev->irq); return rc; @@ -9371,7 +9374,10 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - free_irq(pdev->irq, ioa_cfg); + if (ioa_cfg->intr_flag == IPR_USE_MSIX) + free_irq(ioa_cfg->vectors_info[0].vec, ioa_cfg); + else + free_irq(pdev->irq, ioa_cfg); LEAVE; @@ -9722,6 +9728,7 @@ static void __ipr_remove(struct pci_dev *pdev) spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); flush_work(&ioa_cfg->work_q); + INIT_LIST_HEAD(&ioa_cfg->used_res_q); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); spin_lock(&ipr_driver_lock); diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 8e561e6a557c..880a9068ca12 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -712,12 +712,13 @@ static void fc_disc_stop_final(struct fc_lport *lport) } /** - * fc_disc_init() - Initialize the discovery layer for a local port - * @lport: The local port that needs the discovery layer to be initialized + * fc_disc_config() - Configure the discovery layer for a local port + * @lport: The local port that needs the discovery layer to be configured + * @priv: Private data structre for users of the discovery layer */ -int fc_disc_init(struct fc_lport *lport) +void fc_disc_config(struct fc_lport *lport, void *priv) { - struct fc_disc *disc; + struct fc_disc *disc = &lport->disc; if (!lport->tt.disc_start) lport->tt.disc_start = fc_disc_start; @@ -732,12 +733,21 @@ int fc_disc_init(struct fc_lport *lport) lport->tt.disc_recv_req = fc_disc_recv_req; disc = &lport->disc; + + disc->priv = priv; +} +EXPORT_SYMBOL(fc_disc_config); + +/** + * fc_disc_init() - Initialize the discovery layer for a local port + * @lport: The local port that needs the discovery layer to be initialized + */ +void fc_disc_init(struct fc_lport *lport) +{ + struct fc_disc *disc = &lport->disc; + INIT_DELAYED_WORK(&disc->disc_work, fc_disc_timeout); mutex_init(&disc->disc_mutex); INIT_LIST_HEAD(&disc->rports); - - disc->priv = lport; - - return 0; } EXPORT_SYMBOL(fc_disc_init); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index aec2e0da5016..55cbd0180159 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -235,6 +235,17 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) linkrate = phy->linkrate; memcpy(sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE); + /* Handle vacant phy - rest of dr data is not valid so skip it */ + if (phy->phy_state == PHY_VACANT) { + memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + phy->attached_dev_type = NO_DEVICE; + if (!test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) { + phy->phy_id = phy_id; + goto skip; + } else + goto out; + } + phy->attached_dev_type = to_dev_type(dr); if (test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) goto out; @@ -272,6 +283,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) phy->phy->maximum_linkrate = dr->pmax_linkrate; phy->phy->negotiated_linkrate = phy->linkrate; + skip: if (new_phy) if (sas_phy_add(phy->phy)) { sas_phy_free(phy->phy); @@ -388,7 +400,7 @@ int sas_ex_phy_discover(struct domain_device *dev, int single) if (!disc_req) return -ENOMEM; - disc_resp = alloc_smp_req(DISCOVER_RESP_SIZE); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); if (!disc_resp) { kfree(disc_req); return -ENOMEM; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 74b67d98e952..d43faf34c1e2 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -438,11 +438,12 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, struct lpfc_rqe *temp_hrqe; struct lpfc_rqe *temp_drqe; struct lpfc_register doorbell; - int put_index = hq->host_index; + int put_index; /* sanity check on queue memory */ if (unlikely(!hq) || unlikely(!dq)) return -ENOMEM; + put_index = hq->host_index; temp_hrqe = hq->qe[hq->host_index].rqe; temp_drqe = dq->qe[dq->host_index].rqe; diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 1d82eef4e1eb..b3db9dcc2619 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1938,11 +1938,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) "Timer for the VP[%d] has stopped\n", vha->vp_idx); } - /* No pending activities shall be there on the vha now */ - if (ql2xextended_error_logging & ql_dbg_user) - msleep(random32()%10); /* Just to see if something falls on - * the net we have placed below */ - BUG_ON(atomic_read(&vha->vref_count)); qla2x00_free_fcports(vha); diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 1626de52e32a..fbc305f1c87c 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -15,6 +15,7 @@ * | Mailbox commands | 0x115b | 0x111a-0x111b | * | | | 0x112c-0x112e | * | | | 0x113a | + * | | | 0x1155-0x1158 | * | Device Discovery | 0x2087 | 0x2020-0x2022, | * | | | 0x2016 | * | Queue Command and IO tracing | 0x3031 | 0x3006-0x300b | @@ -401,7 +402,7 @@ qla2xxx_copy_atioqueues(struct qla_hw_data *ha, void *ptr, void *ring; } aq, *aqp; - if (!ha->tgt.atio_q_length) + if (!ha->tgt.atio_ring) return ptr; num_queues = 1; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index c6509911772b..65c5ff75936b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -863,7 +863,6 @@ typedef struct { #define MBX_1 BIT_1 #define MBX_0 BIT_0 -#define RNID_TYPE_SET_VERSION 0x9 #define RNID_TYPE_ASIC_TEMP 0xC /* diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index eb3ca21a7f17..b310fa97b545 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -358,9 +358,6 @@ extern int qla2x00_disable_fce_trace(scsi_qla_host_t *, uint64_t *, uint64_t *); extern int -qla2x00_set_driver_version(scsi_qla_host_t *, char *); - -extern int qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *, uint16_t, uint16_t, uint16_t, uint16_t); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index edf4d14a1335..b59203393cb2 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -619,8 +619,6 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) qla24xx_read_fcp_prio_cfg(vha); - qla2x00_set_driver_version(vha, QLA2XXX_VERSION); - return (rval); } @@ -1399,7 +1397,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mq_size += ha->max_rsp_queues * (rsp->length * sizeof(response_t)); } - if (ha->tgt.atio_q_length) + if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); /* Allocate memory for Fibre Channel Event Buffer. */ if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha) && !IS_QLA83XX(ha)) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 186dd59ce4fa..43345af56431 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3866,64 +3866,6 @@ qla81xx_restart_mpi_firmware(scsi_qla_host_t *vha) return rval; } -int -qla2x00_set_driver_version(scsi_qla_host_t *vha, char *version) -{ - int rval; - mbx_cmd_t mc; - mbx_cmd_t *mcp = &mc; - int len; - uint16_t dwlen; - uint8_t *str; - dma_addr_t str_dma; - struct qla_hw_data *ha = vha->hw; - - if (!IS_FWI2_CAPABLE(ha) || IS_QLA82XX(ha)) - return QLA_FUNCTION_FAILED; - - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1155, - "Entered %s.\n", __func__); - - str = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &str_dma); - if (!str) { - ql_log(ql_log_warn, vha, 0x1156, - "Failed to allocate driver version param.\n"); - return QLA_MEMORY_ALLOC_FAILED; - } - - memcpy(str, "\x7\x3\x11\x0", 4); - dwlen = str[0]; - len = dwlen * sizeof(uint32_t) - 4; - memset(str + 4, 0, len); - if (len > strlen(version)) - len = strlen(version); - memcpy(str + 4, version, len); - - mcp->mb[0] = MBC_SET_RNID_PARAMS; - mcp->mb[1] = RNID_TYPE_SET_VERSION << 8 | dwlen; - mcp->mb[2] = MSW(LSD(str_dma)); - mcp->mb[3] = LSW(LSD(str_dma)); - mcp->mb[6] = MSW(MSD(str_dma)); - mcp->mb[7] = LSW(MSD(str_dma)); - mcp->out_mb = MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0; - mcp->in_mb = MBX_0; - mcp->tov = MBX_TOV_SECONDS; - mcp->flags = 0; - rval = qla2x00_mailbox_command(vha, mcp); - - if (rval != QLA_SUCCESS) { - ql_dbg(ql_dbg_mbx, vha, 0x1157, - "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]); - } else { - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1158, - "Done %s.\n", __func__); - } - - dma_pool_free(ha->s_dma_pool, str, str_dma); - - return rval; -} - static int qla2x00_read_asic_temperature(scsi_qla_host_t *vha, uint16_t *temp) { diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 2b6e478d9e33..ec54036d1e12 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,7 +7,7 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.04.00.08-k" +#define QLA2XXX_VERSION "8.04.00.13-k" #define QLA_DRIVER_MAJOR_VER 8 #define QLA_DRIVER_MINOR_VER 4 diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 765398c063c7..c31187d79343 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -71,9 +71,14 @@ struct kmem_cache *scsi_sdb_cache; #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> +static bool acpi_scsi_bus_match(struct device *dev) +{ + return dev->bus == &scsi_bus_type; +} + int scsi_register_acpi_bus_type(struct acpi_bus_type *bus) { - bus->bus = &scsi_bus_type; + bus->match = acpi_scsi_bus_match; return register_acpi_bus_type(bus); } EXPORT_SYMBOL_GPL(scsi_register_acpi_bus_type); diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 65123a21b97e..fe30ea94ffe6 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -50,7 +50,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb) u32 rlen; int err, tport; - while (skb->len >= NLMSG_SPACE(0)) { + while (skb->len >= NLMSG_HDRLEN) { err = 0; nlh = nlmsg_hdr(skb); @@ -70,7 +70,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb) goto next_msg; } - hdr = NLMSG_DATA(nlh); + hdr = nlmsg_data(nlh); if ((hdr->version != SCSI_NL_VERSION) || (hdr->magic != SCSI_NL_MAGIC)) { err = -EPROTOTYPE; diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index e894ca7b54c0..e106c276aa00 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -35,7 +35,6 @@ #include <scsi/scsi_transport.h> #include <scsi/scsi_transport_fc.h> #include <scsi/scsi_cmnd.h> -#include <linux/netlink.h> #include <net/netlink.h> #include <scsi/scsi_netlink_fc.h> #include <scsi/scsi_bsg_fc.h> @@ -534,7 +533,7 @@ fc_host_post_event(struct Scsi_Host *shost, u32 event_number, struct nlmsghdr *nlh; struct fc_nl_event *event; const char *name; - u32 len, skblen; + u32 len; int err; if (!scsi_nl_sock) { @@ -543,21 +542,19 @@ fc_host_post_event(struct Scsi_Host *shost, u32 event_number, } len = FC_NL_MSGALIGN(sizeof(*event)); - skblen = NLMSG_SPACE(len); - skb = alloc_skb(skblen, GFP_KERNEL); + skb = nlmsg_new(len, GFP_KERNEL); if (!skb) { err = -ENOBUFS; goto send_fail; } - nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, - skblen - sizeof(*nlh), 0); + nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, len, 0); if (!nlh) { err = -ENOBUFS; goto send_fail_skb; } - event = NLMSG_DATA(nlh); + event = nlmsg_data(nlh); INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC, FC_NL_ASYNC_EVENT, len); @@ -604,7 +601,7 @@ fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, struct sk_buff *skb; struct nlmsghdr *nlh; struct fc_nl_event *event; - u32 len, skblen; + u32 len; int err; if (!scsi_nl_sock) { @@ -613,21 +610,19 @@ fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, } len = FC_NL_MSGALIGN(sizeof(*event) + data_len); - skblen = NLMSG_SPACE(len); - skb = alloc_skb(skblen, GFP_KERNEL); + skb = nlmsg_new(len, GFP_KERNEL); if (!skb) { err = -ENOBUFS; goto send_vendor_fail; } - nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, - skblen - sizeof(*nlh), 0); + nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, len, 0); if (!nlh) { err = -ENOBUFS; goto send_vendor_fail_skb; } - event = NLMSG_DATA(nlh); + event = nlmsg_data(nlh); INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC, FC_NL_ASYNC_EVENT, len); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0a74b975efdf..2e3816530bba 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1344,8 +1344,8 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, struct iscsi_uevent *ev; char *pdu; struct iscsi_internal *priv; - int len = NLMSG_SPACE(sizeof(*ev) + sizeof(struct iscsi_hdr) + - data_size); + int len = nlmsg_total_size(sizeof(*ev) + sizeof(struct iscsi_hdr) + + data_size); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -1360,7 +1360,7 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); memset(ev, 0, sizeof(*ev)); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_RECV_PDU; @@ -1381,7 +1381,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; - int len = NLMSG_SPACE(sizeof(*ev) + data_size); + int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { @@ -1390,7 +1390,7 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); memset(ev, 0, sizeof(*ev)); ev->type = type; ev->transport_handle = iscsi_handle(transport); @@ -1415,7 +1415,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; - int len = NLMSG_SPACE(sizeof(*ev)); + int len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -1429,7 +1429,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_ERROR; ev->r.connerror.error = error; @@ -1450,7 +1450,7 @@ void iscsi_conn_login_event(struct iscsi_cls_conn *conn, struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; - int len = NLMSG_SPACE(sizeof(*ev)); + int len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -1464,7 +1464,7 @@ void iscsi_conn_login_event(struct iscsi_cls_conn *conn, } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_LOGIN_STATE; ev->r.conn_login.state = state; @@ -1484,7 +1484,7 @@ void iscsi_post_host_event(uint32_t host_no, struct iscsi_transport *transport, struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; - int len = NLMSG_SPACE(sizeof(*ev) + data_size); + int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_NOIO); if (!skb) { @@ -1494,7 +1494,7 @@ void iscsi_post_host_event(uint32_t host_no, struct iscsi_transport *transport, } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(transport); ev->type = ISCSI_KEVENT_HOST_EVENT; ev->r.host_event.host_no = host_no; @@ -1515,7 +1515,7 @@ void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_transport *transport, struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; - int len = NLMSG_SPACE(sizeof(*ev) + data_size); + int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_NOIO); if (!skb) { @@ -1524,7 +1524,7 @@ void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_transport *transport, } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(transport); ev->type = ISCSI_KEVENT_PING_COMP; ev->r.ping_comp.host_no = host_no; @@ -1543,7 +1543,7 @@ iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi, { struct sk_buff *skb; struct nlmsghdr *nlh; - int len = NLMSG_SPACE(size); + int len = nlmsg_total_size(size); int flags = multi ? NLM_F_MULTI : 0; int t = done ? NLMSG_DONE : type; @@ -1555,24 +1555,24 @@ iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi, nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0); nlh->nlmsg_flags = flags; - memcpy(NLMSG_DATA(nlh), payload, size); + memcpy(nlmsg_data(nlh), payload, size); return iscsi_multicast_skb(skb, group, GFP_ATOMIC); } static int iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) { - struct iscsi_uevent *ev = NLMSG_DATA(nlh); + struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_stats *stats; struct sk_buff *skbstat; struct iscsi_cls_conn *conn; struct nlmsghdr *nlhstat; struct iscsi_uevent *evstat; struct iscsi_internal *priv; - int len = NLMSG_SPACE(sizeof(*ev) + - sizeof(struct iscsi_stats) + - sizeof(struct iscsi_stats_custom) * - ISCSI_STATS_CUSTOM_MAX); + int len = nlmsg_total_size(sizeof(*ev) + + sizeof(struct iscsi_stats) + + sizeof(struct iscsi_stats_custom) * + ISCSI_STATS_CUSTOM_MAX); int err = 0; priv = iscsi_if_transport_lookup(transport); @@ -1595,7 +1595,7 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) nlhstat = __nlmsg_put(skbstat, 0, 0, 0, (len - sizeof(*nlhstat)), 0); - evstat = NLMSG_DATA(nlhstat); + evstat = nlmsg_data(nlhstat); memset(evstat, 0, sizeof(*evstat)); evstat->transport_handle = iscsi_handle(conn->transport); evstat->type = nlh->nlmsg_type; @@ -1608,12 +1608,12 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) memset(stats, 0, sizeof(*stats)); transport->get_stats(conn, stats); - actual_size = NLMSG_SPACE(sizeof(struct iscsi_uevent) + - sizeof(struct iscsi_stats) + - sizeof(struct iscsi_stats_custom) * - stats->custom_length); + actual_size = nlmsg_total_size(sizeof(struct iscsi_uevent) + + sizeof(struct iscsi_stats) + + sizeof(struct iscsi_stats_custom) * + stats->custom_length); actual_size -= sizeof(*nlhstat); - actual_size = NLMSG_LENGTH(actual_size); + actual_size = nlmsg_msg_size(actual_size); skb_trim(skbstat, NLMSG_ALIGN(actual_size)); nlhstat->nlmsg_len = actual_size; @@ -1637,7 +1637,7 @@ int iscsi_session_event(struct iscsi_cls_session *session, struct iscsi_uevent *ev; struct sk_buff *skb; struct nlmsghdr *nlh; - int rc, len = NLMSG_SPACE(sizeof(*ev)); + int rc, len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(session->transport); if (!priv) @@ -1653,7 +1653,7 @@ int iscsi_session_event(struct iscsi_cls_session *session, } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(session->transport); ev->type = event; @@ -2005,7 +2005,7 @@ iscsi_send_ping(struct iscsi_transport *transport, struct iscsi_uevent *ev) static int iscsi_get_chap(struct iscsi_transport *transport, struct nlmsghdr *nlh) { - struct iscsi_uevent *ev = NLMSG_DATA(nlh); + struct iscsi_uevent *ev = nlmsg_data(nlh); struct Scsi_Host *shost = NULL; struct iscsi_chap_rec *chap_rec; struct iscsi_internal *priv; @@ -2024,7 +2024,7 @@ iscsi_get_chap(struct iscsi_transport *transport, struct nlmsghdr *nlh) return -EINVAL; chap_buf_size = (ev->u.get_chap.num_entries * sizeof(*chap_rec)); - len = NLMSG_SPACE(sizeof(*ev) + chap_buf_size); + len = nlmsg_total_size(sizeof(*ev) + chap_buf_size); shost = scsi_host_lookup(ev->u.get_chap.host_no); if (!shost) { @@ -2045,7 +2045,7 @@ iscsi_get_chap(struct iscsi_transport *transport, struct nlmsghdr *nlh) nlhchap = __nlmsg_put(skbchap, 0, 0, 0, (len - sizeof(*nlhchap)), 0); - evchap = NLMSG_DATA(nlhchap); + evchap = nlmsg_data(nlhchap); memset(evchap, 0, sizeof(*evchap)); evchap->transport_handle = iscsi_handle(transport); evchap->type = nlh->nlmsg_type; @@ -2058,7 +2058,7 @@ iscsi_get_chap(struct iscsi_transport *transport, struct nlmsghdr *nlh) err = transport->get_chap(shost, ev->u.get_chap.chap_tbl_idx, &evchap->u.get_chap.num_entries, buf); - actual_size = NLMSG_SPACE(sizeof(*ev) + chap_buf_size); + actual_size = nlmsg_total_size(sizeof(*ev) + chap_buf_size); skb_trim(skbchap, NLMSG_ALIGN(actual_size)); nlhchap->nlmsg_len = actual_size; @@ -2096,7 +2096,7 @@ static int iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; - struct iscsi_uevent *ev = NLMSG_DATA(nlh); + struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; struct iscsi_cls_session *session; @@ -2263,7 +2263,7 @@ static void iscsi_if_rx(struct sk_buff *skb) { mutex_lock(&rx_queue_mutex); - while (skb->len >= NLMSG_SPACE(0)) { + while (skb->len >= NLMSG_HDRLEN) { int err; uint32_t rlen; struct nlmsghdr *nlh; @@ -2276,7 +2276,7 @@ iscsi_if_rx(struct sk_buff *skb) break; } - ev = NLMSG_DATA(nlh); + ev = nlmsg_data(nlh); rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) rlen = skb->len; diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 86974471af68..2a32036a9404 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4112,6 +4112,10 @@ static int st_probe(struct device *dev) tpnt->disk = disk; disk->private_data = &tpnt->driver; disk->queue = SDp->request_queue; + /* SCSI tape doesn't register this gendisk via add_disk(). Manually + * take queue reference that release_disk() expects. */ + if (!blk_get_queue(disk->queue)) + goto out_put_disk; tpnt->driver = &st_template; tpnt->device = SDp; @@ -4185,7 +4189,7 @@ static int st_probe(struct device *dev) idr_preload_end(); if (error < 0) { pr_warn("st: idr allocation failed: %d\n", error); - goto out_put_disk; + goto out_put_queue; } tpnt->index = error; sprintf(disk->disk_name, "st%d", tpnt->index); @@ -4211,6 +4215,8 @@ out_remove_devs: spin_lock(&st_index_lock); idr_remove(&st_index_idr, tpnt->index); spin_unlock(&st_index_lock); +out_put_queue: + blk_put_queue(disk->queue); out_put_disk: put_disk(disk); kfree(tpnt); diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index f80eee74a311..2be0de920d67 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -55,6 +55,7 @@ comment "SPI Master Controller Drivers" config SPI_ALTERA tristate "Altera SPI Controller" + depends on GENERIC_HARDIRQS select SPI_BITBANG help This is the driver for the Altera SPI Controller. @@ -310,7 +311,7 @@ config SPI_PXA2XX_DMA config SPI_PXA2XX tristate "PXA2xx SSP SPI master" - depends on ARCH_PXA || PCI || ACPI + depends on (ARCH_PXA || PCI || ACPI) && GENERIC_HARDIRQS select PXA_SSP if ARCH_PXA help This enables using a PXA2xx or Sodaville SSP port as a SPI master diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index 9578af782a77..d7df435d962e 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -152,7 +152,6 @@ static void bcm63xx_spi_setup_transfer(struct spi_device *spi, static int bcm63xx_spi_setup(struct spi_device *spi) { struct bcm63xx_spi *bs; - int ret; bs = spi_master_get_devdata(spi->master); @@ -490,7 +489,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev) default: dev_err(dev, "unsupported MSG_CTL width: %d\n", bs->msg_ctl_width); - goto out_clk_disable; + goto out_err; } /* Initialize hardware */ diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 89480b281d74..3e490ee7f275 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -164,7 +164,7 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, for (i = count; i > 0; i--) { data = tx_buf ? *tx_buf++ : 0; - if (len == EOFBYTE) + if (len == EOFBYTE && t->cs_change) setbits32(&fifo->txcmd, MPC512x_PSC_FIFO_EOF); out_8(&fifo->txdata_8, data); len--; diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 90b27a3508a6..810413883c79 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1168,7 +1168,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) master->dev.parent = &pdev->dev; master->dev.of_node = pdev->dev.of_node; - ACPI_HANDLE_SET(&master->dev, ACPI_HANDLE(&pdev->dev)); /* the spi->mode bits understood by this driver: */ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index e862ab8853aa..4188b2faac5c 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -994,25 +994,30 @@ static irqreturn_t s3c64xx_spi_irq(int irq, void *data) { struct s3c64xx_spi_driver_data *sdd = data; struct spi_master *spi = sdd->master; - unsigned int val; + unsigned int val, clr = 0; - val = readl(sdd->regs + S3C64XX_SPI_PENDING_CLR); + val = readl(sdd->regs + S3C64XX_SPI_STATUS); - val &= S3C64XX_SPI_PND_RX_OVERRUN_CLR | - S3C64XX_SPI_PND_RX_UNDERRUN_CLR | - S3C64XX_SPI_PND_TX_OVERRUN_CLR | - S3C64XX_SPI_PND_TX_UNDERRUN_CLR; - - writel(val, sdd->regs + S3C64XX_SPI_PENDING_CLR); - - if (val & S3C64XX_SPI_PND_RX_OVERRUN_CLR) + if (val & S3C64XX_SPI_ST_RX_OVERRUN_ERR) { + clr = S3C64XX_SPI_PND_RX_OVERRUN_CLR; dev_err(&spi->dev, "RX overrun\n"); - if (val & S3C64XX_SPI_PND_RX_UNDERRUN_CLR) + } + if (val & S3C64XX_SPI_ST_RX_UNDERRUN_ERR) { + clr |= S3C64XX_SPI_PND_RX_UNDERRUN_CLR; dev_err(&spi->dev, "RX underrun\n"); - if (val & S3C64XX_SPI_PND_TX_OVERRUN_CLR) + } + if (val & S3C64XX_SPI_ST_TX_OVERRUN_ERR) { + clr |= S3C64XX_SPI_PND_TX_OVERRUN_CLR; dev_err(&spi->dev, "TX overrun\n"); - if (val & S3C64XX_SPI_PND_TX_UNDERRUN_CLR) + } + if (val & S3C64XX_SPI_ST_TX_UNDERRUN_ERR) { + clr |= S3C64XX_SPI_PND_TX_UNDERRUN_CLR; dev_err(&spi->dev, "TX underrun\n"); + } + + /* Clear the pending irq by setting and then clearing it */ + writel(clr, sdd->regs + S3C64XX_SPI_PENDING_CLR); + writel(0, sdd->regs + S3C64XX_SPI_PENDING_CLR); return IRQ_HANDLED; } @@ -1036,9 +1041,13 @@ static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd, int channel) writel(0, regs + S3C64XX_SPI_MODE_CFG); writel(0, regs + S3C64XX_SPI_PACKET_CNT); - /* Clear any irq pending bits */ - writel(readl(regs + S3C64XX_SPI_PENDING_CLR), - regs + S3C64XX_SPI_PENDING_CLR); + /* Clear any irq pending bits, should set and clear the bits */ + val = S3C64XX_SPI_PND_RX_OVERRUN_CLR | + S3C64XX_SPI_PND_RX_UNDERRUN_CLR | + S3C64XX_SPI_PND_TX_OVERRUN_CLR | + S3C64XX_SPI_PND_TX_UNDERRUN_CLR; + writel(val, regs + S3C64XX_SPI_PENDING_CLR); + writel(0, regs + S3C64XX_SPI_PENDING_CLR); writel(0, regs + S3C64XX_SPI_SWAP_CFG); diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index b8698b389ef3..a829563f4713 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -858,21 +858,6 @@ static int tegra_slink_setup(struct spi_device *spi) return 0; } -static int tegra_slink_prepare_transfer(struct spi_master *master) -{ - struct tegra_slink_data *tspi = spi_master_get_devdata(master); - - return pm_runtime_get_sync(tspi->dev); -} - -static int tegra_slink_unprepare_transfer(struct spi_master *master) -{ - struct tegra_slink_data *tspi = spi_master_get_devdata(master); - - pm_runtime_put(tspi->dev); - return 0; -} - static int tegra_slink_transfer_one_message(struct spi_master *master, struct spi_message *msg) { @@ -885,6 +870,12 @@ static int tegra_slink_transfer_one_message(struct spi_master *master, msg->status = 0; msg->actual_length = 0; + ret = pm_runtime_get_sync(tspi->dev); + if (ret < 0) { + dev_err(tspi->dev, "runtime get failed: %d\n", ret); + goto done; + } + single_xfer = list_is_singular(&msg->transfers); list_for_each_entry(xfer, &msg->transfers, transfer_list) { INIT_COMPLETION(tspi->xfer_completion); @@ -921,6 +912,8 @@ static int tegra_slink_transfer_one_message(struct spi_master *master, exit: tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND); tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2); + pm_runtime_put(tspi->dev); +done: msg->status = ret; spi_finalize_current_message(master); return ret; @@ -1148,9 +1141,7 @@ static int tegra_slink_probe(struct platform_device *pdev) /* the spi->mode bits understood by this driver: */ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; master->setup = tegra_slink_setup; - master->prepare_transfer_hardware = tegra_slink_prepare_transfer; master->transfer_one_message = tegra_slink_transfer_one_message; - master->unprepare_transfer_hardware = tegra_slink_unprepare_transfer; master->num_chipselect = MAX_CHIP_SELECT; master->bus_num = -1; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f996c600eb8c..004b10f184d4 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -543,17 +543,16 @@ static void spi_pump_messages(struct kthread_work *work) /* Lock queue and check for queue work */ spin_lock_irqsave(&master->queue_lock, flags); if (list_empty(&master->queue) || !master->running) { - if (master->busy && master->unprepare_transfer_hardware) { - ret = master->unprepare_transfer_hardware(master); - if (ret) { - spin_unlock_irqrestore(&master->queue_lock, flags); - dev_err(&master->dev, - "failed to unprepare transfer hardware\n"); - return; - } + if (!master->busy) { + spin_unlock_irqrestore(&master->queue_lock, flags); + return; } master->busy = false; spin_unlock_irqrestore(&master->queue_lock, flags); + if (master->unprepare_transfer_hardware && + master->unprepare_transfer_hardware(master)) + dev_err(&master->dev, + "failed to unprepare transfer hardware\n"); return; } @@ -984,7 +983,7 @@ static void acpi_register_spi_devices(struct spi_master *master) acpi_status status; acpi_handle handle; - handle = ACPI_HANDLE(&master->dev); + handle = ACPI_HANDLE(master->dev.parent); if (!handle) return; diff --git a/drivers/staging/ccg/f_fs.c b/drivers/staging/ccg/f_fs.c index 8adc79d1b402..f6373dade7fb 100644 --- a/drivers/staging/ccg/f_fs.c +++ b/drivers/staging/ccg/f_fs.c @@ -1223,6 +1223,7 @@ static struct file_system_type ffs_fs_type = { .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; +MODULE_ALIAS_FS("functionfs"); /* Driver's main init/cleanup functions *************************************/ diff --git a/drivers/staging/comedi/drivers/dt9812.c b/drivers/staging/comedi/drivers/dt9812.c index 192cf088f834..57b451904791 100644 --- a/drivers/staging/comedi/drivers/dt9812.c +++ b/drivers/staging/comedi/drivers/dt9812.c @@ -947,12 +947,13 @@ static int dt9812_di_rinsn(struct comedi_device *dev, unsigned int *data) { struct comedi_dt9812 *devpriv = dev->private; + unsigned int channel = CR_CHAN(insn->chanspec); int n; u8 bits = 0; dt9812_digital_in(devpriv->slot, &bits); for (n = 0; n < insn->n; n++) - data[n] = ((1 << insn->chanspec) & bits) != 0; + data[n] = ((1 << channel) & bits) != 0; return n; } @@ -961,12 +962,13 @@ static int dt9812_do_winsn(struct comedi_device *dev, unsigned int *data) { struct comedi_dt9812 *devpriv = dev->private; + unsigned int channel = CR_CHAN(insn->chanspec); int n; u8 bits = 0; dt9812_digital_out_shadow(devpriv->slot, &bits); for (n = 0; n < insn->n; n++) { - u8 mask = 1 << insn->chanspec; + u8 mask = 1 << channel; bits &= ~mask; if (data[n]) @@ -981,13 +983,13 @@ static int dt9812_ai_rinsn(struct comedi_device *dev, unsigned int *data) { struct comedi_dt9812 *devpriv = dev->private; + unsigned int channel = CR_CHAN(insn->chanspec); int n; for (n = 0; n < insn->n; n++) { u16 value = 0; - dt9812_analog_in(devpriv->slot, insn->chanspec, &value, - DT9812_GAIN_1); + dt9812_analog_in(devpriv->slot, channel, &value, DT9812_GAIN_1); data[n] = value; } return n; @@ -998,12 +1000,13 @@ static int dt9812_ao_rinsn(struct comedi_device *dev, unsigned int *data) { struct comedi_dt9812 *devpriv = dev->private; + unsigned int channel = CR_CHAN(insn->chanspec); int n; u16 value; for (n = 0; n < insn->n; n++) { value = 0; - dt9812_analog_out_shadow(devpriv->slot, insn->chanspec, &value); + dt9812_analog_out_shadow(devpriv->slot, channel, &value); data[n] = value; } return n; @@ -1014,10 +1017,11 @@ static int dt9812_ao_winsn(struct comedi_device *dev, unsigned int *data) { struct comedi_dt9812 *devpriv = dev->private; + unsigned int channel = CR_CHAN(insn->chanspec); int n; for (n = 0; n < insn->n; n++) - dt9812_analog_out(devpriv->slot, insn->chanspec, data[n]); + dt9812_analog_out(devpriv->slot, channel, data[n]); return n; } diff --git a/drivers/staging/comedi/drivers/s626.c b/drivers/staging/comedi/drivers/s626.c index 81a1fe661579..71a73ec5af8d 100644 --- a/drivers/staging/comedi/drivers/s626.c +++ b/drivers/staging/comedi/drivers/s626.c @@ -1483,7 +1483,7 @@ static int s626_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s) case TRIG_NONE: /* continous acquisition */ devpriv->ai_continous = 1; - devpriv->ai_sample_count = 0; + devpriv->ai_sample_count = 1; break; } diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c index 1a0062a04456..6aac1f60bc42 100644 --- a/drivers/staging/comedi/drivers/usbdux.c +++ b/drivers/staging/comedi/drivers/usbdux.c @@ -730,10 +730,14 @@ static void usbduxsub_ao_IsocIrq(struct urb *urb) static int usbduxsub_start(struct usbduxsub *usbduxsub) { int errcode = 0; - uint8_t local_transfer_buffer[16]; + uint8_t *local_transfer_buffer; + + local_transfer_buffer = kmalloc(1, GFP_KERNEL); + if (!local_transfer_buffer) + return -ENOMEM; /* 7f92 to zero */ - local_transfer_buffer[0] = 0; + *local_transfer_buffer = 0; errcode = usb_control_msg(usbduxsub->usbdev, /* create a pipe for a control transfer */ usb_sndctrlpipe(usbduxsub->usbdev, 0), @@ -751,22 +755,25 @@ static int usbduxsub_start(struct usbduxsub *usbduxsub) 1, /* Timeout */ BULK_TIMEOUT); - if (errcode < 0) { + if (errcode < 0) dev_err(&usbduxsub->interface->dev, "comedi_: control msg failed (start)\n"); - return errcode; - } - return 0; + + kfree(local_transfer_buffer); + return errcode; } static int usbduxsub_stop(struct usbduxsub *usbduxsub) { int errcode = 0; + uint8_t *local_transfer_buffer; - uint8_t local_transfer_buffer[16]; + local_transfer_buffer = kmalloc(1, GFP_KERNEL); + if (!local_transfer_buffer) + return -ENOMEM; /* 7f92 to one */ - local_transfer_buffer[0] = 1; + *local_transfer_buffer = 1; errcode = usb_control_msg(usbduxsub->usbdev, usb_sndctrlpipe(usbduxsub->usbdev, 0), /* bRequest, "Firmware" */ @@ -781,12 +788,12 @@ static int usbduxsub_stop(struct usbduxsub *usbduxsub) 1, /* Timeout */ BULK_TIMEOUT); - if (errcode < 0) { + if (errcode < 0) dev_err(&usbduxsub->interface->dev, "comedi_: control msg failed (stop)\n"); - return errcode; - } - return 0; + + kfree(local_transfer_buffer); + return errcode; } static int usbduxsub_upload(struct usbduxsub *usbduxsub, diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c index 4bf5dd094dc9..1ba0e3df492d 100644 --- a/drivers/staging/comedi/drivers/usbduxfast.c +++ b/drivers/staging/comedi/drivers/usbduxfast.c @@ -436,10 +436,14 @@ static void usbduxfastsub_ai_Irq(struct urb *urb) static int usbduxfastsub_start(struct usbduxfastsub_s *udfs) { int ret; - unsigned char local_transfer_buffer[16]; + unsigned char *local_transfer_buffer; + + local_transfer_buffer = kmalloc(1, GFP_KERNEL); + if (!local_transfer_buffer) + return -ENOMEM; /* 7f92 to zero */ - local_transfer_buffer[0] = 0; + *local_transfer_buffer = 0; /* bRequest, "Firmware" */ ret = usb_control_msg(udfs->usbdev, usb_sndctrlpipe(udfs->usbdev, 0), USBDUXFASTSUB_FIRMWARE, @@ -450,22 +454,25 @@ static int usbduxfastsub_start(struct usbduxfastsub_s *udfs) local_transfer_buffer, 1, /* Length */ EZTIMEOUT); /* Timeout */ - if (ret < 0) { + if (ret < 0) dev_err(&udfs->interface->dev, "control msg failed (start)\n"); - return ret; - } - return 0; + kfree(local_transfer_buffer); + return ret; } static int usbduxfastsub_stop(struct usbduxfastsub_s *udfs) { int ret; - unsigned char local_transfer_buffer[16]; + unsigned char *local_transfer_buffer; + + local_transfer_buffer = kmalloc(1, GFP_KERNEL); + if (!local_transfer_buffer) + return -ENOMEM; /* 7f92 to one */ - local_transfer_buffer[0] = 1; + *local_transfer_buffer = 1; /* bRequest, "Firmware" */ ret = usb_control_msg(udfs->usbdev, usb_sndctrlpipe(udfs->usbdev, 0), USBDUXFASTSUB_FIRMWARE, @@ -474,13 +481,12 @@ static int usbduxfastsub_stop(struct usbduxfastsub_s *udfs) 0x0000, /* Index */ local_transfer_buffer, 1, /* Length */ EZTIMEOUT); /* Timeout */ - if (ret < 0) { + if (ret < 0) dev_err(&udfs->interface->dev, "control msg failed (stop)\n"); - return ret; - } - return 0; + kfree(local_transfer_buffer); + return ret; } static int usbduxfastsub_upload(struct usbduxfastsub_s *udfs, diff --git a/drivers/staging/comedi/drivers/usbduxsigma.c b/drivers/staging/comedi/drivers/usbduxsigma.c index d066351a71b2..a728c8fc32a2 100644 --- a/drivers/staging/comedi/drivers/usbduxsigma.c +++ b/drivers/staging/comedi/drivers/usbduxsigma.c @@ -681,7 +681,11 @@ static void usbduxsub_ao_IsocIrq(struct urb *urb) static int usbduxsub_start(struct usbduxsub *usbduxsub) { int errcode = 0; - uint8_t local_transfer_buffer[16]; + uint8_t *local_transfer_buffer; + + local_transfer_buffer = kmalloc(16, GFP_KERNEL); + if (!local_transfer_buffer) + return -ENOMEM; /* 7f92 to zero */ local_transfer_buffer[0] = 0; @@ -702,19 +706,22 @@ static int usbduxsub_start(struct usbduxsub *usbduxsub) 1, /* Timeout */ BULK_TIMEOUT); - if (errcode < 0) { + if (errcode < 0) dev_err(&usbduxsub->interface->dev, "comedi_: control msg failed (start)\n"); - return errcode; - } - return 0; + + kfree(local_transfer_buffer); + return errcode; } static int usbduxsub_stop(struct usbduxsub *usbduxsub) { int errcode = 0; + uint8_t *local_transfer_buffer; - uint8_t local_transfer_buffer[16]; + local_transfer_buffer = kmalloc(16, GFP_KERNEL); + if (!local_transfer_buffer) + return -ENOMEM; /* 7f92 to one */ local_transfer_buffer[0] = 1; @@ -732,12 +739,12 @@ static int usbduxsub_stop(struct usbduxsub *usbduxsub) 1, /* Timeout */ BULK_TIMEOUT); - if (errcode < 0) { + if (errcode < 0) dev_err(&usbduxsub->interface->dev, "comedi_: control msg failed (stop)\n"); - return errcode; - } - return 0; + + kfree(local_transfer_buffer); + return errcode; } static int usbduxsub_upload(struct usbduxsub *usbduxsub, diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c index 52c25ba5831d..c1239aaa6282 100644 --- a/drivers/staging/gdm72xx/netlink_k.c +++ b/drivers/staging/gdm72xx/netlink_k.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/etherdevice.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <asm/byteorder.h> #include <net/sock.h> @@ -25,12 +25,12 @@ #define ND_MAX_GROUP 30 #define ND_IFINDEX_LEN sizeof(int) -#define ND_NLMSG_SPACE(len) (NLMSG_SPACE(len) + ND_IFINDEX_LEN) +#define ND_NLMSG_SPACE(len) (nlmsg_total_size(len) + ND_IFINDEX_LEN) #define ND_NLMSG_DATA(nlh) \ - ((void *)((char *)NLMSG_DATA(nlh) + ND_IFINDEX_LEN)) + ((void *)((char *)nlmsg_data(nlh) + ND_IFINDEX_LEN)) #define ND_NLMSG_S_LEN(len) (len+ND_IFINDEX_LEN) #define ND_NLMSG_R_LEN(nlh) (nlh->nlmsg_len-ND_IFINDEX_LEN) -#define ND_NLMSG_IFIDX(nlh) NLMSG_DATA(nlh) +#define ND_NLMSG_IFIDX(nlh) nlmsg_data(nlh) #define ND_MAX_MSG_LEN 8096 #if defined(DEFINE_MUTEX) @@ -51,7 +51,7 @@ static void netlink_rcv_cb(struct sk_buff *skb) void *msg; int ifindex; - if (skb->len >= NLMSG_SPACE(0)) { + if (skb->len >= NLMSG_HDRLEN) { nlh = (struct nlmsghdr *)skb->data; if (skb->len < nlh->nlmsg_len || @@ -124,7 +124,7 @@ int netlink_send(struct sock *sock, int group, u16 type, void *msg, int len) return -EINVAL; } - skb = alloc_skb(NLMSG_SPACE(len), GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (!skb) { pr_err("netlink_broadcast ret=%d\n", ret); return -ENOMEM; diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c index 4b3a019409b5..b028b0d1317b 100644 --- a/drivers/staging/imx-drm/ipuv3-crtc.c +++ b/drivers/staging/imx-drm/ipuv3-crtc.c @@ -483,17 +483,6 @@ static int ipu_get_resources(struct ipu_crtc *ipu_crtc, goto err_out; } - ipu_crtc->irq = ipu_idmac_channel_irq(ipu, ipu_crtc->ipu_ch, - IPU_IRQ_EOF); - ret = devm_request_irq(ipu_crtc->dev, ipu_crtc->irq, ipu_irq_handler, 0, - "imx_drm", ipu_crtc); - if (ret < 0) { - dev_err(ipu_crtc->dev, "irq request failed with %d.\n", ret); - goto err_out; - } - - disable_irq(ipu_crtc->irq); - return 0; err_out: ipu_put_resources(ipu_crtc); @@ -504,6 +493,7 @@ err_out: static int ipu_crtc_init(struct ipu_crtc *ipu_crtc, struct ipu_client_platformdata *pdata) { + struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent); int ret; ret = ipu_get_resources(ipu_crtc, pdata); @@ -522,6 +512,17 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc, goto err_put_resources; } + ipu_crtc->irq = ipu_idmac_channel_irq(ipu, ipu_crtc->ipu_ch, + IPU_IRQ_EOF); + ret = devm_request_irq(ipu_crtc->dev, ipu_crtc->irq, ipu_irq_handler, 0, + "imx_drm", ipu_crtc); + if (ret < 0) { + dev_err(ipu_crtc->dev, "irq request failed with %d.\n", ret); + goto err_put_resources; + } + + disable_irq(ipu_crtc->irq); + return 0; err_put_resources: diff --git a/drivers/staging/tidspbridge/rmgr/drv.c b/drivers/staging/tidspbridge/rmgr/drv.c index db1da28cecba..be26917a6896 100644 --- a/drivers/staging/tidspbridge/rmgr/drv.c +++ b/drivers/staging/tidspbridge/rmgr/drv.c @@ -76,37 +76,28 @@ int drv_insert_node_res_element(void *hnode, void *node_resource, struct node_res_object **node_res_obj = (struct node_res_object **)node_resource; struct process_context *ctxt = (struct process_context *)process_ctxt; - int status = 0; int retval; *node_res_obj = kzalloc(sizeof(struct node_res_object), GFP_KERNEL); - if (!*node_res_obj) { - status = -ENOMEM; - goto func_end; - } + if (!*node_res_obj) + return -ENOMEM; (*node_res_obj)->node = hnode; - retval = idr_get_new(ctxt->node_id, *node_res_obj, - &(*node_res_obj)->id); - if (retval == -EAGAIN) { - if (!idr_pre_get(ctxt->node_id, GFP_KERNEL)) { - pr_err("%s: OUT OF MEMORY\n", __func__); - status = -ENOMEM; - goto func_end; - } - - retval = idr_get_new(ctxt->node_id, *node_res_obj, - &(*node_res_obj)->id); + retval = idr_alloc(ctxt->node_id, *node_res_obj, 0, 0, GFP_KERNEL); + if (retval >= 0) { + (*node_res_obj)->id = retval; + return 0; } - if (retval) { + + kfree(*node_res_obj); + + if (retval == -ENOSPC) { pr_err("%s: FAILED, IDR is FULL\n", __func__); - status = -EFAULT; + return -EFAULT; + } else { + pr_err("%s: OUT OF MEMORY\n", __func__); + return -ENOMEM; } -func_end: - if (status) - kfree(*node_res_obj); - - return status; } /* Release all Node resources and its context @@ -201,35 +192,26 @@ int drv_proc_insert_strm_res_element(void *stream_obj, struct strm_res_object **pstrm_res = (struct strm_res_object **)strm_res; struct process_context *ctxt = (struct process_context *)process_ctxt; - int status = 0; int retval; *pstrm_res = kzalloc(sizeof(struct strm_res_object), GFP_KERNEL); - if (*pstrm_res == NULL) { - status = -EFAULT; - goto func_end; - } + if (*pstrm_res == NULL) + return -EFAULT; (*pstrm_res)->stream = stream_obj; - retval = idr_get_new(ctxt->stream_id, *pstrm_res, - &(*pstrm_res)->id); - if (retval == -EAGAIN) { - if (!idr_pre_get(ctxt->stream_id, GFP_KERNEL)) { - pr_err("%s: OUT OF MEMORY\n", __func__); - status = -ENOMEM; - goto func_end; - } - - retval = idr_get_new(ctxt->stream_id, *pstrm_res, - &(*pstrm_res)->id); + retval = idr_alloc(ctxt->stream_id, *pstrm_res, 0, 0, GFP_KERNEL); + if (retval >= 0) { + (*pstrm_res)->id = retval; + return 0; } - if (retval) { + + if (retval == -ENOSPC) { pr_err("%s: FAILED, IDR is FULL\n", __func__); - status = -EPERM; + return -EPERM; + } else { + pr_err("%s: OUT OF MEMORY\n", __func__); + return -ENOMEM; } - -func_end: - return status; } static int drv_proc_free_strm_res(int id, void *p, void *process_ctxt) diff --git a/drivers/staging/vt6656/card.c b/drivers/staging/vt6656/card.c index 22918a106d73..d2479b766450 100644 --- a/drivers/staging/vt6656/card.c +++ b/drivers/staging/vt6656/card.c @@ -790,7 +790,7 @@ u64 CARDqGetNextTBTT(u64 qwTSF, WORD wBeaconInterval) if ((~uLowNextTBTT) < uLowRemain) qwTSF = ((qwTSF >> 32) + 1) << 32; - qwTSF = (qwTSF & 0xffffffff00000000UL) | + qwTSF = (qwTSF & 0xffffffff00000000ULL) | (u64)(uLowNextTBTT + uLowRemain); return (qwTSF); diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index d5f53e1a74a2..a5063a6f64d9 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -669,8 +669,6 @@ static int vt6656_suspend(struct usb_interface *intf, pm_message_t message) if (device->flags & DEVICE_FLAGS_OPENED) device_close(device->dev); - usb_put_dev(interface_to_usbdev(intf)); - return 0; } @@ -681,8 +679,6 @@ static int vt6656_resume(struct usb_interface *intf) if (!device || !device->dev) return -ENODEV; - usb_get_dev(interface_to_usbdev(intf)); - if (!(device->flags & DEVICE_FLAGS_OPENED)) device_open(device->dev); diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig index 73582705e8c5..5c3714530961 100644 --- a/drivers/staging/zcache/Kconfig +++ b/drivers/staging/zcache/Kconfig @@ -15,7 +15,7 @@ config RAMSTER depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE=y depends on NET # must ensure struct page is 8-byte aligned - select HAVE_ALIGNED_STRUCT_PAGE if !64_BIT + select HAVE_ALIGNED_STRUCT_PAGE if !64BIT default n help RAMster allows RAM on other machines in a cluster to be utilized diff --git a/drivers/staging/zcache/ramster/tcp.c b/drivers/staging/zcache/ramster/tcp.c index aa2a1a763aa4..f6e1e5209d88 100644 --- a/drivers/staging/zcache/ramster/tcp.c +++ b/drivers/staging/zcache/ramster/tcp.c @@ -300,27 +300,22 @@ static u8 r2net_num_from_nn(struct r2net_node *nn) static int r2net_prep_nsw(struct r2net_node *nn, struct r2net_status_wait *nsw) { - int ret = 0; + int ret; - do { - if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) { - ret = -EAGAIN; - break; - } - spin_lock(&nn->nn_lock); - ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id); - if (ret == 0) - list_add_tail(&nsw->ns_node_item, - &nn->nn_status_list); - spin_unlock(&nn->nn_lock); - } while (ret == -EAGAIN); + spin_lock(&nn->nn_lock); + ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + nsw->ns_id = ret; + list_add_tail(&nsw->ns_node_item, &nn->nn_status_list); + } + spin_unlock(&nn->nn_lock); - if (ret == 0) { + if (ret >= 0) { init_waitqueue_head(&nsw->ns_wq); nsw->ns_sys_status = R2NET_ERR_NONE; nsw->ns_status = 0; + return 0; } - return ret; } diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index db0cf7c8adde..a0fc7b9eea65 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -166,6 +166,7 @@ static int chap_server_compute_md5( { char *endptr; unsigned long id; + unsigned char id_as_uchar; unsigned char digest[MD5_SIGNATURE_SIZE]; unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2]; unsigned char identifier[10], *challenge = NULL; @@ -355,7 +356,9 @@ static int chap_server_compute_md5( goto out; } - sg_init_one(&sg, &id, 1); + /* To handle both endiannesses */ + id_as_uchar = id; + sg_init_one(&sg, &id_as_uchar, 1); ret = crypto_hash_update(&desc, &sg, 1); if (ret < 0) { pr_err("crypto_hash_update() failed for id\n"); diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index ff1c5ee352cb..cbe48ab41745 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -409,6 +409,7 @@ static inline int core_alua_state_standby( case REPORT_LUNS: case RECEIVE_DIAGNOSTIC: case SEND_DIAGNOSTIC: + return 0; case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: @@ -451,6 +452,7 @@ static inline int core_alua_state_unavailable( switch (cdb[0]) { case INQUIRY: case REPORT_LUNS: + return 0; case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: @@ -491,6 +493,7 @@ static inline int core_alua_state_transition( switch (cdb[0]) { case INQUIRY: case REPORT_LUNS: + return 0; case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index bc02b018ae46..37ffc5bd2399 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -7,7 +7,7 @@ #define FD_DEVICE_QUEUE_DEPTH 32 #define FD_MAX_DEVICE_QUEUE_DEPTH 128 #define FD_BLOCKSIZE 512 -#define FD_MAX_SECTORS 1024 +#define FD_MAX_SECTORS 2048 #define RRF_EMULATE_CDB 0x01 #define RRF_GOT_LBA 0x02 diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 82e78d72fdb6..e992b27aa090 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -883,7 +883,14 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, pr_debug("PSCSI: i: %d page: %p len: %d off: %d\n", i, page, len, off); - while (len > 0 && data_len > 0) { + /* + * We only have one page of data in each sg element, + * we can not cross a page boundary. + */ + if (off + len > PAGE_SIZE) + goto fail; + + if (len > 0 && data_len > 0) { bytes = min_t(unsigned int, len, PAGE_SIZE - off); bytes = min(bytes, data_len); @@ -940,9 +947,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, bio = NULL; } - len -= bytes; data_len -= bytes; - off = 0; } } diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 290230de2c53..60d4b5185f32 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -464,8 +464,11 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) break; case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: - if (!ops->execute_sync_cache) - return TCM_UNSUPPORTED_SCSI_OPCODE; + if (!ops->execute_sync_cache) { + size = 0; + cmd->execute_cmd = sbc_emulate_noop; + break; + } /* * Extract LBA and range to be flushed for emulated SYNCHRONIZE_CACHE diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 9169d6a5d7e4..aac9d2727e3c 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -711,7 +711,8 @@ int core_tpg_register( if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) { if (core_tpg_setup_virtual_lun0(se_tpg) < 0) { - kfree(se_tpg); + array_free(se_tpg->tpg_lun_list, + TRANSPORT_MAX_LUNS_PER_TPG); return -ENOMEM; } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2030b608136d..3243ea790eab 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1139,8 +1139,10 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) return ret; ret = target_check_reservation(cmd); - if (ret) + if (ret) { + cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT; return ret; + } ret = dev->transport->parse_cdb(cmd); if (ret) diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c index 7b0bfa0e7a9c..3078c403b42d 100644 --- a/drivers/thermal/dove_thermal.c +++ b/drivers/thermal/dove_thermal.c @@ -143,22 +143,18 @@ static int dove_thermal_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - priv->sensor = devm_request_and_ioremap(&pdev->dev, res); - if (!priv->sensor) { - dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); - return -EADDRNOTAVAIL; - } + priv->sensor = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->sensor)) + return PTR_ERR(priv->sensor); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!res) { dev_err(&pdev->dev, "Failed to get platform resource\n"); return -ENODEV; } - priv->control = devm_request_and_ioremap(&pdev->dev, res); - if (!priv->control) { - dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); - return -EADDRNOTAVAIL; - } + priv->control = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->control)) + return PTR_ERR(priv->control); ret = dove_init_sensor(priv); if (ret) { diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index e04ebd8671ac..46568c078dee 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -476,7 +476,7 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) if (IS_ERR(th_zone->therm_dev)) { pr_err("Failed to register thermal zone device\n"); - ret = -EINVAL; + ret = PTR_ERR(th_zone->therm_dev); goto err_unregister; } th_zone->mode = THERMAL_DEVICE_ENABLED; diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c index 65cb4f09e8f6..e5500edb5285 100644 --- a/drivers/thermal/kirkwood_thermal.c +++ b/drivers/thermal/kirkwood_thermal.c @@ -85,11 +85,9 @@ static int kirkwood_thermal_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - priv->sensor = devm_request_and_ioremap(&pdev->dev, res); - if (!priv->sensor) { - dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); - return -EADDRNOTAVAIL; - } + priv->sensor = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->sensor)) + return PTR_ERR(priv->sensor); thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0, priv, &ops, NULL, 0, 0); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 28f091994013..2cc5b6115e3e 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -145,6 +145,7 @@ static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) struct device *dev = rcar_priv_to_dev(priv); int i; int ctemp, old, new; + int ret = -EINVAL; mutex_lock(&priv->lock); @@ -174,7 +175,7 @@ static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) if (!ctemp) { dev_err(dev, "thermal sensor was broken\n"); - return -EINVAL; + goto err_out_unlock; } /* @@ -192,10 +193,10 @@ static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) dev_dbg(dev, "thermal%d %d -> %d\n", priv->id, priv->ctemp, ctemp); priv->ctemp = ctemp; - + ret = 0; +err_out_unlock: mutex_unlock(&priv->lock); - - return 0; + return ret; } static int rcar_thermal_get_temp(struct thermal_zone_device *zone, @@ -363,6 +364,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) struct resource *res, *irq; int mres = 0; int i; + int ret = -ENODEV; int idle = IDLE_INTERVAL; common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL); @@ -399,11 +401,9 @@ static int rcar_thermal_probe(struct platform_device *pdev) /* * rcar_has_irq_support() will be enabled */ - common->base = devm_request_and_ioremap(dev, res); - if (!common->base) { - dev_err(dev, "Unable to ioremap thermal register\n"); - return -ENOMEM; - } + common->base = devm_ioremap_resource(dev, res); + if (IS_ERR(common->base)) + return PTR_ERR(common->base); /* enable temperature comparation */ rcar_thermal_common_write(common, ENR, 0x00030303); @@ -422,11 +422,9 @@ static int rcar_thermal_probe(struct platform_device *pdev) return -ENOMEM; } - priv->base = devm_request_and_ioremap(dev, res); - if (!priv->base) { - dev_err(dev, "Unable to ioremap priv register\n"); - return -ENOMEM; - } + priv->base = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); priv->common = common; priv->id = i; @@ -441,6 +439,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) idle); if (IS_ERR(priv->zone)) { dev_err(dev, "can't register thermal zone\n"); + ret = PTR_ERR(priv->zone); goto error_unregister; } @@ -460,7 +459,7 @@ error_unregister: rcar_thermal_for_each_priv(priv, common) thermal_zone_device_unregister(priv->zone); - return -ENODEV; + return ret; } static int rcar_thermal_remove(struct platform_device *pdev) diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 484b6a3c9b03..302909ccf183 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -2643,9 +2643,9 @@ static int mxser_probe(struct pci_dev *pdev, mxvar_sdriver, brd->idx + i, &pdev->dev); if (IS_ERR(tty_dev)) { retval = PTR_ERR(tty_dev); - for (i--; i >= 0; i--) + for (; i > 0; i--) tty_unregister_device(mxvar_sdriver, - brd->idx + i); + brd->idx + i - 1); goto err_relbrd; } } @@ -2751,9 +2751,9 @@ static int __init mxser_module_init(void) tty_dev = tty_port_register_device(&brd->ports[i].port, mxvar_sdriver, brd->idx + i, NULL); if (IS_ERR(tty_dev)) { - for (i--; i >= 0; i--) + for (; i > 0; i--) tty_unregister_device(mxvar_sdriver, - brd->idx + i); + brd->idx + i - 1); for (i = 0; i < brd->info->nports; i++) tty_port_destroy(&brd->ports[i].port); free_irq(brd->irq, brd); diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250_core.c index 0efc815a4968..35f9c96aada9 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -301,7 +301,28 @@ static const struct serial8250_config uart_config[] = { }, [PORT_8250_CIR] = { .name = "CIR port" - } + }, + [PORT_ALTR_16550_F32] = { + .name = "Altera 16550 FIFO32", + .fifo_size = 32, + .tx_loadsz = 32, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_ALTR_16550_F64] = { + .name = "Altera 16550 FIFO64", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_ALTR_16550_F128] = { + .name = "Altera 16550 FIFO128", + .fifo_size = 128, + .tx_loadsz = 128, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, }; /* Uart divisor latch read */ @@ -3396,3 +3417,34 @@ module_param_array(probe_rsa, ulong, &probe_rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA"); #endif MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR); + +#ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS +#ifndef MODULE +/* This module was renamed to 8250_core in 3.7. Keep the old "8250" name + * working as well for the module options so we don't break people. We + * need to keep the names identical and the convenient macros will happily + * refuse to let us do that by failing the build with redefinition errors + * of global variables. So we stick them inside a dummy function to avoid + * those conflicts. The options still get parsed, and the redefined + * MODULE_PARAM_PREFIX lets us keep the "8250." syntax alive. + * + * This is hacky. I'm sorry. + */ +static void __used s8250_options(void) +{ +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "8250_core." + + module_param_cb(share_irqs, ¶m_ops_uint, &share_irqs, 0644); + module_param_cb(nr_uarts, ¶m_ops_uint, &nr_uarts, 0644); + module_param_cb(skip_txen_test, ¶m_ops_uint, &skip_txen_test, 0644); +#ifdef CONFIG_SERIAL_8250_RSA + __module_param_call(MODULE_PARAM_PREFIX, probe_rsa, + ¶m_array_ops, .arr = &__param_arr_probe_rsa, + 0444, -1); +#endif +} +#else +MODULE_ALIAS("8250_core"); +#endif +#endif diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 791c5a77ec61..26e3a97ab157 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1554,6 +1554,7 @@ pci_wch_ch353_setup(struct serial_private *priv, #define PCI_DEVICE_ID_PLX_CRONYX_OMEGA 0xc001 #define PCI_DEVICE_ID_INTEL_PATSBURG_KT 0x1d3d #define PCI_VENDOR_ID_WCH 0x4348 +#define PCI_DEVICE_ID_WCH_CH352_2S 0x3253 #define PCI_DEVICE_ID_WCH_CH353_4S 0x3453 #define PCI_DEVICE_ID_WCH_CH353_2S1PF 0x5046 #define PCI_DEVICE_ID_WCH_CH353_2S1P 0x7053 @@ -1571,6 +1572,7 @@ pci_wch_ch353_setup(struct serial_private *priv, /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 +#define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 /* * Master list of serial port init/setup/exit quirks. @@ -1852,15 +1854,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { }, { .vendor = PCI_VENDOR_ID_PLX, - .device = PCI_DEVICE_ID_PLX_9050, - .subvendor = PCI_VENDOR_ID_PLX, - .subdevice = PCI_SUBDEVICE_ID_UNKNOWN_0x1584, - .init = pci_plx9050_init, - .setup = pci_default_setup, - .exit = pci_plx9050_exit, - }, - { - .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_ROMULUS, .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_ROMULUS, @@ -2180,6 +2173,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_wch_ch353_setup, }, + /* WCH CH352 2S card (16550 clone) */ + { + .vendor = PCI_VENDOR_ID_WCH, + .device = PCI_DEVICE_ID_WCH_CH352_2S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_wch_ch353_setup, + }, /* * ASIX devices with FIFO bug */ @@ -3733,7 +3734,12 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_PLX, PCI_SUBDEVICE_ID_UNKNOWN_0x1584, 0, 0, - pbn_b0_4_115200 }, + pbn_b2_4_115200 }, + /* Unknown card - subdevice 0x1588 */ + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, + PCI_VENDOR_ID_PLX, + PCI_SUBDEVICE_ID_UNKNOWN_0x1588, 0, 0, + pbn_b2_8_115200 }, { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_SUBVENDOR_ID_KEYSPAN, PCI_SUBDEVICE_ID_KEYSPAN_SX2, 0, 0, @@ -4791,6 +4797,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_VENDOR_ID_IBM, 0x0299, 0, 0, pbn_b0_bt_2_115200 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9835, + 0x1000, 0x0012, + 0, 0, pbn_b0_bt_2_115200 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901, 0xA000, 0x1000, 0, 0, pbn_b0_1_115200 }, @@ -4869,6 +4879,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_2_115200 }, + { PCI_VENDOR_ID_WCH, PCI_DEVICE_ID_WCH_CH352_2S, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b0_bt_2_115200 }, + /* * Commtech, Inc. Fastcom adapters */ diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 2ef9537bcb2c..80fe91e64a52 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -33,6 +33,23 @@ config SERIAL_8250 Most people will say Y or M here, so that they can use serial mice, modems and similar devices connecting to the standard serial ports. +config SERIAL_8250_DEPRECATED_OPTIONS + bool "Support 8250_core.* kernel options (DEPRECATED)" + depends on SERIAL_8250 + default y + ---help--- + In 3.7 we renamed 8250 to 8250_core by mistake, so now we have to + accept kernel parameters in both forms like 8250_core.nr_uarts=4 and + 8250.nr_uarts=4. We now renamed the module back to 8250, but if + anybody noticed in 3.7 and changed their userspace we still have to + keep the 8350_core.* options around until they revert the changes + they already did. + + If 8250 is built as a module, this adds 8250_core alias instead. + + If you did not notice yet and/or you have userspace from pre-3.7, it + is safe (and recommended) to say N here. + config SERIAL_8250_PNP bool "8250/16550 PNP device support" if EXPERT depends on SERIAL_8250 && PNP diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile index a23838a4d535..36d68d054307 100644 --- a/drivers/tty/serial/8250/Makefile +++ b/drivers/tty/serial/8250/Makefile @@ -2,10 +2,10 @@ # Makefile for the 8250 serial device drivers. # -obj-$(CONFIG_SERIAL_8250) += 8250_core.o -8250_core-y := 8250.o -8250_core-$(CONFIG_SERIAL_8250_PNP) += 8250_pnp.o -8250_core-$(CONFIG_SERIAL_8250_DMA) += 8250_dma.o +obj-$(CONFIG_SERIAL_8250) += 8250.o +8250-y := 8250_core.o +8250-$(CONFIG_SERIAL_8250_PNP) += 8250_pnp.o +8250-$(CONFIG_SERIAL_8250_DMA) += 8250_dma.o obj-$(CONFIG_SERIAL_8250_GSC) += 8250_gsc.o obj-$(CONFIG_SERIAL_8250_PCI) += 8250_pci.o obj-$(CONFIG_SERIAL_8250_HP300) += 8250_hp300.o diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index cf9210db9fa9..7e7006fd404e 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -211,14 +211,14 @@ config SERIAL_SAMSUNG config SERIAL_SAMSUNG_UARTS_4 bool depends on PLAT_SAMSUNG - default y if !(CPU_S3C2410 || SERIAL_S3C2412 || CPU_S3C2440 || CPU_S3C2442) + default y if !(CPU_S3C2410 || CPU_S3C2412 || CPU_S3C2440 || CPU_S3C2442) help Internal node for the common case of 4 Samsung compatible UARTs config SERIAL_SAMSUNG_UARTS int depends on PLAT_SAMSUNG - default 6 if ARCH_S5P6450 + default 6 if CPU_S5P6450 default 4 if SERIAL_SAMSUNG_UARTS_4 || CPU_S3C2416 default 3 help diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index d4a7c241b751..3467462869ce 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -158,7 +158,7 @@ struct atmel_uart_port { }; static struct atmel_uart_port atmel_ports[ATMEL_MAX_UART]; -static unsigned long atmel_ports_in_use; +static DECLARE_BITMAP(atmel_ports_in_use, ATMEL_MAX_UART); #ifdef SUPPORT_SYSRQ static struct console atmel_console; @@ -1769,15 +1769,14 @@ static int atmel_serial_probe(struct platform_device *pdev) if (ret < 0) /* port id not found in platform data nor device-tree aliases: * auto-enumerate it */ - ret = find_first_zero_bit(&atmel_ports_in_use, - sizeof(atmel_ports_in_use)); + ret = find_first_zero_bit(atmel_ports_in_use, ATMEL_MAX_UART); - if (ret > ATMEL_MAX_UART) { + if (ret >= ATMEL_MAX_UART) { ret = -ENODEV; goto err; } - if (test_and_set_bit(ret, &atmel_ports_in_use)) { + if (test_and_set_bit(ret, atmel_ports_in_use)) { /* port already in use */ ret = -EBUSY; goto err; @@ -1857,7 +1856,7 @@ static int atmel_serial_remove(struct platform_device *pdev) /* "port" is allocated statically, so we shouldn't free it */ - clear_bit(port->line, &atmel_ports_in_use); + clear_bit(port->line, atmel_ports_in_use); clk_put(atmel_port->clk); diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 719594e5fc21..52a3ecd40421 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -235,7 +235,7 @@ static const char *bcm_uart_type(struct uart_port *port) */ static void bcm_uart_do_rx(struct uart_port *port) { - struct tty_port *port = &port->state->port; + struct tty_port *tty_port = &port->state->port; unsigned int max_count; /* limit number of char read in interrupt, should not be @@ -260,7 +260,7 @@ static void bcm_uart_do_rx(struct uart_port *port) bcm_uart_writel(port, val, UART_CTL_REG); port->icount.overrun++; - tty_insert_flip_char(port, 0, TTY_OVERRUN); + tty_insert_flip_char(tty_port, 0, TTY_OVERRUN); } if (!(iestat & UART_IR_STAT(UART_IR_RXNOTEMPTY))) @@ -299,11 +299,11 @@ static void bcm_uart_do_rx(struct uart_port *port) if ((cstat & port->ignore_status_mask) == 0) - tty_insert_flip_char(port, c, flag); + tty_insert_flip_char(tty_port, c, flag); } while (--max_count); - tty_flip_buffer_push(port); + tty_flip_buffer_push(tty_port); } /* diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c index c0e1fad51be7..018bad922554 100644 --- a/drivers/tty/serial/mpc52xx_uart.c +++ b/drivers/tty/serial/mpc52xx_uart.c @@ -550,7 +550,7 @@ static int mpc512x_psc_clock(struct uart_port *port, int enable) return 0; psc_num = (port->mapbase & 0xf00) >> 8; - snprintf(clk_name, sizeof(clk_name), "psc%d_clk", psc_num); + snprintf(clk_name, sizeof(clk_name), "psc%d_mclk", psc_num); psc_clk = clk_get(port->dev, clk_name); if (IS_ERR(psc_clk)) { dev_err(port->dev, "Failed to get PSC clock entry!\n"); diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index d5874605682b..b025d5438275 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -241,6 +241,12 @@ static struct of_device_id of_platform_serial_table[] = { { .compatible = "ns16850", .data = (void *)PORT_16850, }, { .compatible = "nvidia,tegra20-uart", .data = (void *)PORT_TEGRA, }, { .compatible = "nxp,lpc3220-uart", .data = (void *)PORT_LPC3220, }, + { .compatible = "altr,16550-FIFO32", + .data = (void *)PORT_ALTR_16550_F32, }, + { .compatible = "altr,16550-FIFO64", + .data = (void *)PORT_ALTR_16550_F64, }, + { .compatible = "altr,16550-FIFO128", + .data = (void *)PORT_ALTR_16550_F128, }, #ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL { .compatible = "ibm,qpace-nwp-serial", .data = (void *)PORT_NWPSERIAL, }, diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 4dc41408ecb7..30d4f7a783cd 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -886,6 +886,17 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); /* FIFO ENABLE, DMA MODE */ + up->scr |= OMAP_UART_SCR_RX_TRIG_GRANU1_MASK; + /* + * NOTE: Setting OMAP_UART_SCR_RX_TRIG_GRANU1_MASK + * sets Enables the granularity of 1 for TRIGGER RX + * level. Along with setting RX FIFO trigger level + * to 1 (as noted below, 16 characters) and TLR[3:0] + * to zero this will result RX FIFO threshold level + * to 1 character, instead of 16 as noted in comment + * below. + */ + /* Set receive FIFO threshold to 16 characters and * transmit FIFO threshold to 16 spaces */ diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index e343d6670854..451687cb9685 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -968,6 +968,7 @@ static struct uart_ops sunsu_pops = { #define UART_NR 4 static struct uart_sunsu_port sunsu_ports[UART_NR]; +static int nr_inst; /* Number of already registered ports */ #ifdef CONFIG_SERIO @@ -1337,13 +1338,8 @@ static int __init sunsu_console_setup(struct console *co, char *options) printk("Console: ttyS%d (SU)\n", (sunsu_reg.minor - 64) + co->index); - /* - * Check whether an invalid uart number has been specified, and - * if so, search for the first available port that does have - * console support. - */ - if (co->index >= UART_NR) - co->index = 0; + if (co->index > nr_inst) + return -ENODEV; port = &sunsu_ports[co->index].port; /* @@ -1408,7 +1404,6 @@ static enum su_type su_get_type(struct device_node *dp) static int su_probe(struct platform_device *op) { - static int inst; struct device_node *dp = op->dev.of_node; struct uart_sunsu_port *up; struct resource *rp; @@ -1418,16 +1413,16 @@ static int su_probe(struct platform_device *op) type = su_get_type(dp); if (type == SU_PORT_PORT) { - if (inst >= UART_NR) + if (nr_inst >= UART_NR) return -EINVAL; - up = &sunsu_ports[inst]; + up = &sunsu_ports[nr_inst]; } else { up = kzalloc(sizeof(*up), GFP_KERNEL); if (!up) return -ENOMEM; } - up->port.line = inst; + up->port.line = nr_inst; spin_lock_init(&up->port.lock); @@ -1461,6 +1456,8 @@ static int su_probe(struct platform_device *op) } dev_set_drvdata(&op->dev, up); + nr_inst++; + return 0; } @@ -1488,7 +1485,7 @@ static int su_probe(struct platform_device *op) dev_set_drvdata(&op->dev, up); - inst++; + nr_inst++; return 0; diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c index a3f9dd5c9dff..705240e6c4ec 100644 --- a/drivers/tty/serial/vt8500_serial.c +++ b/drivers/tty/serial/vt8500_serial.c @@ -611,14 +611,7 @@ static int vt8500_serial_probe(struct platform_device *pdev) vt8500_port->uart.dev = &pdev->dev; vt8500_port->uart.flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF; - vt8500_port->clk = of_clk_get(pdev->dev.of_node, 0); - if (!IS_ERR(vt8500_port->clk)) { - vt8500_port->uart.uartclk = clk_get_rate(vt8500_port->clk); - } else { - /* use the default of 24Mhz if not specified and warn */ - pr_warn("%s: serial clock source not specified\n", __func__); - vt8500_port->uart.uartclk = 24000000; - } + vt8500_port->uart.uartclk = clk_get_rate(vt8500_port->clk); snprintf(vt8500_port->name, sizeof(vt8500_port->name), "VT8500 UART%d", pdev->id); diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index ba451c7209fc..f36bbba1ac8b 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -578,6 +578,8 @@ static int xuartps_startup(struct uart_port *port) /* Receive Timeout register is enabled with value of 10 */ xuartps_writel(10, XUARTPS_RXTOUT_OFFSET); + /* Clear out any pending interrupts before enabling them */ + xuartps_writel(xuartps_readl(XUARTPS_ISR_OFFSET), XUARTPS_ISR_OFFSET); /* Set the Interrupt Registers with desired interrupts */ xuartps_writel(XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_PARITY | diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index bb119934e76c..578aa7594b11 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -425,7 +425,7 @@ static void flush_to_ldisc(struct work_struct *work) struct tty_ldisc *disc; tty = port->itty; - if (WARN_RATELIMIT(tty == NULL, "tty is NULL\n")) + if (tty == NULL) return; disc = tty_ldisc_ref(tty); diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index e4ca345873c3..d7799deacb21 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -93,7 +93,7 @@ vcs_poll_data_free(struct vcs_poll_data *poll) static struct vcs_poll_data * vcs_poll_data_get(struct file *file) { - struct vcs_poll_data *poll = file->private_data; + struct vcs_poll_data *poll = file->private_data, *kill = NULL; if (poll) return poll; @@ -122,10 +122,12 @@ vcs_poll_data_get(struct file *file) file->private_data = poll; } else { /* someone else raced ahead of us */ - vcs_poll_data_free(poll); + kill = poll; poll = file->private_data; } spin_unlock(&file->f_lock); + if (kill) + vcs_poll_data_free(kill); return poll; } diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index f5ed3d75fa5a..8f5ebced5df0 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -46,7 +46,7 @@ obj-$(CONFIG_USB_MICROTEK) += image/ obj-$(CONFIG_USB_SERIAL) += serial/ obj-$(CONFIG_USB) += misc/ -obj-$(CONFIG_USB_COMMON) += phy/ +obj-$(CONFIG_USB_OTG_UTILS) += phy/ obj-$(CONFIG_EARLY_PRINTK_DBGP) += early/ obj-$(CONFIG_USB_ATM) += atm/ diff --git a/drivers/usb/c67x00/c67x00-sched.c b/drivers/usb/c67x00/c67x00-sched.c index a03fbc15fa9c..aa491627a45b 100644 --- a/drivers/usb/c67x00/c67x00-sched.c +++ b/drivers/usb/c67x00/c67x00-sched.c @@ -100,7 +100,7 @@ struct c67x00_urb_priv { #define TD_PIDEP_OFFSET 0x04 #define TD_PIDEPMASK_PID 0xF0 #define TD_PIDEPMASK_EP 0x0F -#define TD_PORTLENMASK_DL 0x02FF +#define TD_PORTLENMASK_DL 0x03FF #define TD_PORTLENMASK_PN 0xC000 #define TD_STATUS_OFFSET 0x07 @@ -590,7 +590,7 @@ static int c67x00_create_td(struct c67x00_hcd *c67x00, struct urb *urb, { struct c67x00_td *td; struct c67x00_urb_priv *urbp = urb->hcpriv; - const __u8 active_flag = 1, retry_cnt = 1; + const __u8 active_flag = 1, retry_cnt = 3; __u8 cmd = 0; int tt = 0; diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 2f45bba8561d..f64fbea1cf20 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1767,7 +1767,7 @@ static int udc_start(struct ci13xxx *ci) goto put_transceiver; } - retval = dbg_create_files(&ci->gadget.dev); + retval = dbg_create_files(ci->dev); if (retval) goto unreg_device; @@ -1796,7 +1796,7 @@ remove_trans: dev_err(dev, "error = %i\n", retval); remove_dbg: - dbg_remove_files(&ci->gadget.dev); + dbg_remove_files(ci->dev); unreg_device: device_unregister(&ci->gadget.dev); put_transceiver: @@ -1836,7 +1836,7 @@ static void udc_stop(struct ci13xxx *ci) if (ci->global_phy) usb_put_phy(ci->transceiver); } - dbg_remove_files(&ci->gadget.dev); + dbg_remove_files(ci->dev); device_unregister(&ci->gadget.dev); /* my kobject is dynamic, I swear! */ memset(&ci->gadget, 0, sizeof(ci->gadget)); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 8ac25adf31b4..387dc6c8ad25 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -593,7 +593,6 @@ static void acm_port_destruct(struct tty_port *port) dev_dbg(&acm->control->dev, "%s\n", __func__); - tty_unregister_device(acm_tty_driver, acm->minor); acm_release_minor(acm); usb_put_intf(acm->control); kfree(acm->country_codes); @@ -977,6 +976,8 @@ static int acm_probe(struct usb_interface *intf, int num_rx_buf; int i; int combined_interfaces = 0; + struct device *tty_dev; + int rv = -ENOMEM; /* normal quirks */ quirks = (unsigned long)id->driver_info; @@ -1339,11 +1340,24 @@ skip_countries: usb_set_intfdata(data_interface, acm); usb_get_intf(control_interface); - tty_port_register_device(&acm->port, acm_tty_driver, minor, + tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor, &control_interface->dev); + if (IS_ERR(tty_dev)) { + rv = PTR_ERR(tty_dev); + goto alloc_fail8; + } return 0; +alloc_fail8: + if (acm->country_codes) { + device_remove_file(&acm->control->dev, + &dev_attr_wCountryCodes); + device_remove_file(&acm->control->dev, + &dev_attr_iCountryCodeRelDate); + } + device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities); alloc_fail7: + usb_set_intfdata(intf, NULL); for (i = 0; i < ACM_NW; i++) usb_free_urb(acm->wb[i].urb); alloc_fail6: @@ -1359,7 +1373,7 @@ alloc_fail2: acm_release_minor(acm); kfree(acm); alloc_fail: - return -ENOMEM; + return rv; } static void stop_data_traffic(struct acm *acm) @@ -1411,6 +1425,8 @@ static void acm_disconnect(struct usb_interface *intf) stop_data_traffic(acm); + tty_unregister_device(acm_tty_driver, acm->minor); + usb_free_urb(acm->ctrlurb); for (i = 0; i < ACM_NW; i++) usb_free_urb(acm->wb[i].urb); diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 5f0cb417b736..122d056d96d5 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -56,6 +56,7 @@ MODULE_DEVICE_TABLE (usb, wdm_ids); #define WDM_RESPONDING 7 #define WDM_SUSPENDING 8 #define WDM_RESETTING 9 +#define WDM_OVERFLOW 10 #define WDM_MAX 16 @@ -155,6 +156,7 @@ static void wdm_in_callback(struct urb *urb) { struct wdm_device *desc = urb->context; int status = urb->status; + int length = urb->actual_length; spin_lock(&desc->iuspin); clear_bit(WDM_RESPONDING, &desc->flags); @@ -185,9 +187,17 @@ static void wdm_in_callback(struct urb *urb) } desc->rerr = status; - desc->reslength = urb->actual_length; - memmove(desc->ubuf + desc->length, desc->inbuf, desc->reslength); - desc->length += desc->reslength; + if (length + desc->length > desc->wMaxCommand) { + /* The buffer would overflow */ + set_bit(WDM_OVERFLOW, &desc->flags); + } else { + /* we may already be in overflow */ + if (!test_bit(WDM_OVERFLOW, &desc->flags)) { + memmove(desc->ubuf + desc->length, desc->inbuf, length); + desc->length += length; + desc->reslength = length; + } + } skip_error: wake_up(&desc->wait); @@ -435,6 +445,11 @@ retry: rv = -ENODEV; goto err; } + if (test_bit(WDM_OVERFLOW, &desc->flags)) { + clear_bit(WDM_OVERFLOW, &desc->flags); + rv = -ENOBUFS; + goto err; + } i++; if (file->f_flags & O_NONBLOCK) { if (!test_bit(WDM_READ, &desc->flags)) { @@ -478,6 +493,7 @@ retry: spin_unlock_irq(&desc->iuspin); goto retry; } + if (!desc->reslength) { /* zero length read */ dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__); clear_bit(WDM_READ, &desc->flags); @@ -1004,6 +1020,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = wdm_find_device(intf); int rv; + clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc); mutex_unlock(&desc->wlock); diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 622b4a48e732..2b487d4797bd 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -173,6 +173,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) struct hc_driver *driver; struct usb_hcd *hcd; int retval; + int hcd_irq = 0; if (usb_disabled()) return -ENODEV; @@ -187,15 +188,19 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) return -ENODEV; dev->current_state = PCI_D0; - /* The xHCI driver supports MSI and MSI-X, - * so don't fail if the BIOS doesn't provide a legacy IRQ. + /* + * The xHCI driver has its own irq management + * make sure irq setup is not touched for xhci in generic hcd code */ - if (!dev->irq && (driver->flags & HCD_MASK) != HCD_USB3) { - dev_err(&dev->dev, - "Found HC with no IRQ. Check BIOS/PCI %s setup!\n", - pci_name(dev)); - retval = -ENODEV; - goto disable_pci; + if ((driver->flags & HCD_MASK) != HCD_USB3) { + if (!dev->irq) { + dev_err(&dev->dev, + "Found HC with no IRQ. Check BIOS/PCI %s setup!\n", + pci_name(dev)); + retval = -ENODEV; + goto disable_pci; + } + hcd_irq = dev->irq; } hcd = usb_create_hcd(driver, &dev->dev, pci_name(dev)); @@ -245,7 +250,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) pci_set_master(dev); - retval = usb_add_hcd(hcd, dev->irq, IRQF_SHARED); + retval = usb_add_hcd(hcd, hcd_irq, IRQF_SHARED); if (retval != 0) goto unmap_registers; set_hs_companion(dev, hcd); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 99b34a30354f..f9ec44cbb82f 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2412,6 +2412,14 @@ int usb_hcd_is_primary_hcd(struct usb_hcd *hcd) } EXPORT_SYMBOL_GPL(usb_hcd_is_primary_hcd); +int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1) +{ + if (!hcd->driver->find_raw_port_number) + return port1; + + return hcd->driver->find_raw_port_number(hcd, port1); +} + static int usb_hcd_request_irqs(struct usb_hcd *hcd, unsigned int irqnum, unsigned long irqflags) { diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 797f9d514732..65d4e55552c6 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -67,7 +67,6 @@ static void usb_port_device_release(struct device *dev) { struct usb_port *port_dev = to_usb_port(dev); - dev_pm_qos_hide_flags(dev); kfree(port_dev); } diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index cef4252bb31a..255c14464bf2 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/pci.h> +#include <linux/usb/hcd.h> #include <acpi/acpi_bus.h> #include "usb.h" @@ -188,8 +189,13 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle) * connected to. */ if (!udev->parent) { - *handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev), + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + int raw_port_num; + + raw_port_num = usb_hcd_find_raw_port_number(hcd, port_num); + *handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev), + raw_port_num); if (!*handle) return -ENODEV; } else { @@ -210,9 +216,14 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle) return 0; } +static bool usb_acpi_bus_match(struct device *dev) +{ + return is_usb_device(dev) || is_usb_port(dev); +} + static struct acpi_bus_type usb_acpi_bus = { - .bus = &usb_bus_type, - .find_bridge = usb_acpi_find_device, + .name = "USB", + .match = usb_acpi_bus_match, .find_device = usb_acpi_find_device, }; diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 999909451e37..ffa6b004a84b 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -583,6 +583,7 @@ static int dwc3_remove(struct platform_device *pdev) break; } + dwc3_free_event_buffers(dwc); dwc3_core_exit(dwc); return 0; diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index b50da53e9a52..b082bec7343e 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -23,8 +23,6 @@ #include <linux/usb/nop-usb-xceiv.h> #include <linux/of.h> -#include "core.h" - struct dwc3_exynos { struct platform_device *dwc3; struct platform_device *usb2_phy; diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 22f337f57219..afa05e3c9cf4 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -54,8 +54,6 @@ #include <linux/usb/otg.h> #include <linux/usb/nop-usb-xceiv.h> -#include "core.h" - /* * All these registers belong to OMAP's Wrapper around the * DesignWare USB3 Core. @@ -465,20 +463,20 @@ static int dwc3_omap_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id of_dwc3_matach[] = { +static const struct of_device_id of_dwc3_match[] = { { "ti,dwc3", }, { }, }; -MODULE_DEVICE_TABLE(of, of_dwc3_matach); +MODULE_DEVICE_TABLE(of, of_dwc3_match); static struct platform_driver dwc3_omap_driver = { .probe = dwc3_omap_probe, .remove = dwc3_omap_remove, .driver = { .name = "omap-dwc3", - .of_match_table = of_dwc3_matach, + .of_match_table = of_dwc3_match, }, }; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 7d70f44567d2..e8d77689a322 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -45,8 +45,6 @@ #include <linux/usb/otg.h> #include <linux/usb/nop-usb-xceiv.h> -#include "core.h" - /* FIXME define these in <linux/pci_ids.h> */ #define PCI_VENDOR_ID_SYNOPSYS 0x16c3 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index d7da073a23fe..1d139ca05ef1 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -891,7 +891,8 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, DWC3_TRBCTL_CONTROL_DATA); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) && (dep->number == 0)) { - u32 transfer_size; + u32 transfer_size; + u32 maxpacket; ret = usb_gadget_map_request(&dwc->gadget, &req->request, dep->number); @@ -902,8 +903,8 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, WARN_ON(req->request.length > DWC3_EP0_BOUNCE_SIZE); - transfer_size = roundup(req->request.length, - (u32) dep->endpoint.maxpacket); + maxpacket = dep->endpoint.maxpacket; + transfer_size = roundup(req->request.length, maxpacket); dwc->ep0_bounced = true; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index a04342f6cbfa..82e160e96fca 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2159,7 +2159,6 @@ static void dwc3_gadget_phy_suspend(struct dwc3 *dwc, u8 speed) static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) { - struct dwc3_gadget_ep_cmd_params params; struct dwc3_ep *dep; int ret; u32 reg; @@ -2167,8 +2166,6 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) dev_vdbg(dwc->dev, "%s\n", __func__); - memset(¶ms, 0x00, sizeof(params)); - reg = dwc3_readl(dwc->regs, DWC3_DSTS); speed = reg & DWC3_DSTS_CONNECTSPD; dwc->speed = speed; diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 5a0c541daf89..c7525b1cad74 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -145,6 +145,7 @@ config USB_LPC32XX tristate "LPC32XX USB Peripheral Controller" depends on ARCH_LPC32XX select USB_ISP1301 + select USB_OTG_UTILS help This option selects the USB device controller in the LPC32xx SoC. diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 97a13c349cc5..82fb22511356 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -35,6 +35,12 @@ mv_udc-y := mv_udc_core.o obj-$(CONFIG_USB_FUSB300) += fusb300_udc.o obj-$(CONFIG_USB_MV_U3D) += mv_u3d_core.o +# USB Functions +obj-$(CONFIG_USB_F_ACM) += f_acm.o +f_ss_lb-y := f_loopback.o f_sourcesink.o +obj-$(CONFIG_USB_F_SS_LB) += f_ss_lb.o +obj-$(CONFIG_USB_U_SERIAL) += u_serial.o + # # USB gadget drivers # @@ -74,9 +80,3 @@ obj-$(CONFIG_USB_G_WEBCAM) += g_webcam.o obj-$(CONFIG_USB_G_NCM) += g_ncm.o obj-$(CONFIG_USB_G_ACM_MS) += g_acm_ms.o obj-$(CONFIG_USB_GADGET_TARGET) += tcm_usb_gadget.o - -# USB Functions -obj-$(CONFIG_USB_F_ACM) += f_acm.o -f_ss_lb-y := f_loopback.o f_sourcesink.o -obj-$(CONFIG_USB_F_SS_LB) += f_ss_lb.o -obj-$(CONFIG_USB_U_SERIAL) += u_serial.o diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 7c821de8ce3d..c0d62b278610 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1757,10 +1757,7 @@ static const struct usb_gadget_driver composite_driver_template = { /** * usb_composite_probe() - register a composite driver * @driver: the driver to register - * @bind: the callback used to allocate resources that are shared across the - * whole device, such as string IDs, and add its configurations using - * @usb_add_config(). This may fail by returning a negative errno - * value; it should return zero on successful initialization. + * * Context: single threaded during gadget setup * * This function is used to register drivers using the composite driver diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 38388d7844fc..c377ff84bf2c 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1235,6 +1235,7 @@ static struct file_system_type ffs_fs_type = { .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; +MODULE_ALIAS_FS("functionfs"); /* Driver's main init/cleanup functions *************************************/ diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 71beeb833558..cc9c49c57c80 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -447,14 +447,13 @@ static void rndis_response_complete(struct usb_ep *ep, struct usb_request *req) static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req) { struct f_rndis *rndis = req->context; - struct usb_composite_dev *cdev = rndis->port.func.config->cdev; int status; /* received RNDIS command from USB_CDC_SEND_ENCAPSULATED_COMMAND */ // spin_lock(&dev->lock); status = rndis_msg_parser(rndis->config, (u8 *) req->buf); if (status < 0) - ERROR(cdev, "RNDIS command error %d, %d/%d\n", + pr_err("RNDIS command error %d, %d/%d\n", status, req->actual, req->length); // spin_unlock(&dev->lock); } diff --git a/drivers/usb/gadget/f_uac1.c b/drivers/usb/gadget/f_uac1.c index f570e667a640..fa8ea4ea00c1 100644 --- a/drivers/usb/gadget/f_uac1.c +++ b/drivers/usb/gadget/f_uac1.c @@ -418,6 +418,7 @@ static int audio_get_intf_req(struct usb_function *f, req->context = audio; req->complete = f_audio_complete; + len = min_t(size_t, sizeof(value), len); memcpy(req->buf, &value, len); return len; diff --git a/drivers/usb/gadget/g_ffs.c b/drivers/usb/gadget/g_ffs.c index 3953dd4d7186..3b343b23e4b0 100644 --- a/drivers/usb/gadget/g_ffs.c +++ b/drivers/usb/gadget/g_ffs.c @@ -357,7 +357,7 @@ static int gfs_bind(struct usb_composite_dev *cdev) goto error; gfs_dev_desc.iProduct = gfs_strings[USB_GADGET_PRODUCT_IDX].id; - for (i = func_num; --i; ) { + for (i = func_num; i--; ) { ret = functionfs_bind(ffs_tab[i].ffs_data, cdev); if (unlikely(ret < 0)) { while (++i < func_num) @@ -413,7 +413,7 @@ static int gfs_unbind(struct usb_composite_dev *cdev) gether_cleanup(); gfs_ether_setup = false; - for (i = func_num; --i; ) + for (i = func_num; i--; ) if (ffs_tab[i].ffs_data) functionfs_unbind(ffs_tab[i].ffs_data); diff --git a/drivers/usb/gadget/imx_udc.c b/drivers/usb/gadget/imx_udc.c index 8efd7555fa21..5bd930d779b9 100644 --- a/drivers/usb/gadget/imx_udc.c +++ b/drivers/usb/gadget/imx_udc.c @@ -1334,27 +1334,18 @@ static int imx_udc_start(struct usb_gadget *gadget, struct usb_gadget_driver *driver) { struct imx_udc_struct *imx_usb; - int retval; imx_usb = container_of(gadget, struct imx_udc_struct, gadget); /* first hook up the driver ... */ imx_usb->driver = driver; imx_usb->gadget.dev.driver = &driver->driver; - retval = device_add(&imx_usb->gadget.dev); - if (retval) - goto fail; - D_INI(imx_usb->dev, "<%s> registered gadget driver '%s'\n", __func__, driver->driver.name); imx_udc_enable(imx_usb); return 0; -fail: - imx_usb->driver = NULL; - imx_usb->gadget.dev.driver = NULL; - return retval; } static int imx_udc_stop(struct usb_gadget *gadget, @@ -1370,8 +1361,6 @@ static int imx_udc_stop(struct usb_gadget *gadget, imx_usb->gadget.dev.driver = NULL; imx_usb->driver = NULL; - device_del(&imx_usb->gadget.dev); - D_INI(imx_usb->dev, "<%s> unregistered gadget driver '%s'\n", __func__, driver->driver.name); @@ -1477,6 +1466,10 @@ static int __init imx_udc_probe(struct platform_device *pdev) imx_usb->gadget.dev.parent = &pdev->dev; imx_usb->gadget.dev.dma_mask = pdev->dev.dma_mask; + ret = device_add(&imx_usb->gadget.dev); + if (retval) + goto fail4; + platform_set_drvdata(pdev, imx_usb); usb_init_data(imx_usb); @@ -1488,9 +1481,11 @@ static int __init imx_udc_probe(struct platform_device *pdev) ret = usb_add_gadget_udc(&pdev->dev, &imx_usb->gadget); if (ret) - goto fail4; + goto fail5; return 0; +fail5: + device_unregister(&imx_usb->gadget.dev); fail4: for (i = 0; i < IMX_USB_NB_EP + 1; i++) free_irq(imx_usb->usbd_int[i], imx_usb); @@ -1514,6 +1509,7 @@ static int __exit imx_udc_remove(struct platform_device *pdev) int i; usb_del_gadget_udc(&imx_usb->gadget); + device_unregister(&imx_usb->gadget.dev); imx_udc_disable(imx_usb); del_timer(&imx_usb->timer); diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 8ac840f25ba9..e2b2e9cf254a 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -2105,6 +2105,7 @@ static struct file_system_type gadgetfs_type = { .mount = gadgetfs_mount, .kill_sb = gadgetfs_kill_sb, }; +MODULE_ALIAS_FS("gadgetfs"); /*----------------------------------------------------------------------*/ diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c index d226058e3b88..32524b631959 100644 --- a/drivers/usb/gadget/net2272.c +++ b/drivers/usb/gadget/net2272.c @@ -59,7 +59,7 @@ static const char * const ep_name[] = { }; #define DMA_ADDR_INVALID (~(dma_addr_t)0) -#ifdef CONFIG_USB_GADGET_NET2272_DMA +#ifdef CONFIG_USB_NET2272_DMA /* * use_dma: the NET2272 can use an external DMA controller. * Note that since there is no generic DMA api, some functions, @@ -1495,6 +1495,13 @@ stop_activity(struct net2272 *dev, struct usb_gadget_driver *driver) for (i = 0; i < 4; ++i) net2272_dequeue_all(&dev->ep[i]); + /* report disconnect; the driver is already quiesced */ + if (driver) { + spin_unlock(&dev->lock); + driver->disconnect(&dev->gadget); + spin_lock(&dev->lock); + } + net2272_usb_reinit(dev); } diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index a1b650e11339..3bd0f992fb49 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -1924,7 +1924,6 @@ static int net2280_start(struct usb_gadget *_gadget, err_func: device_remove_file (&dev->pdev->dev, &dev_attr_function); err_unbind: - driver->unbind (&dev->gadget); dev->gadget.dev.driver = NULL; dev->driver = NULL; return retval; @@ -1946,6 +1945,13 @@ stop_activity (struct net2280 *dev, struct usb_gadget_driver *driver) for (i = 0; i < 7; i++) nuke (&dev->ep [i]); + /* report disconnect; the driver is already quiesced */ + if (driver) { + spin_unlock(&dev->lock); + driver->disconnect(&dev->gadget); + spin_lock(&dev->lock); + } + usb_reinit (dev); } diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 06be85c2b233..f8445653577f 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -62,6 +62,7 @@ #define DRIVER_VERSION "4 October 2004" #define OMAP_DMA_USB_W2FC_TX0 29 +#define OMAP_DMA_USB_W2FC_RX0 26 /* * The OMAP UDC needs _very_ early endpoint setup: before enabling the @@ -1310,7 +1311,7 @@ static int omap_pullup(struct usb_gadget *gadget, int is_on) } static int omap_udc_start(struct usb_gadget *g, - struct usb_gadget_driver *driver) + struct usb_gadget_driver *driver); static int omap_udc_stop(struct usb_gadget *g, struct usb_gadget_driver *driver); diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c index 2bbcdce942dc..d0f37484b6b0 100644 --- a/drivers/usb/gadget/pxa25x_udc.c +++ b/drivers/usb/gadget/pxa25x_udc.c @@ -1266,13 +1266,6 @@ static int pxa25x_udc_start(struct usb_gadget *g, dev->gadget.dev.driver = &driver->driver; dev->pullup = 1; - retval = device_add (&dev->gadget.dev); - if (retval) { - dev->driver = NULL; - dev->gadget.dev.driver = NULL; - return retval; - } - /* ... then enable host detection and ep0; and we're ready * for set_configuration as well as eventual disconnect. */ @@ -1310,6 +1303,10 @@ stop_activity(struct pxa25x_udc *dev, struct usb_gadget_driver *driver) } del_timer_sync(&dev->timer); + /* report disconnect; the driver is already quiesced */ + if (driver) + driver->disconnect(&dev->gadget); + /* re-init driver-visible data structures */ udc_reinit(dev); } @@ -1331,7 +1328,6 @@ static int pxa25x_udc_stop(struct usb_gadget*g, dev->gadget.dev.driver = NULL; dev->driver = NULL; - device_del (&dev->gadget.dev); dump_state(dev); return 0; @@ -2146,6 +2142,13 @@ static int __init pxa25x_udc_probe(struct platform_device *pdev) dev->gadget.dev.parent = &pdev->dev; dev->gadget.dev.dma_mask = pdev->dev.dma_mask; + retval = device_add(&dev->gadget.dev); + if (retval) { + dev->driver = NULL; + dev->gadget.dev.driver = NULL; + goto err_device_add; + } + the_controller = dev; platform_set_drvdata(pdev, dev); @@ -2196,6 +2199,8 @@ lubbock_fail0: free_irq(irq, dev); #endif err_irq1: + device_unregister(&dev->gadget.dev); + err_device_add: if (gpio_is_valid(dev->mach->gpio_pullup)) gpio_free(dev->mach->gpio_pullup); err_gpio_pullup: @@ -2217,10 +2222,11 @@ static int __exit pxa25x_udc_remove(struct platform_device *pdev) { struct pxa25x_udc *dev = platform_get_drvdata(pdev); - usb_del_gadget_udc(&dev->gadget); if (dev->driver) return -EBUSY; + usb_del_gadget_udc(&dev->gadget); + device_unregister(&dev->gadget.dev); dev->pullup = 0; pullup(dev); diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c index f7d25795821a..2fc867652ef5 100644 --- a/drivers/usb/gadget/pxa27x_udc.c +++ b/drivers/usb/gadget/pxa27x_udc.c @@ -1814,11 +1814,6 @@ static int pxa27x_udc_start(struct usb_gadget *g, udc->gadget.dev.driver = &driver->driver; dplus_pullup(udc, 1); - retval = device_add(&udc->gadget.dev); - if (retval) { - dev_err(udc->dev, "device_add error %d\n", retval); - goto fail; - } if (!IS_ERR_OR_NULL(udc->transceiver)) { retval = otg_set_peripheral(udc->transceiver->otg, &udc->gadget); @@ -1876,7 +1871,6 @@ static int pxa27x_udc_stop(struct usb_gadget *g, udc->driver = NULL; - device_del(&udc->gadget.dev); if (!IS_ERR_OR_NULL(udc->transceiver)) return otg_set_peripheral(udc->transceiver->otg, NULL); @@ -2480,13 +2474,24 @@ static int __init pxa_udc_probe(struct platform_device *pdev) driver_name, udc->irq, retval); goto err_irq; } + + retval = device_add(&udc->gadget.dev); + if (retval) { + dev_err(udc->dev, "device_add error %d\n", retval); + goto err_dev_add; + } + retval = usb_add_gadget_udc(&pdev->dev, &udc->gadget); if (retval) goto err_add_udc; pxa_init_debugfs(udc); + return 0; + err_add_udc: + device_unregister(&udc->gadget.dev); +err_dev_add: free_irq(udc->irq, udc); err_irq: iounmap(udc->regs); @@ -2507,6 +2512,7 @@ static int __exit pxa_udc_remove(struct platform_device *_dev) int gpio = udc->mach->gpio_pullup; usb_del_gadget_udc(&udc->gadget); + device_del(&udc->gadget.dev); usb_gadget_unregister_driver(udc->driver); free_irq(udc->irq, udc); pxa_cleanup_debugfs(udc); diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index fc07b4381286..08f89652533b 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -1668,8 +1668,7 @@ static void s3c2410_udc_enable(struct s3c2410_udc *dev) static int s3c2410_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { - struct s3c2410_udc *udc = to_s3c2410(g) - int retval; + struct s3c2410_udc *udc = to_s3c2410(g); dprintk(DEBUG_NORMAL, "%s() '%s'\n", __func__, driver->driver.name); @@ -1677,22 +1676,10 @@ static int s3c2410_udc_start(struct usb_gadget *g, udc->driver = driver; udc->gadget.dev.driver = &driver->driver; - /* Bind the driver */ - retval = device_add(&udc->gadget.dev); - if (retval) { - dev_err(&udc->gadget.dev, "Error in device_add() : %d\n", retval); - goto register_error; - } - /* Enable udc */ s3c2410_udc_enable(udc); return 0; - -register_error: - udc->driver = NULL; - udc->gadget.dev.driver = NULL; - return retval; } static int s3c2410_udc_stop(struct usb_gadget *g, @@ -1700,7 +1687,6 @@ static int s3c2410_udc_stop(struct usb_gadget *g, { struct s3c2410_udc *udc = to_s3c2410(g); - device_del(&udc->gadget.dev); udc->driver = NULL; /* Disable udc */ @@ -1842,6 +1828,13 @@ static int s3c2410_udc_probe(struct platform_device *pdev) udc->gadget.dev.parent = &pdev->dev; udc->gadget.dev.dma_mask = pdev->dev.dma_mask; + /* Bind the driver */ + retval = device_add(&udc->gadget.dev); + if (retval) { + dev_err(&udc->gadget.dev, "Error in device_add() : %d\n", retval); + goto err_device_add; + } + the_controller = udc; platform_set_drvdata(pdev, udc); @@ -1930,6 +1923,8 @@ err_gpio_claim: err_int: free_irq(IRQ_USBD, udc); err_map: + device_unregister(&udc->gadget.dev); +err_device_add: iounmap(base_addr); err_mem: release_mem_region(rsrc_start, rsrc_len); @@ -1947,10 +1942,11 @@ static int s3c2410_udc_remove(struct platform_device *pdev) dev_dbg(&pdev->dev, "%s()\n", __func__); - usb_del_gadget_udc(&udc->gadget); if (udc->driver) return -EBUSY; + usb_del_gadget_udc(&udc->gadget); + device_unregister(&udc->gadget.dev); debugfs_remove(udc->regs_info); if (udc_info && !udc_info->udc_command && diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c index c5034d9c946b..b369292d4b90 100644 --- a/drivers/usb/gadget/u_serial.c +++ b/drivers/usb/gadget/u_serial.c @@ -136,7 +136,7 @@ static struct portmaster { pr_debug(fmt, ##arg) #endif /* pr_vdebug */ #else -#ifndef pr_vdebig +#ifndef pr_vdebug #define pr_vdebug(fmt, arg...) \ ({ if (0) pr_debug(fmt, ##arg); }) #endif /* pr_vdebug */ diff --git a/drivers/usb/gadget/u_uac1.c b/drivers/usb/gadget/u_uac1.c index e0c5e88e03ed..c7d460f43390 100644 --- a/drivers/usb/gadget/u_uac1.c +++ b/drivers/usb/gadget/u_uac1.c @@ -240,8 +240,11 @@ static int gaudio_open_snd_dev(struct gaudio *card) snd = &card->playback; snd->filp = filp_open(fn_play, O_WRONLY, 0); if (IS_ERR(snd->filp)) { + int ret = PTR_ERR(snd->filp); + ERROR(card, "No such PCM playback device: %s\n", fn_play); snd->filp = NULL; + return ret; } pcm_file = snd->filp->private_data; snd->substream = pcm_file->substream; diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index 2a9cd369f71c..f8f62c3ed65e 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -216,7 +216,7 @@ static void usb_gadget_remove_driver(struct usb_udc *udc) usb_gadget_disconnect(udc->gadget); udc->driver->disconnect(udc->gadget); udc->driver->unbind(udc->gadget); - usb_gadget_udc_stop(udc->gadget, udc->driver); + usb_gadget_udc_stop(udc->gadget, NULL); udc->driver = NULL; udc->dev.driver = NULL; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index b416a3fc9959..416a6dce5e11 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -302,6 +302,7 @@ static void ehci_quiesce (struct ehci_hcd *ehci) static void end_unlink_async(struct ehci_hcd *ehci); static void unlink_empty_async(struct ehci_hcd *ehci); +static void unlink_empty_async_suspended(struct ehci_hcd *ehci); static void ehci_work(struct ehci_hcd *ehci); static void start_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh); static void end_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh); @@ -748,11 +749,9 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd) /* guard against (alleged) silicon errata */ if (cmd & CMD_IAAD) ehci_dbg(ehci, "IAA with IAAD still set?\n"); - if (ehci->async_iaa) { + if (ehci->async_iaa) COUNT(ehci->stats.iaa); - end_unlink_async(ehci); - } else - ehci_dbg(ehci, "IAA with nothing unlinked?\n"); + end_unlink_async(ehci); } /* remote wakeup [4.3.1] */ diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 4d3b294f203e..7d06e77f6c4f 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -328,7 +328,7 @@ static int ehci_bus_suspend (struct usb_hcd *hcd) ehci->rh_state = EHCI_RH_SUSPENDED; end_unlink_async(ehci); - unlink_empty_async(ehci); + unlink_empty_async_suspended(ehci); ehci_handle_intr_unlinks(ehci); end_free_itds(ehci); diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index fd252f0cfb3a..23d136904285 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -135,7 +135,7 @@ qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh) * qtd is updated in qh_completions(). Update the QH * overlay here. */ - if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current) { + if (qh->hw->hw_token & ACTIVE_BIT(ehci)) { qh->hw->hw_qtd_next = qtd->hw_next; qtd = NULL; } @@ -449,11 +449,19 @@ qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh) else if (last_status == -EINPROGRESS && !urb->unlinked) continue; - /* qh unlinked; token in overlay may be most current */ - if (state == QH_STATE_IDLE - && cpu_to_hc32(ehci, qtd->qtd_dma) - == hw->hw_current) { + /* + * If this was the active qtd when the qh was unlinked + * and the overlay's token is active, then the overlay + * hasn't been written back to the qtd yet so use its + * token instead of the qtd's. After the qtd is + * processed and removed, the overlay won't be valid + * any more. + */ + if (state == QH_STATE_IDLE && + qh->qtd_list.next == &qtd->qtd_list && + (hw->hw_token & ACTIVE_BIT(ehci))) { token = hc32_to_cpu(ehci, hw->hw_token); + hw->hw_token &= ~ACTIVE_BIT(ehci); /* An unlink may leave an incomplete * async transaction in the TT buffer. @@ -1170,7 +1178,7 @@ static void single_unlink_async(struct ehci_hcd *ehci, struct ehci_qh *qh) struct ehci_qh *prev; /* Add to the end of the list of QHs waiting for the next IAAD */ - qh->qh_state = QH_STATE_UNLINK; + qh->qh_state = QH_STATE_UNLINK_WAIT; if (ehci->async_unlink) ehci->async_unlink_last->unlink_next = qh; else @@ -1213,9 +1221,19 @@ static void start_iaa_cycle(struct ehci_hcd *ehci, bool nested) /* Do only the first waiting QH (nVidia bug?) */ qh = ehci->async_unlink; - ehci->async_iaa = qh; - ehci->async_unlink = qh->unlink_next; - qh->unlink_next = NULL; + + /* + * Intel (?) bug: The HC can write back the overlay region + * even after the IAA interrupt occurs. In self-defense, + * always go through two IAA cycles for each QH. + */ + if (qh->qh_state == QH_STATE_UNLINK_WAIT) { + qh->qh_state = QH_STATE_UNLINK; + } else { + ehci->async_iaa = qh; + ehci->async_unlink = qh->unlink_next; + qh->unlink_next = NULL; + } /* Make sure the unlinks are all visible to the hardware */ wmb(); @@ -1298,6 +1316,19 @@ static void unlink_empty_async(struct ehci_hcd *ehci) } } +/* The root hub is suspended; unlink all the async QHs */ +static void unlink_empty_async_suspended(struct ehci_hcd *ehci) +{ + struct ehci_qh *qh; + + while (ehci->async->qh_next.qh) { + qh = ehci->async->qh_next.qh; + WARN_ON(!list_empty(&qh->qtd_list)); + single_unlink_async(ehci, qh); + } + start_iaa_cycle(ehci, false); +} + /* makes sure the async qh will become idle */ /* caller must own ehci->lock */ diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index b476daf49f6f..010f686d8881 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -1214,6 +1214,7 @@ itd_urb_transaction ( memset (itd, 0, sizeof *itd); itd->itd_dma = itd_dma; + itd->frame = 9999; /* an invalid value */ list_add (&itd->itd_list, &sched->td_list); } spin_unlock_irqrestore (&ehci->lock, flags); @@ -1915,6 +1916,7 @@ sitd_urb_transaction ( memset (sitd, 0, sizeof *sitd); sitd->sitd_dma = sitd_dma; + sitd->frame = 9999; /* an invalid value */ list_add (&sitd->sitd_list, &iso_sched->td_list); } diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c index 20dbdcbe9b0f..c3fa1305f830 100644 --- a/drivers/usb/host/ehci-timer.c +++ b/drivers/usb/host/ehci-timer.c @@ -304,7 +304,7 @@ static void ehci_iaa_watchdog(struct ehci_hcd *ehci) * (a) SMP races against real IAA firing and retriggering, and * (b) clean HC shutdown, when IAA watchdog was pending. */ - if (ehci->async_iaa) { + if (1) { u32 cmd, status; /* If we get here, IAA is *REALLY* late. It's barely diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 35616ffbe3ae..6dc238c592bc 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1022,44 +1022,24 @@ void xhci_copy_ep0_dequeue_into_input_ctx(struct xhci_hcd *xhci, * is attached to (or the roothub port its ancestor hub is attached to). All we * know is the index of that port under either the USB 2.0 or the USB 3.0 * roothub, but that doesn't give us the real index into the HW port status - * registers. Scan through the xHCI roothub port array, looking for the Nth - * entry of the correct port speed. Return the port number of that entry. + * registers. Call xhci_find_raw_port_number() to get real index. */ static u32 xhci_find_real_port_number(struct xhci_hcd *xhci, struct usb_device *udev) { struct usb_device *top_dev; - unsigned int num_similar_speed_ports; - unsigned int faked_port_num; - int i; + struct usb_hcd *hcd; + + if (udev->speed == USB_SPEED_SUPER) + hcd = xhci->shared_hcd; + else + hcd = xhci->main_hcd; for (top_dev = udev; top_dev->parent && top_dev->parent->parent; top_dev = top_dev->parent) /* Found device below root hub */; - faked_port_num = top_dev->portnum; - for (i = 0, num_similar_speed_ports = 0; - i < HCS_MAX_PORTS(xhci->hcs_params1); i++) { - u8 port_speed = xhci->port_array[i]; - - /* - * Skip ports that don't have known speeds, or have duplicate - * Extended Capabilities port speed entries. - */ - if (port_speed == 0 || port_speed == DUPLICATE_ENTRY) - continue; - /* - * USB 3.0 ports are always under a USB 3.0 hub. USB 2.0 and - * 1.1 ports are under the USB 2.0 hub. If the port speed - * matches the device speed, it's a similar speed port. - */ - if ((port_speed == 0x03) == (udev->speed == USB_SPEED_SUPER)) - num_similar_speed_ports++; - if (num_similar_speed_ports == faked_port_num) - /* Roothub ports are numbered from 1 to N */ - return i+1; - } - return 0; + return xhci_find_raw_port_number(hcd, top_dev->portnum); } /* Setup an xHCI virtual device for a Set Address command */ diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index af259e0ec172..1a30c380043c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -313,6 +313,7 @@ static const struct hc_driver xhci_pci_hc_driver = { .set_usb2_hw_lpm = xhci_set_usb2_hardware_lpm, .enable_usb3_lpm_timeout = xhci_enable_usb3_lpm_timeout, .disable_usb3_lpm_timeout = xhci_disable_usb3_lpm_timeout, + .find_raw_port_number = xhci_find_raw_port_number, }; /*-------------------------------------------------------------------------*/ diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 882875465301..1969c001b3f9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1599,14 +1599,20 @@ static void handle_port_status(struct xhci_hcd *xhci, max_ports = HCS_MAX_PORTS(xhci->hcs_params1); if ((port_id <= 0) || (port_id > max_ports)) { xhci_warn(xhci, "Invalid port id %d\n", port_id); - bogus_port_status = true; - goto cleanup; + inc_deq(xhci, xhci->event_ring); + return; } /* Figure out which usb_hcd this port is attached to: * is it a USB 3.0 port or a USB 2.0/1.1 port? */ major_revision = xhci->port_array[port_id - 1]; + + /* Find the right roothub. */ + hcd = xhci_to_hcd(xhci); + if ((major_revision == 0x03) != (hcd->speed == HCD_USB3)) + hcd = xhci->shared_hcd; + if (major_revision == 0) { xhci_warn(xhci, "Event for port %u not in " "Extended Capabilities, ignoring.\n", @@ -1629,10 +1635,6 @@ static void handle_port_status(struct xhci_hcd *xhci, * into the index into the ports on the correct split roothub, and the * correct bus_state structure. */ - /* Find the right roothub. */ - hcd = xhci_to_hcd(xhci); - if ((major_revision == 0x03) != (hcd->speed == HCD_USB3)) - hcd = xhci->shared_hcd; bus_state = &xhci->bus_state[hcd_index(hcd)]; if (hcd->speed == HCD_USB3) port_array = xhci->usb3_ports; @@ -2027,8 +2029,8 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, if (event_trb != ep_ring->dequeue && event_trb != td->last_trb) td->urb->actual_length = - td->urb->transfer_buffer_length - - TRB_LEN(le32_to_cpu(event->transfer_len)); + td->urb->transfer_buffer_length - + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); else td->urb->actual_length = 0; @@ -2060,7 +2062,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, /* Maybe the event was for the data stage? */ td->urb->actual_length = td->urb->transfer_buffer_length - - TRB_LEN(le32_to_cpu(event->transfer_len)); + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); xhci_dbg(xhci, "Waiting for status " "stage event\n"); return 0; @@ -2096,7 +2098,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: - if (TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) { + if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) { frame->status = 0; break; } @@ -2141,7 +2143,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, len += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])); } len += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) - - TRB_LEN(le32_to_cpu(event->transfer_len)); + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); if (trb_comp_code != COMP_STOP_INVAL) { frame->actual_length = len; @@ -2199,7 +2201,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, case COMP_SUCCESS: /* Double check that the HW transferred everything. */ if (event_trb != td->last_trb || - TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { xhci_warn(xhci, "WARN Successful completion " "on short TX\n"); if (td->urb->transfer_flags & URB_SHORT_NOT_OK) @@ -2227,18 +2229,18 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, "%d bytes untransferred\n", td->urb->ep->desc.bEndpointAddress, td->urb->transfer_buffer_length, - TRB_LEN(le32_to_cpu(event->transfer_len))); + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len))); /* Fast path - was this the last TRB in the TD for this URB? */ if (event_trb == td->last_trb) { - if (TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { + if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { td->urb->actual_length = td->urb->transfer_buffer_length - - TRB_LEN(le32_to_cpu(event->transfer_len)); + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); if (td->urb->transfer_buffer_length < td->urb->actual_length) { xhci_warn(xhci, "HC gave bad length " "of %d bytes left\n", - TRB_LEN(le32_to_cpu(event->transfer_len))); + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len))); td->urb->actual_length = 0; if (td->urb->transfer_flags & URB_SHORT_NOT_OK) *status = -EREMOTEIO; @@ -2280,7 +2282,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, if (trb_comp_code != COMP_STOP_INVAL) td->urb->actual_length += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) - - TRB_LEN(le32_to_cpu(event->transfer_len)); + EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); } return finish_td(xhci, td, event_trb, event, ep, status, false); @@ -2368,7 +2370,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, * transfer type */ case COMP_SUCCESS: - if (TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) + if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) break; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) trb_comp_code = COMP_SHORT_TX; @@ -2461,14 +2463,21 @@ static int handle_tx_event(struct xhci_hcd *xhci, * TD list. */ if (list_empty(&ep_ring->td_list)) { - xhci_warn(xhci, "WARN Event TRB for slot %d ep %d " - "with no TDs queued?\n", - TRB_TO_SLOT_ID(le32_to_cpu(event->flags)), - ep_index); - xhci_dbg(xhci, "Event TRB with TRB type ID %u\n", - (le32_to_cpu(event->flags) & - TRB_TYPE_BITMASK)>>10); - xhci_print_trb_offsets(xhci, (union xhci_trb *) event); + /* + * A stopped endpoint may generate an extra completion + * event if the device was suspended. Don't print + * warnings. + */ + if (!(trb_comp_code == COMP_STOP || + trb_comp_code == COMP_STOP_INVAL)) { + xhci_warn(xhci, "WARN Event TRB for slot %d ep %d with no TDs queued?\n", + TRB_TO_SLOT_ID(le32_to_cpu(event->flags)), + ep_index); + xhci_dbg(xhci, "Event TRB with TRB type ID %u\n", + (le32_to_cpu(event->flags) & + TRB_TYPE_BITMASK)>>10); + xhci_print_trb_offsets(xhci, (union xhci_trb *) event); + } if (ep->skip) { ep->skip = false; xhci_dbg(xhci, "td_list is empty while skip " diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f1f01a834ba7..53b8f89a0b1c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -350,7 +350,7 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd) * generate interrupts. Don't even try to enable MSI. */ if (xhci->quirks & XHCI_BROKEN_MSI) - return 0; + goto legacy_irq; /* unregister the legacy interrupt */ if (hcd->irq) @@ -371,6 +371,7 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd) return -EINVAL; } + legacy_irq: /* fall back to legacy interrupt*/ ret = request_irq(pdev->irq, &usb_hcd_irq, IRQF_SHARED, hcd->irq_descr, hcd); @@ -3778,6 +3779,28 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) return 0; } +/* + * Transfer the port index into real index in the HW port status + * registers. Caculate offset between the port's PORTSC register + * and port status base. Divide the number of per port register + * to get the real index. The raw port number bases 1. + */ +int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + __le32 __iomem *base_addr = &xhci->op_regs->port_status_base; + __le32 __iomem *addr; + int raw_port; + + if (hcd->speed != HCD_USB3) + addr = xhci->usb2_ports[port1 - 1]; + else + addr = xhci->usb3_ports[port1 - 1]; + + raw_port = (addr - base_addr)/NUM_PORT_REGS + 1; + return raw_port; +} + #ifdef CONFIG_USB_SUSPEND /* BESL to HIRD Encoding array for USB2 LPM */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index f791bd0aee6c..63582719e0fb 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -206,8 +206,8 @@ struct xhci_op_regs { /* bits 12:31 are reserved (and should be preserved on writes). */ /* IMAN - Interrupt Management Register */ -#define IMAN_IP (1 << 1) -#define IMAN_IE (1 << 0) +#define IMAN_IE (1 << 1) +#define IMAN_IP (1 << 0) /* USBSTS - USB status - status bitmasks */ /* HC not running - set to 1 when run/stop bit is cleared. */ @@ -972,6 +972,10 @@ struct xhci_transfer_event { __le32 flags; }; +/* Transfer event TRB length bit mask */ +/* bits 0:23 */ +#define EVENT_TRB_LEN(p) ((p) & 0xffffff) + /** Transfer Event bit fields **/ #define TRB_TO_EP_ID(p) (((p) >> 16) & 0x1f) @@ -1829,6 +1833,7 @@ void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array, int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength); int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); +int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); #ifdef CONFIG_PM int xhci_bus_suspend(struct usb_hcd *hcd); diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 45b19e2c60ba..05e51432dd2f 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -7,11 +7,6 @@ config USB_MUSB_HDRC tristate 'Inventra Highspeed Dual Role Controller (TI, ADI, ...)' depends on USB && USB_GADGET - select NOP_USB_XCEIV if (ARCH_DAVINCI || MACH_OMAP3EVM || BLACKFIN) - select NOP_USB_XCEIV if (SOC_TI81XX || SOC_AM33XX) - select TWL4030_USB if MACH_OMAP_3430SDP - select TWL6030_USB if MACH_OMAP_4430SDP || MACH_OMAP4_PANDA - select OMAP_CONTROL_USB if MACH_OMAP_4430SDP || MACH_OMAP4_PANDA select USB_OTG_UTILS help Say Y here if your system has a dual role high speed USB diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 7c71769d71ff..41613a2b35e8 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -327,7 +327,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci) u8 devctl = musb_readb(mregs, MUSB_DEVCTL); int err; - err = musb->int_usb & USB_INTR_VBUSERROR; + err = musb->int_usb & MUSB_INTR_VBUSERROR; if (err) { /* * The Mentor core doesn't debounce VBUS as needed diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 60b41cc28da4..daec6e0f7e38 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1624,8 +1624,6 @@ EXPORT_SYMBOL_GPL(musb_dma_completion); /*-------------------------------------------------------------------------*/ -#ifdef CONFIG_SYSFS - static ssize_t musb_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1742,8 +1740,6 @@ static const struct attribute_group musb_attr_group = { .attrs = musb_attributes, }; -#endif /* sysfs */ - /* Only used to provide driver mode change events */ static void musb_irq_work(struct work_struct *data) { @@ -1968,11 +1964,9 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) if (status < 0) goto fail4; -#ifdef CONFIG_SYSFS status = sysfs_create_group(&musb->controller->kobj, &musb_attr_group); if (status) goto fail5; -#endif pm_runtime_put(musb->controller); diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index be18537c5f14..83eddedcd9be 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -141,7 +141,9 @@ static inline void map_dma_buffer(struct musb_request *request, static inline void unmap_dma_buffer(struct musb_request *request, struct musb *musb) { - if (!is_buffer_mapped(request)) + struct musb_ep *musb_ep = request->ep; + + if (!is_buffer_mapped(request) || !musb_ep->dma) return; if (request->request.dma == DMA_ADDR_INVALID) { @@ -195,7 +197,10 @@ __acquires(ep->musb->lock) ep->busy = 1; spin_unlock(&musb->lock); - unmap_dma_buffer(req, musb); + + if (!dma_mapping_error(&musb->g.dev, request->dma)) + unmap_dma_buffer(req, musb); + if (request->status == 0) dev_dbg(musb->controller, "%s done request %p, %d/%d\n", ep->end_point.name, request, diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 1762354fe793..1a42a458f2c4 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -51,7 +51,7 @@ struct omap2430_glue { }; #define glue_to_musb(g) platform_get_drvdata(g->musb) -struct omap2430_glue *_glue; +static struct omap2430_glue *_glue; static struct timer_list musb_idle_timer; @@ -237,9 +237,13 @@ void omap_musb_mailbox(enum omap_musb_vbus_id_status status) { struct omap2430_glue *glue = _glue; - if (glue && glue_to_musb(glue)) { - glue->status = status; - } else { + if (!glue) { + pr_err("%s: musb core is not yet initialized\n", __func__); + return; + } + glue->status = status; + + if (!glue_to_musb(glue)) { pr_err("%s: musb core is not yet ready\n", __func__); return; } diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c index e1814397ca3a..2bd03d261a50 100644 --- a/drivers/usb/otg/otg.c +++ b/drivers/usb/otg/otg.c @@ -130,7 +130,7 @@ struct usb_phy *usb_get_phy(enum usb_phy_type type) spin_lock_irqsave(&phy_lock, flags); phy = __usb_find_phy(&phy_list, type); - if (IS_ERR(phy)) { + if (IS_ERR(phy) || !try_module_get(phy->dev->driver->owner)) { pr_err("unable to find transceiver of type %s\n", usb_phy_type_string(type)); goto err0; @@ -228,7 +228,7 @@ struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index) spin_lock_irqsave(&phy_lock, flags); phy = __usb_find_phy_dev(dev, &phy_bind_list, index); - if (IS_ERR(phy)) { + if (IS_ERR(phy) || !try_module_get(phy->dev->driver->owner)) { pr_err("unable to find transceiver\n"); goto err0; } @@ -301,8 +301,12 @@ EXPORT_SYMBOL(devm_usb_put_phy); */ void usb_put_phy(struct usb_phy *x) { - if (x) + if (x) { + struct module *owner = x->dev->driver->owner; + put_device(x->dev); + module_put(owner); + } } EXPORT_SYMBOL(usb_put_phy); diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 65217a590068..90549382eba5 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -38,6 +38,7 @@ config USB_ISP1301 tristate "NXP ISP1301 USB transceiver support" depends on USB || USB_GADGET depends on I2C + select USB_OTG_UTILS help Say Y here to add support for the NXP ISP1301 USB transceiver driver. This chip is typically used as USB transceiver for USB host, gadget diff --git a/drivers/usb/phy/omap-control-usb.c b/drivers/usb/phy/omap-control-usb.c index 5323b71c3521..1419ceda9759 100644 --- a/drivers/usb/phy/omap-control-usb.c +++ b/drivers/usb/phy/omap-control-usb.c @@ -219,32 +219,26 @@ static int omap_control_usb_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "control_dev_conf"); - control_usb->dev_conf = devm_request_and_ioremap(&pdev->dev, res); - if (!control_usb->dev_conf) { - dev_err(&pdev->dev, "Failed to obtain io memory\n"); - return -EADDRNOTAVAIL; - } + control_usb->dev_conf = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(control_usb->dev_conf)) + return PTR_ERR(control_usb->dev_conf); if (control_usb->type == OMAP_CTRL_DEV_TYPE1) { res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "otghs_control"); - control_usb->otghs_control = devm_request_and_ioremap( + control_usb->otghs_control = devm_ioremap_resource( &pdev->dev, res); - if (!control_usb->otghs_control) { - dev_err(&pdev->dev, "Failed to obtain io memory\n"); - return -EADDRNOTAVAIL; - } + if (IS_ERR(control_usb->otghs_control)) + return PTR_ERR(control_usb->otghs_control); } if (control_usb->type == OMAP_CTRL_DEV_TYPE2) { res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "phy_power_usb"); - control_usb->phy_power = devm_request_and_ioremap( + control_usb->phy_power = devm_ioremap_resource( &pdev->dev, res); - if (!control_usb->phy_power) { - dev_dbg(&pdev->dev, "Failed to obtain io memory\n"); - return -EADDRNOTAVAIL; - } + if (IS_ERR(control_usb->phy_power)) + return PTR_ERR(control_usb->phy_power); control_usb->sys_clk = devm_clk_get(control_usb->dev, "sys_clkin"); diff --git a/drivers/usb/phy/omap-usb3.c b/drivers/usb/phy/omap-usb3.c index fadc0c2b65bb..a6e60b1e102e 100644 --- a/drivers/usb/phy/omap-usb3.c +++ b/drivers/usb/phy/omap-usb3.c @@ -212,11 +212,9 @@ static int omap_usb3_probe(struct platform_device *pdev) } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pll_ctrl"); - phy->pll_ctrl_base = devm_request_and_ioremap(&pdev->dev, res); - if (!phy->pll_ctrl_base) { - dev_err(&pdev->dev, "ioremap of pll_ctrl failed\n"); - return -ENOMEM; - } + phy->pll_ctrl_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(phy->pll_ctrl_base)) + return PTR_ERR(phy->pll_ctrl_base); phy->dev = &pdev->dev; diff --git a/drivers/usb/phy/samsung-usbphy.c b/drivers/usb/phy/samsung-usbphy.c index 6ea553733832..967101ec15fd 100644 --- a/drivers/usb/phy/samsung-usbphy.c +++ b/drivers/usb/phy/samsung-usbphy.c @@ -787,11 +787,9 @@ static int samsung_usbphy_probe(struct platform_device *pdev) return -ENODEV; } - phy_base = devm_request_and_ioremap(dev, phy_mem); - if (!phy_base) { - dev_err(dev, "%s: register mapping failed\n", __func__); - return -ENXIO; - } + phy_base = devm_ioremap_resource(dev, phy_mem); + if (IS_ERR(phy_base)) + return PTR_ERR(phy_base); sphy = devm_kzalloc(dev, sizeof(*sphy), GFP_KERNEL); if (!sphy) diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c index cbd904b8fba5..4775f8209e55 100644 --- a/drivers/usb/serial/ark3116.c +++ b/drivers/usb/serial/ark3116.c @@ -62,7 +62,6 @@ static int is_irda(struct usb_serial *serial) } struct ark3116_private { - wait_queue_head_t delta_msr_wait; struct async_icount icount; int irda; /* 1 for irda device */ @@ -146,7 +145,6 @@ static int ark3116_port_probe(struct usb_serial_port *port) if (!priv) return -ENOMEM; - init_waitqueue_head(&priv->delta_msr_wait); mutex_init(&priv->hw_lock); spin_lock_init(&priv->status_lock); @@ -456,10 +454,14 @@ static int ark3116_ioctl(struct tty_struct *tty, case TIOCMIWAIT: for (;;) { struct async_icount prev = priv->icount; - interruptible_sleep_on(&priv->delta_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + if ((prev.rng == priv->icount.rng) && (prev.dsr == priv->icount.dsr) && (prev.dcd == priv->icount.dcd) && @@ -580,7 +582,7 @@ static void ark3116_update_msr(struct usb_serial_port *port, __u8 msr) priv->icount.dcd++; if (msr & UART_MSR_TERI) priv->icount.rng++; - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); } } diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index d255f66e708e..07d4650a32ab 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -80,7 +80,6 @@ MODULE_DEVICE_TABLE(usb, id_table); struct ch341_private { spinlock_t lock; /* access lock */ - wait_queue_head_t delta_msr_wait; /* wait queue for modem status */ unsigned baud_rate; /* set baud rate */ u8 line_control; /* set line control value RTS/DTR */ u8 line_status; /* active status of modem control inputs */ @@ -252,7 +251,6 @@ static int ch341_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->lock); - init_waitqueue_head(&priv->delta_msr_wait); priv->baud_rate = DEFAULT_BAUD_RATE; priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; @@ -298,7 +296,7 @@ static void ch341_dtr_rts(struct usb_serial_port *port, int on) priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR); spin_unlock_irqrestore(&priv->lock, flags); ch341_set_handshake(port->serial->dev, priv->line_control); - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); } static void ch341_close(struct usb_serial_port *port) @@ -491,7 +489,7 @@ static void ch341_read_int_callback(struct urb *urb) tty_kref_put(tty); } - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); } exit: @@ -517,11 +515,14 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) spin_unlock_irqrestore(&priv->lock, flags); while (!multi_change) { - interruptible_sleep_on(&priv->delta_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->lock, flags); status = priv->line_status; multi_change = priv->multi_status_change; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index edc0f0dcad83..4747d1c328ff 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -85,6 +85,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x813F) }, /* Tams Master Easy Control */ { USB_DEVICE(0x10C4, 0x814A) }, /* West Mountain Radio RIGblaster P&P */ { USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */ + { USB_DEVICE(0x2405, 0x0003) }, /* West Mountain Radio RIGblaster Advantage */ { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */ { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */ { USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */ @@ -150,6 +151,25 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ + { USB_DEVICE(0x1FB9, 0x0100) }, /* Lake Shore Model 121 Current Source */ + { USB_DEVICE(0x1FB9, 0x0200) }, /* Lake Shore Model 218A Temperature Monitor */ + { USB_DEVICE(0x1FB9, 0x0201) }, /* Lake Shore Model 219 Temperature Monitor */ + { USB_DEVICE(0x1FB9, 0x0202) }, /* Lake Shore Model 233 Temperature Transmitter */ + { USB_DEVICE(0x1FB9, 0x0203) }, /* Lake Shore Model 235 Temperature Transmitter */ + { USB_DEVICE(0x1FB9, 0x0300) }, /* Lake Shore Model 335 Temperature Controller */ + { USB_DEVICE(0x1FB9, 0x0301) }, /* Lake Shore Model 336 Temperature Controller */ + { USB_DEVICE(0x1FB9, 0x0302) }, /* Lake Shore Model 350 Temperature Controller */ + { USB_DEVICE(0x1FB9, 0x0303) }, /* Lake Shore Model 371 AC Bridge */ + { USB_DEVICE(0x1FB9, 0x0400) }, /* Lake Shore Model 411 Handheld Gaussmeter */ + { USB_DEVICE(0x1FB9, 0x0401) }, /* Lake Shore Model 425 Gaussmeter */ + { USB_DEVICE(0x1FB9, 0x0402) }, /* Lake Shore Model 455A Gaussmeter */ + { USB_DEVICE(0x1FB9, 0x0403) }, /* Lake Shore Model 475A Gaussmeter */ + { USB_DEVICE(0x1FB9, 0x0404) }, /* Lake Shore Model 465 Three Axis Gaussmeter */ + { USB_DEVICE(0x1FB9, 0x0600) }, /* Lake Shore Model 625A Superconducting MPS */ + { USB_DEVICE(0x1FB9, 0x0601) }, /* Lake Shore Model 642A Magnet Power Supply */ + { USB_DEVICE(0x1FB9, 0x0602) }, /* Lake Shore Model 648 Magnet Power Supply */ + { USB_DEVICE(0x1FB9, 0x0700) }, /* Lake Shore Model 737 VSM Controller */ + { USB_DEVICE(0x1FB9, 0x0701) }, /* Lake Shore Model 776 Hall Matrix */ { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 8efa19d0e9fb..ba7352e4187e 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -111,7 +111,6 @@ struct cypress_private { int baud_rate; /* stores current baud rate in integer form */ int isthrottled; /* if throttled, discard reads */ - wait_queue_head_t delta_msr_wait; /* used for TIOCMIWAIT */ char prev_status, diff_status; /* used for TIOCMIWAIT */ /* we pass a pointer to this as the argument sent to cypress_set_termios old_termios */ @@ -449,7 +448,6 @@ static int cypress_generic_port_probe(struct usb_serial_port *port) kfree(priv); return -ENOMEM; } - init_waitqueue_head(&priv->delta_msr_wait); usb_reset_configuration(serial->dev); @@ -868,12 +866,16 @@ static int cypress_ioctl(struct tty_struct *tty, switch (cmd) { /* This code comes from drivers/char/serial.c and ftdi_sio.c */ case TIOCMIWAIT: - while (priv != NULL) { - interruptible_sleep_on(&priv->delta_msr_wait); + for (;;) { + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; - else { + + if (port->serial->disconnected) + return -EIO; + + { char diff = priv->diff_status; if (diff == 0) return -EIO; /* no change => error */ @@ -1187,7 +1189,7 @@ static void cypress_read_int_callback(struct urb *urb) if (priv->current_status != priv->prev_status) { priv->diff_status |= priv->current_status ^ priv->prev_status; - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); priv->prev_status = priv->current_status; } spin_unlock_irqrestore(&priv->lock, flags); diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c index b1b2dc64b50b..a172ad5c5ce8 100644 --- a/drivers/usb/serial/f81232.c +++ b/drivers/usb/serial/f81232.c @@ -47,7 +47,6 @@ MODULE_DEVICE_TABLE(usb, id_table); struct f81232_private { spinlock_t lock; - wait_queue_head_t delta_msr_wait; u8 line_control; u8 line_status; }; @@ -111,7 +110,7 @@ static void f81232_process_read_urb(struct urb *urb) line_status = priv->line_status; priv->line_status &= ~UART_STATE_TRANSIENT_MASK; spin_unlock_irqrestore(&priv->lock, flags); - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); if (!urb->actual_length) return; @@ -256,11 +255,14 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) spin_unlock_irqrestore(&priv->lock, flags); while (1) { - interruptible_sleep_on(&priv->delta_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->lock, flags); status = priv->line_status; spin_unlock_irqrestore(&priv->lock, flags); @@ -322,7 +324,6 @@ static int f81232_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->lock); - init_waitqueue_head(&priv->delta_msr_wait); usb_set_serial_port_data(port, priv); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index edd162df49ca..9886180e45f1 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -69,9 +69,7 @@ struct ftdi_private { int flags; /* some ASYNC_xxxx flags are supported */ unsigned long last_dtr_rts; /* saved modem control outputs */ struct async_icount icount; - wait_queue_head_t delta_msr_wait; /* Used for TIOCMIWAIT */ char prev_status; /* Used for TIOCMIWAIT */ - bool dev_gone; /* Used to abort TIOCMIWAIT */ char transmit_empty; /* If transmitter is empty or not */ __u16 interface; /* FT2232C, FT2232H or FT4232H port interface (0 for FT232/245) */ @@ -642,6 +640,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_RM_CANVIEW_PID) }, { USB_DEVICE(ACTON_VID, ACTON_SPECTRAPRO_PID) }, { USB_DEVICE(CONTEC_VID, CONTEC_COM1USBH_PID) }, + { USB_DEVICE(MITSUBISHI_VID, MITSUBISHI_FXUSB_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2_PID) }, @@ -1691,10 +1690,8 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port) kref_init(&priv->kref); mutex_init(&priv->cfg_lock); - init_waitqueue_head(&priv->delta_msr_wait); priv->flags = ASYNC_LOW_LATENCY; - priv->dev_gone = false; if (quirk && quirk->port_probe) quirk->port_probe(priv); @@ -1840,8 +1837,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); - priv->dev_gone = true; - wake_up_interruptible_all(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); remove_sysfs_attrs(port); @@ -1989,7 +1985,7 @@ static int ftdi_process_packet(struct usb_serial_port *port, if (diff_status & FTDI_RS0_RLSD) priv->icount.dcd++; - wake_up_interruptible_all(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); priv->prev_status = status; } @@ -2440,11 +2436,15 @@ static int ftdi_ioctl(struct tty_struct *tty, */ case TIOCMIWAIT: cprev = priv->icount; - while (!priv->dev_gone) { - interruptible_sleep_on(&priv->delta_msr_wait); + for (;;) { + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + cnow = priv->icount; if (((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) || ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) || @@ -2454,8 +2454,6 @@ static int ftdi_ioctl(struct tty_struct *tty, } cprev = cnow; } - return -EIO; - break; case TIOCSERGETLSR: return get_lsr_info(port, (struct serial_struct __user *)arg); break; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 9d359e189a64..e79861eeed4c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -584,6 +584,13 @@ #define CONTEC_COM1USBH_PID 0x8311 /* COM-1(USB)H */ /* + * Mitsubishi Electric Corp. (http://www.meau.com) + * Submitted by Konstantin Holoborodko + */ +#define MITSUBISHI_VID 0x06D3 +#define MITSUBISHI_FXUSB_PID 0x0284 /* USB/RS422 converters: FX-USB-AW/-BD */ + +/* * Definitions for B&B Electronics products. */ #define BANDB_VID 0x0856 /* B&B Electronics Vendor ID */ diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 1a07b12ef341..81caf5623ee2 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -956,10 +956,7 @@ static void garmin_close(struct usb_serial_port *port) if (!serial) return; - mutex_lock(&port->serial->disc_mutex); - - if (!port->serial->disconnected) - garmin_clear(garmin_data_p); + garmin_clear(garmin_data_p); /* shutdown our urbs */ usb_kill_urb(port->read_urb); @@ -968,8 +965,6 @@ static void garmin_close(struct usb_serial_port *port) /* keep reset state so we know that we must start a new session */ if (garmin_data_p->state != STATE_RESET) garmin_data_p->state = STATE_DISCONNECTED; - - mutex_unlock(&port->serial->disc_mutex); } diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index b00e5cbf741f..efd8b978128c 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -110,7 +110,6 @@ struct edgeport_port { wait_queue_head_t wait_chase; /* for handling sleeping while waiting for chase to finish */ wait_queue_head_t wait_open; /* for handling sleeping while waiting for open to finish */ wait_queue_head_t wait_command; /* for handling sleeping while waiting for command to finish */ - wait_queue_head_t delta_msr_wait; /* for handling sleeping while waiting for msr change to happen */ struct async_icount icount; struct usb_serial_port *port; /* loop back to the owner of this object */ @@ -884,7 +883,6 @@ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port) /* initialize our wait queues */ init_waitqueue_head(&edge_port->wait_open); init_waitqueue_head(&edge_port->wait_chase); - init_waitqueue_head(&edge_port->delta_msr_wait); init_waitqueue_head(&edge_port->wait_command); /* initialize our icount structure */ @@ -1669,13 +1667,17 @@ static int edge_ioctl(struct tty_struct *tty, dev_dbg(&port->dev, "%s (%d) TIOCMIWAIT\n", __func__, port->number); cprev = edge_port->icount; while (1) { - prepare_to_wait(&edge_port->delta_msr_wait, + prepare_to_wait(&port->delta_msr_wait, &wait, TASK_INTERRUPTIBLE); schedule(); - finish_wait(&edge_port->delta_msr_wait, &wait); + finish_wait(&port->delta_msr_wait, &wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + cnow = edge_port->icount; if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) @@ -2051,7 +2053,7 @@ static void handle_new_msr(struct edgeport_port *edge_port, __u8 newMsr) icount->dcd++; if (newMsr & EDGEPORT_MSR_DELTA_RI) icount->rng++; - wake_up_interruptible(&edge_port->delta_msr_wait); + wake_up_interruptible(&edge_port->port->delta_msr_wait); } /* Save the new modem status */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index c23776679f70..7777172206de 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -87,9 +87,6 @@ struct edgeport_port { int close_pending; int lsr_event; struct async_icount icount; - wait_queue_head_t delta_msr_wait; /* for handling sleeping while - waiting for msr change to - happen */ struct edgeport_serial *edge_serial; struct usb_serial_port *port; __u8 bUartMode; /* Port type, 0: RS232, etc. */ @@ -1459,7 +1456,7 @@ static void handle_new_msr(struct edgeport_port *edge_port, __u8 msr) icount->dcd++; if (msr & EDGEPORT_MSR_DELTA_RI) icount->rng++; - wake_up_interruptible(&edge_port->delta_msr_wait); + wake_up_interruptible(&edge_port->port->delta_msr_wait); } /* Save the new modem status */ @@ -1754,7 +1751,6 @@ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port) dev = port->serial->dev; memset(&(edge_port->icount), 0x00, sizeof(edge_port->icount)); - init_waitqueue_head(&edge_port->delta_msr_wait); /* turn off loopback */ status = ti_do_config(edge_port, UMPC_SET_CLR_LOOPBACK, 0); @@ -2434,10 +2430,14 @@ static int edge_ioctl(struct tty_struct *tty, dev_dbg(&port->dev, "%s - TIOCMIWAIT\n", __func__); cprev = edge_port->icount; while (1) { - interruptible_sleep_on(&edge_port->delta_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + cnow = edge_port->icount; if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) @@ -2649,6 +2649,7 @@ static struct usb_serial_driver edgeport_2port_device = { .set_termios = edge_set_termios, .tiocmget = edge_tiocmget, .tiocmset = edge_tiocmset, + .get_icount = edge_get_icount, .write = edge_write, .write_room = edge_write_room, .chars_in_buffer = edge_chars_in_buffer, diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index a64d420f687b..06d5a60be2c4 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -114,8 +114,6 @@ struct mct_u232_private { unsigned char last_msr; /* Modem Status Register */ unsigned int rx_flags; /* Throttling flags */ struct async_icount icount; - wait_queue_head_t msr_wait; /* for handling sleeping while waiting - for msr change to happen */ }; #define THROTTLED 0x01 @@ -409,7 +407,6 @@ static int mct_u232_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->lock); - init_waitqueue_head(&priv->msr_wait); usb_set_serial_port_data(port, priv); @@ -601,7 +598,7 @@ static void mct_u232_read_int_callback(struct urb *urb) tty_kref_put(tty); } #endif - wake_up_interruptible(&priv->msr_wait); + wake_up_interruptible(&port->delta_msr_wait); spin_unlock_irqrestore(&priv->lock, flags); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); @@ -810,13 +807,17 @@ static int mct_u232_ioctl(struct tty_struct *tty, cprev = mct_u232_port->icount; spin_unlock_irqrestore(&mct_u232_port->lock, flags); for ( ; ; ) { - prepare_to_wait(&mct_u232_port->msr_wait, + prepare_to_wait(&port->delta_msr_wait, &wait, TASK_INTERRUPTIBLE); schedule(); - finish_wait(&mct_u232_port->msr_wait, &wait); + finish_wait(&port->delta_msr_wait, &wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&mct_u232_port->lock, flags); cnow = mct_u232_port->icount; spin_unlock_irqrestore(&mct_u232_port->lock, flags); diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 809fb329eca5..b8051fa61911 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -219,7 +219,6 @@ struct moschip_port { char open; char open_ports; wait_queue_head_t wait_chase; /* for handling sleeping while waiting for chase to finish */ - wait_queue_head_t delta_msr_wait; /* for handling sleeping while waiting for msr change to happen */ int delta_msr_cond; struct async_icount icount; struct usb_serial_port *port; /* loop back to the owner of this object */ @@ -423,6 +422,9 @@ static void mos7840_handle_new_msr(struct moschip_port *port, __u8 new_msr) icount->rng++; smp_wmb(); } + + mos7840_port->delta_msr_cond = 1; + wake_up_interruptible(&port->port->delta_msr_wait); } } @@ -1127,7 +1129,6 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) /* initialize our wait queues */ init_waitqueue_head(&mos7840_port->wait_chase); - init_waitqueue_head(&mos7840_port->delta_msr_wait); /* initialize our icount structure */ memset(&(mos7840_port->icount), 0x00, sizeof(mos7840_port->icount)); @@ -2017,8 +2018,6 @@ static void mos7840_change_port_settings(struct tty_struct *tty, mos7840_port->read_urb_busy = false; } } - wake_up(&mos7840_port->delta_msr_wait); - mos7840_port->delta_msr_cond = 1; dev_dbg(&port->dev, "%s - mos7840_port->shadowLCR is End %x\n", __func__, mos7840_port->shadowLCR); } @@ -2219,13 +2218,18 @@ static int mos7840_ioctl(struct tty_struct *tty, while (1) { /* interruptible_sleep_on(&mos7840_port->delta_msr_wait); */ mos7840_port->delta_msr_cond = 0; - wait_event_interruptible(mos7840_port->delta_msr_wait, - (mos7840_port-> + wait_event_interruptible(port->delta_msr_wait, + (port->serial->disconnected || + mos7840_port-> delta_msr_cond == 1)); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + cnow = mos7840_port->icount; smp_rmb(); if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f7d339d8187b..558adfc05007 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -341,6 +341,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_EU3_E 0x0051 #define CINTERION_PRODUCT_EU3_P 0x0052 #define CINTERION_PRODUCT_PH8 0x0053 +#define CINTERION_PRODUCT_AH6 0x0055 +#define CINTERION_PRODUCT_PLS8 0x0060 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -579,6 +581,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUANTA_VENDOR_ID, 0xea42), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c05, USB_CLASS_COMM, 0x02, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c1f, USB_CLASS_COMM, 0x02, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c23, USB_CLASS_COMM, 0x02, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E173, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t) &net_intf1_blacklist }, @@ -1260,6 +1263,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_E) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8) }, + { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AH6) }, + { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLS8) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) }, { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) }, diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index a958fd41b5b3..87c71ccfee87 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -188,7 +188,6 @@ struct oti6858_private { u8 setup_done; struct delayed_work delayed_setup_work; - wait_queue_head_t intr_wait; struct usb_serial_port *port; /* USB port with which associated */ }; @@ -339,7 +338,6 @@ static int oti6858_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->lock); - init_waitqueue_head(&priv->intr_wait); priv->port = port; INIT_DELAYED_WORK(&priv->delayed_setup_work, setup_line); INIT_DELAYED_WORK(&priv->delayed_write_work, send_data); @@ -664,11 +662,15 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) spin_unlock_irqrestore(&priv->lock, flags); while (1) { - wait_event_interruptible(priv->intr_wait, + wait_event_interruptible(port->delta_msr_wait, + port->serial->disconnected || priv->status.pin_state != prev); if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->lock, flags); status = priv->status.pin_state & PIN_MASK; spin_unlock_irqrestore(&priv->lock, flags); @@ -763,7 +765,7 @@ static void oti6858_read_int_callback(struct urb *urb) if (!priv->transient) { if (xs->pin_state != priv->status.pin_state) - wake_up_interruptible(&priv->intr_wait); + wake_up_interruptible(&port->delta_msr_wait); memcpy(&priv->status, xs, OTI6858_CTRL_PKT_SIZE); } diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 54adc9125e5c..3b10018d89a3 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -139,7 +139,6 @@ struct pl2303_serial_private { struct pl2303_private { spinlock_t lock; - wait_queue_head_t delta_msr_wait; u8 line_control; u8 line_status; }; @@ -233,7 +232,6 @@ static int pl2303_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->lock); - init_waitqueue_head(&priv->delta_msr_wait); usb_set_serial_port_data(port, priv); @@ -607,11 +605,14 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) spin_unlock_irqrestore(&priv->lock, flags); while (1) { - interruptible_sleep_on(&priv->delta_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->lock, flags); status = priv->line_status; spin_unlock_irqrestore(&priv->lock, flags); @@ -719,7 +720,7 @@ static void pl2303_update_line_status(struct usb_serial_port *port, spin_unlock_irqrestore(&priv->lock, flags); if (priv->line_status & UART_BREAK_ERROR) usb_serial_handle_break(port); - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); tty = tty_port_tty_get(&port->port); if (!tty) @@ -783,7 +784,7 @@ static void pl2303_process_read_urb(struct urb *urb) line_status = priv->line_status; priv->line_status &= ~UART_STATE_TRANSIENT_MASK; spin_unlock_irqrestore(&priv->lock, flags); - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); if (!urb->actual_length) return; diff --git a/drivers/usb/serial/qcaux.c b/drivers/usb/serial/qcaux.c index 9b1b96f2d095..31f81c3c15eb 100644 --- a/drivers/usb/serial/qcaux.c +++ b/drivers/usb/serial/qcaux.c @@ -69,6 +69,7 @@ static struct usb_device_id id_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, 0xff, 0xfd, 0xff) }, /* NMEA */ { USB_VENDOR_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, 0xff, 0xfe, 0xff) }, /* WMC */ { USB_VENDOR_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, 0xff, 0xff, 0xff) }, /* DIAG */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1fac, 0x0151, 0xff, 0xff, 0xff) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 24662547dc5b..59b32b782126 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -197,12 +197,15 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) if (is_gobi1k) { /* Gobi 1K USB layout: - * 0: serial port (doesn't respond) + * 0: DM/DIAG (use libqcdm from ModemManager for communication) * 1: serial port (doesn't respond) * 2: AT-capable modem port * 3: QMI/net */ - if (ifnum == 2) + if (ifnum == 0) { + dev_dbg(dev, "Gobi 1K DM/DIAG interface found\n"); + altsetting = 1; + } else if (ifnum == 2) dev_dbg(dev, "Modem port found\n"); else altsetting = -1; diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 00e6c9bac8a3..75f125ddb0c9 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -128,7 +128,6 @@ struct qt2_port_private { u8 shadowLSR; u8 shadowMSR; - wait_queue_head_t delta_msr_wait; /* Used for TIOCMIWAIT */ struct async_icount icount; struct usb_serial_port *port; @@ -506,8 +505,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) spin_unlock_irqrestore(&priv->lock, flags); while (1) { - wait_event_interruptible(priv->delta_msr_wait, - ((priv->icount.rng != prev.rng) || + wait_event_interruptible(port->delta_msr_wait, + (port->serial->disconnected || + (priv->icount.rng != prev.rng) || (priv->icount.dsr != prev.dsr) || (priv->icount.dcd != prev.dcd) || (priv->icount.cts != prev.cts))); @@ -515,6 +515,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->lock, flags); cur = priv->icount; spin_unlock_irqrestore(&priv->lock, flags); @@ -661,7 +664,9 @@ void qt2_process_read_urb(struct urb *urb) __func__); break; } - tty_flip_buffer_push(&port->port); + + if (port_priv->is_open) + tty_flip_buffer_push(&port->port); newport = *(ch + 3); @@ -704,7 +709,8 @@ void qt2_process_read_urb(struct urb *urb) tty_insert_flip_string(&port->port, ch, 1); } - tty_flip_buffer_push(&port->port); + if (port_priv->is_open) + tty_flip_buffer_push(&port->port); } static void qt2_write_bulk_callback(struct urb *urb) @@ -824,7 +830,6 @@ static int qt2_port_probe(struct usb_serial_port *port) spin_lock_init(&port_priv->lock); spin_lock_init(&port_priv->urb_lock); - init_waitqueue_head(&port_priv->delta_msr_wait); port_priv->port = port; port_priv->write_urb = usb_alloc_urb(0, GFP_KERNEL); @@ -967,7 +972,7 @@ static void qt2_update_msr(struct usb_serial_port *port, unsigned char *ch) if (newMSR & UART_MSR_TERI) port_priv->icount.rng++; - wake_up_interruptible(&port_priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); } } diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 91ff8e3bddbd..549ef68ff5fa 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -149,7 +149,6 @@ enum spcp8x5_type { struct spcp8x5_private { spinlock_t lock; enum spcp8x5_type type; - wait_queue_head_t delta_msr_wait; u8 line_control; u8 line_status; }; @@ -179,7 +178,6 @@ static int spcp8x5_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->lock); - init_waitqueue_head(&priv->delta_msr_wait); priv->type = type; usb_set_serial_port_data(port , priv); @@ -475,7 +473,7 @@ static void spcp8x5_process_read_urb(struct urb *urb) priv->line_status &= ~UART_STATE_TRANSIENT_MASK; spin_unlock_irqrestore(&priv->lock, flags); /* wake up the wait for termios */ - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); if (!urb->actual_length) return; @@ -526,12 +524,15 @@ static int spcp8x5_wait_modem_info(struct usb_serial_port *port, while (1) { /* wake up in bulk read */ - interruptible_sleep_on(&priv->delta_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); /* see if a signal did it */ if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->lock, flags); status = priv->line_status; spin_unlock_irqrestore(&priv->lock, flags); diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c index b57cf841c5b6..4b2a19757b4d 100644 --- a/drivers/usb/serial/ssu100.c +++ b/drivers/usb/serial/ssu100.c @@ -61,7 +61,6 @@ struct ssu100_port_private { spinlock_t status_lock; u8 shadowLSR; u8 shadowMSR; - wait_queue_head_t delta_msr_wait; /* Used for TIOCMIWAIT */ struct async_icount icount; }; @@ -355,8 +354,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) spin_unlock_irqrestore(&priv->status_lock, flags); while (1) { - wait_event_interruptible(priv->delta_msr_wait, - ((priv->icount.rng != prev.rng) || + wait_event_interruptible(port->delta_msr_wait, + (port->serial->disconnected || + (priv->icount.rng != prev.rng) || (priv->icount.dsr != prev.dsr) || (priv->icount.dcd != prev.dcd) || (priv->icount.cts != prev.cts))); @@ -364,6 +364,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) if (signal_pending(current)) return -ERESTARTSYS; + if (port->serial->disconnected) + return -EIO; + spin_lock_irqsave(&priv->status_lock, flags); cur = priv->icount; spin_unlock_irqrestore(&priv->status_lock, flags); @@ -445,7 +448,6 @@ static int ssu100_port_probe(struct usb_serial_port *port) return -ENOMEM; spin_lock_init(&priv->status_lock); - init_waitqueue_head(&priv->delta_msr_wait); usb_set_serial_port_data(port, priv); @@ -537,7 +539,7 @@ static void ssu100_update_msr(struct usb_serial_port *port, u8 msr) priv->icount.dcd++; if (msr & UART_MSR_TERI) priv->icount.rng++; - wake_up_interruptible(&priv->delta_msr_wait); + wake_up_interruptible(&port->delta_msr_wait); } } diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 39cb9b807c3c..73deb029fc05 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -74,7 +74,6 @@ struct ti_port { int tp_flags; int tp_closing_wait;/* in .01 secs */ struct async_icount tp_icount; - wait_queue_head_t tp_msr_wait; /* wait for msr change */ wait_queue_head_t tp_write_wait; struct ti_device *tp_tdev; struct usb_serial_port *tp_port; @@ -432,7 +431,6 @@ static int ti_port_probe(struct usb_serial_port *port) else tport->tp_uart_base_addr = TI_UART2_BASE_ADDR; tport->tp_closing_wait = closing_wait; - init_waitqueue_head(&tport->tp_msr_wait); init_waitqueue_head(&tport->tp_write_wait); if (kfifo_alloc(&tport->write_fifo, TI_WRITE_BUF_SIZE, GFP_KERNEL)) { kfree(tport); @@ -784,9 +782,13 @@ static int ti_ioctl(struct tty_struct *tty, dev_dbg(&port->dev, "%s - TIOCMIWAIT\n", __func__); cprev = tport->tp_icount; while (1) { - interruptible_sleep_on(&tport->tp_msr_wait); + interruptible_sleep_on(&port->delta_msr_wait); if (signal_pending(current)) return -ERESTARTSYS; + + if (port->serial->disconnected) + return -EIO; + cnow = tport->tp_icount; if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) @@ -1392,7 +1394,7 @@ static void ti_handle_new_msr(struct ti_port *tport, __u8 msr) icount->dcd++; if (msr & TI_MSR_DELTA_RI) icount->rng++; - wake_up_interruptible(&tport->tp_msr_wait); + wake_up_interruptible(&tport->tp_port->delta_msr_wait); spin_unlock_irqrestore(&tport->tp_lock, flags); } diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index a19ed74d770d..5d9b178484fd 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -151,6 +151,7 @@ static void destroy_serial(struct kref *kref) } } + usb_put_intf(serial->interface); usb_put_dev(serial->dev); kfree(serial); } @@ -620,7 +621,7 @@ static struct usb_serial *create_serial(struct usb_device *dev, } serial->dev = usb_get_dev(dev); serial->type = driver; - serial->interface = interface; + serial->interface = usb_get_intf(interface); kref_init(&serial->kref); mutex_init(&serial->disc_mutex); serial->minor = SERIAL_TTY_NO_MINOR; @@ -902,6 +903,7 @@ static int usb_serial_probe(struct usb_interface *interface, port->port.ops = &serial_port_ops; port->serial = serial; spin_lock_init(&port->lock); + init_waitqueue_head(&port->delta_msr_wait); /* Keep this for private driver use for the moment but should probably go away */ INIT_WORK(&port->work, usb_serial_port_work); diff --git a/drivers/usb/storage/initializers.c b/drivers/usb/storage/initializers.c index 7ab9046ae0ec..105d900150c1 100644 --- a/drivers/usb/storage/initializers.c +++ b/drivers/usb/storage/initializers.c @@ -92,8 +92,8 @@ int usb_stor_ucr61s2b_init(struct us_data *us) return 0; } -/* This places the HUAWEI usb dongles in multi-port mode */ -static int usb_stor_huawei_feature_init(struct us_data *us) +/* This places the HUAWEI E220 devices in multi-port mode */ +int usb_stor_huawei_e220_init(struct us_data *us) { int result; @@ -104,75 +104,3 @@ static int usb_stor_huawei_feature_init(struct us_data *us) US_DEBUGP("Huawei mode set result is %d\n", result); return 0; } - -/* - * It will send a scsi switch command called rewind' to huawei dongle. - * When the dongle receives this command at the first time, - * it will reboot immediately. After rebooted, it will ignore this command. - * So it is unnecessary to read its response. - */ -static int usb_stor_huawei_scsi_init(struct us_data *us) -{ - int result = 0; - int act_len = 0; - struct bulk_cb_wrap *bcbw = (struct bulk_cb_wrap *) us->iobuf; - char rewind_cmd[] = {0x11, 0x06, 0x20, 0x00, 0x00, 0x01, 0x01, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - - bcbw->Signature = cpu_to_le32(US_BULK_CB_SIGN); - bcbw->Tag = 0; - bcbw->DataTransferLength = 0; - bcbw->Flags = bcbw->Lun = 0; - bcbw->Length = sizeof(rewind_cmd); - memset(bcbw->CDB, 0, sizeof(bcbw->CDB)); - memcpy(bcbw->CDB, rewind_cmd, sizeof(rewind_cmd)); - - result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcbw, - US_BULK_CB_WRAP_LEN, &act_len); - US_DEBUGP("transfer actual length=%d, result=%d\n", act_len, result); - return result; -} - -/* - * It tries to find the supported Huawei USB dongles. - * In Huawei, they assign the following product IDs - * for all of their mobile broadband dongles, - * including the new dongles in the future. - * So if the product ID is not included in this list, - * it means it is not Huawei's mobile broadband dongles. - */ -static int usb_stor_huawei_dongles_pid(struct us_data *us) -{ - struct usb_interface_descriptor *idesc; - int idProduct; - - idesc = &us->pusb_intf->cur_altsetting->desc; - idProduct = le16_to_cpu(us->pusb_dev->descriptor.idProduct); - /* The first port is CDROM, - * means the dongle in the single port mode, - * and a switch command is required to be sent. */ - if (idesc && idesc->bInterfaceNumber == 0) { - if ((idProduct == 0x1001) - || (idProduct == 0x1003) - || (idProduct == 0x1004) - || (idProduct >= 0x1401 && idProduct <= 0x1500) - || (idProduct >= 0x1505 && idProduct <= 0x1600) - || (idProduct >= 0x1c02 && idProduct <= 0x2202)) { - return 1; - } - } - return 0; -} - -int usb_stor_huawei_init(struct us_data *us) -{ - int result = 0; - - if (usb_stor_huawei_dongles_pid(us)) { - if (le16_to_cpu(us->pusb_dev->descriptor.idProduct) >= 0x1446) - result = usb_stor_huawei_scsi_init(us); - else - result = usb_stor_huawei_feature_init(us); - } - return result; -} diff --git a/drivers/usb/storage/initializers.h b/drivers/usb/storage/initializers.h index 5376d4fc76f0..529327fbb06b 100644 --- a/drivers/usb/storage/initializers.h +++ b/drivers/usb/storage/initializers.h @@ -46,5 +46,5 @@ int usb_stor_euscsi_init(struct us_data *us); * flash reader */ int usb_stor_ucr61s2b_init(struct us_data *us); -/* This places the HUAWEI usb dongles in multi-port mode */ -int usb_stor_huawei_init(struct us_data *us); +/* This places the HUAWEI E220 devices in multi-port mode */ +int usb_stor_huawei_e220_init(struct us_data *us); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 72923b56bbf6..1799335288bd 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -53,6 +53,14 @@ * as opposed to devices that do something strangely or wrongly. */ +/* In-kernel mode switching is deprecated. Do not add new devices to + * this list for the sole purpose of switching them to a different + * mode. Existing userspace solutions are superior. + * + * New mode switching devices should instead be added to the database + * maintained at http://www.draisberghof.de/usb_modeswitch/ + */ + #if !defined(CONFIG_USB_STORAGE_SDDR09) && \ !defined(CONFIG_USB_STORAGE_SDDR09_MODULE) #define NO_SDDR09 @@ -488,6 +496,13 @@ UNUSUAL_DEV( 0x04e8, 0x5122, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_MAX_SECTORS_64 | US_FL_BULK_IGNORE_TAG), +/* Added by Dmitry Artamonow <mad_soft@inbox.ru> */ +UNUSUAL_DEV( 0x04e8, 0x5136, 0x0000, 0x9999, + "Samsung", + "YP-Z3", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* Entry and supporting patch by Theodore Kilgore <kilgota@auburn.edu>. * Device uses standards-violating 32-byte Bulk Command Block Wrappers and * reports itself as "Proprietary SCSI Bulk." Cf. device entry 0x084d:0x0011. @@ -1527,10 +1542,335 @@ UNUSUAL_DEV( 0x1210, 0x0003, 0x0100, 0x0100, /* Reported by fangxiaozhi <huananhu@huawei.com> * This brings the HUAWEI data card devices into multi-port mode */ -UNUSUAL_VENDOR_INTF(0x12d1, 0x08, 0x06, 0x50, +UNUSUAL_DEV( 0x12d1, 0x1001, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1003, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1004, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1401, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1402, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1403, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1404, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1405, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1406, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1407, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1408, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1409, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x140A, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x140B, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x140C, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x140D, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x140E, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x140F, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1410, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1411, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1412, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1413, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1414, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1415, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1416, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1417, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1418, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1419, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x141A, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x141B, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x141C, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x141D, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x141E, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x141F, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1420, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1421, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1422, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1423, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1424, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1425, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1426, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1427, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1428, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1429, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x142A, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x142B, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x142C, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x142D, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x142E, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x142F, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1430, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1431, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1432, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1433, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1434, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1435, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1436, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1437, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1438, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x1439, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x143A, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x143B, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x143C, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x143D, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x143E, 0x0000, 0x0000, + "HUAWEI MOBILE", + "Mass Storage", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, + 0), +UNUSUAL_DEV( 0x12d1, 0x143F, 0x0000, 0x0000, "HUAWEI MOBILE", "Mass Storage", - USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_init, + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init, 0), /* Reported by Vilius Bilinkevicius <vilisas AT xxx DOT lt) */ diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 8189cb6a86af..7abc5c81af2c 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -346,6 +346,7 @@ static long vfio_pci_ioctl(void *device_data, if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { size_t size; + int max = vfio_pci_get_irq_count(vdev, hdr.index); if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) size = sizeof(uint8_t); @@ -355,7 +356,7 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; if (hdr.argsz - minsz < hdr.count * size || - hdr.count > vfio_pci_get_irq_count(vdev, hdr.index)) + hdr.start >= max || hdr.start + hdr.count > max) return -EINVAL; data = memdup_user((void __user *)(arg + minsz), diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 964ff22bf281..aeb00fc2d3be 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -27,6 +27,7 @@ #include <linux/pci.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <linux/slab.h> #include "vfio_pci_private.h" diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3639371fa697..a96509187deb 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -22,6 +22,7 @@ #include <linux/vfio.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <linux/slab.h> #include "vfio_pci_private.h" diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 959b1cd89e6a..87c216c1e54e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); - net->tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static int tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - int ret; - - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) - return 0; - ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); - if (!ret) - net->tx_poll_state = VHOST_NET_POLL_STARTED; - return ret; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net) .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; - int err, wmem; + int err; size_t hdr_size; struct socket *sock; struct vhost_ubuf_ref *uninitialized_var(ubufs); @@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net) if (!sock) return; - wmem = atomic_read(&sock->sk->sk_wmem_alloc); - if (wmem >= sock->sk->sk_sndbuf) { - mutex_lock(&vq->mutex); - tx_poll_start(net, sock); - mutex_unlock(&vq->mutex); - return; - } - mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); - if (wmem < sock->sk->sk_sndbuf / 2) - tx_poll_stop(net); hdr_size = vq->vhost_hlen; zcopy = vq->ubufs; @@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net) if (head == vq->num) { int num_pends; - wmem = atomic_read(&sock->sk->sk_wmem_alloc); - if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); - break; - } /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ num_pends = likely(vq->upend_idx >= vq->done_idx) ? (vq->upend_idx - vq->done_idx) : (vq->upend_idx + UIO_MAXIOV - vq->done_idx); - if (unlikely(num_pends > VHOST_MAX_PEND)) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + if (unlikely(num_pends > VHOST_MAX_PEND)) break; - } if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; @@ -339,7 +288,8 @@ static void handle_tx(struct vhost_net *net) msg.msg_controllen = 0; ubufs = NULL; } else { - struct ubuf_info *ubuf = &vq->ubuf_info[head]; + struct ubuf_info *ubuf; + ubuf = vq->ubuf_info + vq->upend_idx; vq->heads[vq->upend_idx].len = VHOST_DMA_IN_PROGRESS; @@ -363,8 +313,6 @@ static void handle_tx(struct vhost_net *net) UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); - if (err == -EAGAIN || err == -ENOBUFS) - tx_poll_start(net, sock); break; } if (err != len) @@ -627,7 +575,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); - n->tx_poll_state = VHOST_NET_POLL_DISABLED; f->private_data = n; @@ -637,32 +584,24 @@ static int vhost_net_open(struct inode *inode, struct file *f) static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { + struct vhost_poll *poll = n->poll + (vq - n->vqs); if (!vq->private_data) return; - if (vq == n->vqs + VHOST_NET_VQ_TX) { - tx_poll_stop(n); - n->tx_poll_state = VHOST_NET_POLL_DISABLED; - } else - vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); + vhost_poll_stop(poll); } static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { + struct vhost_poll *poll = n->poll + (vq - n->vqs); struct socket *sock; - int ret; sock = rcu_dereference_protected(vq->private_data, lockdep_is_held(&vq->mutex)); if (!sock) return 0; - if (vq == n->vqs + VHOST_NET_VQ_TX) { - n->tx_poll_state = VHOST_NET_POLL_STOPPED; - ret = tx_poll_start(n, sock); - } else - ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); - return ret; + return vhost_poll_start(poll, sock->file); } static struct socket *vhost_net_stop_vq(struct vhost_net *n, diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 9951297b2427..957a0b98a5d9 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -60,14 +60,22 @@ enum { VHOST_SCSI_VQ_IO = 2, }; +/* + * VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in + * kernel but disabling it helps. + * TODO: debug and remove the workaround. + */ +enum { + VHOST_SCSI_FEATURES = VHOST_FEATURES & (~VIRTIO_RING_F_EVENT_IDX) +}; + #define VHOST_SCSI_MAX_TARGET 256 #define VHOST_SCSI_MAX_VQ 128 struct vhost_scsi { /* Protected by vhost_scsi->dev.mutex */ - struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET]; + struct tcm_vhost_tpg **vs_tpg; char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; - bool vs_endpoint; struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; @@ -570,9 +578,27 @@ static void tcm_vhost_submission_work(struct work_struct *work) } } +static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, int head, unsigned out) +{ + struct virtio_scsi_cmd_resp __user *resp; + struct virtio_scsi_cmd_resp rsp; + int ret; + + memset(&rsp, 0, sizeof(rsp)); + rsp.response = VIRTIO_SCSI_S_BAD_TARGET; + resp = vq->iov[out].iov_base; + ret = __copy_to_user(resp, &rsp, sizeof(rsp)); + if (!ret) + vhost_add_used_and_signal(&vs->dev, vq, head, 0); + else + pr_err("Faulted on virtio_scsi_cmd_resp\n"); +} + static void vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { + struct tcm_vhost_tpg **vs_tpg; struct virtio_scsi_cmd_req v_req; struct tcm_vhost_tpg *tv_tpg; struct tcm_vhost_cmd *tv_cmd; @@ -581,8 +607,16 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, int head, ret; u8 target; - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */ - if (unlikely(!vs->vs_endpoint)) + /* + * We can handle the vq only after the endpoint is setup by calling the + * VHOST_SCSI_SET_ENDPOINT ioctl. + * + * TODO: Check that we are running from vhost_worker which acts + * as read-side critical section for vhost kind of RCU. + * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h + */ + vs_tpg = rcu_dereference_check(vq->private_data, 1); + if (!vs_tpg) return; mutex_lock(&vq->mutex); @@ -652,23 +686,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, /* Extract the tpgt */ target = v_req.lun[1]; - tv_tpg = vs->vs_tpg[target]; + tv_tpg = ACCESS_ONCE(vs_tpg[target]); /* Target does not exist, fail the request */ if (unlikely(!tv_tpg)) { - struct virtio_scsi_cmd_resp __user *resp; - struct virtio_scsi_cmd_resp rsp; - - memset(&rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_BAD_TARGET; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, - vq, head, 0); - else - pr_err("Faulted on virtio_scsi_cmd_resp\n"); - + vhost_scsi_send_bad_target(vs, vq, head, out); continue; } @@ -681,22 +703,13 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, if (IS_ERR(tv_cmd)) { vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", PTR_ERR(tv_cmd)); - break; + goto err_cmd; } pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction" ": %d\n", tv_cmd, exp_data_len, data_direction); tv_cmd->tvc_vhost = vs; tv_cmd->tvc_vq = vq; - - if (unlikely(vq->iov[out].iov_len != - sizeof(struct virtio_scsi_cmd_resp))) { - vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu" - " bytes, out: %d, in: %d\n", - vq->iov[out].iov_len, out, in); - break; - } - tv_cmd->tvc_resp = vq->iov[out].iov_base; /* @@ -716,7 +729,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", scsi_command_size(tv_cmd->tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err_free; } tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; @@ -729,7 +742,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); - break; /* TODO */ + goto err_free; } } @@ -750,6 +763,13 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(&vq->mutex); + return; + +err_free: + vhost_scsi_free_cmd(tv_cmd); +err_cmd: + vhost_scsi_send_bad_target(vs, vq, head, out); + mutex_unlock(&vq->mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) @@ -771,6 +791,20 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) vhost_scsi_handle_vq(vs, vq); } +static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) +{ + vhost_poll_flush(&vs->dev.vqs[index].poll); +} + +static void vhost_scsi_flush(struct vhost_scsi *vs) +{ + int i; + + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) + vhost_scsi_flush_vq(vs, i); + vhost_work_flush(&vs->dev, &vs->vs_completion_work); +} + /* * Called from vhost_scsi_ioctl() context to walk the list of available * tcm_vhost_tpg with an active struct tcm_vhost_nexus @@ -781,8 +815,10 @@ static int vhost_scsi_set_endpoint( { struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tv_tpg; + struct tcm_vhost_tpg **vs_tpg; + struct vhost_virtqueue *vq; + int index, ret, i, len; bool match = false; - int index, ret; mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ @@ -794,6 +830,15 @@ static int vhost_scsi_set_endpoint( } } + len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET; + vs_tpg = kzalloc(len, GFP_KERNEL); + if (!vs_tpg) { + mutex_unlock(&vs->dev.mutex); + return -ENOMEM; + } + if (vs->vs_tpg) + memcpy(vs_tpg, vs->vs_tpg, len); + mutex_lock(&tcm_vhost_mutex); list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) { mutex_lock(&tv_tpg->tv_tpg_mutex); @@ -808,14 +853,15 @@ static int vhost_scsi_set_endpoint( tv_tport = tv_tpg->tport; if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { - if (vs->vs_tpg[tv_tpg->tport_tpgt]) { + if (vs->vs_tpg && vs->vs_tpg[tv_tpg->tport_tpgt]) { mutex_unlock(&tv_tpg->tv_tpg_mutex); mutex_unlock(&tcm_vhost_mutex); mutex_unlock(&vs->dev.mutex); + kfree(vs_tpg); return -EEXIST; } tv_tpg->tv_tpg_vhost_count++; - vs->vs_tpg[tv_tpg->tport_tpgt] = tv_tpg; + vs_tpg[tv_tpg->tport_tpgt] = tv_tpg; smp_mb__after_atomic_inc(); match = true; } @@ -826,12 +872,27 @@ static int vhost_scsi_set_endpoint( if (match) { memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); - vs->vs_endpoint = true; + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i]; + /* Flushing the vhost_work acts as synchronize_rcu */ + mutex_lock(&vq->mutex); + rcu_assign_pointer(vq->private_data, vs_tpg); + vhost_init_used(vq); + mutex_unlock(&vq->mutex); + } ret = 0; } else { ret = -EEXIST; } + /* + * Act as synchronize_rcu to make sure access to + * old vs->vs_tpg is finished. + */ + vhost_scsi_flush(vs); + kfree(vs->vs_tpg); + vs->vs_tpg = vs_tpg; + mutex_unlock(&vs->dev.mutex); return ret; } @@ -842,6 +903,8 @@ static int vhost_scsi_clear_endpoint( { struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tv_tpg; + struct vhost_virtqueue *vq; + bool match = false; int index, ret, i; u8 target; @@ -850,20 +913,26 @@ static int vhost_scsi_clear_endpoint( for (index = 0; index < vs->dev.nvqs; ++index) { if (!vhost_vq_access_ok(&vs->vqs[index])) { ret = -EFAULT; - goto err; + goto err_dev; } } + + if (!vs->vs_tpg) { + mutex_unlock(&vs->dev.mutex); + return 0; + } + for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { target = i; - tv_tpg = vs->vs_tpg[target]; if (!tv_tpg) continue; + mutex_lock(&tv_tpg->tv_tpg_mutex); tv_tport = tv_tpg->tport; if (!tv_tport) { ret = -ENODEV; - goto err; + goto err_tpg; } if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) { @@ -872,20 +941,58 @@ static int vhost_scsi_clear_endpoint( tv_tport->tport_name, tv_tpg->tport_tpgt, t->vhost_wwpn, t->vhost_tpgt); ret = -EINVAL; - goto err; + goto err_tpg; } tv_tpg->tv_tpg_vhost_count--; vs->vs_tpg[target] = NULL; - vs->vs_endpoint = false; + match = true; + mutex_unlock(&tv_tpg->tv_tpg_mutex); } + if (match) { + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i]; + /* Flushing the vhost_work acts as synchronize_rcu */ + mutex_lock(&vq->mutex); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + } + } + /* + * Act as synchronize_rcu to make sure access to + * old vs->vs_tpg is finished. + */ + vhost_scsi_flush(vs); + kfree(vs->vs_tpg); + vs->vs_tpg = NULL; mutex_unlock(&vs->dev.mutex); + return 0; -err: +err_tpg: + mutex_unlock(&tv_tpg->tv_tpg_mutex); +err_dev: mutex_unlock(&vs->dev.mutex); return ret; } +static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) +{ + if (features & ~VHOST_SCSI_FEATURES) + return -EOPNOTSUPP; + + mutex_lock(&vs->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&vs->dev)) { + mutex_unlock(&vs->dev.mutex); + return -EFAULT; + } + vs->dev.acked_features = features; + smp_wmb(); + vhost_scsi_flush(vs); + mutex_unlock(&vs->dev.mutex); + return 0; +} + static int vhost_scsi_open(struct inode *inode, struct file *f) { struct vhost_scsi *s; @@ -926,37 +1033,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) return 0; } -static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) -{ - vhost_poll_flush(&vs->dev.vqs[index].poll); -} - -static void vhost_scsi_flush(struct vhost_scsi *vs) -{ - int i; - - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) - vhost_scsi_flush_vq(vs, i); -} - -static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) -{ - if (features & ~VHOST_FEATURES) - return -EOPNOTSUPP; - - mutex_lock(&vs->dev.mutex); - if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&vs->dev)) { - mutex_unlock(&vs->dev.mutex); - return -EFAULT; - } - vs->dev.acked_features = features; - smp_wmb(); - vhost_scsi_flush(vs); - mutex_unlock(&vs->dev.mutex); - return 0; -} - static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { @@ -987,7 +1063,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return 0; case VHOST_GET_FEATURES: - features = VHOST_FEATURES; + features = VHOST_SCSI_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9759249e6d90..4eecdb867d53 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -89,6 +89,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) unsigned long mask; int ret = 0; + if (poll->wqh) + return 0; + mask = file->f_op->poll(file, &poll->table); if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c index 12cf5f31ee8f..025428e04c33 100644 --- a/drivers/video/atmel_lcdfb.c +++ b/drivers/video/atmel_lcdfb.c @@ -422,17 +422,22 @@ static int atmel_lcdfb_check_var(struct fb_var_screeninfo *var, = var->bits_per_pixel; break; case 16: + /* Older SOCs use IBGR:555 rather than BGR:565. */ + if (sinfo->have_intensity_bit) + var->green.length = 5; + else + var->green.length = 6; + if (sinfo->lcd_wiring_mode == ATMEL_LCDC_WIRING_RGB) { - /* RGB:565 mode */ - var->red.offset = 11; + /* RGB:5X5 mode */ + var->red.offset = var->green.length + 5; var->blue.offset = 0; } else { - /* BGR:565 mode */ + /* BGR:5X5 mode */ var->red.offset = 0; - var->blue.offset = 11; + var->blue.offset = var->green.length + 5; } var->green.offset = 5; - var->green.length = 6; var->red.length = var->blue.length = 5; break; case 32: @@ -679,8 +684,7 @@ static int atmel_lcdfb_setcolreg(unsigned int regno, unsigned int red, case FB_VISUAL_PSEUDOCOLOR: if (regno < 256) { - if (cpu_is_at91sam9261() || cpu_is_at91sam9263() - || cpu_is_at91sam9rl()) { + if (sinfo->have_intensity_bit) { /* old style I+BGR:555 */ val = ((red >> 11) & 0x001f); val |= ((green >> 6) & 0x03e0); @@ -870,6 +874,10 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev) } sinfo->info = info; sinfo->pdev = pdev; + if (cpu_is_at91sam9261() || cpu_is_at91sam9263() || + cpu_is_at91sam9rl()) { + sinfo->have_intensity_bit = true; + } strcpy(info->fix.id, sinfo->pdev->name); info->flags = ATMEL_LCDFB_FBINFO_DEFAULT; diff --git a/drivers/video/ep93xx-fb.c b/drivers/video/ep93xx-fb.c index 3f2519d30715..e06cd5d90c97 100644 --- a/drivers/video/ep93xx-fb.c +++ b/drivers/video/ep93xx-fb.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/clk.h> #include <linux/fb.h> +#include <linux/io.h> #include <linux/platform_data/video-ep93xx.h> diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 7c254084b6a0..86291dcd964a 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1373,15 +1373,12 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) { struct fb_info *info = file_fb_info(file); struct fb_ops *fb; - unsigned long off; + unsigned long mmio_pgoff; unsigned long start; u32 len; if (!info) return -ENODEV; - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) - return -EINVAL; - off = vma->vm_pgoff << PAGE_SHIFT; fb = info->fbops; if (!fb) return -ENODEV; @@ -1393,32 +1390,24 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) return res; } - /* frame buffer memory */ + /* + * Ugh. This can be either the frame buffer mapping, or + * if pgoff points past it, the mmio mapping. + */ start = info->fix.smem_start; - len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len); - if (off >= len) { - /* memory mapped io */ - off -= len; - if (info->var.accel_flags) { - mutex_unlock(&info->mm_lock); - return -EINVAL; - } + len = info->fix.smem_len; + mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT; + if (vma->vm_pgoff >= mmio_pgoff) { + vma->vm_pgoff -= mmio_pgoff; start = info->fix.mmio_start; - len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len); + len = info->fix.mmio_len; } mutex_unlock(&info->mm_lock); - start &= PAGE_MASK; - if ((vma->vm_end - vma->vm_start + off) > len) - return -EINVAL; - off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; - /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by io_remap_pfn_range()*/ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - fb_pgprotect(file, vma, off); - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - return 0; + fb_pgprotect(file, vma, start); + + return vm_iomap_memory(vma, start, len); } static int diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c index 94ad0f71383c..7f6709991a5c 100644 --- a/drivers/video/fbmon.c +++ b/drivers/video/fbmon.c @@ -1400,7 +1400,7 @@ int fb_videomode_from_videomode(const struct videomode *vm, fbmode->vmode = 0; if (vm->dmt_flags & VESA_DMT_HSYNC_HIGH) fbmode->sync |= FB_SYNC_HOR_HIGH_ACT; - if (vm->dmt_flags & VESA_DMT_HSYNC_HIGH) + if (vm->dmt_flags & VESA_DMT_VSYNC_HIGH) fbmode->sync |= FB_SYNC_VERT_HIGH_ACT; if (vm->data_flags & DISPLAY_FLAGS_INTERLACED) fbmode->vmode |= FB_VMODE_INTERLACED; diff --git a/drivers/video/mmp/core.c b/drivers/video/mmp/core.c index 9ed83419038b..84de2632857a 100644 --- a/drivers/video/mmp/core.c +++ b/drivers/video/mmp/core.c @@ -252,7 +252,5 @@ void mmp_unregister_path(struct mmp_path *path) kfree(path); mutex_unlock(&disp_lock); - - dev_info(path->dev, "de-register %s\n", path->name); } EXPORT_SYMBOL_GPL(mmp_unregister_path); diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c index 755556ca5b2d..45169cbaba6e 100644 --- a/drivers/video/mxsfb.c +++ b/drivers/video/mxsfb.c @@ -169,6 +169,7 @@ struct mxsfb_info { unsigned dotclk_delay; const struct mxsfb_devdata *devdata; int mapped; + u32 sync; }; #define mxsfb_is_v3(host) (host->devdata->ipversion == 3) @@ -456,9 +457,9 @@ static int mxsfb_set_par(struct fb_info *fb_info) vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH; if (fb_info->var.sync & FB_SYNC_VERT_HIGH_ACT) vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH; - if (fb_info->var.sync & FB_SYNC_DATA_ENABLE_HIGH_ACT) + if (host->sync & MXSFB_SYNC_DATA_ENABLE_HIGH_ACT) vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH; - if (fb_info->var.sync & FB_SYNC_DOTCLK_FAILING_ACT) + if (host->sync & MXSFB_SYNC_DOTCLK_FAILING_ACT) vdctrl0 |= VDCTRL0_DOTCLK_ACT_FAILING; writel(vdctrl0, host->base + LCDC_VDCTRL0); @@ -861,6 +862,8 @@ static int mxsfb_probe(struct platform_device *pdev) INIT_LIST_HEAD(&fb_info->modelist); + host->sync = pdata->sync; + ret = mxsfb_init_fbinfo(host); if (ret != 0) goto error_init_fb; diff --git a/drivers/video/omap/lcd_ams_delta.c b/drivers/video/omap/lcd_ams_delta.c index ed4cad87fbcd..4a5f2cd3d3bf 100644 --- a/drivers/video/omap/lcd_ams_delta.c +++ b/drivers/video/omap/lcd_ams_delta.c @@ -27,6 +27,7 @@ #include <linux/lcd.h> #include <linux/gpio.h> +#include <mach/hardware.h> #include <mach/board-ams-delta.h> #include "omapfb.h" diff --git a/drivers/video/omap/lcd_osk.c b/drivers/video/omap/lcd_osk.c index 3aa62da89195..7fbe04bce0ed 100644 --- a/drivers/video/omap/lcd_osk.c +++ b/drivers/video/omap/lcd_osk.c @@ -24,7 +24,10 @@ #include <linux/platform_device.h> #include <asm/gpio.h> + +#include <mach/hardware.h> #include <mach/mux.h> + #include "omapfb.h" static int osk_panel_init(struct lcd_panel *panel, struct omapfb_device *fbdev) diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c index e31f5b33b501..d40612c31a98 100644 --- a/drivers/video/omap/omapfb_main.c +++ b/drivers/video/omap/omapfb_main.c @@ -32,6 +32,8 @@ #include <linux/omap-dma.h> +#include <mach/hardware.h> + #include "omapfb.h" #include "lcdc.h" diff --git a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c index 6b6643911d29..048c98381ef6 100644 --- a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c @@ -63,6 +63,9 @@ struct tpo_td043_device { u32 power_on_resume:1; }; +/* used to pass spi_device from SPI to DSS portion of the driver */ +static struct tpo_td043_device *g_tpo_td043; + static int tpo_td043_write(struct spi_device *spi, u8 addr, u8 data) { struct spi_message m; @@ -403,7 +406,7 @@ static void tpo_td043_disable(struct omap_dss_device *dssdev) static int tpo_td043_probe(struct omap_dss_device *dssdev) { - struct tpo_td043_device *tpo_td043 = dev_get_drvdata(&dssdev->dev); + struct tpo_td043_device *tpo_td043 = g_tpo_td043; int nreset_gpio = dssdev->reset_gpio; int ret = 0; @@ -440,6 +443,8 @@ static int tpo_td043_probe(struct omap_dss_device *dssdev) if (ret) dev_warn(&dssdev->dev, "failed to create sysfs files\n"); + dev_set_drvdata(&dssdev->dev, tpo_td043); + return 0; fail_gpio_req: @@ -505,6 +510,9 @@ static int tpo_td043_spi_probe(struct spi_device *spi) return -ENODEV; } + if (g_tpo_td043 != NULL) + return -EBUSY; + spi->bits_per_word = 16; spi->mode = SPI_MODE_0; @@ -521,7 +529,7 @@ static int tpo_td043_spi_probe(struct spi_device *spi) tpo_td043->spi = spi; tpo_td043->nreset_gpio = dssdev->reset_gpio; dev_set_drvdata(&spi->dev, tpo_td043); - dev_set_drvdata(&dssdev->dev, tpo_td043); + g_tpo_td043 = tpo_td043; omap_dss_register_driver(&tpo_td043_driver); @@ -534,6 +542,7 @@ static int tpo_td043_spi_remove(struct spi_device *spi) omap_dss_unregister_driver(&tpo_td043_driver); kfree(tpo_td043); + g_tpo_td043 = NULL; return 0; } diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c index d7d66ef5cb58..7f791aeda4d2 100644 --- a/drivers/video/omap2/dss/dss_features.c +++ b/drivers/video/omap2/dss/dss_features.c @@ -202,12 +202,10 @@ static const enum omap_dss_output_id omap3630_dss_supported_outputs[] = { static const enum omap_dss_output_id omap4_dss_supported_outputs[] = { /* OMAP_DSS_CHANNEL_LCD */ - OMAP_DSS_OUTPUT_DPI | OMAP_DSS_OUTPUT_DBI | - OMAP_DSS_OUTPUT_DSI1, + OMAP_DSS_OUTPUT_DBI | OMAP_DSS_OUTPUT_DSI1, /* OMAP_DSS_CHANNEL_DIGIT */ - OMAP_DSS_OUTPUT_VENC | OMAP_DSS_OUTPUT_HDMI | - OMAP_DSS_OUTPUT_DPI, + OMAP_DSS_OUTPUT_VENC | OMAP_DSS_OUTPUT_HDMI, /* OMAP_DSS_CHANNEL_LCD2 */ OMAP_DSS_OUTPUT_DPI | OMAP_DSS_OUTPUT_DBI | diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index 63203acef812..0264704a52be 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -858,6 +858,7 @@ static void sh_mobile_lcdc_geometry(struct sh_mobile_lcdc_chan *ch) tmp = ((mode->xres & 7) << 24) | ((display_h_total & 7) << 16) | ((mode->hsync_len & 7) << 8) | (hsync_pos & 7); lcdc_write_chan(ch, LDHAJR, tmp); + lcdc_write_chan_mirror(ch, LDHAJR, tmp); } static void sh_mobile_lcdc_overlay_setup(struct sh_mobile_lcdc_overlay *ovl) diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index b75db0186488..d4284458377e 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -1973,7 +1973,8 @@ static int uvesafb_init(void) err = -ENOMEM; if (err) { - platform_device_put(uvesafb_device); + if (uvesafb_device) + platform_device_put(uvesafb_device); platform_driver_unregister(&uvesafb_driver); cn_del_callback(&uvesafb_cn_id); return err; diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c index d39dfa4cc235..46d97014342e 100644 --- a/drivers/w1/masters/w1-gpio.c +++ b/drivers/w1/masters/w1-gpio.c @@ -47,11 +47,13 @@ static u8 w1_gpio_read_bit(void *data) return gpio_get_value(pdata->pin) ? 1 : 0; } +#if defined(CONFIG_OF) static struct of_device_id w1_gpio_dt_ids[] = { { .compatible = "w1-gpio" }, {} }; MODULE_DEVICE_TABLE(of, w1_gpio_dt_ids); +#endif static int w1_gpio_probe_dt(struct platform_device *pdev) { @@ -158,7 +160,7 @@ static int w1_gpio_probe(struct platform_device *pdev) return err; } -static int __exit w1_gpio_remove(struct platform_device *pdev) +static int w1_gpio_remove(struct platform_device *pdev) { struct w1_bus_master *master = platform_get_drvdata(pdev); struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; @@ -210,7 +212,7 @@ static struct platform_driver w1_gpio_driver = { .of_match_table = of_match_ptr(w1_gpio_dt_ids), }, .probe = w1_gpio_probe, - .remove = __exit_p(w1_gpio_remove), + .remove = w1_gpio_remove, .suspend = w1_gpio_suspend, .resume = w1_gpio_resume, }; diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 7994d933f040..7ce277d2bb67 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -924,7 +924,8 @@ void w1_search(struct w1_master *dev, u8 search_type, w1_slave_found_callback cb tmp64 = (triplet_ret >> 2); rn |= (tmp64 << i); - if (kthread_should_stop()) { + /* ensure we're called from kthread and not by netlink callback */ + if (!dev->priv && kthread_should_stop()) { mutex_unlock(&dev->bus_mutex); dev_dbg(&dev->dev, "Abort w1_search\n"); return; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 9fcc70c11cea..e89fc3133972 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -117,7 +117,7 @@ config ARM_SP805_WATCHDOG config AT91RM9200_WATCHDOG tristate "AT91RM9200 watchdog" - depends on ARCH_AT91 + depends on ARCH_AT91RM9200 help Watchdog timer embedded into AT91RM9200 chips. This will reboot your system when the timeout is reached. diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index e3b8f757d2d3..0e9d8c479c35 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -40,13 +40,12 @@ #include "sp5100_tco.h" /* Module and version information */ -#define TCO_VERSION "0.03" +#define TCO_VERSION "0.05" #define TCO_MODULE_NAME "SP5100 TCO timer" #define TCO_DRIVER_NAME TCO_MODULE_NAME ", v" TCO_VERSION /* internal variables */ static u32 tcobase_phys; -static u32 resbase_phys; static u32 tco_wdt_fired; static void __iomem *tcobase; static unsigned int pm_iobase; @@ -54,10 +53,6 @@ static DEFINE_SPINLOCK(tco_lock); /* Guards the hardware */ static unsigned long timer_alive; static char tco_expect_close; static struct pci_dev *sp5100_tco_pci; -static struct resource wdt_res = { - .name = "Watchdog Timer", - .flags = IORESOURCE_MEM, -}; /* the watchdog platform device */ static struct platform_device *sp5100_tco_platform_device; @@ -75,12 +70,6 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started." " (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -static unsigned int force_addr; -module_param(force_addr, uint, 0); -MODULE_PARM_DESC(force_addr, "Force the use of specified MMIO address." - " ONLY USE THIS PARAMETER IF YOU REALLY KNOW" - " WHAT YOU ARE DOING (default=none)"); - /* * Some TCO specific functions */ @@ -176,39 +165,6 @@ static void tco_timer_enable(void) } } -static void tco_timer_disable(void) -{ - int val; - - if (sp5100_tco_pci->revision >= 0x40) { - /* For SB800 or later */ - /* Enable watchdog decode bit and Disable watchdog timer */ - outb(SB800_PM_WATCHDOG_CONTROL, SB800_IO_PM_INDEX_REG); - val = inb(SB800_IO_PM_DATA_REG); - val |= SB800_PCI_WATCHDOG_DECODE_EN; - val |= SB800_PM_WATCHDOG_DISABLE; - outb(val, SB800_IO_PM_DATA_REG); - } else { - /* For SP5100 or SB7x0 */ - /* Enable watchdog decode bit */ - pci_read_config_dword(sp5100_tco_pci, - SP5100_PCI_WATCHDOG_MISC_REG, - &val); - - val |= SP5100_PCI_WATCHDOG_DECODE_EN; - - pci_write_config_dword(sp5100_tco_pci, - SP5100_PCI_WATCHDOG_MISC_REG, - val); - - /* Disable Watchdog timer */ - outb(SP5100_PM_WATCHDOG_CONTROL, SP5100_IO_PM_INDEX_REG); - val = inb(SP5100_IO_PM_DATA_REG); - val |= SP5100_PM_WATCHDOG_DISABLE; - outb(val, SP5100_IO_PM_DATA_REG); - } -} - /* * /dev/watchdog handling */ @@ -361,7 +317,7 @@ static unsigned char sp5100_tco_setupdevice(void) { struct pci_dev *dev = NULL; const char *dev_name = NULL; - u32 val, tmp_val; + u32 val; u32 index_reg, data_reg, base_addr; /* Match the PCI device */ @@ -459,63 +415,8 @@ static unsigned char sp5100_tco_setupdevice(void) } else pr_debug("SBResource_MMIO is disabled(0x%04x)\n", val); - /* - * Lastly re-programming the watchdog timer MMIO address, - * This method is a last resort... - * - * Before re-programming, to ensure that the watchdog timer - * is disabled, disable the watchdog timer. - */ - tco_timer_disable(); - - if (force_addr) { - /* - * Force the use of watchdog timer MMIO address, and aligned to - * 8byte boundary. - */ - force_addr &= ~0x7; - val = force_addr; - - pr_info("Force the use of 0x%04x as MMIO address\n", val); - } else { - /* - * Get empty slot into the resource tree for watchdog timer. - */ - if (allocate_resource(&iomem_resource, - &wdt_res, - SP5100_WDT_MEM_MAP_SIZE, - 0xf0000000, - 0xfffffff8, - 0x8, - NULL, - NULL)) { - pr_err("MMIO allocation failed\n"); - goto unreg_region; - } - - val = resbase_phys = wdt_res.start; - pr_debug("Got 0x%04x from resource tree\n", val); - } - - /* Restore to the low three bits */ - outb(base_addr+0, index_reg); - tmp_val = val | (inb(data_reg) & 0x7); - - /* Re-programming the watchdog timer base address */ - outb(base_addr+0, index_reg); - outb((tmp_val >> 0) & 0xff, data_reg); - outb(base_addr+1, index_reg); - outb((tmp_val >> 8) & 0xff, data_reg); - outb(base_addr+2, index_reg); - outb((tmp_val >> 16) & 0xff, data_reg); - outb(base_addr+3, index_reg); - outb((tmp_val >> 24) & 0xff, data_reg); - - if (!request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE, - dev_name)) { - pr_err("MMIO address 0x%04x already in use\n", val); - goto unreg_resource; - } + pr_notice("failed to find MMIO address, giving up.\n"); + goto unreg_region; setup_wdt: tcobase_phys = val; @@ -555,9 +456,6 @@ setup_wdt: unreg_mem_region: release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE); -unreg_resource: - if (resbase_phys) - release_resource(&wdt_res); unreg_region: release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE); exit: @@ -567,7 +465,6 @@ exit: static int sp5100_tco_init(struct platform_device *dev) { int ret; - char addr_str[16]; /* * Check whether or not the hardware watchdog is there. If found, then @@ -599,23 +496,14 @@ static int sp5100_tco_init(struct platform_device *dev) clear_bit(0, &timer_alive); /* Show module parameters */ - if (force_addr == tcobase_phys) - /* The force_addr is vaild */ - sprintf(addr_str, "0x%04x", force_addr); - else - strcpy(addr_str, "none"); - - pr_info("initialized (0x%p). heartbeat=%d sec (nowayout=%d, " - "force_addr=%s)\n", - tcobase, heartbeat, nowayout, addr_str); + pr_info("initialized (0x%p). heartbeat=%d sec (nowayout=%d)\n", + tcobase, heartbeat, nowayout); return 0; exit: iounmap(tcobase); release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE); - if (resbase_phys) - release_resource(&wdt_res); release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE); return ret; } @@ -630,8 +518,6 @@ static void sp5100_tco_cleanup(void) misc_deregister(&sp5100_tco_miscdev); iounmap(tcobase); release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE); - if (resbase_phys) - release_resource(&wdt_res); release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE); } diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h index 71594a0c14b7..2b28c00da0df 100644 --- a/drivers/watchdog/sp5100_tco.h +++ b/drivers/watchdog/sp5100_tco.h @@ -57,7 +57,7 @@ #define SB800_PM_WATCHDOG_DISABLE (1 << 2) #define SB800_PM_WATCHDOG_SECOND_RES (3 << 0) #define SB800_ACPI_MMIO_DECODE_EN (1 << 0) -#define SB800_ACPI_MMIO_SEL (1 << 2) +#define SB800_ACPI_MMIO_SEL (1 << 1) #define SB800_PM_WDT_MMIO_OFFSET 0xB00 diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5a32232cf7c1..67af155cf602 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -182,7 +182,7 @@ config XEN_PRIVCMD config XEN_STUB bool "Xen stub drivers" - depends on XEN && X86_64 + depends on XEN && X86_64 && BROKEN default n help Allow kernel to install stub drivers, to reserve space for Xen drivers, diff --git a/drivers/xen/events.c b/drivers/xen/events.c index d17aa41a9041..2647ad8e1f19 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -403,11 +403,23 @@ static void unmask_evtchn(int port) if (unlikely((cpu != cpu_from_evtchn(port)))) do_hypercall = 1; - else + else { + /* + * Need to clear the mask before checking pending to + * avoid a race with an event becoming pending. + * + * EVTCHNOP_unmask will only trigger an upcall if the + * mask bit was set, so if a hypercall is needed + * remask the event. + */ + sync_clear_bit(port, BM(&s->evtchn_mask[0])); evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0])); - if (unlikely(evtchn_pending && xen_hvm_domain())) - do_hypercall = 1; + if (unlikely(evtchn_pending && xen_hvm_domain())) { + sync_set_bit(port, BM(&s->evtchn_mask[0])); + do_hypercall = 1; + } + } /* Slow path (hypercall) if this is a non-local port or if this is * an hvm domain and an event is pending (hvm domains don't have @@ -418,8 +430,6 @@ static void unmask_evtchn(int port) } else { struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); - sync_clear_bit(port, BM(&s->evtchn_mask[0])); - /* * The following is basically the equivalent of * 'hw_resend_irq'. Just like a real IO-APIC we 'lose @@ -1306,7 +1316,7 @@ static void __xen_evtchn_do_upcall(void) { int start_word_idx, start_bit_idx; int word_idx, bit_idx; - int i; + int i, irq; int cpu = get_cpu(); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); @@ -1314,6 +1324,8 @@ static void __xen_evtchn_do_upcall(void) do { xen_ulong_t pending_words; + xen_ulong_t pending_bits; + struct irq_desc *desc; vcpu_info->evtchn_upcall_pending = 0; @@ -1325,6 +1337,17 @@ static void __xen_evtchn_do_upcall(void) * selector flag. xchg_xen_ulong must contain an * appropriate barrier. */ + if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) { + int evtchn = evtchn_from_irq(irq); + word_idx = evtchn / BITS_PER_LONG; + pending_bits = evtchn % BITS_PER_LONG; + if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + } + pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0); start_word_idx = __this_cpu_read(current_word_idx); @@ -1333,7 +1356,6 @@ static void __xen_evtchn_do_upcall(void) word_idx = start_word_idx; for (i = 0; pending_words != 0; i++) { - xen_ulong_t pending_bits; xen_ulong_t words; words = MASK_LSBS(pending_words, word_idx); @@ -1362,8 +1384,7 @@ static void __xen_evtchn_do_upcall(void) do { xen_ulong_t bits; - int port, irq; - struct irq_desc *desc; + int port; bits = MASK_LSBS(pending_bits, bit_idx); diff --git a/drivers/xen/fallback.c b/drivers/xen/fallback.c index 0ef7c4d40f86..b04fb64c5a91 100644 --- a/drivers/xen/fallback.c +++ b/drivers/xen/fallback.c @@ -44,7 +44,7 @@ int xen_event_channel_op_compat(int cmd, void *arg) } EXPORT_SYMBOL_GPL(xen_event_channel_op_compat); -int HYPERVISOR_physdev_op_compat(int cmd, void *arg) +int xen_physdev_op_compat(int cmd, void *arg) { struct physdev_op op; int rc; @@ -78,3 +78,4 @@ int HYPERVISOR_physdev_op_compat(int cmd, void *arg) return rc; } +EXPORT_SYMBOL_GPL(xen_physdev_op_compat); diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 316df65163cf..90e34ac7e522 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -500,16 +500,19 @@ static int __init xen_acpi_processor_init(void) (void)acpi_processor_preregister_performance(acpi_perf_data); for_each_possible_cpu(i) { + struct acpi_processor *pr; struct acpi_processor_performance *perf; + pr = per_cpu(processors, i); perf = per_cpu_ptr(acpi_perf_data, i); - rc = acpi_processor_register_performance(perf, i); + if (!pr) + continue; + + pr->performance = perf; + rc = acpi_processor_get_performance_info(pr); if (rc) goto err_out; } - rc = acpi_processor_notify_smm(THIS_MODULE); - if (rc) - goto err_unregister; for_each_possible_cpu(i) { struct acpi_processor *_pr; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 9204126f1560..a2278ba7fb27 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -17,6 +17,7 @@ #include <xen/events.h> #include <asm/xen/pci.h> #include <asm/xen/hypervisor.h> +#include <xen/interface/physdev.h> #include "pciback.h" #include "conf_space.h" #include "conf_space_quirks.h" @@ -85,37 +86,52 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) static void pcistub_device_release(struct kref *kref) { struct pcistub_device *psdev; + struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data; psdev = container_of(kref, struct pcistub_device, kref); - dev_data = pci_get_drvdata(psdev->dev); + dev = psdev->dev; + dev_data = pci_get_drvdata(dev); - dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); + dev_dbg(&dev->dev, "pcistub_device_release\n"); - xen_unregister_device_domain_owner(psdev->dev); + xen_unregister_device_domain_owner(dev); /* Call the reset function which does not take lock as this * is called from "unbind" which takes a device_lock mutex. */ - __pci_reset_function_locked(psdev->dev); - if (pci_load_and_free_saved_state(psdev->dev, - &dev_data->pci_saved_state)) { - dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n"); - } else - pci_restore_state(psdev->dev); + __pci_reset_function_locked(dev); + if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) + dev_dbg(&dev->dev, "Could not reload PCI state\n"); + else + pci_restore_state(dev); + + if (pci_find_capability(dev, PCI_CAP_ID_MSIX)) { + struct physdev_pci_device ppdev = { + .seg = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn + }; + int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix, + &ppdev); + + if (err) + dev_warn(&dev->dev, "MSI-X release failed (%d)\n", + err); + } /* Disable the device */ - xen_pcibk_reset_device(psdev->dev); + xen_pcibk_reset_device(dev); kfree(dev_data); - pci_set_drvdata(psdev->dev, NULL); + pci_set_drvdata(dev, NULL); /* Clean-up the device */ - xen_pcibk_config_free_dyn_fields(psdev->dev); - xen_pcibk_config_free_dev(psdev->dev); + xen_pcibk_config_free_dyn_fields(dev); + xen_pcibk_config_free_dev(dev); - psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; - pci_dev_put(psdev->dev); + dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + pci_dev_put(dev); kfree(psdev); } @@ -355,6 +371,19 @@ static int pcistub_init_device(struct pci_dev *dev) if (err) goto config_release; + if (pci_find_capability(dev, PCI_CAP_ID_MSIX)) { + struct physdev_pci_device ppdev = { + .seg = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn + }; + + err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev); + if (err) + dev_err(&dev->dev, "MSI-X preparation failed (%d)\n", + err); + } + /* We need the device active to save the state. */ dev_dbg(&dev->dev, "save state of device\n"); pci_save_state(dev); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 37c1f825f513..b98cf0c35725 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -113,7 +113,8 @@ void xen_pcibk_reset_device(struct pci_dev *dev) if (dev->msi_enabled) pci_disable_msi(dev); #endif - pci_disable_device(dev); + if (pci_is_enabled(dev)) + pci_disable_device(dev); pci_write_config_word(dev, PCI_COMMAND, 0); diff --git a/drivers/xen/xen-stub.c b/drivers/xen/xen-stub.c index d85e411cbf89..bbef194c5b01 100644 --- a/drivers/xen/xen-stub.c +++ b/drivers/xen/xen-stub.c @@ -25,7 +25,6 @@ #include <linux/export.h> #include <linux/types.h> #include <linux/acpi.h> -#include <acpi/acpi_drivers.h> #include <xen/acpi.h> #ifdef CONFIG_ACPI diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index ec0abb6df3c3..71679875f056 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -75,6 +75,7 @@ static struct file_system_type xenfs_type = { .mount = xenfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("xenfs"); static int __init xenfs_init(void) { diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 91dad63e5a2d..2756dcd5de6e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -365,3 +365,4 @@ struct file_system_type v9fs_fs_type = { .owner = THIS_MODULE, .fs_flags = FS_RENAME_DOES_D_MOVE, }; +MODULE_ALIAS_FS("9p"); diff --git a/fs/adfs/super.c b/fs/adfs/super.c index d57122935793..0ff4bae2c2a2 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -524,6 +524,7 @@ static struct file_system_type adfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("adfs"); static int __init init_adfs_fs(void) { diff --git a/fs/affs/super.c b/fs/affs/super.c index b84dc7352502..45161a832bbc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -622,6 +622,7 @@ static struct file_system_type affs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("affs"); static int __init init_affs_fs(void) { diff --git a/fs/afs/super.c b/fs/afs/super.c index 7c31ec399575..c4861557e385 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -45,6 +45,7 @@ struct file_system_type afs_fs_type = { .kill_sb = afs_kill_super, .fs_flags = 0, }; +MODULE_ALIAS_FS("afs"); static const struct super_operations afs_super_ops = { .statfs = afs_statfs, diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index cddc74b9cdb2..b3db517e89ec 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -26,6 +26,7 @@ static struct file_system_type autofs_fs_type = { .mount = autofs_mount, .kill_sb = autofs4_kill_sb, }; +MODULE_ALIAS_FS("autofs"); static int __init init_autofs4_fs(void) { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index c8f4e25eb9e2..8615ee89ab55 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -951,6 +951,7 @@ static struct file_system_type befs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("befs"); static int __init init_befs_fs(void) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 737aaa3f7090..5e376bb93419 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -473,6 +473,7 @@ static struct file_system_type bfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("bfs"); static int __init init_bfs_fs(void) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3939829f6c5c..86af964c2425 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1137,6 +1137,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, goto whole; if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) goto whole; + return 0; } /* Do not dump I/O mapped devices or special mappings */ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index fecbbf3f8ff2..751df5e4f61a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -720,6 +720,7 @@ static struct file_system_type bm_fs_type = { .mount = bm_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("binfmt_misc"); static int __init init_misc_binfmt(void) { @@ -1428,8 +1428,6 @@ void bio_endio(struct bio *bio, int error) else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; - trace_block_bio_complete(bio, error); - if (bio->bi_end_io) bio->bi_end_io(bio, error); } diff --git a/fs/block_dev.c b/fs/block_dev.c index aea605c98ba6..aae187a7f94a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -551,6 +551,7 @@ struct block_device *bdgrab(struct block_device *bdev) ihold(bdev->bd_inode); return bdev; } +EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ecd25a1b4e51..ca9d8f1a3bb6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (tree_mod_dont_log(fs_info, NULL)) return 0; + __tree_mod_log_free_eb(fs_info, old_root); + ret = tree_mod_alloc(fs_info, flags, &tm); if (ret < 0) goto out; @@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) static noinline void tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, - unsigned long src_offset, int nr_items) + unsigned long src_offset, int nr_items, int log_removal) { int ret; int i; @@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, } for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, - i + src_offset, - MOD_LOG_KEY_REMOVE); - BUG_ON(ret < 0); + if (log_removal) { + ret = tree_mod_log_insert_key_locked(fs_info, src, + i + src_offset, + MOD_LOG_KEY_REMOVE); + BUG_ON(ret < 0); + } ret = tree_mod_log_insert_key_locked(fs_info, dst, i + dst_offset, MOD_LOG_KEY_ADD); @@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); /* -ENOMEM */ } - tree_mod_log_free_eb(root->fs_info, buf); clean_tree_block(trans, root, buf); *last_ref = 1; } @@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); + tree_mod_log_free_eb(root->fs_info, buf); btrfs_free_tree_block(trans, root, buf, parent_start, last_ref); } @@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } - tree_mod_log_free_eb(root->fs_info, root->node); tree_mod_log_set_root_pointer(root, child); rcu_assign_pointer(root->node, child); @@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, push_items = min(src_nritems - 8, push_items); tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, - push_items); + push_items, 1); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), @@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, sizeof(struct btrfs_key_ptr)); tree_mod_log_eb_copy(root->fs_info, dst, src, 0, - src_nritems - push_items, push_items); + src_nritems - push_items, push_items, 1); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), @@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans, int mid; int ret; u32 c_nritems; + int tree_mod_log_removal = 1; c = path->nodes[level]; WARN_ON(btrfs_header_generation(c) != trans->transid); if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); + /* + * removal of root nodes has been logged by + * tree_mod_log_set_root_pointer due to locking + */ + tree_mod_log_removal = 0; if (ret) return ret; } else { @@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); - tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); + tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid, + tree_mod_log_removal); copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(mid), diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0b278b117cbe..14fce27b4780 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -22,8 +22,9 @@ #include "disk-io.h" #include "transaction.h" -#define BTRFS_DELAYED_WRITEBACK 400 -#define BTRFS_DELAYED_BACKGROUND 100 +#define BTRFS_DELAYED_WRITEBACK 512 +#define BTRFS_DELAYED_BACKGROUND 128 +#define BTRFS_DELAYED_BATCH 16 static struct kmem_cache *delayed_node_cache; @@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node, BTRFS_DELAYED_DELETION_ITEM); } +static void finish_one_item(struct btrfs_delayed_root *delayed_root) +{ + int seq = atomic_inc_return(&delayed_root->items_seq); + if ((atomic_dec_return(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) && + waitqueue_active(&delayed_root->wait)) + wake_up(&delayed_root->wait); +} + static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) { struct rb_root *root; @@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) rb_erase(&delayed_item->rb_node, root); delayed_item->delayed_node->count--; - if (atomic_dec_return(&delayed_root->items) < - BTRFS_DELAYED_BACKGROUND && - waitqueue_active(&delayed_root->wait)) - wake_up(&delayed_root->wait); + + finish_one_item(delayed_root); } static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) @@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) delayed_node->count--; delayed_root = delayed_node->root->fs_info->delayed_root; - if (atomic_dec_return(&delayed_root->items) < - BTRFS_DELAYED_BACKGROUND && - waitqueue_active(&delayed_root->wait)) - wake_up(&delayed_root->wait); + finish_one_item(delayed_root); } } @@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode) btrfs_release_delayed_node(delayed_node); } -struct btrfs_async_delayed_node { - struct btrfs_root *root; - struct btrfs_delayed_node *delayed_node; +struct btrfs_async_delayed_work { + struct btrfs_delayed_root *delayed_root; + int nr; struct btrfs_work work; }; -static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) +static void btrfs_async_run_delayed_root(struct btrfs_work *work) { - struct btrfs_async_delayed_node *async_node; + struct btrfs_async_delayed_work *async_work; + struct btrfs_delayed_root *delayed_root; struct btrfs_trans_handle *trans; struct btrfs_path *path; struct btrfs_delayed_node *delayed_node = NULL; struct btrfs_root *root; struct btrfs_block_rsv *block_rsv; - int need_requeue = 0; + int total_done = 0; - async_node = container_of(work, struct btrfs_async_delayed_node, work); + async_work = container_of(work, struct btrfs_async_delayed_work, work); + delayed_root = async_work->delayed_root; path = btrfs_alloc_path(); if (!path) goto out; - path->leave_spinning = 1; - delayed_node = async_node->delayed_node; +again: + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) + goto free_path; + + delayed_node = btrfs_first_prepared_delayed_node(delayed_root); + if (!delayed_node) + goto free_path; + + path->leave_spinning = 1; root = delayed_node->root; trans = btrfs_join_transaction(root); if (IS_ERR(trans)) - goto free_path; + goto release_path; block_rsv = trans->block_rsv; trans->block_rsv = &root->fs_info->delayed_block_rsv; @@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) * Task1 will sleep until the transaction is commited. */ mutex_lock(&delayed_node->mutex); - if (delayed_node->count) - need_requeue = 1; - else - btrfs_dequeue_delayed_node(root->fs_info->delayed_root, - delayed_node); + btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node); mutex_unlock(&delayed_node->mutex); trans->block_rsv = block_rsv; btrfs_end_transaction_dmeta(trans, root); btrfs_btree_balance_dirty_nodelay(root); + +release_path: + btrfs_release_path(path); + total_done++; + + btrfs_release_prepared_delayed_node(delayed_node); + if (async_work->nr == 0 || total_done < async_work->nr) + goto again; + free_path: btrfs_free_path(path); out: - if (need_requeue) - btrfs_requeue_work(&async_node->work); - else { - btrfs_release_prepared_delayed_node(delayed_node); - kfree(async_node); - } + wake_up(&delayed_root->wait); + kfree(async_work); } + static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, - struct btrfs_root *root, int all) + struct btrfs_root *root, int nr) { - struct btrfs_async_delayed_node *async_node; - struct btrfs_delayed_node *curr; - int count = 0; + struct btrfs_async_delayed_work *async_work; -again: - curr = btrfs_first_prepared_delayed_node(delayed_root); - if (!curr) + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) return 0; - async_node = kmalloc(sizeof(*async_node), GFP_NOFS); - if (!async_node) { - btrfs_release_prepared_delayed_node(curr); + async_work = kmalloc(sizeof(*async_work), GFP_NOFS); + if (!async_work) return -ENOMEM; - } - - async_node->root = root; - async_node->delayed_node = curr; - - async_node->work.func = btrfs_async_run_delayed_node_done; - async_node->work.flags = 0; - btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work); - count++; - - if (all || count < 4) - goto again; + async_work->delayed_root = delayed_root; + async_work->work.func = btrfs_async_run_delayed_root; + async_work->work.flags = 0; + async_work->nr = nr; + btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); return 0; } @@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root) WARN_ON(btrfs_first_delayed_node(delayed_root)); } +static int refs_newer(struct btrfs_delayed_root *delayed_root, + int seq, int count) +{ + int val = atomic_read(&delayed_root->items_seq); + + if (val < seq || val >= seq + count) + return 1; + return 0; +} + void btrfs_balance_delayed_items(struct btrfs_root *root) { struct btrfs_delayed_root *delayed_root; + int seq; delayed_root = btrfs_get_delayed_root(root); if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) return; + seq = atomic_read(&delayed_root->items_seq); + if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { int ret; - ret = btrfs_wq_run_delayed_node(delayed_root, root, 1); + DEFINE_WAIT(__wait); + + ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); if (ret) return; - wait_event_interruptible_timeout( - delayed_root->wait, - (atomic_read(&delayed_root->items) < - BTRFS_DELAYED_BACKGROUND), - HZ); - return; + while (1) { + prepare_to_wait(&delayed_root->wait, &__wait, + TASK_INTERRUPTIBLE); + + if (refs_newer(delayed_root, seq, + BTRFS_DELAYED_BATCH) || + atomic_read(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND) { + break; + } + if (!signal_pending(current)) + schedule(); + else + break; + } + finish_wait(&delayed_root->wait, &__wait); } - btrfs_wq_run_delayed_node(delayed_root, root, 0); + btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); } /* Will return 0 or -ENOMEM */ diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 78b6ad0fc669..1d5c5f7abe3e 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -43,6 +43,7 @@ struct btrfs_delayed_root { */ struct list_head prepare_list; atomic_t items; /* for delayed items */ + atomic_t items_seq; /* for delayed items */ int nodes; /* for delayed nodes */ wait_queue_head_t wait; }; @@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root( struct btrfs_delayed_root *delayed_root) { atomic_set(&delayed_root->items, 0); + atomic_set(&delayed_root->items_seq, 0); delayed_root->nodes = 0; spin_lock_init(&delayed_root->lock); init_waitqueue_head(&delayed_root->wait); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 02369a3c162e..6d19a0a554aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -62,7 +62,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, @@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); + leaf = NULL; goto fail; } @@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_tree_unlock(leaf); + return root; + fail: - if (ret) - return ERR_PTR(ret); + if (leaf) { + btrfs_tree_unlock(leaf); + free_extent_buffer(leaf); + } + kfree(root); - return root; + return ERR_PTR(ret); } static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, @@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log(NULL, root); btrfs_free_log_root_tree(NULL, fs_info); } @@ -3687,7 +3693,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t) { struct btrfs_pending_snapshot *snapshot; struct list_head splice; @@ -3700,10 +3706,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) snapshot = list_entry(splice.next, struct btrfs_pending_snapshot, list); - + snapshot->error = -ECANCELED; list_del_init(&snapshot->list); - - kfree(snapshot); } } @@ -3840,6 +3844,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->blocked = 1; wake_up(&root->fs_info->transaction_blocked_wait); + btrfs_evict_pending_snapshots(cur_trans); + cur_trans->blocked = 0; wake_up(&root->fs_info->transaction_wait); @@ -3849,8 +3855,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); - btrfs_destroy_pending_snapshots(cur_trans); - btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, EXTENT_DIRTY); btrfs_destroy_pinned_extent(root, @@ -3894,6 +3898,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); + btrfs_evict_pending_snapshots(t); + t->blocked = 0; smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) @@ -3907,8 +3913,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); - btrfs_destroy_pending_snapshots(t); - btrfs_destroy_delalloc_inodes(root); spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e074dab2d57..3d551231caba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root, cache->bytes_super += stripe_len; ret = add_excluded_extent(root, cache->key.objectid, stripe_len); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + return ret; } for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { @@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root, ret = btrfs_rmap_block(&root->fs_info->mapping_tree, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + return ret; while (nr--) { cache->bytes_super += stripe_len; ret = add_excluded_extent(root, logical[nr], stripe_len); - BUG_ON(ret); /* -ENOMEM */ + if (ret) { + kfree(logical); + return ret; + } } kfree(logical); @@ -1467,8 +1472,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, if (ret && !insert) { err = -ENOENT; goto out; + } else if (ret) { + err = -EIO; + WARN_ON(1); + goto out; } - BUG_ON(ret); /* Corruption */ leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -4435,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) spin_lock(&sinfo->lock); spin_lock(&block_rsv->lock); - block_rsv->size = num_bytes; + block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + sinfo->bytes_readonly + @@ -4790,14 +4798,49 @@ out_fail: * If the inodes csum_bytes is the same as the original * csum_bytes then we know we haven't raced with any free()ers * so we can just reduce our inodes csum bytes and carry on. - * Otherwise we have to do the normal free thing to account for - * the case that the free side didn't free up its reserve - * because of this outstanding reservation. */ - if (BTRFS_I(inode)->csum_bytes == csum_bytes) + if (BTRFS_I(inode)->csum_bytes == csum_bytes) { calc_csum_metadata_size(inode, num_bytes, 0); - else - to_free = calc_csum_metadata_size(inode, num_bytes, 0); + } else { + u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; + u64 bytes; + + /* + * This is tricky, but first we need to figure out how much we + * free'd from any free-ers that occured during this + * reservation, so we reset ->csum_bytes to the csum_bytes + * before we dropped our lock, and then call the free for the + * number of bytes that were freed while we were trying our + * reservation. + */ + bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; + BTRFS_I(inode)->csum_bytes = csum_bytes; + to_free = calc_csum_metadata_size(inode, bytes, 0); + + + /* + * Now we need to see how much we would have freed had we not + * been making this reservation and our ->csum_bytes were not + * artificially inflated. + */ + BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; + bytes = csum_bytes - orig_csum_bytes; + bytes = calc_csum_metadata_size(inode, bytes, 0); + + /* + * Now reset ->csum_bytes to what it should be. If bytes is + * more than to_free then we would have free'd more space had we + * not had an artificially high ->csum_bytes, so we need to free + * the remainder. If bytes is the same or less then we don't + * need to do anything, the other free-ers did the correct + * thing. + */ + BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; + if (bytes > to_free) + to_free = bytes - to_free; + else + to_free = 0; + } spin_unlock(&BTRFS_I(inode)->lock); if (dropped) to_free += btrfs_calc_trans_metadata_size(root, dropped); @@ -7944,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root) * info has super bytes accounted for, otherwise we'll think * we have more space than we actually do. */ - exclude_super_stripes(root, cache); + ret = exclude_super_stripes(root, cache); + if (ret) { + /* + * We may have excluded something, so call this just in + * case. + */ + free_excluded_extents(root, cache); + kfree(cache->free_space_ctl); + kfree(cache); + goto error; + } /* * check for two cases, either we are full, and therefore @@ -8086,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; - exclude_super_stripes(root, cache); + ret = exclude_super_stripes(root, cache); + if (ret) { + /* + * We may have excluded something, so call this just in + * case. + */ + free_excluded_extents(root, cache); + kfree(cache->free_space_ctl); + kfree(cache); + return ret; + } add_new_free_space(cache, root->fs_info, chunk_offset, chunk_offset + size); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f173c5af6461..cdee391fc7bf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) GFP_NOFS); } +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + BUG_ON(!page); /* Pages should be in the extent_io_tree */ + clear_page_dirty_for_io(page); + page_cache_release(page); + index++; + } + return 0; +} + +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + BUG_ON(!page); /* Pages should be in the extent_io_tree */ + account_page_redirty(page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + return 0; +} + /* * helper function to set both pages and extents in the tree writeback */ diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6068a1985560..258c92156857 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ec160202be3e..c4628a201cb3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); csums_in_item /= csum_size; - if (csum_offset >= csums_in_item) { + if (csum_offset == csums_in_item) { ret = -EFBIG; goto fail; + } else if (csum_offset > csums_in_item) { + goto fail; } } item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); @@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, return -ENOMEM; sector_sum = sums->sums; - trans->adding_csums = 1; again: next_offset = (u64)-1; found_next = 0; @@ -899,7 +900,6 @@ next_sector: goto again; } out: - trans->adding_csums = 0; btrfs_free_path(path); return ret; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index af1d0605a5c1..ade03e6f7bd2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -591,6 +591,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, } compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); clear_bit(EXTENT_FLAG_PINNED, &em->flags); + clear_bit(EXTENT_FLAG_LOGGING, &flags); remove_extent_mapping(em_tree, em); if (no_splits) goto next; @@ -2141,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode, { struct inode *inode = file_inode(file); struct extent_state *cached_state = NULL; + struct btrfs_root *root = BTRFS_I(inode)->root; u64 cur_offset; u64 last_byte; u64 alloc_start; @@ -2168,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode, ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); if (ret) return ret; + if (root->fs_info->quota_enabled) { + ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); + if (ret) + goto out_reserve_fail; + } /* * wait for ordered IO before we have any locks. We'll loop again @@ -2271,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode, &cached_state, GFP_NOFS); out: mutex_unlock(&inode->i_mutex); + if (root->fs_info->quota_enabled) + btrfs_qgroup_free(root, alloc_end - alloc_start); +out_reserve_fail: /* Let go of our reservation. */ btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c226daefd65d..09c58a35b429 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; int compress_type = root->fs_info->compress_type; + int redirty = 0; /* if this is a small write inside eof, kick off a defrag */ if ((end - start + 1) < 16 * 1024 && @@ -415,6 +416,17 @@ again: if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; + /* + * we need to call clear_page_dirty_for_io on each + * page in the range. Otherwise applications with the file + * mmap'd can wander in and change the page contents while + * we are compressing them. + * + * If the compression fails for any reason, we set the pages + * dirty again later on. + */ + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; ret = btrfs_compress_pages(compress_type, inode->i_mapping, start, total_compressed, pages, @@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } + if (redirty) + extent_range_redirty_for_io(inode, start, end); add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; @@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sum; list_for_each_entry(sum, list, list) { + trans->adding_csums = 1; btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); + trans->adding_csums = 0; } return 0; } @@ -2312,6 +2328,7 @@ again: key.type = BTRFS_EXTENT_DATA_KEY; key.offset = start; + path->leave_spinning = 1; if (merge) { struct btrfs_file_extent_item *fi; u64 extent_len; @@ -2368,6 +2385,7 @@ again: btrfs_mark_buffer_dirty(leaf); inode_add_bytes(inode, len); + btrfs_release_path(path); ret = btrfs_inc_extent_ref(trans, root, new->bytenr, new->disk_len, 0, @@ -2381,6 +2399,7 @@ again: ret = 1; out_free_path: btrfs_release_path(path); + path->leave_spinning = 0; btrfs_end_transaction(trans, root); out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, @@ -3676,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, * 1 for the dir item * 1 for the dir index * 1 for the inode ref - * 1 for the inode ref in the tree log - * 2 for the dir entries in the log * 1 for the inode */ - trans = btrfs_start_transaction(root, 8); + trans = btrfs_start_transaction(root, 5); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; @@ -8124,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items * should cover the worst case number of items we'll modify. */ - trans = btrfs_start_transaction(root, 20); + trans = btrfs_start_transaction(root, 11); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_notrans; @@ -8502,6 +8519,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_key ins; u64 cur_offset = start; u64 i_size; + u64 cur_bytes; int ret = 0; bool own_trans = true; @@ -8516,8 +8534,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, } } - ret = btrfs_reserve_extent(trans, root, - min(num_bytes, 256ULL * 1024 * 1024), + cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); + cur_bytes = max(cur_bytes, min_size); + ret = btrfs_reserve_extent(trans, root, cur_bytes, min_size, 0, *alloc_hint, &ins, 1); if (ret) { if (own_trans) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c83086fdda05..2c02310ff2d9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -527,6 +527,8 @@ fail: if (async_transid) { *async_transid = trans->transid; err = btrfs_commit_transaction_async(trans, root, 1); + if (err) + err = btrfs_commit_transaction(trans, root); } else { err = btrfs_commit_transaction(trans, root); } @@ -592,16 +594,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, *async_transid = trans->transid; ret = btrfs_commit_transaction_async(trans, root->fs_info->extent_root, 1); + if (ret) + ret = btrfs_commit_transaction(trans, root); } else { ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); } - if (ret) { - /* cleanup_transaction has freed this for us */ - if (trans->aborted) - pending_snapshot = NULL; + if (ret) goto fail; - } ret = pending_snapshot->error; if (ret) @@ -2245,13 +2245,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) if (ret) return ret; - if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, - 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - mnt_drop_write_file(file); - return -EINVAL; - } - if (btrfs_root_readonly(root)) { ret = -EROFS; goto out; @@ -2306,7 +2299,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EINVAL; } out: - atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); mnt_drop_write_file(file); return ret; } diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index ca52681e5f40..b81e0e9a4894 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -26,7 +26,6 @@ void btrfs_tree_lock(struct extent_buffer *eb); void btrfs_tree_unlock(struct extent_buffer *eb); -int btrfs_try_spin_lock(struct extent_buffer *eb); void btrfs_tree_read_lock(struct extent_buffer *eb); void btrfs_tree_read_unlock(struct extent_buffer *eb); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dc08d77b717e..005c45db699e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&works); + mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->fs_info->ordered_extent_lock); list_splice_init(&root->fs_info->ordered_extents, &splice); while (!list_empty(&splice)) { @@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) cond_resched(); } + mutex_unlock(&root->fs_info->ordered_operations_mutex); } /* diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index aee4b1cc3d98..b44124dd2370 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, sgn > 0 ? node->seq - 1 : node->seq, &roots); if (ret < 0) - goto out; + return ret; spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; @@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, ret = 0; unlock: spin_unlock(&fs_info->qgroup_lock); -out: ulist_free(roots); ulist_free(tmp); @@ -1525,21 +1524,23 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qg->reserved + qg->rfer + num_bytes > - qg->max_rfer) + qg->max_rfer) { ret = -EDQUOT; + goto out; + } if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && qg->reserved + qg->excl + num_bytes > - qg->max_excl) + qg->max_excl) { ret = -EDQUOT; + goto out; + } list_for_each_entry(glist, &qg->groups, next_group) { ulist_add(ulist, glist->group->qgroupid, (uintptr_t)glist->group, GFP_ATOMIC); } } - if (ret) - goto out; /* * no limits exceeded, now record the reservation into all qgroups diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 50695dc5e2ab..b67171e6d688 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1269,6 +1269,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) } spin_unlock(&rc->reloc_root_tree.lock); + if (!node) + return 0; BUG_ON((struct btrfs_root *)node->data != root); if (!del) { @@ -2238,13 +2240,28 @@ again: } static noinline_for_stack +void free_reloc_roots(struct list_head *list) +{ + struct btrfs_root *reloc_root; + + while (!list_empty(list)) { + reloc_root = list_entry(list->next, struct btrfs_root, + root_list); + __update_reloc_root(reloc_root, 1); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + kfree(reloc_root); + } +} + +static noinline_for_stack int merge_reloc_roots(struct reloc_control *rc) { struct btrfs_root *root; struct btrfs_root *reloc_root; LIST_HEAD(reloc_roots); int found = 0; - int ret; + int ret = 0; again: root = rc->extent_root; @@ -2270,20 +2287,33 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); - BUG_ON(ret); + if (ret) + goto out; } else { list_del_init(&reloc_root->root_list); } ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); - BUG_ON(ret < 0); + if (ret < 0) { + if (list_empty(&reloc_root->root_list)) + list_add_tail(&reloc_root->root_list, + &reloc_roots); + goto out; + } } if (found) { found = 0; goto again; } +out: + if (ret) { + btrfs_std_error(root->fs_info, ret); + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + } + BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); - return 0; + return ret; } static void free_block_list(struct rb_root *blocks) @@ -2818,8 +2848,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, int err = 0; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out_path; + } rb_node = rb_first(blocks); while (rb_node) { @@ -2858,10 +2890,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, rb_node = rb_next(rb_node); } out: - free_block_list(blocks); err = finish_pending_nodes(trans, rc, path, err); btrfs_free_path(path); +out_path: + free_block_list(blocks); return err; } @@ -3698,7 +3731,15 @@ int prepare_to_relocate(struct reloc_control *rc) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + /* + * extent tree is not a ref_cow tree and has no reloc_root to + * cleanup. And callers are responsible to free the above + * block rsv. + */ + return PTR_ERR(trans); + } btrfs_commit_transaction(trans, rc->extent_root); return 0; } @@ -3730,7 +3771,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) while (1) { progress++; trans = btrfs_start_transaction(rc->extent_root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + break; + } restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); @@ -4264,14 +4309,9 @@ int btrfs_recover_relocation(struct btrfs_root *root) out_free: kfree(rc); out: - while (!list_empty(&reloc_roots)) { - reloc_root = list_entry(reloc_roots.next, - struct btrfs_root, root_list); - list_del(&reloc_root->root_list); - free_extent_buffer(reloc_root->node); - free_extent_buffer(reloc_root->commit_root); - kfree(reloc_root); - } + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + btrfs_free_path(path); if (err == 0) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 53c3501fa4ca..85e072b956d5 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) eb = path->nodes[0]; ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); item_size = btrfs_item_size_nr(eb, path->slots[0]); - btrfs_release_path(path); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { do { @@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) ret < 0 ? -1 : ref_level, ret < 0 ? -1 : ref_root); } while (ret != 1); + btrfs_release_path(path); } else { + btrfs_release_path(path); swarn.path = path; swarn.dev = dev; iterate_extent_inodes(fs_info, found_key.objectid, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f7a8b861058b..c85e7c6b4598 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx, found_key.type != key.type) { key.offset += right_len; break; - } else { - if (found_key.offset != key.offset + right_len) { - /* Should really not happen */ - ret = -EIO; - goto out; - } + } + if (found_key.offset != key.offset + right_len) { + ret = 0; + goto out; } key = found_key; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68a29a1ea068..f6b88595f858 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1558,6 +1558,7 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("btrfs"); /* * used by btrfsctl to scan devices when no FS is mounted diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e52da6fb1165..50767bbaad6c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -625,14 +625,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; - /* - * the same root has to be passed to start_transaction and - * end_transaction. Subvolume quota depends on this. - */ - WARN_ON(trans->root != root); if (trans->qgroup_reserved) { - btrfs_qgroup_free(root, trans->qgroup_reserved); + /* + * the same root has to be passed here between start_transaction + * and end_transaction. Subvolume quota depends on this. + */ + btrfs_qgroup_free(trans->root, trans->qgroup_reserved); trans->qgroup_reserved = 0; } @@ -1052,7 +1051,12 @@ int btrfs_defrag_root(struct btrfs_root *root) /* * new snapshots need to be created at a very specific time in the - * transaction commit. This does the actual creation + * transaction commit. This does the actual creation. + * + * Note: + * If the error which may affect the commitment of the current transaction + * happens, we should return the error number. If the error which just affect + * the creation of the pending snapshots, just return 0. */ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -1071,7 +1075,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct extent_buffer *tmp; struct extent_buffer *old; struct timespec cur_time = CURRENT_TIME; - int ret; + int ret = 0; u64 to_reserve = 0; u64 index = 0; u64 objectid; @@ -1080,40 +1084,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { - ret = pending->error = -ENOMEM; - return ret; + pending->error = -ENOMEM; + return 0; } new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { - ret = pending->error = -ENOMEM; + pending->error = -ENOMEM; goto root_item_alloc_fail; } - ret = btrfs_find_free_objectid(tree_root, &objectid); - if (ret) { - pending->error = ret; + pending->error = btrfs_find_free_objectid(tree_root, &objectid); + if (pending->error) goto no_free_objectid; - } btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); if (to_reserve > 0) { - ret = btrfs_block_rsv_add(root, &pending->block_rsv, - to_reserve, - BTRFS_RESERVE_NO_FLUSH); - if (ret) { - pending->error = ret; + pending->error = btrfs_block_rsv_add(root, + &pending->block_rsv, + to_reserve, + BTRFS_RESERVE_NO_FLUSH); + if (pending->error) goto no_free_objectid; - } } - ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, - objectid, pending->inherit); - if (ret) { - pending->error = ret; + pending->error = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (pending->error) goto no_free_objectid; - } key.objectid = objectid; key.offset = (u64)-1; @@ -1141,7 +1141,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.len, 0); if (dir_item != NULL && !IS_ERR(dir_item)) { pending->error = -EEXIST; - goto fail; + goto dir_item_existed; } else if (IS_ERR(dir_item)) { ret = PTR_ERR(dir_item); btrfs_abort_transaction(trans, root, ret); @@ -1272,6 +1272,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) btrfs_abort_transaction(trans, root, ret); fail: + pending->error = ret; +dir_item_existed: trans->block_rsv = rsv; trans->bytes_reserved = 0; no_free_objectid: @@ -1287,12 +1289,17 @@ root_item_alloc_fail: static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { - struct btrfs_pending_snapshot *pending; + struct btrfs_pending_snapshot *pending, *next; struct list_head *head = &trans->transaction->pending_snapshots; + int ret = 0; - list_for_each_entry(pending, head, list) - create_pending_snapshot(trans, fs_info, pending); - return 0; + list_for_each_entry_safe(pending, next, head, list) { + list_del(&pending->list); + ret = create_pending_snapshot(trans, fs_info, pending); + if (ret) + break; + } + return ret; } static void update_super_roots(struct btrfs_root *root) @@ -1448,6 +1455,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, err); spin_lock(&root->fs_info->trans_lock); + + if (list_empty(&cur_trans->list)) { + spin_unlock(&root->fs_info->trans_lock); + btrfs_end_transaction(trans, root); + return; + } + list_del_init(&cur_trans->list); if (cur_trans == root->fs_info->running_transaction) { root->fs_info->trans_no_join = 1; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7ef569eb22a..ef96381569a4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -317,6 +317,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; + bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -326,6 +327,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { char *src_copy; char *dst_copy; @@ -367,6 +371,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } + /* + * We need to load the old nbytes into the inode so when we + * replay the extents we've logged we get the right nbytes. + */ + if (inode_item) { + struct btrfs_inode_item *item; + u64 nbytes; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nbytes = btrfs_inode_nbytes(path->nodes[0], item); + item = btrfs_item_ptr(eb, slot, + struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, nbytes); + } + } else if (inode_item) { + struct btrfs_inode_item *item; + + /* + * New inode, set nbytes to 0 so that the nbytes comes out + * properly when we replay the extents. + */ + item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -486,7 +514,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, int found_type; u64 extent_end; u64 start = key->offset; - u64 saved_nbytes; + u64 nbytes = 0; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -496,10 +524,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) - extent_end = start + btrfs_file_extent_num_bytes(eb, item); - else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + nbytes = btrfs_file_extent_num_bytes(eb, item); + extent_end = start + nbytes; + + /* + * We don't add to the inodes nbytes if we are prealloc or a + * hole. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) + nbytes = 0; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); + nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, root->sectorsize); } else { ret = 0; @@ -548,7 +585,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); BUG_ON(ret); @@ -635,7 +671,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_set_bytes(inode, saved_nbytes); + inode_add_bytes(inode, nbytes); ret = btrfs_update_inode(trans, root, inode); out: if (inode) @@ -1382,7 +1418,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { - btrfs_inc_nlink(inode); + if (!inode->i_nlink) + set_nlink(inode, 1); + else + btrfs_inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 35bb2d4ed29f..2854c824ab64 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -684,6 +684,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } + /* + * Wait for rcu kworkers under __btrfs_close_devices + * to finish all blkdev_puts so device is really + * free when umount is done. + */ + rcu_barrier(); return ret; } @@ -2379,7 +2385,11 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, return ret; trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + btrfs_std_error(root->fs_info, ret); + return ret; + } lock_chunks(root); @@ -3050,7 +3060,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) unset_balance_control(fs_info); ret = del_balance_item(fs_info->tree_root); - BUG_ON(ret); + if (ret) + btrfs_std_error(fs_info, ret); atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } @@ -3230,6 +3241,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, update_ioctl_balance_args(fs_info, 0, bargs); } + if ((ret && ret != -ECANCELED && ret != -ENOSPC) || + balance_need_close(fs_info)) { + __cancel_balance(fs_info); + } + wake_up(&fs_info->balance_wait_q); return ret; @@ -4919,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, em = lookup_extent_mapping(em_tree, chunk_start, 1); read_unlock(&em_tree->lock); - BUG_ON(!em || em->start != chunk_start); + if (!em) { + printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", + chunk_start); + return -EIO; + } + + if (em->start != chunk_start) { + printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", + em->start, chunk_start); + free_extent_map(em); + return -EIO; + } map = (struct map_lookup *)em->bdev; length = em->len; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9fe17c6c2876..6ddc0bca56b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -952,6 +952,7 @@ static struct file_system_type ceph_fs_type = { .kill_sb = ceph_kill_sb, .fs_flags = FS_RENAME_DOES_D_MOVE, }; +MODULE_ALIAS_FS("ceph"); #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce34e0bc..1d36db114772 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } - /* mechlistMIC */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - /* Check if we have reached the end of the blob, but with - no mechListMic (e.g. NTLMSSP instead of KRB5) */ - if (ctx.error == ASN1_ERR_DEC_EMPTY) - goto decode_negtoken_exit; - cFYI(1, "Error decoding last part negTokenInit exit3"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - /* tag = 3 indicating mechListMIC */ - cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* sequence */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit5"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - } - - /* sequence of */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit 7"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* general string */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit9"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) - || (tag != ASN1_GENSTR)) { - cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - cFYI(1, "Need to call asn1_octets_decode() function for %s", - ctx.pointer); /* is this UTF-8 or ASCII? */ -decode_negtoken_exit: + /* + * We currently ignore anything at the end of the SPNEGO blob after + * the mechTypes have been parsed, since none of that info is + * used at the moment. + */ return 1; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 1a052c0eee8e..345fc89c4286 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -91,6 +91,30 @@ struct workqueue_struct *cifsiod_wq; __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; #endif +/* + * Bumps refcount for cifs super block. + * Note that it should be only called if a referece to VFS super block is + * already held, e.g. in open-type syscalls context. Otherwise it can race with + * atomic_dec_and_test in deactivate_locked_super. + */ +void +cifs_sb_active(struct super_block *sb) +{ + struct cifs_sb_info *server = CIFS_SB(sb); + + if (atomic_inc_return(&server->active) == 1) + atomic_inc(&sb->s_active); +} + +void +cifs_sb_deactive(struct super_block *sb) +{ + struct cifs_sb_info *server = CIFS_SB(sb); + + if (atomic_dec_and_test(&server->active)) + deactivate_super(sb); +} + static int cifs_read_super(struct super_block *sb) { @@ -777,6 +801,7 @@ struct file_system_type cifs_fs_type = { .kill_sb = cifs_kill_sb, /* .fs_flags */ }; +MODULE_ALIAS_FS("cifs"); const struct inode_operations cifs_dir_inode_ops = { .create = cifs_create, .atomic_open = cifs_atomic_open, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 7163419cecd9..0e32c3446ce9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -41,6 +41,10 @@ extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; +/* Functions related to super block operations */ +extern void cifs_sb_active(struct super_block *sb); +extern void cifs_sb_deactive(struct super_block *sb); + /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7353bc5d73d7..8e2e799e7a24 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1909,12 +1909,12 @@ cifs_writev_requeue(struct cifs_writedata *wdata) } while (rc == -EAGAIN); for (i = 0; i < wdata->nr_pages; i++) { + unlock_page(wdata->pages[i]); if (rc != 0) { SetPageError(wdata->pages[i]); end_page_writeback(wdata->pages[i]); page_cache_release(wdata->pages[i]); } - unlock_page(wdata->pages[i]); } mapping_set_error(inode->i_mapping, rc); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 54125e04fd0c..21b3a291c327 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -97,7 +97,7 @@ enum { Opt_user, Opt_pass, Opt_ip, Opt_unc, Opt_domain, Opt_srcaddr, Opt_prefixpath, - Opt_iocharset, Opt_sockopt, + Opt_iocharset, Opt_netbiosname, Opt_servern, Opt_ver, Opt_vers, Opt_sec, Opt_cache, @@ -202,7 +202,6 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_srcaddr, "srcaddr=%s" }, { Opt_prefixpath, "prefixpath=%s" }, { Opt_iocharset, "iocharset=%s" }, - { Opt_sockopt, "sockopt=%s" }, { Opt_netbiosname, "netbiosname=%s" }, { Opt_servern, "servern=%s" }, { Opt_ver, "ver=%s" }, @@ -1576,14 +1575,24 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } break; case Opt_blank_pass: - vol->password = NULL; - break; - case Opt_pass: /* passwords have to be handled differently * to allow the character used for deliminator * to be passed within them */ + /* + * Check if this is a case where the password + * starts with a delimiter + */ + tmp_end = strchr(data, '='); + tmp_end++; + if (!(tmp_end < end && tmp_end[1] == delim)) { + /* No it is not. Set the password to NULL */ + vol->password = NULL; + break; + } + /* Yes it is. Drop down to Opt_pass below.*/ + case Opt_pass: /* Obtain the value string */ value = strchr(data, '='); value++; @@ -1752,19 +1761,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, */ cFYI(1, "iocharset set to %s", string); break; - case Opt_sockopt: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - - if (strnicmp(string, "TCP_NODELAY", 11) == 0) { - printk(KERN_WARNING "CIFS: the " - "sockopt=TCP_NODELAY option has been " - "deprecated and will be removed " - "in 3.9\n"); - vol->sockopt_tcp_nodelay = 1; - } - break; case Opt_netbiosname: string = match_strdup(args); if (string == NULL) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8c0d85577314..7a0dd99e4507 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -300,6 +300,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_WORK(&cfile->oplock_break, cifs_oplock_break); mutex_init(&cfile->fh_mutex); + cifs_sb_active(inode->i_sb); + /* * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. @@ -349,7 +351,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); struct TCP_Server_Info *server = tcon->ses->server; struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsLockInfo *li, *tmp; struct cifs_fid fid; struct cifs_pending_open open; @@ -414,6 +417,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) cifs_put_tlink(cifs_file->tlink); dput(cifs_file->dentry); + cifs_sb_deactive(sb); kfree(cifs_file); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 83f2606c76d0..20887bf63121 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -995,6 +995,15 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, return PTR_ERR(tlink); tcon = tlink_tcon(tlink); + /* + * We cannot rename the file if the server doesn't support + * CAP_INFOLEVEL_PASSTHRU + */ + if (!(tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)) { + rc = -EBUSY; + goto out; + } + rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, @@ -1023,7 +1032,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, current->tgid); /* although we would like to mark the file hidden if that fails we will still try to rename it */ - if (rc != 0) + if (!rc) cifsInode->cifsAttrs = dosattr; else dosattr = origattr; /* since not able to change them */ @@ -1034,7 +1043,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc != 0) { - rc = -ETXTBSY; + rc = -EBUSY; goto undo_setattr; } @@ -1053,7 +1062,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, if (rc == -ENOENT) rc = 0; else if (rc != 0) { - rc = -ETXTBSY; + rc = -EBUSY; goto undo_rename; } cifsInode->delete_pending = true; @@ -1160,15 +1169,13 @@ psx_del_no_retry: cifs_drop_nlink(inode); } else if (rc == -ENOENT) { d_drop(dentry); - } else if (rc == -ETXTBSY) { + } else if (rc == -EBUSY) { if (server->ops->rename_pending_delete) { rc = server->ops->rename_pending_delete(full_path, dentry, xid); if (rc == 0) cifs_drop_nlink(inode); } - if (rc == -ETXTBSY) - rc = -EBUSY; } else if ((rc == -EACCES) && (dosattr == 0) && inode) { attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (attrs == NULL) { @@ -1509,7 +1516,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, * source. Note that cross directory moves do not work with * rename by filehandle to various Windows servers. */ - if (rc == 0 || rc != -ETXTBSY) + if (rc == 0 || rc != -EBUSY) goto do_rename_exit; /* open-file renames don't work across directories */ diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index a82bc51fdc82..c0b25b28be6c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -62,7 +62,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRdiffdevice, -EXDEV}, {ERRnofiles, -ENOENT}, {ERRwriteprot, -EROFS}, - {ERRbadshare, -ETXTBSY}, + {ERRbadshare, -EBUSY}, {ERRlock, -EACCES}, {ERRunsup, -EINVAL}, {ERRnosuchshare, -ENXIO}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c9c7aa7ed966..bceffe7b8f8d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -744,4 +744,5 @@ struct smb_version_values smb30_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, + .oplock_read = SMB2_OPLOCK_LEVEL_II, }; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index dada9d0abede..4dcc0d81a7aa 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -329,4 +329,5 @@ struct file_system_type coda_fs_type = { .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("coda"); diff --git a/fs/compat.c b/fs/compat.c index fe40fde29111..d487985dd0ea 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -558,6 +558,10 @@ ssize_t compat_rw_copy_check_uvector(int type, } *ret_pointer = iov; + ret = -EFAULT; + if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an @@ -1080,17 +1084,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (!file->f_op) goto out; - ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) - goto out; - - tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, + ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov); - if (tot_len == 0) { - ret = 0; + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index aee0a7ebbd8e..7f26c3cf75ae 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -114,6 +114,7 @@ static struct file_system_type configfs_fs_type = { .mount = configfs_do_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("configfs"); struct dentry *configfs_pin_fs(void) { diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 3ceb9ec976e1..35b1c7bd18b7 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -573,6 +573,7 @@ static struct file_system_type cramfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("cramfs"); static int __init init_cramfs_fs(void) { diff --git a/fs/dcache.c b/fs/dcache.c index fbfae008ba44..e8bc3420d63e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path, bool slash = false; int error = 0; - br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path, if (!error && !slash) error = prepend(buffer, buflen, "/", 1); -out: - br_read_unlock(&vfsmount_lock); return error; global_root: @@ -2590,7 +2587,7 @@ global_root: error = prepend(buffer, buflen, "/", 1); if (!error) error = is_mounted(vfsmnt) ? 1 : 2; - goto out; + return error; } /** @@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); + br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); write_sequnlock(&rename_lock); + br_read_unlock(&vfsmount_lock); if (error < 0) return ERR_PTR(error); @@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); + br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); error = prepend_path(path, &root, &res, &buflen); write_sequnlock(&rename_lock); + br_read_unlock(&vfsmount_lock); if (error > 1) error = -EINVAL; @@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); + br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); error = path_with_deleted(path, &root, &res, &buflen); + write_sequnlock(&rename_lock); + br_read_unlock(&vfsmount_lock); if (error < 0) res = ERR_PTR(error); - write_sequnlock(&rename_lock); path_put(&root); return res; } @@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd(current->fs, &root, &pwd); error = -ENOENT; + br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; @@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); write_sequnlock(&rename_lock); + br_read_unlock(&vfsmount_lock); if (error < 0) goto out; @@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { write_sequnlock(&rename_lock); + br_read_unlock(&vfsmount_lock); } out: diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 0c4f80b447fb..4888cb3fdef7 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -299,6 +299,7 @@ static struct file_system_type debug_fs_type = { .mount = debug_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("debugfs"); static struct dentry *__create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 4f5ad246582f..d0ccd2fd79eb 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -52,8 +52,8 @@ #include <linux/mutex.h> #include <linux/sctp.h> #include <linux/slab.h> +#include <linux/sctp.h> #include <net/sctp/sctp.h> -#include <net/sctp/user.h> #include <net/ipv6.h> #include "dlm_internal.h" diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index e15ef38c24fa..434aa313f077 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -12,3 +12,11 @@ config ECRYPT_FS To compile this file system support as a module, choose M here: the module will be called ecryptfs. + +config ECRYPT_FS_MESSAGING + bool "Enable notifications for userspace key wrap/unwrap" + depends on ECRYPT_FS + help + Enables the /dev/ecryptfs entry for use by ecryptfsd. This allows + for userspace to wrap/unwrap file encryption keys by other + backends, like OpenSSL. diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 2cc9ee4ad2eb..49678a69947d 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -1,7 +1,10 @@ # -# Makefile for the Linux 2.6 eCryptfs +# Makefile for the Linux eCryptfs # obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o +ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \ + crypto.o keystore.o kthread.o debug.o + +ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a7b0c2dfb3db..d5c25db4398f 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -301,17 +301,14 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, while (size > 0 && i < sg_size) { pg = virt_to_page(addr); offset = offset_in_page(addr); - if (sg) - sg_set_page(&sg[i], pg, 0, offset); + sg_set_page(&sg[i], pg, 0, offset); remainder_of_page = PAGE_CACHE_SIZE - offset; if (size >= remainder_of_page) { - if (sg) - sg[i].length = remainder_of_page; + sg[i].length = remainder_of_page; addr += remainder_of_page; size -= remainder_of_page; } else { - if (sg) - sg[i].length = size; + sg[i].length = size; addr += size; size = 0; } diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 1b5d9af937df..bf12ba5dd223 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -45,14 +45,12 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry; - struct vfsmount *lower_mnt; int rc = 1; if (flags & LOOKUP_RCU) return -ECHILD; lower_dentry = ecryptfs_dentry_to_lower(dentry); - lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 7e2c6f5d7985..dd299b389d4e 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -172,6 +172,19 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24 #define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32) +#ifdef CONFIG_ECRYPT_FS_MESSAGING +# define ECRYPTFS_VERSIONING_MASK_MESSAGING (ECRYPTFS_VERSIONING_DEVMISC \ + | ECRYPTFS_VERSIONING_PUBKEY) +#else +# define ECRYPTFS_VERSIONING_MASK_MESSAGING 0 +#endif + +#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ + | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ + | ECRYPTFS_VERSIONING_XATTR \ + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_MASK_MESSAGING \ + | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) struct ecryptfs_key_sig { struct list_head crypt_stat_list; char keysig[ECRYPTFS_SIG_SIZE_HEX + 1]; @@ -399,7 +412,9 @@ struct ecryptfs_daemon { struct hlist_node euid_chain; }; +#ifdef CONFIG_ECRYPT_FS_MESSAGING extern struct mutex ecryptfs_daemon_hash_mux; +#endif static inline size_t ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) @@ -610,6 +625,7 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); +#ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_process_response(struct ecryptfs_daemon *daemon, struct ecryptfs_message *msg, u32 seq); int ecryptfs_send_message(char *data, int data_len, @@ -618,6 +634,24 @@ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **emsg); int ecryptfs_init_messaging(void); void ecryptfs_release_messaging(void); +#else +static inline int ecryptfs_init_messaging(void) +{ + return 0; +} +static inline void ecryptfs_release_messaging(void) +{ } +static inline int ecryptfs_send_message(char *data, int data_len, + struct ecryptfs_msg_ctx **msg_ctx) +{ + return -ENOTCONN; +} +static inline int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, + struct ecryptfs_message **emsg) +{ + return -ENOMSG; +} +#endif void ecryptfs_write_header_metadata(char *virt, @@ -655,12 +689,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); -int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); -int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, size_t *length_size); int ecryptfs_write_packet_length(char *dest, size_t size, size_t *packet_size_length); +#ifdef CONFIG_ECRYPT_FS_MESSAGING int ecryptfs_init_ecryptfs_miscdev(void); void ecryptfs_destroy_ecryptfs_miscdev(void); int ecryptfs_send_miscdev(char *data, size_t data_size, @@ -669,6 +702,9 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); int ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file); +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon); +#endif int ecryptfs_init_kthread(void); void ecryptfs_destroy_kthread(void); int ecryptfs_privileged_open(struct file **lower_file, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 53acc9d0c138..63b1f54b6a1f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -199,7 +199,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file) struct dentry *ecryptfs_dentry = file->f_path.dentry; /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ - struct dentry *lower_dentry; struct ecryptfs_file_info *file_info; mount_crypt_stat = &ecryptfs_superblock_to_private( @@ -222,7 +221,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file) rc = -ENOMEM; goto out; } - lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; mutex_lock(&crypt_stat->cs_mutex); if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e0f07fb6d56b..5eab400e2590 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -999,8 +999,8 @@ out: return rc; } -int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) { struct ecryptfs_mount_crypt_stat *mount_crypt_stat; int rc = 0; @@ -1021,8 +1021,8 @@ int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, return rc; } -int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) { struct kstat lower_stat; int rc; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 2333203a120b..7d52806c2119 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1150,7 +1150,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_message *msg = NULL; char *auth_tok_sig; char *payload; - size_t payload_len; + size_t payload_len = 0; int rc; rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); @@ -1168,7 +1168,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { ecryptfs_printk(KERN_ERR, "Error sending message to " - "ecryptfsd\n"); + "ecryptfsd: %d\n", rc); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); @@ -1202,8 +1202,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, crypt_stat->key_size); } out: - if (msg) - kfree(msg); + kfree(msg); return rc; } @@ -1989,7 +1988,7 @@ pki_encrypt_session_key(struct key *auth_tok_key, rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); if (rc) { ecryptfs_printk(KERN_ERR, "Error sending message to " - "ecryptfsd\n"); + "ecryptfsd: %d\n", rc); goto out; } rc = ecryptfs_wait_for_response(msg_ctx, &msg); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 4e0886c9e5c4..e924cf45aad9 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -629,6 +629,7 @@ static struct file_system_type ecryptfs_fs_type = { .kill_sb = ecryptfs_kill_block_super, .fs_flags = 0 }; +MODULE_ALIAS_FS("ecryptfs"); /** * inode_info_init_once diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 8d7a577ae497..49ff8ea08f1c 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -97,8 +97,7 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) { list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); - if (msg_ctx->msg) - kfree(msg_ctx->msg); + kfree(msg_ctx->msg); msg_ctx->msg = NULL; msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; } @@ -283,7 +282,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, int rc; rc = ecryptfs_find_daemon_by_euid(&daemon); - if (rc || !daemon) { + if (rc) { rc = -ENOTCONN; goto out; } diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 412e6eda25f8..e4141f257495 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -80,13 +80,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = try_module_get(THIS_MODULE); - if (rc == 0) { - rc = -EIO; - printk(KERN_ERR "%s: Error attempting to increment module use " - "count; rc = [%d]\n", __func__, rc); - goto out_unlock_daemon_list; - } rc = ecryptfs_find_daemon_by_euid(&daemon); if (!rc) { rc = -EINVAL; @@ -96,7 +89,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " "rc = [%d]\n", __func__, rc); - goto out_module_put_unlock_daemon_list; + goto out_unlock_daemon_list; } mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { @@ -108,9 +101,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) atomic_inc(&ecryptfs_num_miscdev_opens); out_unlock_daemon: mutex_unlock(&daemon->mux); -out_module_put_unlock_daemon_list: - if (rc) - module_put(THIS_MODULE); out_unlock_daemon_list: mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; @@ -147,7 +137,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) "bug.\n", __func__, rc); BUG(); } - module_put(THIS_MODULE); return rc; } @@ -471,6 +460,7 @@ out_free: static const struct file_operations ecryptfs_miscdev_fops = { + .owner = THIS_MODULE, .open = ecryptfs_miscdev_open, .poll = ecryptfs_miscdev_poll, .read = ecryptfs_miscdev_read, diff --git a/fs/efs/super.c b/fs/efs/super.c index 2002431ef9a0..c6f57a74a559 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -33,6 +33,7 @@ static struct file_system_type efs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("efs"); static struct pt_types sgi_pt_types[] = { {0x00, "SGI vh"}, diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5e59280d42d7..9d9763328734 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -1010,6 +1010,7 @@ static struct file_system_type exofs_type = { .mount = exofs_mount, .kill_sb = generic_shutdown_super, }; +MODULE_ALIAS_FS("exofs"); static int __init init_exofs(void) { diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 8f370e012e61..7cadd823bb31 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -118,7 +118,6 @@ void ext2_free_inode (struct inode * inode) * as writing the quota to disk may need the lock as well. */ /* Quota is already initialized in iput() */ - ext2_xattr_delete_inode(inode); dquot_free_inode(inode); dquot_drop(inode); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c3881e56662e..fe60cc1117d8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -34,6 +34,7 @@ #include "ext2.h" #include "acl.h" #include "xip.h" +#include "xattr.h" static int __ext2_write_inode(struct inode *inode, int do_sync); @@ -88,6 +89,7 @@ void ext2_evict_inode(struct inode * inode) inode->i_size = 0; if (inode->i_blocks) ext2_truncate_blocks(inode, 0); + ext2_xattr_delete_inode(inode); } invalidate_inode_buffers(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7f68c8114026..288534920fe5 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1536,6 +1536,7 @@ static struct file_system_type ext2_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); static int __init init_ext2_fs(void) { diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5546ca225ffe..fb5120a5505c 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -353,7 +353,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext3_msg(sb, "error: failed to open journal device %s: %ld", + ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; @@ -887,7 +887,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - ext3_msg(sb, "error: invalid sb specification: %s", + ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", (char *) *data); return 1; } @@ -3068,6 +3068,7 @@ static struct file_system_type ext3_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext3"); static int __init init_ext3_fs(void) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4a01ba315262..3b83cd604796 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -335,9 +335,9 @@ struct ext4_group_desc */ struct flex_groups { - atomic_t free_inodes; - atomic_t free_clusters; - atomic_t used_dirs; + atomic64_t free_clusters; + atomic_t free_inodes; + atomic_t used_dirs; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ @@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, extern int __init ext4_init_pageio(void); extern void ext4_add_complete_io(ext4_io_end_t *io_end); extern void ext4_exit_pageio(void); -extern void ext4_ioend_wait(struct inode *); +extern void ext4_ioend_shutdown(struct inode *); extern void ext4_free_io_end(ext4_io_end_t *io); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); extern void ext4_end_io_work(struct work_struct *work); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 28dd8eeea6a9..9c6d06dcef8b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, unsigned short ext1_ee_len, ext2_ee_len, max_len; /* - * Make sure that either both extents are uninitialized, or - * both are _not_. + * Make sure that both extents are initialized. We don't merge + * uninitialized extents so that we can be sure that end_io code has + * the extent that was written properly split out and conversion to + * initialized is trivial. */ - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) return 0; if (ext4_ext_is_uninitialized(ex1)) @@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle, { ext4_fsblk_t newblock; ext4_lblk_t ee_block; - struct ext4_extent *ex, newex, orig_ex; + struct ext4_extent *ex, newex, orig_ex, zero_ex; struct ext4_extent *ex2 = NULL; unsigned int ee_len, depth; int err = 0; @@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle, newblock = split - ee_block + ext4_ext_pblock(ex); BUG_ON(split < ee_block || split >= (ee_block + ee_len)); + BUG_ON(!ext4_ext_is_uninitialized(ex) && + split_flag & (EXT4_EXT_MAY_ZEROOUT | + EXT4_EXT_MARK_UNINIT1 | + EXT4_EXT_MARK_UNINIT2)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -2990,12 +2996,29 @@ static int ext4_split_extent_at(handle_t *handle, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { - if (split_flag & EXT4_EXT_DATA_VALID1) + if (split_flag & EXT4_EXT_DATA_VALID1) { err = ext4_ext_zeroout(inode, ex2); - else + zero_ex.ee_block = ex2->ee_block; + zero_ex.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(ex2)); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex2)); + } else { err = ext4_ext_zeroout(inode, ex); - } else + zero_ex.ee_block = ex->ee_block; + zero_ex.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(ex)); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex)); + } + } else { err = ext4_ext_zeroout(inode, &orig_ex); + zero_ex.ee_block = orig_ex.ee_block; + zero_ex.ee_len = cpu_to_le16( + ext4_ext_get_actual_len(&orig_ex)); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(&orig_ex)); + } if (err) goto fix_extent_len; @@ -3003,6 +3026,12 @@ static int ext4_split_extent_at(handle_t *handle, ex->ee_len = cpu_to_le16(ee_len); ext4_ext_try_to_merge(handle, inode, path, ex); err = ext4_ext_dirty(handle, inode, path + path->p_depth); + if (err) + goto fix_extent_len; + + /* update extent status tree */ + err = ext4_es_zeroout(inode, &zero_ex); + goto out; } else if (err) goto fix_extent_len; @@ -3041,6 +3070,7 @@ static int ext4_split_extent(handle_t *handle, int err = 0; int uninitialized; int split_flag1, flags1; + int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3060,20 +3090,29 @@ static int ext4_split_extent(handle_t *handle, map->m_lblk + map->m_len, split_flag1, flags1); if (err) goto out; + } else { + allocated = ee_len - (map->m_lblk - ee_block); } - + /* + * Update path is required because previous ext4_split_extent_at() may + * result in split of original leaf or extent zeroout. + */ ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, map->m_lblk, path); if (IS_ERR(path)) return PTR_ERR(path); + depth = ext_depth(inode); + ex = path[depth].p_ext; + uninitialized = ext4_ext_is_uninitialized(ex); + split_flag1 = 0; if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_DATA_VALID2); - if (uninitialized) + split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; + if (uninitialized) { split_flag1 |= EXT4_EXT_MARK_UNINIT1; - if (split_flag & EXT4_EXT_MARK_UNINIT2) - split_flag1 |= EXT4_EXT_MARK_UNINIT2; + split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | + EXT4_EXT_MARK_UNINIT2); + } err = ext4_split_extent_at(handle, inode, path, map->m_lblk, split_flag1, flags); if (err) @@ -3082,7 +3121,7 @@ static int ext4_split_extent(handle_t *handle, ext4_ext_show_leaf(inode, path); out: - return err ? err : map->m_len; + return err ? err : allocated; } /* @@ -3137,6 +3176,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (map->m_lblk - ee_block); + zero_ex.ee_len = 0; trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); @@ -3227,13 +3267,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (EXT4_EXT_MAY_ZEROOUT & split_flag) max_zeroout = sbi->s_extent_max_zeroout_kb >> - inode->i_sb->s_blocksize_bits; + (inode->i_sb->s_blocksize_bits - 10); /* If extent is less than s_max_zeroout_kb, zeroout directly */ if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); if (err) goto out; + zero_ex.ee_block = ex->ee_block; + zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)); + ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3292,6 +3335,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, err = allocated; out: + /* If we have gotten a failure, don't zero out status tree */ + if (!err) + err = ext4_es_zeroout(inode, &zero_ex); return err ? err : allocated; } @@ -3374,8 +3420,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, "block %llu, max_blocks %u\n", inode->i_ino, (unsigned long long)ee_block, ee_len); - /* If extent is larger than requested then split is required */ + /* If extent is larger than requested it is a clear sign that we still + * have some extent state machine issues left. So extent_split is still + * required. + * TODO: Once all related issues will be fixed this situation should be + * illegal. + */ if (ee_block != map->m_lblk || ee_len > map->m_len) { +#ifdef EXT4_DEBUG + ext4_warning("Inode (%ld) finished: extent logical block %llu," + " len %u; IO logical block %llu, len %u\n", + inode->i_ino, (unsigned long long)ee_block, ee_len, + (unsigned long long)map->m_lblk, map->m_len); +#endif err = ext4_split_unwritten_extents(handle, inode, map, path, EXT4_GET_BLOCKS_CONVERT); if (err < 0) @@ -3626,6 +3683,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, path, map->m_len); } else err = ret; + map->m_flags |= EXT4_MAP_MAPPED; + if (allocated > map->m_len) + allocated = map->m_len; + map->m_len = allocated; goto out2; } /* buffered IO case */ @@ -3675,6 +3736,7 @@ out: allocated - map->m_len); allocated = map->m_len; } + map->m_len = allocated; /* * If we have done fallocate with the offset that is already @@ -4106,9 +4168,6 @@ got_allocated_blocks: } } else { BUG_ON(allocated_clusters < reserved_clusters); - /* We will claim quota for all newly allocated blocks.*/ - ext4_da_update_reserve_space(inode, allocated_clusters, - 1); if (reserved_clusters < allocated_clusters) { struct ext4_inode_info *ei = EXT4_I(inode); int reservation = allocated_clusters - @@ -4159,6 +4218,15 @@ got_allocated_blocks: ei->i_reserved_data_blocks += reservation; spin_unlock(&ei->i_block_reservation_lock); } + /* + * We will claim quota for all newly allocated blocks. + * We're updating the reserved space *after* the + * correction above so we do not accidentally free + * all the metadata reservation because we might + * actually need it later on. + */ + ext4_da_update_reserve_space(inode, allocated_clusters, + 1); } } @@ -4368,8 +4436,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (len <= EXT_UNINIT_MAX_LEN << blkbits) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; - /* Prevent race condition between unwritten */ - ext4_flush_unwritten_io(inode); retry: while (ret >= 0 && ret < max_blocks) { map.m_lblk = map.m_lblk + ret; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 95796a1b7522..fe3337a85ede 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) static int ext4_es_can_be_merged(struct extent_status *es1, struct extent_status *es2) { - if (es1->es_lblk + es1->es_len != es2->es_lblk) + if (ext4_es_status(es1) != ext4_es_status(es2)) return 0; - if (ext4_es_status(es1) != ext4_es_status(es2)) + if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) return 0; - if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && - (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2))) + if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) return 0; - return 1; + if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && + (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2))) + return 1; + + if (ext4_es_is_hole(es1)) + return 1; + + /* we need to check delayed extent is without unwritten status */ + if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1)) + return 1; + + return 0; } static struct extent_status * @@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) return es; } +#ifdef ES_AGGRESSIVE_TEST +static void ext4_es_insert_extent_ext_check(struct inode *inode, + struct extent_status *es) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent *ex; + ext4_lblk_t ee_block; + ext4_fsblk_t ee_start; + unsigned short ee_len; + int depth, ee_status, es_status; + + path = ext4_ext_find_extent(inode, es->es_lblk, NULL); + if (IS_ERR(path)) + return; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + + if (ex) { + + ee_block = le32_to_cpu(ex->ee_block); + ee_start = ext4_ext_pblock(ex); + ee_len = ext4_ext_get_actual_len(ex); + + ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; + es_status = ext4_es_is_unwritten(es) ? 1 : 0; + + /* + * Make sure ex and es are not overlap when we try to insert + * a delayed/hole extent. + */ + if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { + if (in_range(es->es_lblk, ee_block, ee_len)) { + pr_warn("ES insert assertation failed for " + "inode: %lu we can find an extent " + "at block [%d/%d/%llu/%c], but we " + "want to add an delayed/hole extent " + "[%d/%d/%llu/%llx]\n", + inode->i_ino, ee_block, ee_len, + ee_start, ee_status ? 'u' : 'w', + es->es_lblk, es->es_len, + ext4_es_pblock(es), ext4_es_status(es)); + } + goto out; + } + + /* + * We don't check ee_block == es->es_lblk, etc. because es + * might be a part of whole extent, vice versa. + */ + if (es->es_lblk < ee_block || + ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { + pr_warn("ES insert assertation failed for inode: %lu " + "ex_status [%d/%d/%llu/%c] != " + "es_status [%d/%d/%llu/%c]\n", inode->i_ino, + ee_block, ee_len, ee_start, + ee_status ? 'u' : 'w', es->es_lblk, es->es_len, + ext4_es_pblock(es), es_status ? 'u' : 'w'); + goto out; + } + + if (ee_status ^ es_status) { + pr_warn("ES insert assertation failed for inode: %lu " + "ex_status [%d/%d/%llu/%c] != " + "es_status [%d/%d/%llu/%c]\n", inode->i_ino, + ee_block, ee_len, ee_start, + ee_status ? 'u' : 'w', es->es_lblk, es->es_len, + ext4_es_pblock(es), es_status ? 'u' : 'w'); + } + } else { + /* + * We can't find an extent on disk. So we need to make sure + * that we don't want to add an written/unwritten extent. + */ + if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { + pr_warn("ES insert assertation failed for inode: %lu " + "can't find an extent at block %d but we want " + "to add an written/unwritten extent " + "[%d/%d/%llu/%llx]\n", inode->i_ino, + es->es_lblk, es->es_lblk, es->es_len, + ext4_es_pblock(es), ext4_es_status(es)); + } + } +out: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } +} + +static void ext4_es_insert_extent_ind_check(struct inode *inode, + struct extent_status *es) +{ + struct ext4_map_blocks map; + int retval; + + /* + * Here we call ext4_ind_map_blocks to lookup a block mapping because + * 'Indirect' structure is defined in indirect.c. So we couldn't + * access direct/indirect tree from outside. It is too dirty to define + * this function in indirect.c file. + */ + + map.m_lblk = es->es_lblk; + map.m_len = es->es_len; + + retval = ext4_ind_map_blocks(NULL, inode, &map, 0); + if (retval > 0) { + if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) { + /* + * We want to add a delayed/hole extent but this + * block has been allocated. + */ + pr_warn("ES insert assertation failed for inode: %lu " + "We can find blocks but we want to add a " + "delayed/hole extent [%d/%d/%llu/%llx]\n", + inode->i_ino, es->es_lblk, es->es_len, + ext4_es_pblock(es), ext4_es_status(es)); + return; + } else if (ext4_es_is_written(es)) { + if (retval != es->es_len) { + pr_warn("ES insert assertation failed for " + "inode: %lu retval %d != es_len %d\n", + inode->i_ino, retval, es->es_len); + return; + } + if (map.m_pblk != ext4_es_pblock(es)) { + pr_warn("ES insert assertation failed for " + "inode: %lu m_pblk %llu != " + "es_pblk %llu\n", + inode->i_ino, map.m_pblk, + ext4_es_pblock(es)); + return; + } + } else { + /* + * We don't need to check unwritten extent because + * indirect-based file doesn't have it. + */ + BUG_ON(1); + } + } else if (retval == 0) { + if (ext4_es_is_written(es)) { + pr_warn("ES insert assertation failed for inode: %lu " + "We can't find the block but we want to add " + "an written extent [%d/%d/%llu/%llx]\n", + inode->i_ino, es->es_lblk, es->es_len, + ext4_es_pblock(es), ext4_es_status(es)); + return; + } + } +} + +static inline void ext4_es_insert_extent_check(struct inode *inode, + struct extent_status *es) +{ + /* + * We don't need to worry about the race condition because + * caller takes i_data_sem locking. + */ + BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + ext4_es_insert_extent_ext_check(inode, es); + else + ext4_es_insert_extent_ind_check(inode, es); +} +#else +static inline void ext4_es_insert_extent_check(struct inode *inode, + struct extent_status *es) +{ +} +#endif + static int __es_insert_extent(struct inode *inode, struct extent_status *newes) { struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; @@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ext4_es_store_status(&newes, status); trace_ext4_es_insert_extent(inode, &newes); + ext4_es_insert_extent_check(inode, &newes); + write_lock(&EXT4_I(inode)->i_es_lock); err = __es_remove_extent(inode, lblk, end); if (err != 0) @@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, return err; } +int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) +{ + ext4_lblk_t ee_block; + ext4_fsblk_t ee_pblock; + unsigned int ee_len; + + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + ee_pblock = ext4_ext_pblock(ex); + + if (ee_len == 0) + return 0; + + return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, + EXTENT_STATUS_WRITTEN); +} + static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct ext4_sb_info *sbi = container_of(shrink, diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f190dfe969da..d8e2d4dc311e 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -21,6 +21,12 @@ #endif /* + * With ES_AGGRESSIVE_TEST defined, the result of es caching will be + * checked with old map_block's result. + */ +#define ES_AGGRESSIVE_TEST__ + +/* * These flags live in the high bits of extent_status.es_pblk */ #define EXTENT_STATUS_WRITTEN (1ULL << 63) @@ -33,6 +39,8 @@ EXTENT_STATUS_DELAYED | \ EXTENT_STATUS_HOLE) +struct ext4_extent; + struct extent_status { struct rb_node rb_node; ext4_lblk_t es_lblk; /* first logical block extent covers */ @@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status *es); extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status *es); +extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex); static inline int ext4_es_is_written(struct extent_status *es) { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 32fd2b9075dd..6c5bb8d993fe 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -324,8 +324,8 @@ error_return: } struct orlov_stats { + __u64 free_clusters; __u32 free_inodes; - __u32 free_clusters; __u32 used_dirs; }; @@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, if (flex_size > 1) { stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic_read(&flex_group[g].free_clusters); + stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); stats->used_dirs = atomic_read(&flex_group[g].used_dirs); return; } diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b505a145a593..a04183127ef0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1539,9 +1539,9 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode, blk = *i_data; if (level > 0) { ext4_lblk_t first2; - bh = sb_bread(inode->i_sb, blk); + bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, blk, + EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), "Read failure"); return -EIO; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9ea0cde3fa9e..b3a5213bc73e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); - ext4_ioend_wait(inode); - if (inode->i_nlink) { /* * When journalling data dirty buffers are tracked only in the @@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode) * don't use page cache. */ if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && + inode->i_ino != EXT4_JOURNAL_INO) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; @@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode) filemap_write_and_wait(&inode->i_data); } truncate_inode_pages(&inode->i_data, 0); + ext4_ioend_shutdown(inode); goto no_delete; } @@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode) if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); truncate_inode_pages(&inode->i_data, 0); + ext4_ioend_shutdown(inode); if (is_bad_inode(inode)) goto no_delete; @@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, return num; } +#ifdef ES_AGGRESSIVE_TEST +static void ext4_map_blocks_es_recheck(handle_t *handle, + struct inode *inode, + struct ext4_map_blocks *es_map, + struct ext4_map_blocks *map, + int flags) +{ + int retval; + + map->m_flags = 0; + /* + * There is a race window that the result is not the same. + * e.g. xfstests #223 when dioread_nolock enables. The reason + * is that we lookup a block mapping in extent status tree with + * out taking i_data_sem. So at the time the unwritten extent + * could be converted. + */ + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + down_read((&EXT4_I(inode)->i_data_sem)); + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + retval = ext4_ext_map_blocks(handle, inode, map, flags & + EXT4_GET_BLOCKS_KEEP_SIZE); + } else { + retval = ext4_ind_map_blocks(handle, inode, map, flags & + EXT4_GET_BLOCKS_KEEP_SIZE); + } + if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) + up_read((&EXT4_I(inode)->i_data_sem)); + /* + * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag + * because it shouldn't be marked in es_map->m_flags. + */ + map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY); + + /* + * We don't check m_len because extent will be collpased in status + * tree. So the m_len might not equal. + */ + if (es_map->m_lblk != map->m_lblk || + es_map->m_flags != map->m_flags || + es_map->m_pblk != map->m_pblk) { + printk("ES cache assertation failed for inode: %lu " + "es_cached ex [%d/%d/%llu/%x] != " + "found ex [%d/%d/%llu/%x] retval %d flags %x\n", + inode->i_ino, es_map->m_lblk, es_map->m_len, + es_map->m_pblk, es_map->m_flags, map->m_lblk, + map->m_len, map->m_pblk, map->m_flags, + retval, flags); + } +} +#endif /* ES_AGGRESSIVE_TEST */ + /* * The ext4_map_blocks() function tries to look up the requested blocks, * and returns if the blocks are already mapped. @@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, { struct extent_status es; int retval; +#ifdef ES_AGGRESSIVE_TEST + struct ext4_map_blocks orig_map; + + memcpy(&orig_map, map, sizeof(*map)); +#endif map->m_flags = 0; ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," @@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, } else { BUG_ON(1); } +#ifdef ES_AGGRESSIVE_TEST + ext4_map_blocks_es_recheck(handle, inode, map, + &orig_map, flags); +#endif goto found; } @@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, int ret; unsigned long long status; +#ifdef ES_AGGRESSIVE_TEST + if (retval != map->m_len) { + printk("ES len assertation failed for inode: %lu " + "retval %d != map->m_len %d " + "in %s (lookup)\n", inode->i_ino, retval, + map->m_len, __func__); + } +#endif + status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && @@ -643,6 +714,24 @@ found: int ret; unsigned long long status; +#ifdef ES_AGGRESSIVE_TEST + if (retval != map->m_len) { + printk("ES len assertation failed for inode: %lu " + "retval %d != map->m_len %d " + "in %s (allocation)\n", inode->i_ino, retval, + map->m_len, __func__); + } +#endif + + /* + * If the extent has been zeroed out, we don't need to update + * extent status tree. + */ + if ((flags & EXT4_GET_BLOCKS_PRE_IO) && + ext4_es_lookup_extent(inode, map->m_lblk, &es)) { + if (ext4_es_is_written(&es)) + goto has_zeroout; + } status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && @@ -655,6 +744,7 @@ found: retval = ret; } +has_zeroout: up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { int ret = check_block_validity(inode, map); @@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file, } /* + * Reserve a metadata for a single block located at lblock + */ +static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) +{ + int retries = 0; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int md_needed; + ext4_lblk_t save_last_lblock; + int save_len; + + /* + * recalculate the amount of metadata blocks to reserve + * in order to allocate nrblocks + * worse case is one extent per block + */ +repeat: + spin_lock(&ei->i_block_reservation_lock); + /* + * ext4_calc_metadata_amount() has side effects, which we have + * to be prepared undo if we fail to claim space. + */ + save_len = ei->i_da_metadata_calc_len; + save_last_lblock = ei->i_da_metadata_calc_last_lblock; + md_needed = EXT4_NUM_B2C(sbi, + ext4_calc_metadata_amount(inode, lblock)); + trace_ext4_da_reserve_space(inode, md_needed); + + /* + * We do still charge estimated metadata to the sb though; + * we cannot afford to run out of free blocks. + */ + if (ext4_claim_free_clusters(sbi, md_needed, 0)) { + ei->i_da_metadata_calc_len = save_len; + ei->i_da_metadata_calc_last_lblock = save_last_lblock; + spin_unlock(&ei->i_block_reservation_lock); + if (ext4_should_retry_alloc(inode->i_sb, &retries)) { + cond_resched(); + goto repeat; + } + return -ENOSPC; + } + ei->i_reserved_meta_blocks += md_needed; + spin_unlock(&ei->i_block_reservation_lock); + + return 0; /* success */ +} + +/* * Reserve a single cluster located at lblock */ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) @@ -1263,7 +1402,7 @@ repeat: ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - yield(); + cond_resched(); goto repeat; } dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); @@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, struct extent_status es; int retval; sector_t invalid_block = ~((sector_t) 0xffff); +#ifdef ES_AGGRESSIVE_TEST + struct ext4_map_blocks orig_map; + + memcpy(&orig_map, map, sizeof(*map)); +#endif if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) invalid_block = ~0; @@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, else BUG_ON(1); +#ifdef ES_AGGRESSIVE_TEST + ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); +#endif return retval; } @@ -1843,8 +1990,11 @@ add_delayed: * XXX: __block_prepare_write() unmaps passed block, * is it OK? */ - /* If the block was allocated from previously allocated cluster, - * then we dont need to reserve it again. */ + /* + * If the block was allocated from previously allocated cluster, + * then we don't need to reserve it again. However we still need + * to reserve metadata for every block we're going to write. + */ if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { ret = ext4_da_reserve_space(inode, iblock); if (ret) { @@ -1852,6 +2002,13 @@ add_delayed: retval = ret; goto out_unlock; } + } else { + ret = ext4_da_reserve_metadata(inode, iblock); + if (ret) { + /* not enough space to reserve */ + retval = ret; + goto out_unlock; + } } ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, @@ -1873,6 +2030,15 @@ add_delayed: int ret; unsigned long long status; +#ifdef ES_AGGRESSIVE_TEST + if (retval != map->m_len) { + printk("ES len assertation failed for inode: %lu " + "retval %d != map->m_len %d " + "in %s (lookup)\n", inode->i_ino, retval, + map->m_len, __func__); + } +#endif + status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, @@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) trace_ext4_releasepage(page); - WARN_ON(PageChecked(page)); - if (!page_has_buffers(page)) + /* Page has dirty journalled data -> cannot release */ + if (PageChecked(page)) return 0; if (journal) return jbd2_journal_try_to_free_buffers(journal, page, wait); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7bb713a46fe4..ee6614bdb639 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - atomic_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3692,11 +3692,7 @@ repeat: if (free < needed && busy) { busy = 0; ext4_unlock_group(sb, group); - /* - * Yield the CPU here so that we don't get soft lockup - * in non preempt case. - */ - yield(); + cond_resched(); goto repeat; } @@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { /* let others to free the space */ - yield(); + cond_resched(); ar->len = ar->len >> 1; } if (!ar->len) { @@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; struct ext4_group_desc *gdp; - unsigned long freed = 0; unsigned int overflow; ext4_grpblk_t bit; struct buffer_head *gd_bh; @@ -4666,14 +4661,12 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(count_clusters, + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); - freed += count; - if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); @@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 4e81d47aa8cb..33e1c086858b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -32,16 +32,18 @@ */ static inline int get_ext_path(struct inode *inode, ext4_lblk_t lblock, - struct ext4_ext_path **path) + struct ext4_ext_path **orig_path) { int ret = 0; + struct ext4_ext_path *path; - *path = ext4_ext_find_extent(inode, lblock, *path); - if (IS_ERR(*path)) { - ret = PTR_ERR(*path); - *path = NULL; - } else if ((*path)[ext_depth(inode)].p_ext == NULL) + path = ext4_ext_find_extent(inode, lblock, *orig_path); + if (IS_ERR(path)) + ret = PTR_ERR(path); + else if (path[ext_depth(inode)].p_ext == NULL) ret = -ENODATA; + else + *orig_path = path; return ret; } @@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, { struct ext4_ext_path *path = NULL; struct ext4_extent *ext; + int ret = 0; ext4_lblk_t last = from + count; while (from < last) { *err = get_ext_path(inode, from, &path); if (*err) - return 0; + goto out; ext = path[ext_depth(inode)].p_ext; - if (!ext) { - ext4_ext_drop_refs(path); - return 0; - } - if (uninit != ext4_ext_is_uninitialized(ext)) { - ext4_ext_drop_refs(path); - return 0; - } + if (uninit != ext4_ext_is_uninitialized(ext)) + goto out; from += ext4_ext_get_actual_len(ext); ext4_ext_drop_refs(path); } - return 1; + ret = 1; +out: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + return ret; } /** @@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, int replaced_count = 0; int dext_alen; + *err = ext4_es_remove_extent(orig_inode, from, count); + if (*err) + goto out; + + *err = ext4_es_remove_extent(donor_inode, from, count); + if (*err) + goto out; + /* Get the original extent for the block "orig_off" */ *err = get_ext_path(orig_inode, orig_off, &orig_path); if (*err) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 809b31003ecc..047a6de04a0a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -50,11 +50,21 @@ void ext4_exit_pageio(void) kmem_cache_destroy(io_page_cachep); } -void ext4_ioend_wait(struct inode *inode) +/* + * This function is called by ext4_evict_inode() to make sure there is + * no more pending I/O completion work left to do. + */ +void ext4_ioend_shutdown(struct inode *inode) { wait_queue_head_t *wq = ext4_ioend_wq(inode); wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); + /* + * We need to make sure the work structure is finished being + * used before we let the inode get destroyed. + */ + if (work_pending(&EXT4_I(inode)->i_unwritten_work)) + cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); } static void put_io_page(struct ext4_io_page *io_page) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b2c8ee56eb98..c169477a62c9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb, sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, group_data[0].group); - atomic_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), + &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, &sbi->s_flex_groups[flex_group].free_inodes); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5e6c87836193..5d6d53578124 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -90,6 +90,8 @@ static struct file_system_type ext2_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); +MODULE_ALIAS("ext2"); #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) #else #define IS_EXT2_SB(sb) (0) @@ -104,6 +106,8 @@ static struct file_system_type ext3_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext3"); +MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) #else #define IS_EXT3_SB(sb) (0) @@ -1923,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); atomic_add(ext4_free_inodes_count(sb, gdp), &sbi->s_flex_groups[flex_group].free_inodes); - atomic_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(ext4_free_group_clusters(sb, gdp), + &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(ext4_used_dirs_count(sb, gdp), &sbi->s_flex_groups[flex_group].used_dirs); } @@ -5152,7 +5156,6 @@ static inline int ext2_feature_set_ok(struct super_block *sb) return 0; return 1; } -MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } @@ -5185,7 +5188,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb) return 0; return 1; } -MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } @@ -5199,6 +5201,7 @@ static struct file_system_type ext4_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext4"); static int __init ext4_init_feat_adverts(void) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c117649a035..fea6e582a2ed 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -687,6 +687,7 @@ static struct file_system_type f2fs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("f2fs"); static int __init init_inodecache(void) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e2cfda94a28d..081b759cff83 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -668,6 +668,7 @@ static struct file_system_type msdos_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("msdos"); static int __init init_msdos_fs(void) { diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index ac959d655e7d..2da952036a3d 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1073,6 +1073,7 @@ static struct file_system_type vfat_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("vfat"); static int __init init_vfat_fs(void) { diff --git a/fs/filesystems.c b/fs/filesystems.c index da165f6adcbf..92567d95ba6a 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -273,7 +273,7 @@ struct file_system_type *get_fs_type(const char *name) int len = dot ? dot - name : strlen(name); fs = __get_fs_type(name, len); - if (!fs && (request_module("%.*s", len, name) == 0)) + if (!fs && (request_module("fs-%.*s", len, name) == 0)) fs = __get_fs_type(name, len); if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index fed2c8afb3a9..e37eb274e492 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -52,7 +52,6 @@ MODULE_AUTHOR("Christoph Hellwig"); MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */ static void vxfs_put_super(struct super_block *); @@ -258,6 +257,8 @@ static struct file_system_type vxfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("vxfs"); /* makes mount -t vxfs autoload the module */ +MODULE_ALIAS("vxfs"); static int __init vxfs_init(void) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index b7978b9f75ef..a0b0855d00a9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -341,6 +341,7 @@ static struct file_system_type fuse_ctl_fs_type = { .mount = fuse_ctl_mount, .kill_sb = fuse_ctl_kill_sb, }; +MODULE_ALIAS_FS("fusectl"); int __init fuse_ctl_init(void) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index df00993ed108..137185c3884f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1117,6 +1117,7 @@ static struct file_system_type fuse_fs_type = { .mount = fuse_mount, .kill_sb = fuse_kill_sb_anon, }; +MODULE_ALIAS_FS("fuse"); #ifdef CONFIG_BLOCK static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, @@ -1146,6 +1147,7 @@ static struct file_system_type fuseblk_fs_type = { .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; +MODULE_ALIAS_FS("fuseblk"); static inline int register_fuseblk(void) { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 019f45e45097..d79c2dadc536 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -923,8 +923,11 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) cmd = F_SETLK; fl->fl_type = F_UNLCK; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (fl->fl_type == F_UNLCK) + posix_lock_file_wait(file, fl); return -EIO; + } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 156e42ec84ea..5c29216e9cc1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -588,6 +588,7 @@ struct lm_lockstruct { struct dlm_lksb ls_control_lksb; /* control_lock */ char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ + char *ls_lvb_bits; spinlock_t ls_recover_spin; /* protects following fields */ unsigned long ls_recover_flags; /* DFL_ */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 9802de0f85e6..c8423d6de6c3 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -483,12 +483,8 @@ static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, static int all_jid_bits_clear(char *lvb) { - int i; - for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { - if (lvb[i]) - return 0; - } - return 1; + return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0, + GDLM_LVB_SIZE - JID_BITMAP_OFFSET); } static void sync_wait_cb(void *arg) @@ -580,7 +576,6 @@ static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); struct lm_lockstruct *ls = &sdp->sd_lockstruct; - char lvb_bits[GDLM_LVB_SIZE]; uint32_t block_gen, start_gen, lvb_gen, flags; int recover_set = 0; int write_lvb = 0; @@ -634,7 +629,7 @@ static void gfs2_control_func(struct work_struct *work) return; } - control_lvb_read(ls, &lvb_gen, lvb_bits); + control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); spin_lock(&ls->ls_recover_spin); if (block_gen != ls->ls_recover_block || @@ -664,10 +659,10 @@ static void gfs2_control_func(struct work_struct *work) ls->ls_recover_result[i] = 0; - if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) + if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) continue; - __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); write_lvb = 1; } } @@ -691,7 +686,7 @@ static void gfs2_control_func(struct work_struct *work) continue; if (ls->ls_recover_submit[i] < start_gen) { ls->ls_recover_submit[i] = 0; - __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); } } /* even if there are no bits to set, we need to write the @@ -705,7 +700,7 @@ static void gfs2_control_func(struct work_struct *work) spin_unlock(&ls->ls_recover_spin); if (write_lvb) { - control_lvb_write(ls, start_gen, lvb_bits); + control_lvb_write(ls, start_gen, ls->ls_lvb_bits); flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; } else { flags = DLM_LKF_CONVERT; @@ -725,7 +720,7 @@ static void gfs2_control_func(struct work_struct *work) */ for (i = 0; i < recover_size; i++) { - if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { + if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) { fs_info(sdp, "recover generation %u jid %d\n", start_gen, i); gfs2_recover_set(sdp, i); @@ -758,7 +753,6 @@ static void gfs2_control_func(struct work_struct *work) static int control_mount(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - char lvb_bits[GDLM_LVB_SIZE]; uint32_t start_gen, block_gen, mount_gen, lvb_gen; int mounted_mode; int retries = 0; @@ -857,7 +851,7 @@ locks_done: * lvb_gen will be non-zero. */ - control_lvb_read(ls, &lvb_gen, lvb_bits); + control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); if (lvb_gen == 0xFFFFFFFF) { /* special value to force mount attempts to fail */ @@ -887,7 +881,7 @@ locks_done: * and all lvb bits to be clear (no pending journal recoveries.) */ - if (!all_jid_bits_clear(lvb_bits)) { + if (!all_jid_bits_clear(ls->ls_lvb_bits)) { /* journals need recovery, wait until all are clear */ fs_info(sdp, "control_mount wait for journal recovery\n"); goto restart; @@ -949,7 +943,6 @@ static int dlm_recovery_wait(void *word) static int control_first_done(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - char lvb_bits[GDLM_LVB_SIZE]; uint32_t start_gen, block_gen; int error; @@ -991,8 +984,8 @@ restart: memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); spin_unlock(&ls->ls_recover_spin); - memset(lvb_bits, 0, sizeof(lvb_bits)); - control_lvb_write(ls, start_gen, lvb_bits); + memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE); + control_lvb_write(ls, start_gen, ls->ls_lvb_bits); error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); if (error) @@ -1022,6 +1015,12 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, uint32_t old_size, new_size; int i, max_jid; + if (!ls->ls_lvb_bits) { + ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); + if (!ls->ls_lvb_bits) + return -ENOMEM; + } + max_jid = 0; for (i = 0; i < num_slots; i++) { if (max_jid < slots[i].slot - 1) @@ -1057,6 +1056,7 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, static void free_recover_size(struct lm_lockstruct *ls) { + kfree(ls->ls_lvb_bits); kfree(ls->ls_recover_submit); kfree(ls->ls_recover_result); ls->ls_recover_submit = NULL; @@ -1205,6 +1205,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) ls->ls_recover_size = 0; ls->ls_recover_submit = NULL; ls->ls_recover_result = NULL; + ls->ls_lvb_bits = NULL; error = set_recover_size(sdp, NULL, 0); if (error) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1b612be4b873..60ede2a0f43f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -20,6 +20,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/quotaops.h> #include <linux/lockdep.h> +#include <linux/module.h> #include "gfs2.h" #include "incore.h" @@ -1425,6 +1426,7 @@ struct file_system_type gfs2_fs_type = { .kill_sb = gfs2_kill_sb, .owner = THIS_MODULE, }; +MODULE_ALIAS_FS("gfs2"); struct file_system_type gfs2meta_fs_type = { .name = "gfs2meta", @@ -1432,4 +1434,4 @@ struct file_system_type gfs2meta_fs_type = { .mount = gfs2_mount_meta, .owner = THIS_MODULE, }; - +MODULE_ALIAS_FS("gfs2meta"); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index d1f51fd73f86..5a51265a4341 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -576,7 +576,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) RB_CLEAR_NODE(&ip->i_res->rs_node); out: up_write(&ip->i_rw_mutex); - return 0; + return error; } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) @@ -1181,12 +1181,9 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) { struct super_block *sb = sdp->sd_vfs; - struct block_device *bdev = sb->s_bdev; - const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / - bdev_logical_block_size(sb->s_bdev); u64 blk; sector_t start = 0; - sector_t nr_sects = 0; + sector_t nr_blks = 0; int rv; unsigned int x; u32 trimmed = 0; @@ -1206,35 +1203,34 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, if (diff == 0) continue; blk = offset + ((bi->bi_start + x) * GFS2_NBBY); - blk *= sects_per_blk; /* convert to sectors */ while(diff) { if (diff & 1) { - if (nr_sects == 0) + if (nr_blks == 0) goto start_new_extent; - if ((start + nr_sects) != blk) { - if (nr_sects >= minlen) { - rv = blkdev_issue_discard(bdev, - start, nr_sects, + if ((start + nr_blks) != blk) { + if (nr_blks >= minlen) { + rv = sb_issue_discard(sb, + start, nr_blks, GFP_NOFS, 0); if (rv) goto fail; - trimmed += nr_sects; + trimmed += nr_blks; } - nr_sects = 0; + nr_blks = 0; start_new_extent: start = blk; } - nr_sects += sects_per_blk; + nr_blks++; } diff >>= 2; - blk += sects_per_blk; + blk++; } } - if (nr_sects >= minlen) { - rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); + if (nr_blks >= minlen) { + rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0); if (rv) goto fail; - trimmed += nr_sects; + trimmed += nr_blks; } if (ptrimmed) *ptrimmed = trimmed; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index e93ddaadfd1e..bbaaa8a4ee64 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -466,6 +466,7 @@ static struct file_system_type hfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hfs"); static void hfs_init_once(void *p) { diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index a94f0f779d5e..fe0a76213d9e 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -533,7 +533,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 974c26f96fae..7b87284e46dc 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -654,6 +654,7 @@ static struct file_system_type hfsplus_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hfsplus"); static void hfsplus_init_once(void *p) { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fbabb906066f..0f6e52d22b84 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -845,15 +845,8 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - int error; - - error = inode_newsize_ok(inode, attr->ia_size); - if (error) - return error; - + attr->ia_size != i_size_read(inode)) truncate_setsize(inode, attr->ia_size); - } setattr_copy(inode, attr); mark_inode_dirty(inode); @@ -993,6 +986,7 @@ static struct file_system_type hostfs_type = { .kill_sb = hostfs_kill_sb, .fs_flags = 0, }; +MODULE_ALIAS_FS("hostfs"); static int __init init_hostfs(void) { diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a3076228523d..a0617e706957 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -688,6 +688,7 @@ static struct file_system_type hpfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("hpfs"); static int __init init_hpfs_fs(void) { diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 74f55703be49..126d3c2e2dee 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -748,6 +748,7 @@ static struct file_system_type hppfs_type = { .kill_sb = kill_anon_super, .fs_flags = 0, }; +MODULE_ALIAS_FS("hppfs"); static int __init init_hppfs(void) { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7f94e0cbc69c..523464e62849 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) @@ -896,6 +896,7 @@ static struct file_system_type hugetlbfs_fs_type = { .mount = hugetlbfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("hugetlbfs"); static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; diff --git a/fs/inode.c b/fs/inode.c index f5f7c06c36fb..a898b3d43ccf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -725,7 +725,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move_tail(&inode->i_lru, &sb->s_inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); continue; } diff --git a/fs/internal.h b/fs/internal.h index 507141fceb99..4be78237d896 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool); * dcache.c */ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); + +/* + * read_write.c + */ +extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 67ce52507d7d..d9b8aebdeb22 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1556,6 +1556,8 @@ static struct file_system_type iso9660_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("iso9660"); +MODULE_ALIAS("iso9660"); static int __init init_iso9660_fs(void) { @@ -1593,5 +1595,3 @@ static void __exit exit_iso9660_fs(void) module_init(init_iso9660_fs) module_exit(exit_iso9660_fs) MODULE_LICENSE("GPL"); -/* Actual filesystem name is iso9660, as requested in filesystems.c */ -MODULE_ALIAS("iso9660"); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d6ee5aed56b1..325bc019ed88 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1065,9 +1065,12 @@ out: void jbd2_journal_set_triggers(struct buffer_head *bh, struct jbd2_buffer_trigger_type *type) { - struct journal_head *jh = bh2jh(bh); + struct journal_head *jh = jbd2_journal_grab_journal_head(bh); + if (WARN_ON(!jh)) + return; jh->b_triggers = type; + jbd2_journal_put_journal_head(jh); } void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, @@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); + struct journal_head *jh; int ret = 0; - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); if (is_handle_aborted(handle)) goto out; - if (!buffer_jbd(bh)) { + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) { ret = -EUCLEAN; goto out; } + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); jbd_lock_bh_state(bh); @@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); + jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); WARN_ON(ret); /* All errors are bugs, so dump the stack */ diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d3d8799e2187..0defb1cc2a35 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -356,6 +356,7 @@ static struct file_system_type jffs2_fs_type = { .mount = jffs2_mount, .kill_sb = jffs2_kill_sb, }; +MODULE_ALIAS_FS("jffs2"); static int __init init_jffs2_fs(void) { diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 060ba638becb..2003e830ed1c 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -833,6 +833,7 @@ static struct file_system_type jfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("jfs"); static void init_once(void *foo) { diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 345c24b8a6f8..54360293bcb5 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -608,6 +608,7 @@ static struct file_system_type logfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("logfs"); static int __init logfs_init(void) { diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 99541cceb584..df122496f328 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -660,6 +660,7 @@ static struct file_system_type minix_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("minix"); static int __init init_minix_fs(void) { diff --git a/fs/namei.c b/fs/namei.c index 961bc1268366..57ae9c8c66bf 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -689,8 +689,6 @@ void nd_jump_link(struct nameidata *nd, struct path *path) nd->path = *path; nd->inode = nd->path.dentry->d_inode; nd->flags |= LOOKUP_JUMPED; - - BUG_ON(nd->inode->i_op->follow_link); } static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) diff --git a/fs/namespace.c b/fs/namespace.c index 50ca17d3cb45..341d3f564082 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + /* Don't allow unprivileged users to change mount flags */ + if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); @@ -1686,7 +1690,7 @@ static int do_loopback(struct path *path, const char *old_name, if (IS_ERR(mnt)) { err = PTR_ERR(mnt); - goto out; + goto out2; } err = graft_tree(mnt, path); @@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; + if (mnt->mnt_flags & MNT_LOCK_READONLY) + return -EPERM; + if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, /* First pass: copy the tree topology */ copy_flags = CL_COPY_ALL | CL_EXPIRE; if (user_ns != mnt_ns->user_ns) - copy_flags |= CL_SHARED_TO_SLAVE; + copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { up_write(&namespace_sem); @@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt) return check_mnt(real_mount(mnt)); } +bool current_chrooted(void) +{ + /* Does the current process have a non-standard root */ + struct path ns_root; + struct path fs_root; + bool chrooted; + + /* Find the namespace root */ + ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; + ns_root.dentry = ns_root.mnt->mnt_root; + path_get(&ns_root); + while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) + ; + + get_fs_root(current->fs, &fs_root); + + chrooted = !path_equal(&fs_root, &ns_root); + + path_put(&fs_root); + path_put(&ns_root); + + return chrooted; +} + +void update_mnt_policy(struct user_namespace *userns) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mount *mnt; + + down_read(&namespace_sem); + list_for_each_entry(mnt, &ns->list, mnt_list) { + switch (mnt->mnt.mnt_sb->s_magic) { + case SYSFS_MAGIC: + userns->may_mount_sysfs = true; + break; + case PROC_SUPER_MAGIC: + userns->may_mount_proc = true; + break; + } + if (userns->may_mount_sysfs && userns->may_mount_proc) + break; + } + up_read(&namespace_sem); +} + static void *mntns_get(struct task_struct *task) { struct mnt_namespace *ns = NULL; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 7dafd6899a62..26910c8154da 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -1051,6 +1051,7 @@ static struct file_system_type ncp_fs_type = { .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("ncpfs"); static int __init init_ncp_fs(void) { diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 737d839bc17b..6fc7b5cae92b 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev) bl_pipe_msg.bl_wq = &nn->bl_wq; memset(msg, 0, sizeof(*msg)); - msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + msg->len = sizeof(bl_msg) + bl_msg.totallen; + msg->data = kzalloc(msg->len, GFP_NOFS); if (!msg->data) goto out; @@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev) memcpy(msg->data, &bl_msg, sizeof(bl_msg)); dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg->len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&nn->bl_wq, &wq); if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index dc0f98dfa717..c516da5873fd 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -726,9 +726,9 @@ out1: return ret; } -static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) +static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen) { - return key_instantiate_and_link(key, data, strlen(data) + 1, + return key_instantiate_and_link(key, data, datalen, id_resolver_cache->thread_keyring, authkey); } @@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, struct key *key, struct key *authkey) { char id_str[NFS_UINT_MAXLEN]; + size_t len; int ret = -ENOKEY; /* ret = -ENOKEY */ @@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, case IDMAP_CONV_NAMETOID: if (strcmp(upcall->im_name, im->im_name) != 0) break; - sprintf(id_str, "%d", im->im_id); - ret = nfs_idmap_instantiate(key, authkey, id_str); + /* Note: here we store the NUL terminator too */ + len = sprintf(id_str, "%d", im->im_id) + 1; + ret = nfs_idmap_instantiate(key, authkey, id_str, len); break; case IDMAP_CONV_IDTONAME: if (upcall->im_id != im->im_id) break; - ret = nfs_idmap_instantiate(key, authkey, im->im_name); + len = strlen(im->im_name); + ret = nfs_idmap_instantiate(key, authkey, im->im_name, len); break; default: ret = -EINVAL; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ac4fc9a8fdbc..66b6664dcd4c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -300,7 +300,7 @@ int nfs40_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; struct nfs4_setclientid_res clid = { .clientid = new->cl_clientid, .confirm = new->cl_confirm, @@ -308,10 +308,23 @@ int nfs40_walk_client_list(struct nfs_client *new, int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ - if (pos->cl_cons_state < NFS_CS_READY) + if (pos->cl_cons_state > NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + status = nfs_wait_client_init_complete(pos); + spin_lock(&nn->nfs_client_lock); + if (status < 0) + continue; + } + if (pos->cl_cons_state != NFS_CS_READY) continue; if (pos->rpc_ops != new->rpc_ops) @@ -423,16 +436,16 @@ int nfs41_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ - if (pos->cl_cons_state < NFS_CS_READY) { + if (pos->cl_cons_state > NFS_CS_READY) { atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -440,18 +453,17 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; - nfs4_schedule_lease_recovery(pos); status = nfs_wait_client_init_complete(pos); - if (status < 0) { - nfs_put_client(pos); - spin_lock(&nn->nfs_client_lock); - continue; + if (status == 0) { + nfs4_schedule_lease_recovery(pos); + status = nfs4_wait_clnt_recover(pos); } - status = pos->cl_cons_state; spin_lock(&nn->nfs_client_lock); if (status < 0) continue; } + if (pos->cl_cons_state != NFS_CS_READY) + continue; if (pos->rpc_ops != new->rpc_ops) continue; @@ -469,17 +481,18 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; atomic_inc(&pos->cl_count); - spin_unlock(&nn->nfs_client_lock); + *result = pos; + status = 0; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - - *result = pos; - return 0; + break; } /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); + if (prev) + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 49eeb044c109..4fb234d3aefb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) { if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return; - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); pnfs_return_layout(inode); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2671cb0f901..0ad025eb523b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1046,6 +1046,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); + nfs_release_seqid(opendata->o_arg.seqid); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); if (ret != 0) goto out; @@ -2632,7 +2633,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, int status; if (pnfs_ld_layoutret_on_setattr(inode)) - pnfs_return_layout(inode); + pnfs_commit_and_return_layout(inode); nfs_fattr_init(fattr); @@ -6416,22 +6417,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) static void nfs4_layoutcommit_release(void *calldata) { struct nfs4_layoutcommit_data *data = calldata; - struct pnfs_layout_segment *lseg, *tmp; - unsigned long *bitlock = &NFS_I(data->args.inode)->flags; pnfs_cleanup_layoutcommit(data); - /* Matched by references in pnfs_set_layoutcommit */ - list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { - list_del_init(&lseg->pls_lc_list); - if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, - &lseg->pls_flags)) - pnfs_put_lseg(lseg); - } - - clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); - smp_mb__after_clear_bit(); - wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); - put_rpccred(data->cred); kfree(data); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ace365c6334..d41a3518509f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1886,7 +1886,13 @@ again: status = PTR_ERR(clnt); break; } - clp->cl_rpcclient = clnt; + /* Note: this is safe because we haven't yet marked the + * client as ready, so we are the only user of + * clp->cl_rpcclient + */ + clnt = xchg(&clp->cl_rpcclient, clnt); + rpc_shutdown_client(clnt); + clnt = clp->cl_rpcclient; goto again; case -NFS4ERR_MINOR_VERS_MISMATCH: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 48ac5aad6258..4bdffe0ba025 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range, lo_seg_intersecting(lseg_range, recall_range); } +static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) +{ + if (!atomic_dec_and_test(&lseg->pls_refcount)) + return false; + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); + list_add(&lseg->pls_list, tmp_list); + return true; +} + /* Returns 1 if lseg is removed from list, 0 otherwise */ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) @@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); - if (atomic_dec_and_test(&lseg->pls_refcount)) { - pnfs_layout_remove_lseg(lseg->pls_layout, lseg); - list_add(&lseg->pls_list, tmp_list); + if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) rv = 1; - } } return rv; } @@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, return lseg; } +static void pnfs_clear_layoutcommit(struct inode *inode, + struct list_head *head) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct pnfs_layout_segment *lseg, *tmp; + + if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) + return; + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { + if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + continue; + pnfs_lseg_dec_and_remove_zero(lseg, head); + } +} + /* * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr * when the layout segment list is empty. @@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino) /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); + pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { @@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; @@ -845,6 +866,33 @@ out: } EXPORT_SYMBOL_GPL(_pnfs_return_layout); +int +pnfs_commit_and_return_layout(struct inode *inode) +{ + struct pnfs_layout_hdr *lo; + int ret; + + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo == NULL) { + spin_unlock(&inode->i_lock); + return 0; + } + pnfs_get_layout_hdr(lo); + /* Block new layoutgets and read/write to ds */ + lo->plh_block_lgets++; + spin_unlock(&inode->i_lock); + filemap_fdatawait(inode->i_mapping); + ret = pnfs_layoutcommit_inode(inode, true); + if (ret == 0) + ret = _pnfs_return_layout(inode); + spin_lock(&inode->i_lock); + lo->plh_block_lgets--; + spin_unlock(&inode->i_lock); + pnfs_put_layout_hdr(lo); + return ret; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; @@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { if (lseg->pls_range.iomode == IOMODE_RW && - test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) list_add(&lseg->pls_lc_list, listp); } } +static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) +{ + struct pnfs_layout_segment *lseg, *tmp; + unsigned long *bitlock = &NFS_I(inode)->flags; + + /* Matched by references in pnfs_set_layoutcommit */ + list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { + list_del_init(&lseg->pls_lc_list); + pnfs_put_lseg(lseg); + } + + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); +} + void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); @@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) if (nfss->pnfs_curr_ld->cleanup_layoutcommit) nfss->pnfs_curr_ld->cleanup_layoutcommit(data); + pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94ba80417748..f5f8a470a647 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); +int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_write_data *); void pnfs_ld_read_done(struct nfs_read_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, @@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline int pnfs_commit_and_return_layout(struct inode *inode) +{ + return 0; +} + static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 17b32b722457..2f8a29db0f1b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -294,6 +294,7 @@ struct file_system_type nfs_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { @@ -333,6 +334,8 @@ struct file_system_type nfs4_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs4"); +MODULE_ALIAS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); static int __init register_nfs4_fs(void) @@ -2717,6 +2720,5 @@ module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); -MODULE_ALIAS("nfs4"); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 16d39c6c4fbb..2e27430b9070 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -230,37 +230,6 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) __nfs4_file_put_access(fp, oflag); } -static inline int get_new_stid(struct nfs4_stid *stid) -{ - static int min_stateid = 0; - struct idr *stateids = &stid->sc_client->cl_stateids; - int new_stid; - int error; - - error = idr_get_new_above(stateids, stid, min_stateid, &new_stid); - /* - * Note: the necessary preallocation was done in - * nfs4_alloc_stateid(). The idr code caps the number of - * preallocations that can exist at a time, but the state lock - * prevents anyone from using ours before we get here: - */ - WARN_ON_ONCE(error); - /* - * It shouldn't be a problem to reuse an opaque stateid value. - * I don't think it is for 4.1. But with 4.0 I worry that, for - * example, a stray write retransmission could be accepted by - * the server when it should have been rejected. Therefore, - * adopt a trick from the sctp code to attempt to maximize the - * amount of time until an id is reused, by ensuring they always - * "increase" (mod INT_MAX): - */ - - min_stateid = new_stid+1; - if (min_stateid == INT_MAX) - min_stateid = 0; - return new_stid; -} - static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) { @@ -273,9 +242,8 @@ kmem_cache *slab) if (!stid) return NULL; - if (!idr_pre_get(stateids, GFP_KERNEL)) - goto out_free; - if (idr_get_new_above(stateids, stid, min_stateid, &new_id)) + new_id = idr_alloc(stateids, stid, min_stateid, 0, GFP_KERNEL); + if (new_id < 0) goto out_free; stid->sc_client = cl; stid->sc_type = 0; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 01168865dd37..a2720071f282 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -264,7 +264,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 62c1ee128aeb..ca05f6dc3544 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { if (rp->c_type == RC_REPLBUFF) kfree(rp->c_replvec.iov_base); - hlist_del(&rp->c_hash); + if (!hlist_unhashed(&rp->c_hash)) + hlist_del(&rp->c_hash); list_del(&rp->c_lru); --num_drc_entries; kmem_cache_free(drc_slab, rp); @@ -118,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp) int nfsd_reply_cache_init(void) { + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + register_shrinker(&nfsd_reply_cache_shrinker); drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); @@ -128,10 +133,6 @@ int nfsd_reply_cache_init(void) if (!cache_hash) goto out_nomem; - INIT_LIST_HEAD(&lru_head); - max_drc_entries = nfsd_cache_size_limit(); - num_drc_entries = 0; - return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 13a21c8fca49..f33455b4d957 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1090,6 +1090,7 @@ static struct file_system_type nfsd_fs_type = { .mount = nfsd_mount, .kill_sb = nfsd_umount, }; +MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2a7eb536de0b..2b2e2396a869 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int host_err; int stable = *stablep; int use_wgather; + loff_t pos = offset; dentry = file->f_path.dentry; inode = dentry->d_inode; @@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); set_fs(oldfs); if (host_err < 0) goto out_nfserr; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3c991dc84f2f..c7d1f9f18b09 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1361,6 +1361,7 @@ struct file_system_type nilfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("nilfs2"); static void nilfs_inode_init_once(void *obj) { diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 4a8289f8b16c..82650d52d916 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3079,6 +3079,7 @@ static struct file_system_type ntfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ntfs"); /* Stable names for the slab caches. */ static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache"; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 4c5fc8d77dc2..12bafb7265ce 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -640,6 +640,7 @@ static struct file_system_type dlmfs_fs_type = { .mount = dlmfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("ocfs2_dlmfs"); static int __init init_dlmfs_fs(void) { diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9b6910dec4ba..01b85165552b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1266,6 +1266,7 @@ static struct file_system_type ocfs2_fs_type = { .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; +MODULE_ALIAS_FS("ocfs2"); static int ocfs2_check_set_options(struct super_block *sb, struct mount_options *options) diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 25d715c7c87a..d8b0afde2179 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -572,6 +572,7 @@ static struct file_system_type omfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("omfs"); static int __init init_omfs_fs(void) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index ae47fa7efb9d..75885ffde44e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -432,6 +432,7 @@ static struct file_system_type openprom_fs_type = { .mount = openprom_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("openpromfs"); static void op_inode_init_once(void *data) { diff --git a/fs/pipe.c b/fs/pipe.c index 64a494cef0a0..2234f3f61f8d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -863,6 +863,9 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) { int ret = -ENOENT; + if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE))) + return -EINVAL; + mutex_lock(&inode->i_mutex); if (inode->i_pipe) { diff --git a/fs/pnode.c b/fs/pnode.c index 3e000a51ac0d..8b29d2164da6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -9,6 +9,7 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/nsproxy.h> #include "internal.h" #include "pnode.h" @@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest, int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, struct mount *source_mnt, struct list_head *tree_list) { + struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; @@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); + /* Notice when we are propagating across user namespaces */ + if (m->mnt_ns->user_ns != user_ns) + type |= CL_UNPRIVILEGED; + child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); diff --git a/fs/pnode.h b/fs/pnode.h index 19b853a3445c..a0493d5ebfbf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -23,6 +23,7 @@ #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_SHARED_TO_SLAVE 0x20 +#define CL_UNPRIVILEGED 0x40 static inline void set_mnt_shared(struct mount *mnt) { diff --git a/fs/proc/array.c b/fs/proc/array.c index f7ed9ee46eb9..cbd0f1b324b9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -143,6 +143,7 @@ static const char * const task_state_array[] = { "x (dead)", /* 64 */ "K (wakekill)", /* 128 */ "W (waking)", /* 256 */ + "P (parked)", /* 512 */ }; static inline const char *get_task_state(struct task_struct *tsk) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4b3b3ffb52f1..21e1a8f1659d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -755,37 +755,8 @@ void pde_put(struct proc_dir_entry *pde) free_proc_entry(pde); } -/* - * Remove a /proc entry and free it if it's not currently in use. - */ -void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +static void entry_rundown(struct proc_dir_entry *de) { - struct proc_dir_entry **p; - struct proc_dir_entry *de = NULL; - const char *fn = name; - unsigned int len; - - spin_lock(&proc_subdir_lock); - if (__xlate_proc_name(name, &parent, &fn) != 0) { - spin_unlock(&proc_subdir_lock); - return; - } - len = strlen(fn); - - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - de = *p; - *p = de->next; - de->next = NULL; - break; - } - } - spin_unlock(&proc_subdir_lock); - if (!de) { - WARN(1, "name '%s'\n", name); - return; - } - spin_lock(&de->pde_unload_lock); /* * Stop accepting new callers into module. If you're @@ -817,6 +788,40 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) spin_lock(&de->pde_unload_lock); } spin_unlock(&de->pde_unload_lock); +} + +/* + * Remove a /proc entry and free it if it's not currently in use. + */ +void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *de = NULL; + const char *fn = name; + unsigned int len; + + spin_lock(&proc_subdir_lock); + if (__xlate_proc_name(name, &parent, &fn) != 0) { + spin_unlock(&proc_subdir_lock); + return; + } + len = strlen(fn); + + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (proc_match(len, fn, *p)) { + de = *p; + *p = de->next; + de->next = NULL; + break; + } + } + spin_unlock(&proc_subdir_lock); + if (!de) { + WARN(1, "name '%s'\n", name); + return; + } + + entry_rundown(de); if (S_ISDIR(de->mode)) parent->nlink--; @@ -827,3 +832,57 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); + +int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *root = NULL, *de, *next; + const char *fn = name; + unsigned int len; + + spin_lock(&proc_subdir_lock); + if (__xlate_proc_name(name, &parent, &fn) != 0) { + spin_unlock(&proc_subdir_lock); + return -ENOENT; + } + len = strlen(fn); + + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (proc_match(len, fn, *p)) { + root = *p; + *p = root->next; + root->next = NULL; + break; + } + } + if (!root) { + spin_unlock(&proc_subdir_lock); + return -ENOENT; + } + de = root; + while (1) { + next = de->subdir; + if (next) { + de->subdir = next->next; + next->next = NULL; + de = next; + continue; + } + spin_unlock(&proc_subdir_lock); + + entry_rundown(de); + next = de->parent; + if (S_ISDIR(de->mode)) + next->nlink--; + de->nlink = 0; + if (de == root) + break; + pde_put(de); + + spin_lock(&proc_subdir_lock); + de = next; + } + pde_put(root); + return 0; +} +EXPORT_SYMBOL(remove_proc_subtree); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a86aebc9ba7c..869116c2afbe 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -446,9 +446,10 @@ static const struct file_operations proc_reg_file_ops_no_compat = { struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { - struct inode *inode = iget_locked(sb, de->low_ino); + struct inode *inode = new_inode_pseudo(sb); - if (inode && (inode->i_state & I_NEW)) { + if (inode) { + inode->i_ino = de->low_ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; @@ -476,7 +477,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_fop = de->proc_fops; } } - unlock_new_inode(inode); } else pde_put(de); return inode; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b7a47196c8c3..66b51c0383da 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -118,7 +118,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) struct super_block *sb = inode->i_sb; struct proc_inode *ei = PROC_I(inode); struct task_struct *task; - struct dentry *ns_dentry; + struct path ns_path; void *error = ERR_PTR(-EACCES); task = get_proc_task(inode); @@ -128,14 +128,14 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); - if (IS_ERR(ns_dentry)) { - error = ERR_CAST(ns_dentry); + ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); + if (IS_ERR(ns_path.dentry)) { + error = ERR_CAST(ns_path.dentry); goto out_put_task; } - dput(nd->path.dentry); - nd->path.dentry = ns_dentry; + ns_path.mnt = mntget(nd->path.mnt); + nd_jump_link(nd, &ns_path); error = NULL; out_put_task: diff --git a/fs/proc/root.c b/fs/proc/root.c index c6e9fac26bac..9c7fab1d23f0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/bitops.h> +#include <linux/user_namespace.h> #include <linux/mount.h> #include <linux/pid_namespace.h> #include <linux/parser.h> @@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } else { ns = task_active_pid_ns(current); options = data; + + if (!current_user_ns()->may_mount_proc) + return ERR_PTR(-EPERM); } sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 43098bb5723a..2e8caa62da78 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -412,6 +412,7 @@ static struct file_system_type qnx4_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("qnx4"); static int __init init_qnx4_fs(void) { diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 57199a52a351..8d941edfefa1 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -672,6 +672,7 @@ static struct file_system_type qnx6_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("qnx6"); static int __init init_qnx6_fs(void) { diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 05ae3c97f7a5..3e64169ef527 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1439,8 +1439,11 @@ static void __dquot_initialize(struct inode *inode, int type) * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); - if (unlikely(rsv)) + if (unlikely(rsv)) { + spin_lock(&dq_data_lock); dquot_resv_space(inode->i_dquot[cnt], rsv); + spin_unlock(&dq_data_lock); + } } } out_err: diff --git a/fs/read_write.c b/fs/read_write.c index a698eff457fb..e6ddc8dceb96 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -17,6 +17,7 @@ #include <linux/splice.h> #include <linux/compat.h> #include "read_write.h" +#include "internal.h" #include <asm/uaccess.h> #include <asm/unistd.h> @@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof EXPORT_SYMBOL(do_sync_write); +ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) +{ + mm_segment_t old_fs; + const char __user *p; + ssize_t ret; + + if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) + return -EINVAL; + + old_fs = get_fs(); + set_fs(get_ds()); + p = (__force const char __user *)buf; + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + if (file->f_op->write) + ret = file->f_op->write(file, p, count, pos); + else + ret = do_sync_write(file, p, count, pos); + set_fs(old_fs); + if (ret > 0) { + fsnotify_modify(file); + add_wchar(current, ret); + } + inc_syscw(current); + return ret; +} + ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { ssize_t ret; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 418bdc3a57da..f8a23c3078f8 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1147,8 +1147,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin "on filesystem root."); return 0; } - qf_names[qtype] = - kmalloc(strlen(arg) + 1, GFP_KERNEL); + qf_names[qtype] = kstrdup(arg, GFP_KERNEL); if (!qf_names[qtype]) { reiserfs_warning(s, "reiserfs-2502", "not enough memory " @@ -1156,7 +1155,6 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin "quotafile name."); return 0; } - strcpy(qf_names[qtype], arg); if (qtype == USRQUOTA) *mount_options |= 1 << REISERFS_USRQUOTA; else @@ -2434,6 +2432,7 @@ struct file_system_type reiserfs_fs_type = { .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("reiserfs"); MODULE_DESCRIPTION("ReiserFS journaled filesystem"); MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>"); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c196369fe408..4cce1d9552fb 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; - if (name[0] == '.' && (name[1] == '\0' || - (name[1] == '.' && name[2] == '\0'))) + if (name[0] == '.' && (namelen < 2 || + (namelen == 2 && name[1] == '.'))) return 0; dentry = lookup_one_len(name, dbuf->xadir, namelen); diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 7e8d3a80bdab..15cbc41ee365 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -599,6 +599,7 @@ static struct file_system_type romfs_fs_type = { .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("romfs"); /* * inode storage initialiser diff --git a/fs/splice.c b/fs/splice.c index 718bd0056384..29e394e49ddd 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/gfp.h> #include <linux/socket.h> +#include "internal.h" /* * Attempt to steal a page from a pipe buffer. This should perhaps go into @@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, { int ret; void *data; + loff_t tmp = sd->pos; data = buf->ops->map(pipe, buf, 0); - ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); + ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); buf->ops->unmap(pipe, buf, data); return ret; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 260e3928d4f5..60553a9053ca 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -489,6 +489,7 @@ static struct file_system_type squashfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV }; +MODULE_ALIAS_FS("squashfs"); static const struct super_operations squashfs_super_ops = { .alloc_inode = squashfs_alloc_inode, diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 2fbdff6be25c..e14512678c9b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } if (filp->f_pos == 1) { if (parent_sd->s_parent) @@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } mutex_lock(&sysfs_mutex); for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); @@ -1058,10 +1062,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) return 0; } +static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file_inode(file); + loff_t ret; + + mutex_lock(&inode->i_mutex); + ret = generic_file_llseek(file, offset, whence); + mutex_unlock(&inode->i_mutex); + + return ret; +} const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, .release = sysfs_dir_release, - .llseek = generic_file_llseek, + .llseek = sysfs_dir_llseek, }; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8d924b5ec733..afd83273e6ce 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/magic.h> #include <linux/slab.h> +#include <linux/user_namespace.h> #include "sysfs.h" @@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, struct super_block *sb; int error; + if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) + return ERR_PTR(-EPERM); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index a38e87bdd78d..d0c6a007ce83 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -545,6 +545,7 @@ static struct file_system_type sysv_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("sysv"); static struct file_system_type v7_fs_type = { .owner = THIS_MODULE, @@ -553,6 +554,8 @@ static struct file_system_type v7_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("v7"); +MODULE_ALIAS("v7"); static int __init init_sysv_fs(void) { @@ -586,5 +589,4 @@ static void __exit exit_sysv_fs(void) module_init(init_sysv_fs) module_exit(exit_sysv_fs) -MODULE_ALIAS("v7"); MODULE_LICENSE("GPL"); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ddc0f6ae65e9..f21acf0ef01f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1568,6 +1568,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->ro_mount = 0; + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + return err; + } + err = check_free_space(c); if (err) goto out; @@ -1684,12 +1690,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = dbg_check_space_info(c); } - if (c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out; - } - mutex_unlock(&c->umount_mutex); return err; @@ -2174,6 +2174,7 @@ static struct file_system_type ubifs_fs_type = { .mount = ubifs_mount, .kill_sb = kill_ubifs_super, }; +MODULE_ALIAS_FS("ubifs"); /* * Inode slab cache constructor. diff --git a/fs/udf/super.c b/fs/udf/super.c index bc5b30a819e8..9ac4057a86c9 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -118,6 +118,7 @@ static struct file_system_type udf_fstype = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("udf"); static struct kmem_cache *udf_inode_cachep; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index dc8e3a861d0f..329f2f53b7ed 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1500,6 +1500,7 @@ static struct file_system_type ufs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ufs"); static int __init init_ufs_fs(void) { diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4e8f0df82d02..8459b5d8cb71 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1334,6 +1334,12 @@ _xfs_buf_ioapply( int size; int i; + /* + * Make sure we capture only current IO errors rather than stale errors + * left over from previous use of the buffer (e.g. failed readahead). + */ + bp->b_error = 0; + if (bp->b_flags & XBF_WRITE) { if (bp->b_flags & XBF_SYNCIO) rw = WRITE_SYNC; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 912d83d8860a..5a30dd899d2b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -325,7 +325,7 @@ xfs_iomap_eof_want_preallocate( * rather than falling short due to things like stripe unit/width alignment of * real extents. */ -STATIC int +STATIC xfs_fsblock_t xfs_iomap_eof_prealloc_initial_size( struct xfs_mount *mp, struct xfs_inode *ip, @@ -413,7 +413,7 @@ xfs_iomap_prealloc_size( * have a large file on a small filesystem and the above * lowspace thresholds are smaller than MAXEXTLEN. */ - while (alloc_blocks >= freesp) + while (alloc_blocks && alloc_blocks >= freesp) alloc_blocks >>= 4; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c407121873b4..ea341cea68cb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1561,6 +1561,7 @@ static struct file_system_type xfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e65278f560c4..22ba56e834e2 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -437,11 +437,9 @@ void acpi_remove_dir(struct acpi_device *); */ struct acpi_bus_type { struct list_head list; - struct bus_type *bus; - /* For general devices under the bus */ + const char *name; + bool (*match)(struct device *dev); int (*find_device) (struct device *, acpi_handle *); - /* For bridges, such as PCI root bridge, IDE controller */ - int (*find_bridge) (struct device *, acpi_handle *); void (*setup)(struct device *); void (*cleanup)(struct device *); }; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 555d0337ad95..b327b5a9296d 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -235,6 +235,9 @@ extern void acpi_processor_unregister_performance(struct if a _PPC object exists, rmmod is disallowed then */ int acpi_processor_notify_smm(struct module *calling_module); +/* parsing the _P* objects. */ +extern int acpi_processor_get_performance_info(struct acpi_processor *pr); + /* for communication between multiple parts of the processor kernel module */ DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 1ced6413ea03..33bd2de3bc1e 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -136,12 +136,6 @@ static inline void atomic_dec(atomic_t *v) #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ - (unsigned long)(n), sizeof(*(ptr)))) - -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) - static inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h index 14883026015d..811fb1e9b061 100644 --- a/include/asm-generic/cmpxchg.h +++ b/include/asm-generic/cmpxchg.h @@ -92,6 +92,16 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) */ #include <asm-generic/cmpxchg-local.h> +#ifndef cmpxchg_local +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ + (unsigned long)(n), sizeof(*(ptr)))) +#endif + +#ifndef cmpxchg64_local +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) +#endif + #define cmpxchg(ptr, o, n) cmpxchg_local((ptr), (o), (n)) #define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n)) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 25f01d0bc149..b1b1fa6ffffe 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -99,7 +99,12 @@ struct mmu_gather { unsigned int need_flush : 1, /* Did free PTEs */ fast_mode : 1; /* No batching */ - unsigned int fullmm; + /* we are in the middle of an operation to clear + * a full mm and can make some optimizations */ + unsigned int fullmm : 1, + /* we have performed an operation which + * requires a complete flush of the tlb */ + need_flush_all : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 8839b3a24660..e3e0d651c6ca 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -443,12 +443,12 @@ struct drm_crtc { * @dpms: set power state (see drm_crtc_funcs above) * @save: save connector state * @restore: restore connector state - * @reset: reset connector after state has been invalidate (e.g. resume) + * @reset: reset connector after state has been invalidated (e.g. resume) * @detect: is this connector active? * @fill_modes: fill mode list for this connector - * @set_property: property for this connector may need update + * @set_property: property for this connector may need an update * @destroy: make object go away - * @force: notify the driver the connector is forced on + * @force: notify the driver that the connector is forced on * * Each CRTC may have one or more connectors attached to it. The functions * below allow the core DRM code to control connectors, enumerate available modes, diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index a386b0b654cc..918e8fe2f5e9 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -581,7 +581,11 @@ {0x1002, 0x9908, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ - {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x990B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x990C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x990D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x990E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9910, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9913, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9917, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ @@ -592,6 +596,13 @@ {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9996, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9998, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9999, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x999A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x999B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ diff --git a/include/linux/ata.h b/include/linux/ata.h index 8f7a3d68371a..ee0bd9524055 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -954,7 +954,7 @@ static inline int atapi_cdb_len(const u16 *dev_id) } } -static inline bool atapi_command_packet_set(const u16 *dev_id) +static inline int atapi_command_packet_set(const u16 *dev_id) { return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f; } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 0ea61e07a91c..7c2e030e72f1 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,7 +12,6 @@ struct blk_trace { int trace_state; - bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; diff --git a/include/linux/capability.h b/include/linux/capability.h index 98503b792369..d9a4f7f40f32 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -35,6 +35,7 @@ struct cpu_vfs_cap_data { #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) +struct file; struct inode; struct dentry; struct user_namespace; @@ -211,6 +212,7 @@ extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool nsown_capable(int cap); extern bool inode_capable(const struct inode *inode, int cap); +extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h index 2c1bc1ea04ee..1d5b02a96c46 100644 --- a/include/linux/cn_proc.h +++ b/include/linux/cn_proc.h @@ -26,6 +26,7 @@ void proc_id_connector(struct task_struct *task, int which_id); void proc_sid_connector(struct task_struct *task); void proc_ptrace_connector(struct task_struct *task, int which_id); void proc_comm_connector(struct task_struct *task); +void proc_coredump_connector(struct task_struct *task); void proc_exit_connector(struct task_struct *task); #else static inline void proc_fork_connector(struct task_struct *task) @@ -48,6 +49,9 @@ static inline void proc_ptrace_connector(struct task_struct *task, int ptrace_id) {} +static inline void proc_coredump_connector(struct task_struct *task) +{} + static inline void proc_exit_connector(struct task_struct *task) {} #endif /* CONFIG_PROC_EVENTS */ diff --git a/include/linux/compat.h b/include/linux/compat.h index 76a87fb57ac2..377cd8c3395e 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -141,11 +141,11 @@ typedef struct { } compat_sigset_t; struct compat_sigaction { -#ifndef __ARCH_HAS_ODD_SIGACTION +#ifndef __ARCH_HAS_IRIX_SIGACTION compat_uptr_t sa_handler; compat_ulong_t sa_flags; #else - compat_ulong_t sa_flags; + compat_uint_t sa_flags; compat_uptr_t sa_handler; #endif #ifdef __ARCH_HAS_SA_RESTORER diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index a975de1ff59f..3bd46f766751 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(void); +extern void debug_check_no_locks_held(struct task_struct *task); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(void) +debug_check_no_locks_held(struct task_struct *task) { } #endif diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index e83ef39b3bea..fe8c4476f7e4 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -213,7 +213,7 @@ struct devfreq_simple_ondemand_data { #endif #else /* !CONFIG_PM_DEVFREQ */ -static struct devfreq *devfreq_add_device(struct device *dev, +static inline struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, const char *governor_name, void *data) @@ -221,34 +221,34 @@ static struct devfreq *devfreq_add_device(struct device *dev, return NULL; } -static int devfreq_remove_device(struct devfreq *devfreq) +static inline int devfreq_remove_device(struct devfreq *devfreq) { return 0; } -static int devfreq_suspend_device(struct devfreq *devfreq) +static inline int devfreq_suspend_device(struct devfreq *devfreq) { return 0; } -static int devfreq_resume_device(struct devfreq *devfreq) +static inline int devfreq_resume_device(struct devfreq *devfreq) { return 0; } -static struct opp *devfreq_recommended_opp(struct device *dev, +static inline struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq, u32 flags) { - return -EINVAL; + return ERR_PTR(-EINVAL); } -static int devfreq_register_opp_notifier(struct device *dev, +static inline int devfreq_register_opp_notifier(struct device *dev, struct devfreq *devfreq) { return -EINVAL; } -static int devfreq_unregister_opp_notifier(struct device *dev, +static inline int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq) { return -EINVAL; diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h index 2224a8c0cb64..8d5ab998a222 100644 --- a/include/linux/ecryptfs.h +++ b/include/linux/ecryptfs.h @@ -6,9 +6,8 @@ #define ECRYPTFS_VERSION_MINOR 0x04 #define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 /* These flags indicate which features are supported by the kernel - * module; userspace tools such as the mount helper read - * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine - * how to behave. */ + * module; userspace tools such as the mount helper read the feature + * bits from a sysfs handle in order to determine how to behave. */ #define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 #define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 @@ -19,13 +18,6 @@ #define ECRYPTFS_VERSIONING_HMAC 0x00000080 #define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 #define ECRYPTFS_VERSIONING_GCM 0x00000200 -#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ - | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ - | ECRYPTFS_VERSIONING_PUBKEY \ - | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY \ - | ECRYPTFS_VERSIONING_DEVMISC \ - | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 diff --git a/include/linux/edac.h b/include/linux/edac.h index 4fd4999ccb5b..0b763276f619 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -561,7 +561,6 @@ struct csrow_info { u32 ue_count; /* Uncorrectable Errors for this csrow */ u32 ce_count; /* Correctable Errors for this csrow */ - u32 nr_pages; /* combined pages count of all channels */ struct mem_ctl_info *mci; /* the parent */ @@ -676,11 +675,11 @@ struct mem_ctl_info { * sees memory sticks ("dimms"), and the ones that sees memory ranks. * All old memory controllers enumerate memories per rank, but most * of the recent drivers enumerate memories per DIMM, instead. - * When the memory controller is per rank, mem_is_per_rank is true. + * When the memory controller is per rank, csbased is true. */ unsigned n_layers; struct edac_mc_layer *layers; - bool mem_is_per_rank; + bool csbased; /* * DIMM info. Will eventually remove the entire csrows_info some day @@ -741,8 +740,6 @@ struct mem_ctl_info { u32 fake_inject_ue; u16 fake_inject_count; #endif - __u8 csbased : 1, /* csrow-based memory controller */ - __resv : 7; }; #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index 9bf2f1fcae27..3d7df3d32c66 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -333,6 +333,7 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); +typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size); /* * EFI Configuration Table and GUID definitions @@ -575,9 +576,15 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos #ifdef CONFIG_X86 extern void efi_late_init(void); extern void efi_free_boot_services(void); +extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size); #else static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} + +static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + return EFI_SUCCESS; +} #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern u64 efi_get_iobase (void); @@ -731,7 +738,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_query_variable_info_t *query_variable_info; + efi_query_variable_store_t *query_variable_store; }; struct efivars { diff --git a/include/linux/filter.h b/include/linux/filter.h index c45eabc135e1..d1248f401a56 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -48,8 +48,21 @@ extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); #ifdef CONFIG_BPF_JIT +#include <linux/linkage.h> +#include <linux/printk.h> + extern void bpf_jit_compile(struct sk_filter *fp); extern void bpf_jit_free(struct sk_filter *fp); + +static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, + u32 pass, void *image) +{ + pr_err("flen=%u proglen=%u pass=%u image=%p\n", + flen, proglen, pass, image); + if (image) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS, + 16, 1, image, proglen, false); +} #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else static inline void bpf_jit_compile(struct sk_filter *fp) @@ -126,6 +139,7 @@ enum { BPF_S_ANC_SECCOMP_LD_W, BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, + BPF_S_ANC_PAY_OFFSET, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 043a5cf8b5ba..e70df40d84f6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,7 +3,6 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED -#include <linux/debug_locks.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> @@ -49,8 +48,6 @@ extern void thaw_kernel_threads(void); static inline bool try_to_freeze(void) { - if (!(current->flags & PF_NOFREEZE)) - debug_check_no_locks_held(); might_sleep(); if (likely(!freezing(current))) return false; diff --git a/include/linux/fs.h b/include/linux/fs.h index 74a907b8b950..2c28271ab9d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,6 +1825,8 @@ struct file_system_type { struct lock_class_key i_mutex_dir_key; }; +#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) + extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 729eded4b24f..2b93a9a5a1e6 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, spin_unlock(&fs->lock); } +extern bool current_chrooted(void); + #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e5ca8ef50e9b..52da2a250795 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -89,6 +89,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * that the call back has its own recursion protection. If it does * not set this, then the ftrace infrastructure will add recursion * protection for the caller. + * STUB - The ftrace_ops is just a place holder. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -98,6 +99,7 @@ enum { FTRACE_OPS_FL_SAVE_REGS = 1 << 4, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, + FTRACE_OPS_FL_STUB = 1 << 7, }; struct ftrace_ops { @@ -394,7 +396,6 @@ ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); -loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence); int ftrace_regex_release(struct inode *inode, struct file *file); void __init @@ -567,6 +568,8 @@ static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } #endif /* CONFIG_DYNAMIC_FTRACE */ +loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence); + /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/include/linux/hash.h b/include/linux/hash.h index 61c97ae22e01..f09a0ae4d858 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -15,6 +15,7 @@ */ #include <asm/types.h> +#include <linux/compiler.h> /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ #define GOLDEN_RATIO_PRIME_32 0x9e370001UL @@ -31,7 +32,7 @@ #error Wordsize not 32 or 64 #endif -static inline u64 hash_64(u64 val, unsigned int bits) +static __always_inline u64 hash_64(u64 val, unsigned int bits) { u64 hash = val; diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index f027f7a63511..99e379b74398 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -15,6 +15,9 @@ #include <linux/types.h> +/* For key_map array */ +#define MXT_NUM_GPIO 4 + /* Orient */ #define MXT_NORMAL 0x0 #define MXT_DIAGONAL 0x1 @@ -39,6 +42,8 @@ struct mxt_platform_data { unsigned int voltage; unsigned char orient; unsigned long irqflags; + bool is_tp; + const unsigned int key_map[MXT_NUM_GPIO]; }; #endif /* __LINUX_ATMEL_MXT_TS_H */ diff --git a/include/linux/idr.h b/include/linux/idr.h index a6f38b5c34e4..2640c7e99e51 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -73,8 +73,6 @@ struct idr { */ void *idr_find_slowpath(struct idr *idp, int id); -int idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); int idr_for_each(struct idr *idp, @@ -99,7 +97,7 @@ static inline void idr_preload_end(void) /** * idr_find - return pointer for given id - * @idp: idr handle + * @idr: idr handle * @id: lookup key * * Return the pointer given the id it has been registered with. A %NULL @@ -120,19 +118,6 @@ static inline void *idr_find(struct idr *idr, int id) } /** - * idr_get_new - allocate new idr entry - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @id: pointer to the allocated handle - * - * Simple wrapper around idr_get_new_above() w/ @starting_id of zero. - */ -static inline int idr_get_new(struct idr *idp, void *ptr, int *id) -{ - return idr_get_new_above(idp, ptr, 0, id); -} - -/** * idr_for_each_entry - iterate over an idr's elements of a given type * @idp: idr handle * @entry: the type * to use as cursor @@ -143,7 +128,56 @@ static inline int idr_get_new(struct idr *idp, void *ptr, int *id) entry != NULL; \ ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) -void __idr_remove_all(struct idr *idp); /* don't use */ +/* + * Don't use the following functions. These exist only to suppress + * deprecated warnings on EXPORT_SYMBOL()s. + */ +int __idr_pre_get(struct idr *idp, gfp_t gfp_mask); +int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); +void __idr_remove_all(struct idr *idp); + +/** + * idr_pre_get - reserve resources for idr allocation + * @idp: idr handle + * @gfp_mask: memory allocation flags + * + * Part of old alloc interface. This is going away. Use + * idr_preload[_end]() and idr_alloc() instead. + */ +static inline int __deprecated idr_pre_get(struct idr *idp, gfp_t gfp_mask) +{ + return __idr_pre_get(idp, gfp_mask); +} + +/** + * idr_get_new_above - allocate new idr entry above or equal to a start id + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @starting_id: id to start search at + * @id: pointer to the allocated handle + * + * Part of old alloc interface. This is going away. Use + * idr_preload[_end]() and idr_alloc() instead. + */ +static inline int __deprecated idr_get_new_above(struct idr *idp, void *ptr, + int starting_id, int *id) +{ + return __idr_get_new_above(idp, ptr, starting_id, id); +} + +/** + * idr_get_new - allocate new idr entry + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: pointer to the allocated handle + * + * Part of old alloc interface. This is going away. Use + * idr_preload[_end]() and idr_alloc() instead. + */ +static inline int __deprecated idr_get_new(struct idr *idp, void *ptr, int *id) +{ + return __idr_get_new_above(idp, ptr, 0, id); +} /** * idr_remove_all - remove all ids from the given idr tree diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 89b4614a4722..f563907ed776 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -33,7 +33,15 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) static inline int arp_hdr_len(struct net_device *dev) { - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + /* ARP header, device address and 2 IP addresses */ + return sizeof(struct arphdr) + dev->addr_len + sizeof(u32) * 2; +#endif + default: + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + } } #endif /* _LINUX_IF_ARP_H */ diff --git a/include/linux/if_team.h b/include/linux/if_team.h index cfd21e3d5506..4474557904f6 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -112,6 +112,10 @@ struct team_mode_ops { void (*port_disabled)(struct team *team, struct team_port *port); }; +extern int team_modeop_port_enter(struct team *team, struct team_port *port); +extern void team_modeop_port_change_dev_addr(struct team *team, + struct team_port *port); + enum team_option_type { TEAM_OPTION_TYPE_U32, TEAM_OPTION_TYPE_STRING, @@ -236,7 +240,26 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, return NULL; } -extern int team_port_set_team_dev_addr(struct team_port *port); +static inline struct team_port * +team_get_first_port_txable_rcu(struct team *team, struct team_port *port) +{ + struct team_port *cur; + + if (likely(team_port_txable(port))) + return port; + cur = port; + list_for_each_entry_continue_rcu(cur, &team->port_list, list) + if (team_port_txable(port)) + return cur; + list_for_each_entry_rcu(cur, &team->port_list, list) { + if (cur == port) + break; + if (team_port_txable(port)) + return cur; + } + return NULL; +} + extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 218a3b686d90..52bd03b38962 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -86,15 +86,15 @@ static inline int is_vlan_dev(struct net_device *dev) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, - u16 vlan_id); + __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern bool vlan_do_receive(struct sk_buff **skb); extern struct sk_buff *vlan_untag(struct sk_buff *skb); -extern int vlan_vid_add(struct net_device *dev, unsigned short vid); -extern void vlan_vid_del(struct net_device *dev, unsigned short vid); +extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); +extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); extern int vlan_vids_add_by_dev(struct net_device *dev, const struct net_device *by_dev); @@ -104,7 +104,8 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); #else static inline struct net_device * -__vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id) +__vlan_find_dev_deep(struct net_device *real_dev, + __be16 vlan_proto, u16 vlan_id) { return NULL; } @@ -131,12 +132,12 @@ static inline struct sk_buff *vlan_untag(struct sk_buff *skb) return skb; } -static inline int vlan_vid_add(struct net_device *dev, unsigned short vid) +static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { return 0; } -static inline void vlan_vid_del(struct net_device *dev, unsigned short vid) +static inline void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { } @@ -157,9 +158,20 @@ static inline bool vlan_uses_dev(const struct net_device *dev) } #endif +static inline bool vlan_hw_offload_capable(netdev_features_t features, + __be16 proto) +{ + if (proto == htons(ETH_P_8021Q) && features & NETIF_F_HW_VLAN_CTAG_TX) + return true; + if (proto == htons(ETH_P_8021AD) && features & NETIF_F_HW_VLAN_STAG_TX) + return true; + return false; +} + /** * vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload @@ -170,7 +182,8 @@ static inline bool vlan_uses_dev(const struct net_device *dev) * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { struct vlan_ethhdr *veth; @@ -185,7 +198,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) skb->mac_header -= VLAN_HLEN; /* first, the ethernet type */ - veth->h_vlan_proto = htons(ETH_P_8021Q); + veth->h_vlan_proto = vlan_proto; /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); @@ -204,24 +217,28 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. */ -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { - skb = vlan_insert_tag(skb, vlan_tci); + skb = vlan_insert_tag(skb, vlan_proto, vlan_tci); if (skb) - skb->protocol = htons(ETH_P_8021Q); + skb->protocol = vlan_proto; return skb; } /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest */ static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { + skb->vlan_proto = vlan_proto; skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; return skb; } @@ -236,12 +253,13 @@ static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, * Assumes skb->dev is the target that will xmit this frame. * Returns a VLAN tagged skb. */ -static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) { - if (skb->dev->features & NETIF_F_HW_VLAN_TX) { - return __vlan_hwaccel_put_tag(skb, vlan_tci); + if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) { + return __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); } else { - return __vlan_put_tag(skb, vlan_tci); + return __vlan_put_tag(skb, vlan_proto, vlan_tci); } } @@ -256,9 +274,9 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; - if (veth->h_vlan_proto != htons(ETH_P_8021Q)) { + if (veth->h_vlan_proto != htons(ETH_P_8021Q) && + veth->h_vlan_proto != htons(ETH_P_8021AD)) return -EINVAL; - } *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; @@ -294,7 +312,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { - if (skb->dev->features & NETIF_F_HW_VLAN_TX) { + if (skb->dev->features & NETIF_F_HW_VLAN_CTAG_TX) { return __vlan_hwaccel_get_tag(skb, vlan_tci); } else { return __vlan_get_tag(skb, vlan_tci); @@ -339,7 +357,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= 1536) { + if (ntohs(proto) >= ETH_P_802_3_MIN) { skb->protocol = proto; return; } diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 1f86a97ab2e2..8bd12be0b02f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -227,14 +227,17 @@ struct st_sensor_data { }; #ifdef CONFIG_IIO_BUFFER +irqreturn_t st_sensors_trigger_handler(int irq, void *p); + +int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf); +#endif + +#ifdef CONFIG_IIO_TRIGGER int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops); void st_sensors_deallocate_trigger(struct iio_dev *indio_dev); -irqreturn_t st_sensors_trigger_handler(int irq, void *p); - -int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf); #else static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index f5dbce50466e..66017028dcb3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -37,7 +37,7 @@ void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK bool irq_work_needs_cpu(void); #else -static bool irq_work_needs_cpu(void) { return false; } +static inline bool irq_work_needs_cpu(void) { return false; } #endif #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 80d36874689b..79fdd80a42d4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -390,7 +390,6 @@ extern struct pid *session_of_pgrp(struct pid *pgrp); unsigned long int_sqrt(unsigned long); extern void bust_spinlocks(int yes); -extern void wake_up_klogd(void); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; extern int panic_on_oops; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d2e6927bbaae..d78d28a733b1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -200,6 +200,8 @@ extern size_t vmcoreinfo_max_size; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); diff --git a/include/linux/ktime.h b/include/linux/ktime.h index e83512f63df5..bbca12804d12 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -330,6 +330,24 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); +/** + * ktime_to_timespec_cond - convert a ktime_t variable to timespec + * format only if the variable contains data + * @kt: the ktime_t variable to convert + * @ts: the timespec variable to store the result in + * + * Returns true if there was a successful conversion, false if kt was 0. + */ +static inline bool ktime_to_timespec_cond(const ktime_t kt, struct timespec *ts) +{ + if (kt.tv64) { + *ts = ktime_to_timespec(kt); + return true; + } else { + return false; + } +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe09d77..c13958251927 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -518,7 +518,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa); + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index fa7cc7244cbd..b0bcce0ddc95 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -71,6 +71,7 @@ struct gfn_to_hva_cache { u64 generation; gpa_t gpa; unsigned long hva; + unsigned long len; struct kvm_memory_slot *memslot; }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 91c9d109e5f1..eae7a053dc51 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -398,6 +398,7 @@ enum { ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ + ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/list.h b/include/linux/list.h index d991cc147c98..6a1f8df9144b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -667,7 +667,9 @@ static inline void hlist_move_list(struct hlist_head *old, pos = n) #define hlist_entry_safe(ptr, type, member) \ - (ptr) ? hlist_entry(ptr, type, member) : NULL + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? hlist_entry(____ptr, type, member) : NULL; \ + }) /** * hlist_for_each_entry - iterate over list of given type diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 5b18ecde69b5..1aa4f13cdfa6 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -106,6 +106,29 @@ enum max77693_muic_reg { MAX77693_MUIC_REG_END, }; +/* MAX77693 INTMASK1~2 Register */ +#define INTMASK1_ADC1K_SHIFT 3 +#define INTMASK1_ADCERR_SHIFT 2 +#define INTMASK1_ADCLOW_SHIFT 1 +#define INTMASK1_ADC_SHIFT 0 +#define INTMASK1_ADC1K_MASK (1 << INTMASK1_ADC1K_SHIFT) +#define INTMASK1_ADCERR_MASK (1 << INTMASK1_ADCERR_SHIFT) +#define INTMASK1_ADCLOW_MASK (1 << INTMASK1_ADCLOW_SHIFT) +#define INTMASK1_ADC_MASK (1 << INTMASK1_ADC_SHIFT) + +#define INTMASK2_VIDRM_SHIFT 5 +#define INTMASK2_VBVOLT_SHIFT 4 +#define INTMASK2_DXOVP_SHIFT 3 +#define INTMASK2_DCDTMR_SHIFT 2 +#define INTMASK2_CHGDETRUN_SHIFT 1 +#define INTMASK2_CHGTYP_SHIFT 0 +#define INTMASK2_VIDRM_MASK (1 << INTMASK2_VIDRM_SHIFT) +#define INTMASK2_VBVOLT_MASK (1 << INTMASK2_VBVOLT_SHIFT) +#define INTMASK2_DXOVP_MASK (1 << INTMASK2_DXOVP_SHIFT) +#define INTMASK2_DCDTMR_MASK (1 << INTMASK2_DCDTMR_SHIFT) +#define INTMASK2_CHGDETRUN_MASK (1 << INTMASK2_CHGDETRUN_SHIFT) +#define INTMASK2_CHGTYP_MASK (1 << INTMASK2_CHGTYP_SHIFT) + /* MAX77693 MUIC - STATUS1~3 Register */ #define STATUS1_ADC_SHIFT (0) #define STATUS1_ADCLOW_SHIFT (5) diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index a4d13d7cd001..3bbda22721ea 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -221,6 +221,7 @@ struct palmas_clk_platform_data { }; struct palmas_platform_data { + int irq_flags; int gpio_base; /* bit value to be loaded to the POWER_CTRL register */ diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h index aaceab402ec5..6d309032dc0d 100644 --- a/include/linux/mfd/tps65912.h +++ b/include/linux/mfd/tps65912.h @@ -323,5 +323,6 @@ int tps65912_device_init(struct tps65912 *tps65912); void tps65912_device_exit(struct tps65912 *tps65912); int tps65912_irq_init(struct tps65912 *tps65912, int irq, struct tps65912_platform_data *pdata); +int tps65912_irq_exit(struct tps65912 *tps65912); #endif /* __LINUX_MFD_TPS65912_H */ diff --git a/include/linux/mfd/wm831x/auxadc.h b/include/linux/mfd/wm831x/auxadc.h index b132067e9e99..867aa23f9370 100644 --- a/include/linux/mfd/wm831x/auxadc.h +++ b/include/linux/mfd/wm831x/auxadc.h @@ -15,6 +15,8 @@ #ifndef __MFD_WM831X_AUXADC_H__ #define __MFD_WM831X_AUXADC_H__ +struct wm831x; + /* * R16429 (0x402D) - AuxADC Data */ diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h index 4a3b83a77614..76c22648436f 100644 --- a/include/linux/mfd/wm831x/core.h +++ b/include/linux/mfd/wm831x/core.h @@ -20,6 +20,7 @@ #include <linux/irqdomain.h> #include <linux/list.h> #include <linux/regmap.h> +#include <linux/mfd/wm831x/auxadc.h> /* * Register values. @@ -355,7 +356,6 @@ enum wm831x_parent { }; struct wm831x; -enum wm831x_auxadc; typedef int (*wm831x_auxadc_read_fn)(struct wm831x *wm831x, enum wm831x_auxadc input); diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 9dbb41a4e250..8752dbbc6135 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -19,6 +19,7 @@ #define PHY_ID_KSZ9021 0x00221610 #define PHY_ID_KS8737 0x00221720 #define PHY_ID_KSZ8021 0x00221555 +#define PHY_ID_KSZ8031 0x00221556 #define PHY_ID_KSZ8041 0x00221510 #define PHY_ID_KSZ8051 0x00221550 /* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 811f91cf5e8c..1bc5a750b330 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -140,6 +140,7 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, + MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7acc9dc73c9f..e2091b88d24c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -87,7 +87,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ -#define VM_POPULATE 0x00001000 #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 /* Memory mapped I/O or similar */ @@ -1612,6 +1611,8 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); + struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags, diff --git a/include/linux/mman.h b/include/linux/mman.h index 61c7a87e5d2b..9aa863da287f 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -79,8 +79,6 @@ calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | - ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) | - (((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ? - VM_POPULATE : 0); + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); } #endif /* _LINUX_MMAN_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ede274957e05..c74092eebf5c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -527,7 +527,7 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } -static inline unsigned zone_end_pfn(const struct zone *zone) +static inline unsigned long zone_end_pfn(const struct zone *zone) { return zone->zone_start_pfn + zone->spanned_pages; } diff --git a/include/linux/mount.h b/include/linux/mount.h index d7029f4a191a..73005f9957ea 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -47,6 +47,8 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +#define MNT_LOCK_READONLY 0x400000 + struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7ccb3c59ed60..ef52d9c91459 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -187,6 +187,13 @@ typedef enum { * This happens with the Renesas AG-AND chips, possibly others. */ #define BBT_AUTO_REFRESH 0x00000080 +/* + * Chip requires ready check on read (for auto-incremented sequential read). + * True only for small page devices; large page devices do not support + * autoincrement. + */ +#define NAND_NEED_READRDY 0x00000100 + /* Chip does not allow subpage writes */ #define NAND_NO_SUBPAGE_WRITE 0x00000200 diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 49258e0ed1c6..141d395bbb5f 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -19,7 +19,6 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; - struct platform_device *shared_smi; /* * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default * limit of 9KiB will be used. diff --git a/include/linux/mxsfb.h b/include/linux/mxsfb.h index f14943d55315..f80af8674342 100644 --- a/include/linux/mxsfb.h +++ b/include/linux/mxsfb.h @@ -24,8 +24,8 @@ #define STMLCDIF_18BIT 2 /** pixel data bus to the display is of 18 bit width */ #define STMLCDIF_24BIT 3 /** pixel data bus to the display is of 24 bit width */ -#define FB_SYNC_DATA_ENABLE_HIGH_ACT (1 << 6) -#define FB_SYNC_DOTCLK_FAILING_ACT (1 << 7) /* failing/negtive edge sampling */ +#define MXSFB_SYNC_DATA_ENABLE_HIGH_ACT (1 << 6) +#define MXSFB_SYNC_DOTCLK_FAILING_ACT (1 << 7) /* failing/negtive edge sampling */ struct mxsfb_platform_data { struct fb_videomode *mode_list; @@ -44,6 +44,9 @@ struct mxsfb_platform_data { * allocated. If specified,fb_size must also be specified. * fb_phys must be unused by Linux. */ + u32 sync; /* sync mask, contains MXSFB specifics not + * carried in fb_info->var.sync + */ }; #endif /* __LINUX_MXSFB_H */ diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 3dd39340430e..cbaa027ef5a7 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -22,9 +22,12 @@ enum { NETIF_F_IPV6_CSUM_BIT, /* Can checksum TCP/UDP over IPV6 */ NETIF_F_HIGHDMA_BIT, /* Can DMA to high memory. */ NETIF_F_FRAGLIST_BIT, /* Scatter/gather IO. */ - NETIF_F_HW_VLAN_TX_BIT, /* Transmit VLAN hw acceleration */ - NETIF_F_HW_VLAN_RX_BIT, /* Receive VLAN hw acceleration */ - NETIF_F_HW_VLAN_FILTER_BIT, /* Receive filtering on VLAN */ + NETIF_F_HW_VLAN_CTAG_TX_BIT, /* Transmit VLAN CTAG HW acceleration */ + NETIF_F_HW_VLAN_CTAG_RX_BIT, /* Receive VLAN CTAG HW acceleration */ + NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */ + NETIF_F_HW_VLAN_STAG_TX_BIT, /* Transmit VLAN STAG HW acceleration */ + NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ + NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ @@ -42,9 +45,9 @@ enum { NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ - /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ - NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ - = NETIF_F_GSO_LAST, + NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ + /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ + NETIF_F_GSO_UDP_TUNNEL_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ @@ -80,9 +83,12 @@ enum { #define NETIF_F_GSO_ROBUST __NETIF_F(GSO_ROBUST) #define NETIF_F_HIGHDMA __NETIF_F(HIGHDMA) #define NETIF_F_HW_CSUM __NETIF_F(HW_CSUM) -#define NETIF_F_HW_VLAN_FILTER __NETIF_F(HW_VLAN_FILTER) -#define NETIF_F_HW_VLAN_RX __NETIF_F(HW_VLAN_RX) -#define NETIF_F_HW_VLAN_TX __NETIF_F(HW_VLAN_TX) +#define NETIF_F_HW_VLAN_CTAG_FILTER __NETIF_F(HW_VLAN_CTAG_FILTER) +#define NETIF_F_HW_VLAN_CTAG_RX __NETIF_F(HW_VLAN_CTAG_RX) +#define NETIF_F_HW_VLAN_CTAG_TX __NETIF_F(HW_VLAN_CTAG_TX) +#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) +#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) +#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) #define NETIF_F_LLTX __NETIF_F(LLTX) @@ -102,7 +108,8 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) -#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3d00fa4b314..f8898a435dc5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -144,8 +144,6 @@ static inline bool dev_xmit_complete(int rc) # else # define LL_MAX_HEADER 96 # endif -#elif IS_ENABLED(CONFIG_TR) -# define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 #endif @@ -210,9 +208,10 @@ struct netdev_hw_addr { #define NETDEV_HW_ADDR_T_SLAVE 3 #define NETDEV_HW_ADDR_T_UNICAST 4 #define NETDEV_HW_ADDR_T_MULTICAST 5 - bool synced; bool global_use; + int sync_cnt; int refcount; + int synced; struct rcu_head rcu_head; }; @@ -785,13 +784,13 @@ struct netdev_fcoe_hbainfo { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * - * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); - * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) - * this function is called when a VLAN id is registered. + * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16t vid); + * If device support VLAN filtering this function is called when a + * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); - * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) - * this function is called when a VLAN id is unregistered. + * If device support VLAN filtering this function is called when a + * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); * @@ -895,7 +894,7 @@ struct netdev_fcoe_hbainfo { * * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - * struct net_device *dev) + * struct net_device *dev, u32 filter_mask) * * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); * Called to change device carrier. Soft-devices (like dummy, team, etc) @@ -935,9 +934,9 @@ struct net_device_ops { struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); + __be16 proto, u16 vid); int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); + __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev, @@ -1073,6 +1072,8 @@ struct net_device { struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; + struct list_head upper_dev_list; /* List of upper devices */ + /* currently active device features */ netdev_features_t features; @@ -1145,6 +1146,13 @@ struct net_device { spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ + struct netdev_hw_addr_list dev_addrs; /* list of device + * hw addresses + */ +#ifdef CONFIG_SYSFS + struct kset *queues_kset; +#endif + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1177,21 +1185,11 @@ struct net_device { * avoid dirtying this cache line. */ - struct list_head upper_dev_list; /* List of upper devices */ - /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are unicast) */ - struct netdev_hw_addr_list dev_addrs; /* list of device - hw addresses */ - - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - -#ifdef CONFIG_SYSFS - struct kset *queues_kset; -#endif #ifdef CONFIG_RPS struct netdev_rx_queue *_rx; @@ -1202,18 +1200,14 @@ struct net_device { /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; -#ifdef CONFIG_RFS_ACCEL - /* CPU reverse-mapping for RX completion interrupts, indexed - * by RX queue number. Assigned by driver. This must only be - * set if the ndo_rx_flow_steer operation is defined. */ - struct cpu_rmap *rx_cpu_rmap; -#endif #endif rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + /* * Cache lines mostly used on transmit path @@ -1235,6 +1229,12 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_RFS_ACCEL + /* CPU reverse-mapping for RX completion interrupts, indexed + * by RX queue number. Assigned by driver. This must only be + * set if the ndo_rx_flow_steer operation is defined. */ + struct cpu_rmap *rx_cpu_rmap; +#endif /* These may be needed for future network-power-down code. */ @@ -1475,6 +1475,11 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) +/* Default NAPI poll() weight + * Device drivers are strongly advised to not use bigger value + */ +#define NAPI_POLL_WEIGHT 64 + /** * netif_napi_add - initialize a napi context * @dev: network device @@ -1612,6 +1617,9 @@ extern seqcount_t devnet_rename_seq; /* Device rename seq */ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_in_bond_rcu(bond, slave) \ + for_each_netdev_rcu(&init_net, slave) \ + if (netdev_master_upper_dev_get_rcu(slave) == bond) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) static inline struct net_device *next_net_device(struct net_device *dev) @@ -1684,7 +1692,6 @@ extern int netdev_refcnt_read(const struct net_device *dev); extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int init_dummy_netdev(struct net_device *dev); -extern void netdev_resync_ops(struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); @@ -2621,6 +2628,7 @@ extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); extern int dev_uc_sync(struct net_device *to, struct net_device *from); +extern int dev_uc_sync_multiple(struct net_device *to, struct net_device *from); extern void dev_uc_unsync(struct net_device *to, struct net_device *from); extern void dev_uc_flush(struct net_device *dev); extern void dev_uc_init(struct net_device *dev); @@ -2632,6 +2640,7 @@ extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); extern int dev_mc_sync(struct net_device *to, struct net_device *from); +extern int dev_mc_sync_multiple(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern void dev_mc_flush(struct net_device *dev); extern void dev_mc_init(struct net_device *dev); @@ -2678,6 +2687,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } +__be16 skb_network_protocol(struct sk_buff *skb); + +static inline bool can_checksum_protocol(netdev_features_t features, + __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); @@ -2756,6 +2778,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline bool netif_is_bond_master(struct net_device *dev) +{ + return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; +} + static inline bool netif_is_bond_slave(struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ee142846f56a..0060fde3160e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #endif } -#ifdef CONFIG_PROC_FS -#include <linux/proc_fs.h> -extern struct proc_dir_entry *proc_net_netfilter; -#endif - #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index 01d25e6fc792..0214c4c146fa 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -291,6 +291,7 @@ ip_set_hash_destroy(struct ip_set *set) #define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) #define type_pf_data_next TOKEN(TYPE, PF, _data_next) #define type_pf_data_flags TOKEN(TYPE, PF, _data_flags) +#define type_pf_data_reset_flags TOKEN(TYPE, PF, _data_reset_flags) #ifdef IP_SET_HASH_WITH_NETS #define type_pf_data_match TOKEN(TYPE, PF, _data_match) #else @@ -385,9 +386,9 @@ type_pf_resize(struct ip_set *set, bool retried) struct ip_set_hash *h = set->data; struct htable *t, *orig = h->table; u8 htable_bits = orig->htable_bits; - const struct type_pf_elem *data; + struct type_pf_elem *data; struct hbucket *n, *m; - u32 i, j; + u32 i, j, flags = 0; int ret; retry: @@ -412,9 +413,16 @@ retry: n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + type_pf_data_reset_flags(data, &flags); +#endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_add(m, data, AHASH_MAX(h), 0); + ret = type_pf_elem_add(m, data, AHASH_MAX(h), flags); if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + type_pf_data_flags(data, flags); +#endif read_unlock_bh(&set->lock); ahash_destroy(t); if (ret == -EAGAIN) @@ -836,9 +844,9 @@ type_pf_tresize(struct ip_set *set, bool retried) struct ip_set_hash *h = set->data; struct htable *t, *orig = h->table; u8 htable_bits = orig->htable_bits; - const struct type_pf_elem *data; + struct type_pf_elem *data; struct hbucket *n, *m; - u32 i, j; + u32 i, j, flags = 0; int ret; /* Try to cleanup once */ @@ -873,10 +881,17 @@ retry: n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { data = ahash_tdata(n, j); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + type_pf_data_reset_flags(data, &flags); +#endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0, - ip_set_timeout_get(type_pf_data_timeout(data))); + ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), flags, + ip_set_timeout_get(type_pf_data_timeout(data))); if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + type_pf_data_flags(data, flags); +#endif read_unlock_bh(&set->lock); ahash_destroy(t); if (ret == -EAGAIN) @@ -1187,6 +1202,7 @@ type_pf_gc_init(struct ip_set *set) #undef type_pf_data_tlist #undef type_pf_data_next #undef type_pf_data_flags +#undef type_pf_data_reset_flags #undef type_pf_data_match #undef type_pf_elem diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ecbb8e495912..cadb7402d7a7 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -29,10 +29,13 @@ extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(struct net *net, unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, - int echo, gfp_t flags); -extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); +extern struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, + u32 dst_portid, gfp_t gfp_mask); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, + unsigned int group, int echo, gfp_t flags); +extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, + u32 portid, int flags); extern void nfnl_lock(__u8 subsys_id); extern void nfnl_unlock(__u8 subsys_id); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e0f746b7b95c..6358da5eeee8 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -15,11 +15,18 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) return (struct nlmsghdr *)skb->data; } +enum netlink_skb_flags { + NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ + NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ + NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ +}; + struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ __u32 portid; __u32 dst_group; - struct sock *ssk; + __u32 flags; + struct sock *sk; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) @@ -57,6 +64,8 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); +extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, + u32 dst_portid, gfp_t gfp_mask); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c25cccaa555a..4fa3b0b9b071 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -137,6 +137,34 @@ enum { NVME_LBAF_RP_DEGRADED = 3, }; +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __u8 rsvd192[320]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + struct nvme_lba_range_type { __u8 type; __u8 attributes; diff --git a/include/linux/of_net.h b/include/linux/of_net.h index f47464188710..61bf53b02779 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -11,6 +11,16 @@ #include <linux/of.h> extern const int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +#else +static inline const int of_get_phy_mode(struct device_node *np) +{ + return -ENODEV; +} + +static inline const void *of_get_mac_address(struct device_node *np) +{ + return NULL; +} #endif #endif /* __LINUX_OF_NET_H */ diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index d42e174bd0c8..e6b240b6196c 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -19,435 +19,6 @@ #ifndef _LINUX_OPENVSWITCH_H #define _LINUX_OPENVSWITCH_H 1 -#include <linux/types.h> - -/** - * struct ovs_header - header for OVS Generic Netlink messages. - * @dp_ifindex: ifindex of local port for datapath (0 to make a request not - * specific to a datapath). - * - * Attributes following the header are specific to a particular OVS Generic - * Netlink family, but all of the OVS families use this header. - */ - -struct ovs_header { - int dp_ifindex; -}; - -/* Datapaths. */ - -#define OVS_DATAPATH_FAMILY "ovs_datapath" -#define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 - -enum ovs_datapath_cmd { - OVS_DP_CMD_UNSPEC, - OVS_DP_CMD_NEW, - OVS_DP_CMD_DEL, - OVS_DP_CMD_GET, - OVS_DP_CMD_SET -}; - -/** - * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. - * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local - * port". This is the name of the network device whose dp_ifindex is given in - * the &struct ovs_header. Always present in notifications. Required in - * %OVS_DP_NEW requests. May be used as an alternative to specifying - * dp_ifindex in other requests (with a dp_ifindex of 0). - * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially - * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on - * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should - * not be sent. - * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the - * datapath. Always present in notifications. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_DP_* commands. - */ -enum ovs_datapath_attr { - OVS_DP_ATTR_UNSPEC, - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ - __OVS_DP_ATTR_MAX -}; - -#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) - -struct ovs_dp_stats { - __u64 n_hit; /* Number of flow table matches. */ - __u64 n_missed; /* Number of flow table misses. */ - __u64 n_lost; /* Number of misses not sent to userspace. */ - __u64 n_flows; /* Number of flows present */ -}; - -struct ovs_vport_stats { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ -}; - -/* Fixed logical ports. */ -#define OVSP_LOCAL ((__u16)0) - -/* Packet transfer. */ - -#define OVS_PACKET_FAMILY "ovs_packet" -#define OVS_PACKET_VERSION 0x1 - -enum ovs_packet_cmd { - OVS_PACKET_CMD_UNSPEC, - - /* Kernel-to-user notifications. */ - OVS_PACKET_CMD_MISS, /* Flow table miss. */ - OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ - - /* Userspace commands. */ - OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ -}; - -/** - * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. - * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire - * packet as received, from the start of the Ethernet header onward. For - * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by - * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is - * the flow key extracted from the packet as originally received. - * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key - * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows - * userspace to adapt its flow setup strategy by comparing its notion of the - * flow key against the kernel's. - * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used - * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. - * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION - * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an - * %OVS_USERSPACE_ATTR_USERDATA attribute. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_PACKET_* commands. - */ -enum ovs_packet_attr { - OVS_PACKET_ATTR_UNSPEC, - OVS_PACKET_ATTR_PACKET, /* Packet data. */ - OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ - OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_PACKET_ATTR_USERDATA, /* u64 OVS_ACTION_ATTR_USERSPACE arg. */ - __OVS_PACKET_ATTR_MAX -}; - -#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) - -/* Virtual ports. */ - -#define OVS_VPORT_FAMILY "ovs_vport" -#define OVS_VPORT_MCGROUP "ovs_vport" -#define OVS_VPORT_VERSION 0x1 - -enum ovs_vport_cmd { - OVS_VPORT_CMD_UNSPEC, - OVS_VPORT_CMD_NEW, - OVS_VPORT_CMD_DEL, - OVS_VPORT_CMD_GET, - OVS_VPORT_CMD_SET -}; - -enum ovs_vport_type { - OVS_VPORT_TYPE_UNSPEC, - OVS_VPORT_TYPE_NETDEV, /* network device */ - OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ - __OVS_VPORT_TYPE_MAX -}; - -#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) - -/** - * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. - * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. - * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type - * of vport. - * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device - * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes - * plus a null terminator. - * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on - * this port. A value of zero indicates that upcalls should not be sent. - * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for - * packets sent or received through the vport. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_VPORT_* commands. - * - * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and - * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is - * optional; if not specified a free port number is automatically selected. - * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type - * of vport. - * and other attributes are ignored. - * - * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to - * look up the vport to operate on; otherwise dp_idx from the &struct - * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. - */ -enum ovs_vport_attr { - OVS_VPORT_ATTR_UNSPEC, - OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ - OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ - OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ - OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ - OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ - __OVS_VPORT_ATTR_MAX -}; - -#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) - -/* Flows. */ - -#define OVS_FLOW_FAMILY "ovs_flow" -#define OVS_FLOW_MCGROUP "ovs_flow" -#define OVS_FLOW_VERSION 0x1 - -enum ovs_flow_cmd { - OVS_FLOW_CMD_UNSPEC, - OVS_FLOW_CMD_NEW, - OVS_FLOW_CMD_DEL, - OVS_FLOW_CMD_GET, - OVS_FLOW_CMD_SET -}; - -struct ovs_flow_stats { - __u64 n_packets; /* Number of matched packets. */ - __u64 n_bytes; /* Number of matched bytes. */ -}; - -enum ovs_key_attr { - OVS_KEY_ATTR_UNSPEC, - OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ - OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ - OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ - OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ - OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ - OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ - OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ - OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ - OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ - OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ - OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ - OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ - OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ - OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ - OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ - __OVS_KEY_ATTR_MAX -}; - -#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) - -/** - * enum ovs_frag_type - IPv4 and IPv6 fragment type - * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. - * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. - * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. - * - * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct - * ovs_key_ipv6. - */ -enum ovs_frag_type { - OVS_FRAG_TYPE_NONE, - OVS_FRAG_TYPE_FIRST, - OVS_FRAG_TYPE_LATER, - __OVS_FRAG_TYPE_MAX -}; - -#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) - -struct ovs_key_ethernet { - __u8 eth_src[6]; - __u8 eth_dst[6]; -}; - -struct ovs_key_ipv4 { - __be32 ipv4_src; - __be32 ipv4_dst; - __u8 ipv4_proto; - __u8 ipv4_tos; - __u8 ipv4_ttl; - __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ -}; - -struct ovs_key_ipv6 { - __be32 ipv6_src[4]; - __be32 ipv6_dst[4]; - __be32 ipv6_label; /* 20-bits in least-significant bits. */ - __u8 ipv6_proto; - __u8 ipv6_tclass; - __u8 ipv6_hlimit; - __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ -}; - -struct ovs_key_tcp { - __be16 tcp_src; - __be16 tcp_dst; -}; - -struct ovs_key_udp { - __be16 udp_src; - __be16 udp_dst; -}; - -struct ovs_key_icmp { - __u8 icmp_type; - __u8 icmp_code; -}; - -struct ovs_key_icmpv6 { - __u8 icmpv6_type; - __u8 icmpv6_code; -}; - -struct ovs_key_arp { - __be32 arp_sip; - __be32 arp_tip; - __be16 arp_op; - __u8 arp_sha[6]; - __u8 arp_tha[6]; -}; - -struct ovs_key_nd { - __u32 nd_target[4]; - __u8 nd_sll[6]; - __u8 nd_tll[6]; -}; - -/** - * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. - * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow - * key. Always present in notifications. Required for all requests (except - * dumps). - * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying - * the actions to take for packets that match the key. Always present in - * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for - * %OVS_FLOW_CMD_SET requests. - * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this - * flow. Present in notifications if the stats would be nonzero. Ignored in - * requests. - * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the - * TCP flags seen on packets in this flow. Only present in notifications for - * TCP flows, and only if it would be nonzero. Ignored in requests. - * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on - * the system monotonic clock, at which a packet was last processed for this - * flow. Only present in notifications if a packet has been processed for this - * flow. Ignored in requests. - * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the - * last-used time, accumulated TCP flags, and statistics for this flow. - * Otherwise ignored in requests. Never present in notifications. - * - * These attributes follow the &struct ovs_header within the Generic Netlink - * payload for %OVS_FLOW_* commands. - */ -enum ovs_flow_attr { - OVS_FLOW_ATTR_UNSPEC, - OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ - OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ - OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ - OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ - OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ - __OVS_FLOW_ATTR_MAX -}; - -#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) - -/** - * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. - * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with - * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of - * %UINT32_MAX samples all packets and intermediate values sample intermediate - * fractions of packets. - * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. - * Actions are passed as nested attributes. - * - * Executes the specified actions with the given probability on a per-packet - * basis. - */ -enum ovs_sample_attr { - OVS_SAMPLE_ATTR_UNSPEC, - OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ - OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ - __OVS_SAMPLE_ATTR_MAX, -}; - -#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) - -/** - * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. - * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION - * message should be sent. Required. - * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the - * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA, - */ -enum ovs_userspace_attr { - OVS_USERSPACE_ATTR_UNSPEC, - OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ - OVS_USERSPACE_ATTR_USERDATA, /* u64 optional user-specified cookie. */ - __OVS_USERSPACE_ATTR_MAX -}; - -#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) - -/** - * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. - * @vlan_tpid: Tag protocol identifier (TPID) to push. - * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set - * (but it will not be set in the 802.1Q header that is pushed). - * - * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID - * values are those that the kernel module also parses as 802.1Q headers, to - * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN - * from having surprising results. - */ -struct ovs_action_push_vlan { - __be16 vlan_tpid; /* 802.1Q TPID. */ - __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ -}; - -/** - * enum ovs_action_attr - Action types. - * - * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. - * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested - * %OVS_USERSPACE_ATTR_* attributes. - * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The - * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its - * value. - * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the - * packet. - * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. - * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in - * the nested %OVS_SAMPLE_ATTR_* attributes. - * - * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all - * fields within a header are modifiable, e.g. the IPv4 protocol and fragment - * type may not be changed. - */ - -enum ovs_action_attr { - OVS_ACTION_ATTR_UNSPEC, - OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ - OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ - OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ - OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ - OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ - OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ - __OVS_ACTION_ATTR_MAX -}; - -#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) +#include <uapi/linux/openvswitch.h> #endif /* _LINUX_OPENVSWITCH_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 2461033a7987..710067f3618c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -916,6 +916,7 @@ void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); +void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e47ee462c2f2..1d795df6f4cf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -799,6 +799,12 @@ static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +extern void perf_restore_debug_store(void); +#else +static inline void perf_restore_debug_store(void) { } +#endif + #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* diff --git a/include/linux/phy.h b/include/linux/phy.h index 33999adbf8c8..9e11039dd7a3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -455,6 +455,14 @@ struct phy_driver { */ void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); + /* Some devices (e.g. qnap TS-119P II) require PHY register changes to + * enable Wake on LAN, so set_wol is provided to be called in the + * ethernet driver's set_wol function. */ + int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + + /* See set_wol, but for checking whether Wake on LAN is enabled. */ + void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) @@ -560,6 +568,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int __init mdio_bus_init(void); void mdio_bus_exit(void); diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h index 798fb80b024b..bb3cd58d71e3 100644 --- a/include/linux/platform_data/cpsw.h +++ b/include/linux/platform_data/cpsw.h @@ -30,7 +30,7 @@ struct cpsw_platform_data { u32 channels; /* number of cpdma channels (symmetric) */ u32 slaves; /* number of slave cpgmac ports */ struct cpsw_slave_data *slave_data; - u32 cpts_active_slave; /* time stamping slave */ + u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */ u32 cpts_clock_mult; /* convert input clock ticks to nanoseconds */ u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */ u32 ale_entries; /* ale table size */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 5a710b9c578e..87a03c746f17 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -93,14 +93,20 @@ do { \ #else /* !CONFIG_PREEMPT_COUNT */ -#define preempt_disable() do { } while (0) -#define sched_preempt_enable_no_resched() do { } while (0) -#define preempt_enable_no_resched() do { } while (0) -#define preempt_enable() do { } while (0) - -#define preempt_disable_notrace() do { } while (0) -#define preempt_enable_no_resched_notrace() do { } while (0) -#define preempt_enable_notrace() do { } while (0) +/* + * Even if we don't have any preemption, we need preempt disable/enable + * to be barriers, so that we don't have things like get_user/put_user + * that can cause faults and scheduling migrate into our preempt-protected + * region. + */ +#define preempt_disable() barrier() +#define sched_preempt_enable_no_resched() barrier() +#define preempt_enable_no_resched() barrier() +#define preempt_enable() barrier() + +#define preempt_disable_notrace() barrier() +#define preempt_enable_no_resched_notrace() barrier() +#define preempt_enable_notrace() barrier() #endif /* CONFIG_PREEMPT_COUNT */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 1249a54d17e0..822171fcb1c8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -134,6 +134,8 @@ extern int printk_delay_msec; extern int dmesg_restrict; extern int kptr_restrict; +extern void wake_up_klogd(void); + void log_buf_kexec_setup(void); void __init setup_log_buf(int early); #else @@ -162,6 +164,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, return false; } +static inline void wake_up_klogd(void) +{ +} + static inline void log_buf_kexec_setup(void) { } diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 8307f2f94d86..94dfb2aa5533 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -117,6 +117,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, const struct file_operations *proc_fops, void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); +extern int remove_proc_subtree(const char *name, struct proc_dir_entry *parent); struct pid_namespace; @@ -202,6 +203,7 @@ static inline struct proc_dir_entry *proc_create_data(const char *name, return NULL; } #define remove_proc_entry(name, parent) do {} while (0) +#define remove_proc_subtree(name, parent) do {} while (0) static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 23070fd83872..7df93f52db08 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -199,6 +199,8 @@ enum regulator_type { * output when using regulator_set_voltage_sel_regmap * @enable_reg: Register for control when using regmap enable/disable ops * @enable_mask: Mask for control when using regmap enable/disable ops + * @bypass_reg: Register for control when using regmap set_bypass + * @bypass_mask: Mask for control when using regmap set_bypass * * @enable_time: Time taken for initial enable of regulator (in uS). */ diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 5ae8456d9670..c23099413ad6 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -14,6 +14,7 @@ */ #include <linux/cgroup.h> +#include <linux/errno.h> /* * The core object. the cgroup that wishes to account for some diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 489dd7bb28ec..f28544b2f9af 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -69,6 +69,15 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int idx); +extern int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags); +extern int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode); diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..e692a022527b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -163,9 +163,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 -#define TASK_STATE_MAX 512 +#define TASK_PARKED 512 +#define TASK_STATE_MAX 1024 -#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" +#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c11a28706fa4..3bfe8d6ee248 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -53,7 +53,9 @@ #include <linux/in.h> /* We need in_addr. */ #include <linux/in6.h> /* We need in6_addr. */ +#include <linux/skbuff.h> +#include <uapi/linux/sctp.h> /* Section 3.1. SCTP Common Header Format */ typedef struct sctphdr { @@ -63,14 +65,10 @@ typedef struct sctphdr { __le32 checksum; } __packed sctp_sctphdr_t; -#ifdef __KERNEL__ -#include <linux/skbuff.h> - static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { return (struct sctphdr *)skb_transport_header(skb); } -#endif /* Section 3.2. Chunk Field Descriptions. */ typedef struct sctp_chunkhdr { diff --git a/include/linux/security.h b/include/linux/security.h index eee7478cda70..032c366ef1c6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1012,6 +1012,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * This hook can be used by the module to update any security state * associated with the TUN device's security structure. * @security pointer to the TUN devices's security structure. + * @skb_owned_by: + * This hook sets the packet's owning sock. + * @skb is the packet. + * @sk the sock which owns the packet. * * Security hooks for XFRM operations. * @@ -1638,6 +1642,7 @@ struct security_operations { int (*tun_dev_attach_queue) (void *security); int (*tun_dev_attach) (struct sock *sk, void *security); int (*tun_dev_open) (void *security); + void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2588,6 +2593,8 @@ int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); +void security_skb_owned_by(struct sk_buff *skb, struct sock *sk); + #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, @@ -2779,6 +2786,11 @@ static inline int security_tun_dev_open(void *security) { return 0; } + +static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index b17d765ded84..fc305713fc6d 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -6,6 +6,7 @@ enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN}; enum { SH_ETH_REG_GIGABIT, + SH_ETH_REG_FAST_RCAR, SH_ETH_REG_FAST_SH4, SH_ETH_REG_FAST_SH3_SH2 }; diff --git a/include/linux/signal.h b/include/linux/signal.h index a2dcb94ea49d..9475c5cb28bc 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -250,11 +250,11 @@ extern int show_unhandled_signals; extern int sigsuspend(sigset_t *); struct sigaction { -#ifndef __ARCH_HAS_ODD_SIGACTION +#ifndef __ARCH_HAS_IRIX_SIGACTION __sighandler_t sa_handler; unsigned long sa_flags; #else - unsigned long sa_flags; + unsigned int sa_flags; __sighandler_t sa_handler; #endif #ifdef __ARCH_HAS_SA_RESTORER diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 821c7f45d2a7..2e0ced1af3b1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,6 +32,7 @@ #include <linux/hrtimer.h> #include <linux/dma-mapping.h> #include <linux/netdev_features.h> +#include <net/flow_keys.h> /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 @@ -316,6 +317,8 @@ enum { SKB_GSO_FCOE = 1 << 5, SKB_GSO_GRE = 1 << 6, + + SKB_GSO_UDP_TUNNEL = 1 << 7, }; #if BITS_PER_LONG > 32 @@ -384,9 +387,11 @@ typedef unsigned char *sk_buff_data_t; * @secmark: security marking * @mark: Generic packet mark * @dropcount: total number of sk_receive_queue overflows + * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) + * @inner_mac_header: Link layer header (encapsulation) * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -461,6 +466,7 @@ struct sk_buff { __u32 rxhash; + __be16 vlan_proto; __u16 vlan_tci; #ifdef CONFIG_NET_SCHED @@ -500,11 +506,12 @@ struct sk_buff { union { __u32 mark; __u32 dropcount; - __u32 avail_size; + __u32 reserved_tailroom; }; sk_buff_data_t inner_transport_header; sk_buff_data_t inner_network_header; + sk_buff_data_t inner_mac_header; sk_buff_data_t transport_header; sk_buff_data_t network_header; sk_buff_data_t mac_header; @@ -570,7 +577,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst); +extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, + bool force); + +/** + * skb_dst_set_noref - sets skb dst, hopefully, without taking reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst. + * If dst entry is cached, we do not take reference and dst_release + * will be avoided by refdst_drop. If dst entry is not cached, we take + * reference, so that last dst_release can destroy the dst immediately. + */ +static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +{ + __skb_dst_set_noref(skb, dst, false); +} + +/** + * skb_dst_set_noref_force - sets skb dst, without taking reference + * @skb: buffer + * @dst: dst entry + * + * Sets skb dst, assuming a reference was not taken on dst. + * No reference is taken and no dst_release will be called. While for + * cached dsts deferred reclaim is a basic feature, for entries that are + * not cached it is caller's job to guarantee that last dst_release for + * provided dst happens when nobody uses it, eg. after a RCU grace period. + */ +static inline void skb_dst_set_noref_force(struct sk_buff *skb, + struct dst_entry *dst) +{ + __skb_dst_set_noref(skb, dst, true); +} /** * skb_dst_is_noref - Test if skb dst isn't refcounted @@ -611,6 +651,12 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } +extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node); +static inline struct sk_buff *alloc_skb_head(gfp_t priority) +{ + return __alloc_skb_head(priority, -1); +} + extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, @@ -1288,11 +1334,13 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, * do not lose pfmemalloc information as the pages would not be * allocated using __GFP_MEMALLOC. */ - if (page->pfmemalloc && !page->mapping) - skb->pfmemalloc = true; frag->page.p = page; frag->page_offset = off; skb_frag_size_set(frag, size); + + page = compound_head(page); + if (page->pfmemalloc && !page->mapping) + skb->pfmemalloc = true; } /** @@ -1447,7 +1495,10 @@ static inline int skb_tailroom(const struct sk_buff *skb) */ static inline int skb_availroom(const struct sk_buff *skb) { - return skb_is_nonlinear(skb) ? 0 : skb->avail_size - skb->len; + if (skb_is_nonlinear(skb)) + return 0; + + return skb->end - skb->tail - skb->reserved_tailroom; } /** @@ -1466,6 +1517,7 @@ static inline void skb_reserve(struct sk_buff *skb, int len) static inline void skb_reset_inner_headers(struct sk_buff *skb) { + skb->inner_mac_header = skb->mac_header; skb->inner_network_header = skb->network_header; skb->inner_transport_header = skb->transport_header; } @@ -1511,6 +1563,22 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) +{ + return skb->head + skb->inner_mac_header; +} + +static inline void skb_reset_inner_mac_header(struct sk_buff *skb) +{ + skb->inner_mac_header = skb->data - skb->head; +} + +static inline void skb_set_inner_mac_header(struct sk_buff *skb, + const int offset) +{ + skb_reset_inner_mac_header(skb); + skb->inner_mac_header += offset; +} static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != ~0U; @@ -1604,6 +1672,21 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header = skb->data + offset; } +static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) +{ + return skb->inner_mac_header; +} + +static inline void skb_reset_inner_mac_header(struct sk_buff *skb) +{ + skb->inner_mac_header = skb->data; +} + +static inline void skb_set_inner_mac_header(struct sk_buff *skb, + const int offset) +{ + skb->inner_mac_header = skb->data + offset; +} static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != NULL; @@ -1661,6 +1744,19 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_probe_transport_header(struct sk_buff *skb, + const int offset_hint) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return; + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, offset_hint); +} + static inline void skb_mac_header_rebuild(struct sk_buff *skb) { if (skb_mac_header_was_set(skb)) { @@ -2638,6 +2734,13 @@ static inline void nf_reset(struct sk_buff *skb) #endif } +static inline void nf_reset_trace(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) + skb->nf_trace = 0; +#endif +} + /* Note: This doesn't put any conntrack and bridge info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) { @@ -2799,6 +2902,8 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); +u32 __skb_get_poff(const struct sk_buff *skb); + /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index b2b1afbb3202..aa327a8105ad 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -26,6 +26,20 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; + + /* Setting the irq_type will configure the driver to request + * the platform irq trigger type according to the given value + * and configure the interrupt polarity of the device to the + * corresponding polarity. + * + * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING, + * IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW + * + * Setting it to 0, the driver does not touch the trigger type + * configuration of the interrupt and sets the interrupt polarity + * of the device to high active (the default value). + */ + int irq_type; }; #endif diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index a26e2fb604e6..e2369c167dbd 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -16,7 +16,10 @@ * In the debug case, 1 means unlocked, 0 means locked. (the values * are inverted, to catch initialization bugs) * - * No atomicity anywhere, we are on UP. + * No atomicity anywhere, we are on UP. However, we still need + * the compiler barriers, because we do not want the compiler to + * move potentially faulting instructions (notably user accesses) + * into the locked sequence, resulting in non-atomic execution. */ #ifdef CONFIG_DEBUG_SPINLOCK @@ -25,6 +28,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; + barrier(); } static inline void @@ -32,6 +36,7 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { local_irq_save(flags); lock->slock = 0; + barrier(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) @@ -39,32 +44,34 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) char oldval = lock->slock; lock->slock = 0; + barrier(); return oldval > 0; } static inline void arch_spin_unlock(arch_spinlock_t *lock) { + barrier(); lock->slock = 1; } /* * Read-write spinlocks. No debug version. */ -#define arch_read_lock(lock) do { (void)(lock); } while (0) -#define arch_write_lock(lock) do { (void)(lock); } while (0) -#define arch_read_trylock(lock) ({ (void)(lock); 1; }) -#define arch_write_trylock(lock) ({ (void)(lock); 1; }) -#define arch_read_unlock(lock) do { (void)(lock); } while (0) -#define arch_write_unlock(lock) do { (void)(lock); } while (0) +#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ -# define arch_spin_lock(lock) do { (void)(lock); } while (0) -# define arch_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) -# define arch_spin_unlock(lock) do { (void)(lock); } while (0) -# define arch_spin_trylock(lock) ({ (void)(lock); 1; }) +# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) +# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) +# define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0) +# define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ #define arch_spin_is_contended(lock) (((void)(lock), 0)) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 2de42f9401d2..a5ffd32642fd 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -25,6 +25,7 @@ extern int swiotlb_force; extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); +unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); /* diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f28408c07dc2..5adbc33d1ab3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -90,9 +90,6 @@ struct tcp_options_received { sack_ok : 4, /* SACK seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ - u8 cookie_plus:6, /* bytes in authenticator/cookie option */ - cookie_out_never:1, - cookie_in_always:1; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ @@ -102,7 +99,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; - rx_opt->cookie_plus = 0; } /* This is the max number of SACKS that we'll generate and process. It's safe @@ -191,20 +187,19 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ - u32 frto_highmark; /* snd_nxt when RTO occurred */ u16 advmss; /* Advertised MSS */ - u8 frto_counter; /* Number of new acks after RTO */ + u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ repair : 1, - unused : 1; + frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - early_retrans_delayed:1, /* Delayed ER timer installed */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ @@ -320,12 +315,6 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif - /* When the cookie options are generated and exchanged, then this - * object holds a reference to them (cookie_values->kref). Also - * contains related tcp_cookie_transactions fields. - */ - struct tcp_cookie_values *cookie_values; - /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; /* fastopen_rsk points to request_sock that resulted in this big @@ -361,10 +350,6 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif - /* Few sockets in timewait have cookies; in that case, then this - * object holds a reference to them (tw_cookie_values->kref). - */ - struct tcp_cookie_values *tw_cookie_values; }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f0bd7f90a90d..e3c0ae9bb1fa 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -44,7 +44,7 @@ /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" #define THERMAL_GENL_VERSION 0x01 -#define THERMAL_GENL_MCAST_GROUP_NAME "thermal_mc_group" +#define THERMAL_GENL_MCAST_GROUP_NAME "thermal_mc_grp" /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h new file mode 100644 index 000000000000..cbb20afdbc01 --- /dev/null +++ b/include/linux/ucs2_string.h @@ -0,0 +1,14 @@ +#ifndef _LINUX_UCS2_STRING_H_ +#define _LINUX_UCS2_STRING_H_ + +#include <linux/types.h> /* for size_t */ +#include <linux/stddef.h> /* for NULL */ + +typedef u16 ucs2_char_t; + +unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); +unsigned long ucs2_strlen(const ucs2_char_t *s); +unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); + +#endif /* _LINUX_UCS2_STRING_H_ */ diff --git a/include/linux/udp.h b/include/linux/udp.h index 9d81de123c90..42278bbf7a88 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -68,6 +68,7 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + void (*encap_destroy)(struct sock *sk); }; static inline struct udp_sock *udp_sk(const struct sock *sk) diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 3b8f9d4fc3fe..cc25b70af33c 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -127,6 +127,7 @@ struct cdc_ncm_ctx { u16 connected; }; +extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign); diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 3c671c1b37f6..8860594d6364 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -60,7 +60,7 @@ struct usb_configuration; * @name: For diagnostics, identifies the function. * @strings: tables of strings, keyed by identifiers assigned during bind() * and by language IDs provided in control requests - * @descriptors: Table of full (or low) speed descriptors, using interface and + * @fs_descriptors: Table of full (or low) speed descriptors, using interface and * string identifiers assigned during @bind(). If this pointer is null, * the function will not be available at full speed (or at low speed). * @hs_descriptors: Table of high speed descriptors, using interface and @@ -290,6 +290,7 @@ enum { * after function notifications * @resume: Notifies configuration when the host restarts USB traffic, * before function notifications + * @gadget_driver: Gadget driver controlling this driver * * Devices default to reporting self powered operation. Devices which rely * on bus powered operation should report this in their @bind method. diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 0a78df5f6cfd..59694b5e5e90 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -357,6 +357,7 @@ struct hc_driver { */ int (*disable_usb3_lpm_timeout)(struct usb_hcd *, struct usb_device *, enum usb3_link_state state); + int (*find_raw_port_number)(struct usb_hcd *, int); }; extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb); @@ -396,6 +397,7 @@ extern int usb_hcd_is_primary_hcd(struct usb_hcd *hcd); extern int usb_add_hcd(struct usb_hcd *hcd, unsigned int irqnum, unsigned long irqflags); extern void usb_remove_hcd(struct usb_hcd *hcd); +extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1); struct platform_device; extern void usb_hcd_platform_shutdown(struct platform_device *dev); diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index ef9be7e1e190..1819b59aab2a 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -66,6 +66,7 @@ * port. * @flags: usb serial port flags * @write_wait: a wait_queue_head_t used by the port. + * @delta_msr_wait: modem-status-change wait queue * @work: work queue entry for the line discipline waking up. * @throttled: nonzero if the read urb is inactive to throttle the device * @throttle_req: nonzero if the tty wants to throttle us @@ -112,6 +113,7 @@ struct usb_serial_port { unsigned long flags; wait_queue_head_t write_wait; + wait_queue_head_t delta_msr_wait; struct work_struct work; char throttled; char throttle_req; diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 6f033a415ecb..5c295c26ad37 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -181,8 +181,16 @@ /*-------------------------------------------------------------------------*/ +#if IS_ENABLED(CONFIG_USB_ULPI) struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, unsigned int flags); +#else +static inline struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, + unsigned int flags) +{ + return NULL; +} +#endif #ifdef CONFIG_USB_ULPI_VIEWPORT /* access ops for controllers with a viewport register */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0e5ac93bab10..da46327fca17 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -72,6 +72,7 @@ struct usbnet { # define EVENT_DEVICE_REPORT_IDLE 8 # define EVENT_NO_RUNTIME_PM 9 # define EVENT_RX_KILL 10 +# define EVENT_LINK_CHANGE 11 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -245,5 +246,6 @@ extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); extern int usbnet_nway_reset(struct net_device *net); extern int usbnet_manage_power(struct usbnet *, int); +extern void usbnet_link_change(struct usbnet *, bool, bool); #endif /* __LINUX_USB_USBNET_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4ce009324933..b6b215f13b45 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -26,6 +26,8 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + bool may_mount_sysfs; + bool may_mount_proc; }; extern struct user_namespace init_user_ns; @@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns) #endif +void update_mnt_policy(struct user_namespace *userns); + #endif /* _LINUX_USER_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ff6714e6d0f5..2d7a5e045908 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -58,12 +58,6 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); unsigned int virtqueue_get_vring_size(struct virtqueue *vq); -/* FIXME: Obsolete accessor, but required for virtio_net merge. */ -static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq) -{ - return vq->index; -} - /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h new file mode 100644 index 000000000000..0805eecba8f7 --- /dev/null +++ b/include/linux/vm_sockets.h @@ -0,0 +1,23 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H +#define _VM_SOCKETS_H + +#include <uapi/linux/vm_sockets.h> + +int vm_sockets_get_local_cid(void); + +#endif /* _VM_SOCKETS_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 40be2a0d8ae1..84a6440f1f19 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -199,6 +199,7 @@ extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); +extern int inet6addr_notifier_call_chain(unsigned long val, void *v); extern void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, struct ipv6_devconf *devconf); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0a996a3517ed..a8836e8445cc 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -29,7 +29,8 @@ struct unix_address { struct unix_skb_parms { struct pid *pid; /* Skb credentials */ - const struct cred *cred; + kuid_t uid; + kgid_t gid; struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK u32 secid; /* Security ID */ diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index ef2dd9438bb1..028b754ae9b1 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_device.h b/include/net/caif/caif_device.h index d02f044adb8a..d6e3c4267c81 100644 --- a/include/net/caif/caif_device.h +++ b/include/net/caif/caif_device.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index bcb9cc3ce98b..4795e817afe5 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -1,6 +1,5 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com * Author: Daniel Martensson / daniel.martensson@stericsson.com * Dmitry.Tarnyagin / dmitry.tarnyagin@stericsson.com * License terms: GNU General Public License (GPL) version 2 diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 0f3a39125f90..94e5ed64dc6d 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h deleted file mode 100644 index 5bcce55438cf..000000000000 --- a/include/net/caif/caif_shm.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) ST-Ericsson AB 2010 - * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com - * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com - * License terms: GNU General Public License (GPL) version 2 - */ - -#ifndef CAIF_SHM_H_ -#define CAIF_SHM_H_ - -struct shmdev_layer { - u32 shm_base_addr; - u32 shm_total_sz; - u32 shm_id; - u32 shm_loopback; - void *hmbx; - int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg); - int (*pshmdev_mbxsetup) (void *pshmdrv_cb, - struct shmdev_layer *pshm_dev, void *pshm_drv); - struct net_device *pshm_netdev; -}; - -extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev); -extern void caif_shmcore_remove(struct net_device *pshm_netdev); - -#endif diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index 90b4ff8bad83..70bfd017581f 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h index 9e5425b4a1d7..f2ae33d23baf 100644 --- a/include/net/caif/cfctrl.h +++ b/include/net/caif/cfctrl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cffrml.h b/include/net/caif/cffrml.h index afac1a48cce7..a06e33fbaa8b 100644 --- a/include/net/caif/cffrml.h +++ b/include/net/caif/cffrml.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfmuxl.h b/include/net/caif/cfmuxl.h index 5847a196b8ad..752999572f21 100644 --- a/include/net/caif/cfmuxl.h +++ b/include/net/caif/cfmuxl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 83a89ba3005b..1c1ad46250d5 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h index f121299a3427..b5b020f3c72e 100644 --- a/include/net/caif/cfserl.h +++ b/include/net/caif/cfserl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h index 0f5905241843..cd47705c2cc3 100644 --- a/include/net/caif/cfsrvl.h +++ b/include/net/caif/cfsrvl.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 2581638f4a3d..0fee0617fb7d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -24,7 +24,7 @@ struct cgroup_cls_state u32 classid; }; -extern void sock_update_classid(struct sock *sk, struct task_struct *task); +extern void sock_update_classid(struct sock *sk); #if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) static inline u32 task_cls_classid(struct task_struct *p) @@ -61,7 +61,7 @@ static inline u32 task_cls_classid(struct task_struct *p) } #endif #else /* !CGROUP_NET_CLS_CGROUP */ -static inline void sock_update_classid(struct sock *sk, struct task_struct *task) +static inline void sock_update_classid(struct sock *sk) { } diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 1ee9d4bda30d..74004af31c48 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -1,24 +1,9 @@ #ifndef _NET_DN_FIB_H #define _NET_DN_FIB_H -/* WARNING: The ordering of these elements must match ordering - * of RTA_* rtnetlink attribute numbers. - */ -struct dn_kern_rta { - void *rta_dst; - void *rta_src; - int *rta_iif; - int *rta_oif; - void *rta_gw; - u32 *rta_priority; - void *rta_prefsrc; - struct rtattr *rta_mx; - struct rtattr *rta_mp; - unsigned char *rta_protoinfo; - u32 *rta_flow; - struct rta_cacheinfo *rta_ci; - struct rta_session *rta_sess; -}; +#include <linux/netlink.h> + +extern const struct nla_policy rtm_dn_policy[]; struct dn_fib_res { struct fib_rule *r; @@ -93,10 +78,10 @@ struct dn_fib_table { u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, - struct dn_kern_rta *rta, struct nlmsghdr *n, + struct nlattr *attrs[], struct nlmsghdr *n, struct netlink_skb_parms *req); int (*delete)(struct dn_fib_table *t, struct rtmsg *r, - struct dn_kern_rta *rta, struct nlmsghdr *n, + struct nlattr *attrs[], struct nlmsghdr *n, struct netlink_skb_parms *req); int (*lookup)(struct dn_fib_table *t, const struct flowidn *fld, struct dn_fib_res *res); @@ -116,13 +101,12 @@ extern void dn_fib_cleanup(void); extern int dn_fib_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); extern struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, - struct dn_kern_rta *rta, + struct nlattr *attrs[], const struct nlmsghdr *nlh, int *errp); extern int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res); extern void dn_fib_release_info(struct dn_fib_info *fi); -extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type); extern void dn_fib_flush(void); extern void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res); diff --git a/include/net/dst.h b/include/net/dst.h index 853cda11e518..1f8fd109e225 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -413,13 +413,15 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->ops->neigh_lookup(dst, NULL, daddr); + struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr); + return IS_ERR(n) ? NULL : n; } static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - return dst->ops->neigh_lookup(dst, skb, NULL); + struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL); + return IS_ERR(n) ? NULL : n; } static inline void dst_link_failure(struct sk_buff *skb) diff --git a/include/net/firewire.h b/include/net/firewire.h new file mode 100644 index 000000000000..31bcbfe7a220 --- /dev/null +++ b/include/net/firewire.h @@ -0,0 +1,25 @@ +#ifndef _NET_FIREWIRE_H +#define _NET_FIREWIRE_H + +/* Pseudo L2 address */ +#define FWNET_ALEN 16 +union fwnet_hwaddr { + u8 u[FWNET_ALEN]; + /* "Hardware address" defined in RFC2734/RF3146 */ + struct { + __be64 uniq_id; /* EUI-64 */ + u8 max_rec; /* max packet size */ + u8 sspd; /* max speed */ + __be16 fifo_hi; /* hi 16bits of FIFO addr */ + __be32 fifo_lo; /* lo 32bits of FIFO addr */ + } __packed uc; +}; + +/* Pseudo L2 Header */ +#define FWNET_HLEN 18 +struct fwnet_header { + u8 h_dest[FWNET_ALEN]; /* destination address */ + __be16 h_proto; /* packet type ID field */ +} __packed; + +#endif diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 80461c1ae9ef..bb8271d487b7 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -9,6 +9,7 @@ struct flow_keys { __be32 ports; __be16 port16[2]; }; + u16 thoff; u8 ip_proto; }; diff --git a/include/net/gre.h b/include/net/gre.h index 82665474bcb7..9f03a390c826 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -2,6 +2,7 @@ #define __LINUX_GRE_H #include <linux/skbuff.h> +#include <net/ip_tunnels.h> #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 @@ -12,7 +13,57 @@ struct gre_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); }; +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 + int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); +static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + #endif diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index d104c882fc29..8196d5d40359 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -85,6 +85,8 @@ struct wpan_phy; * Use wpan_wpy_put to put that reference. */ struct ieee802154_mlme_ops { + /* The following fields are optional (can be NULL). */ + int (*assoc_req)(struct net_device *dev, struct ieee802154_addr *addr, u8 channel, u8 page, u8 cap); @@ -101,6 +103,8 @@ struct ieee802154_mlme_ops { int (*scan_req)(struct net_device *dev, u8 type, u32 channels, u8 page, u8 duration); + /* The fields below are required. */ + struct wpan_phy *(*get_phy)(const struct net_device *dev); /* @@ -110,7 +114,6 @@ struct ieee802154_mlme_ops { u16 (*get_pan_id)(const struct net_device *dev); u16 (*get_short_addr)(const struct net_device *dev); u8 (*get_dsn)(const struct net_device *dev); - u8 (*get_bsn)(const struct net_device *dev); }; /* The IEEE 802.15.4 standard defines 2 type of the devices: diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 93563221d29a..100fb8cec17c 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -71,6 +71,8 @@ struct inet6_ifaddr { struct inet6_ifaddr *ifpub; int regen_count; #endif + bool tokenized; + struct rcu_head rcu; }; @@ -187,6 +189,8 @@ struct inet6_dev { struct list_head tempaddr_list; #endif + struct in6_addr token; + struct neigh_parms *nd_parms; struct inet6_dev *next; struct ipv6_devconf cnf; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 183292722f6e..de2c78529afa 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -133,6 +133,8 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ +#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { @@ -222,7 +224,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, when = max_when; } - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || + what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 76c3fe5ecc2e..6f41b45e819e 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,10 +43,23 @@ struct inet_frag_queue { #define INETFRAGS_HASHSZ 64 +/* averaged: + * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / + * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or + * struct frag_queue)) + */ +#define INETFRAGS_MAXDEPTH 128 + +struct inet_frag_bucket { + struct hlist_head chain; + spinlock_t chain_lock; +}; + struct inet_frags { - struct hlist_head hash[INETFRAGS_HASHSZ]; + struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; /* This rwlock is a global lock (seperate per IPv4, IPv6 and * netfilter). Important to keep this on a seperate cacheline. + * Its primarily a rebuild protection rwlock. */ rwlock_t lock ____cacheline_aligned_in_smp; int secret_interval; @@ -76,6 +89,8 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock); +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { @@ -134,6 +149,7 @@ static inline void inet_frag_lru_del(struct inet_frag_queue *q) { spin_lock(&q->net->lru_lock); list_del(&q->lru_list); + q->net->nqueues--; spin_unlock(&q->net->lru_lock); } @@ -142,6 +158,19 @@ static inline void inet_frag_lru_add(struct netns_frags *nf, { spin_lock(&nf->lru_lock); list_add_tail(&q->lru_list, &nf->lru_list); + q->net->nqueues++; spin_unlock(&nf->lru_lock); } + +/* RFC 3168 support : + * We want to check ECN values of all fragments, do detect invalid combinations. + * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. + */ +#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ +#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ +#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ +#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ + +extern const u8 ip_frag_ecn_table[16]; + #endif diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index e03047f7090b..4da5de10d1d4 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -3,6 +3,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> +#include <linux/if_tunnel.h> #include <linux/ip6_tunnel.h> #define IP6TUNNEL_ERR_TIMEO (30*HZ) @@ -68,4 +69,24 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_device_stats *stats = &dev->stats; + int pkt_len, err; + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } +} #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9497be1ad4c0..e49db91593a9 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -152,18 +152,16 @@ struct fib_result_nl { }; #ifdef CONFIG_IP_ROUTE_MULTIPATH - #define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) - -#define FIB_TABLE_HASHSZ 2 - #else /* CONFIG_IP_ROUTE_MULTIPATH */ - #define FIB_RES_NH(res) ((res).fi->fib_nh[0]) +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +#ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 - -#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +#else +#define FIB_TABLE_HASHSZ 2 +#endif extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h new file mode 100644 index 000000000000..4b6f0b28f41f --- /dev/null +++ b/include/net/ip_tunnels.h @@ -0,0 +1,177 @@ +#ifndef __NET_IP_TUNNELS_H +#define __NET_IP_TUNNELS_H 1 + +#include <linux/if_tunnel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <linux/u64_stats_sync.h> +#include <net/dsfield.h> +#include <net/gro_cells.h> +#include <net/inet_ecn.h> +#include <net/ip.h> +#include <net/rtnetlink.h> + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#endif + +/* Keep error state on tunnel for 30 sec */ +#define IPTUNNEL_ERR_TIMEO (30*HZ) + +/* 6rd prefix/relay information */ +#ifdef CONFIG_IPV6_SIT_6RD +struct ip_tunnel_6rd_parm { + struct in6_addr prefix; + __be32 relay_prefix; + u16 prefixlen; + u16 relay_prefixlen; +}; +#endif + +struct ip_tunnel_prl_entry { + struct ip_tunnel_prl_entry __rcu *next; + __be32 addr; + u16 flags; + struct rcu_head rcu_head; +}; + +struct ip_tunnel { + struct ip_tunnel __rcu *next; + struct hlist_node hash_node; + struct net_device *dev; + + int err_count; /* Number of arrived ICMP errors */ + unsigned long err_time; /* Time when the last ICMP error + * arrived */ + + /* These four fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* Precalculated header length */ + int mlink; + + struct ip_tunnel_parm parms; + + /* for SIT */ +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd_parm ip6rd; +#endif + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ + unsigned int prl_count; /* # of entries in PRL */ + int ip_tnl_net_id; + struct gro_cells gro_cells; +}; + +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) + +struct tnl_ptk_info { + __be16 flags; + __be16 proto; + __be32 key; + __be32 seq; +}; + +#define PACKET_RCVD 0 +#define PACKET_REJECT 1 + +#define IP_TNL_HASH_BITS 10 +#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) + +struct ip_tunnel_net { + struct hlist_head *tunnels; + struct net_device *fb_tunnel_dev; +}; + +int ip_tunnel_init(struct net_device *dev); +void ip_tunnel_uninit(struct net_device *dev); +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname); + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params); +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); + +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot); +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key); + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error); +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p); +void ip_tunnel_setup(struct net_device *dev, int net_id); + +/* Extract dsfield from inner protocol */ +static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_get_dsfield((const struct ipv6hdr *)iph); + else + return 0; +} + +/* Propogate ECN bits out */ +static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, + const struct sk_buff *skb) +{ + u8 inner = ip_tunnel_get_dsfield(iph, skb); + + return INET_ECN_encapsulate(tos, inner); +} + +static inline void tunnel_ip_select_ident(struct sk_buff *skb, + const struct iphdr *old_iph, + struct dst_entry *dst) +{ + struct iphdr *iph = ip_hdr(skb); + + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); +} + +static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err; + int pkt_len = skb->len - skb_transport_offset(skb); + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + + nf_reset(skb); + + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } +} +#endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68c69d54d392..f9f5b057b480 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, dst->ip = src->ip; } +static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + dst->in6 = src->in6; + return; + } +#endif + dst->ip = src->ip; + dst->all[1] = 0; + dst->all[2] = 0; + dst->all[3] = 0; +} + static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, const union nf_inet_addr *b) { @@ -344,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, #define LeaveFunction(level) do {} while (0) #endif -#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); } - /* * The port number of FTP service (in network order). @@ -459,7 +472,7 @@ struct ip_vs_estimator { struct ip_vs_stats { struct ip_vs_stats_user ustats; /* statistics */ struct ip_vs_estimator est; /* estimator */ - struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ + struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */ spinlock_t lock; /* spin lock */ struct ip_vs_stats_user ustats0; /* reset values */ }; @@ -566,20 +579,19 @@ struct ip_vs_conn_param { */ struct ip_vs_conn { struct hlist_node c_list; /* hashed list heads */ -#ifdef CONFIG_NET_NS - struct net *net; /* Name space */ -#endif /* Protocol, addresses and port numbers */ - u16 af; /* address family */ __be16 cport; - __be16 vport; __be16 dport; - __u32 fwmark; /* Fire wall mark from skb */ + __be16 vport; + u16 af; /* address family */ union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ volatile __u32 flags; /* status flags */ __u16 protocol; /* Which protocol (TCP/UDP) */ +#ifdef CONFIG_NET_NS + struct net *net; /* Name space */ +#endif /* counter and timer */ atomic_t refcnt; /* reference count */ @@ -593,6 +605,7 @@ struct ip_vs_conn { * state transition triggerd * synchronization */ + __u32 fwmark; /* Fire wall mark from skb */ unsigned long sync_endtime; /* jiffies + sent_retries */ /* Control members */ @@ -620,6 +633,8 @@ struct ip_vs_conn { const struct ip_vs_pe *pe; char *pe_data; __u8 pe_data_len; + + struct rcu_head rcu_head; }; /* @@ -695,10 +710,9 @@ struct ip_vs_dest_user_kern { * and the forwarding entries */ struct ip_vs_service { - struct list_head s_list; /* for normal service table */ - struct list_head f_list; /* for fwmark-based service table */ + struct hlist_node s_list; /* for normal service table */ + struct hlist_node f_list; /* for fwmark-based service table */ atomic_t refcnt; /* reference counter */ - atomic_t usecnt; /* use counter */ u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ @@ -713,25 +727,35 @@ struct ip_vs_service { struct list_head destinations; /* real server d-linked list */ __u32 num_dests; /* number of servers */ struct ip_vs_stats stats; /* statistics for the service */ - struct ip_vs_app *inc; /* bind conns to this app inc */ /* for scheduling */ - struct ip_vs_scheduler *scheduler; /* bound scheduler object */ - rwlock_t sched_lock; /* lock sched_data */ + struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */ + spinlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ /* alternate persistence engine */ - struct ip_vs_pe *pe; + struct ip_vs_pe __rcu *pe; + + struct rcu_head rcu_head; }; +/* Information for cached dst */ +struct ip_vs_dest_dst { + struct dst_entry *dst_cache; /* destination cache entry */ + u32 dst_cookie; + union nf_inet_addr dst_saddr; + struct rcu_head rcu_head; +}; +/* In grace period after removing */ +#define IP_VS_DEST_STATE_REMOVING 0x01 /* * The real server destination forwarding entry * with ip address, port number, and so on. */ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ - struct list_head d_list; /* for table with all the dests */ + struct hlist_node d_list; /* for table with all the dests */ u16 af; /* address family */ __be16 port; /* port number of the server */ @@ -742,6 +766,7 @@ struct ip_vs_dest { atomic_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ + unsigned long state; /* state flags */ /* connection counters and thresholds */ atomic_t activeconns; /* active connections */ @@ -752,10 +777,7 @@ struct ip_vs_dest { /* for destination cache */ spinlock_t dst_lock; /* lock of dst_cache */ - struct dst_entry *dst_cache; /* destination cache entry */ - u32 dst_rtos; /* RT_TOS(tos) for dst */ - u32 dst_cookie; - union nf_inet_addr dst_saddr; + struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ @@ -763,6 +785,10 @@ struct ip_vs_dest { __be16 vport; /* virtual port number */ union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + + struct list_head t_list; /* in dest_trash */ + struct rcu_head rcu_head; + unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -778,9 +804,13 @@ struct ip_vs_scheduler { /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); /* scheduling service finish */ - int (*done_service)(struct ip_vs_service *svc); - /* scheduler updating service */ - int (*update_service)(struct ip_vs_service *svc); + void (*done_service)(struct ip_vs_service *svc); + /* dest is linked */ + int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); + /* dest is unlinked */ + int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); + /* dest is updated */ + int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); /* selecting a server from the given service */ struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc, @@ -819,6 +849,7 @@ struct ip_vs_app { struct ip_vs_app *app; /* its real application */ __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ + struct rcu_head rcu_head; /* * output hook: Process packet in inout direction, diff set for TCP. @@ -881,6 +912,9 @@ struct ipvs_master_sync_state { struct netns_ipvs *ipvs; }; +/* How much time to keep dests in trash */ +#define IP_VS_DEST_TRASH_PERIOD (120 * HZ) + /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ @@ -892,7 +926,7 @@ struct netns_ipvs { #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - struct list_head rs_table[IP_VS_RTAB_SIZE]; + struct hlist_head rs_table[IP_VS_RTAB_SIZE]; /* ip_vs_app */ struct list_head app_list; /* ip_vs_proto */ @@ -904,7 +938,6 @@ struct netns_ipvs { #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) struct list_head tcp_apps[TCP_APP_TAB_SIZE]; - spinlock_t tcp_app_lock; #endif /* ip_vs_proto_udp */ #ifdef CONFIG_IP_VS_PROTO_UDP @@ -912,7 +945,6 @@ struct netns_ipvs { #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) struct list_head udp_apps[UDP_APP_TAB_SIZE]; - spinlock_t udp_app_lock; #endif /* ip_vs_proto_sctp */ #ifdef CONFIG_IP_VS_PROTO_SCTP @@ -921,7 +953,6 @@ struct netns_ipvs { #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) /* Hash table for SCTP application incarnations */ struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; - spinlock_t sctp_app_lock; #endif /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ @@ -931,9 +962,10 @@ struct netns_ipvs { int num_services; /* no of virtual services */ - rwlock_t rs_lock; /* real services table */ /* Trash for destinations */ struct list_head dest_trash; + spinlock_t dest_trash_lock; + struct timer_list dest_trash_timer; /* expiration timer */ /* Service counters */ atomic_t ftpsvc_counter; atomic_t nullsvc_counter; @@ -976,6 +1008,7 @@ struct netns_ipvs { int sysctl_sync_retries; int sysctl_nat_icmp_send; int sysctl_pmtu_disc; + int sysctl_backup_only; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1067,6 +1100,12 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs) return ipvs->sysctl_pmtu_disc; } +static inline int sysctl_backup_only(struct netns_ipvs *ipvs) +{ + return ipvs->sync_state & IP_VS_STATE_BACKUP && + ipvs->sysctl_backup_only; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1114,6 +1153,11 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_backup_only(struct netns_ipvs *ipvs) +{ + return 0; +} + #endif /* @@ -1169,9 +1213,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, int inverse); +/* Get reference to gain full access to conn. + * By default, RCU read-side critical sections have access only to + * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference. + */ +static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp) +{ + return atomic_inc_not_zero(&cp->refcnt); +} + /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) { + smp_mb__before_atomic_dec(); atomic_dec(&cp->refcnt); } extern void ip_vs_conn_put(struct ip_vs_conn *cp); @@ -1286,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); -void ip_vs_unbind_pe(struct ip_vs_service *svc); int register_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe); struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); @@ -1334,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); +extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * @@ -1354,17 +1407,12 @@ extern struct ip_vs_stats ip_vs_stats; extern int sysctl_ip_vs_sync_ver; extern struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); -static inline void ip_vs_service_put(struct ip_vs_service *svc) -{ - atomic_dec(&svc->usecnt); -} - -extern struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, __be16 dport); +extern bool +ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); @@ -1376,8 +1424,18 @@ extern struct ip_vs_dest * ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark, __u32 flags); -extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); +extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp); + +static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) +{ + atomic_inc(&dest->refcnt); +} +static inline void ip_vs_dest_put(struct ip_vs_dest *dest) +{ + smp_mb__before_atomic_dec(); + atomic_dec(&dest->refcnt); +} /* * IPVS sync daemon data and function prototypes @@ -1416,7 +1474,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *iph); -extern void ip_vs_dst_reset(struct ip_vs_dest *dest); +extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head); #ifdef CONFIG_IP_VS_IPV6 extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/include/net/ipip.h b/include/net/ipip.h deleted file mode 100644 index fd19625ff99d..000000000000 --- a/include/net/ipip.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef __NET_IPIP_H -#define __NET_IPIP_H 1 - -#include <linux/if_tunnel.h> -#include <net/gro_cells.h> -#include <net/ip.h> - -/* Keep error state on tunnel for 30 sec */ -#define IPTUNNEL_ERR_TIMEO (30*HZ) - -/* 6rd prefix/relay information */ -struct ip_tunnel_6rd_parm { - struct in6_addr prefix; - __be32 relay_prefix; - u16 prefixlen; - u16 relay_prefixlen; -}; - -struct ip_tunnel { - struct ip_tunnel __rcu *next; - struct net_device *dev; - - int err_count; /* Number of arrived ICMP errors */ - unsigned long err_time; /* Time when the last ICMP error arrived */ - - /* These four fields used only by GRE */ - __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ - int hlen; /* Precalculated GRE header length */ - int mlink; - - struct ip_tunnel_parm parms; - - /* for SIT */ -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd_parm ip6rd; -#endif - struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ - unsigned int prl_count; /* # of entries in PRL */ - - struct gro_cells gro_cells; -}; - -struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry __rcu *next; - __be32 addr; - u16 flags; - struct rcu_head rcu_head; -}; - -static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) -{ - int err; - struct iphdr *iph = ip_hdr(skb); - int pkt_len = skb->len - skb_transport_offset(skb); - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - - nf_reset(skb); - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); - - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } -} - -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - if (iph->frag_off & htons(IP_DF)) - iph->id = 0; - else { - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); - } -} -#endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 64d12e77719a..0810aa57c780 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -217,7 +217,7 @@ struct ipv6_txoptions { }; struct ip6_flowlabel { - struct ip6_flowlabel *next; + struct ip6_flowlabel __rcu *next; __be32 label; atomic_t users; struct in6_addr dst; @@ -238,9 +238,9 @@ struct ip6_flowlabel { #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) struct ipv6_fl_socklist { - struct ipv6_fl_socklist *next; - struct ip6_flowlabel *fl; - struct rcu_head rcu; + struct ipv6_fl_socklist __rcu *next; + struct ip6_flowlabel *fl; + struct rcu_head rcu; }; extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); @@ -320,6 +320,18 @@ static inline int ipv6_addr_src_scope(const struct in6_addr *addr) return __ipv6_addr_src_scope(__ipv6_addr_type(addr)); } +static inline bool __ipv6_addr_needs_scope_id(int type) +{ + return type & IPV6_ADDR_LINKLOCAL || + (type & IPV6_ADDR_MULTICAST && + (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL))); +} + +static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface) +{ + return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0; +} + static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) { return memcmp(a1, a2, sizeof(struct in6_addr)); @@ -466,6 +478,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + u8 ecn; }; void ip6_frag_init(struct inet_frag_queue *q, void *a); @@ -485,6 +498,7 @@ struct frag_queue { int iif; unsigned int csum; __u16 nhoffset; + u8 ecn; }; void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index f74109144d3f..f132924cc9da 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -256,7 +256,8 @@ static inline __u32 irlmp_get_daddr(const struct lsap_cb *self) return (self && self->lap) ? self->lap->daddr : 0; } -extern const char *irlmp_reasons[]; +const char *irlmp_reason_str(LM_REASON reason); + extern int sysctl_discovery_timeout; extern int sysctl_discovery_slots; extern int sysctl_discovery; diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index cc7c19732389..714cc9a54a4c 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -130,6 +130,14 @@ struct iucv_sock { enum iucv_tx_notify n); }; +struct iucv_skb_cb { + u32 class; /* target class of message */ + u32 tag; /* tag associated with message */ + u32 offset; /* offset for skb receival */ +}; + +#define IUCV_SKB_CB(__skb) ((struct iucv_skb_cb *)&((__skb)->cb[0])) + /* iucv socket options (SOL_IUCV) */ #define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ #define SO_MSGLIMIT 0x1000 /* get/set IUCV MSGLIMIT */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index de644bcd8613..b17697827482 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -17,6 +17,7 @@ #include <net/netns/ipv6.h> #include <net/netns/sctp.h> #include <net/netns/dccp.h> +#include <net/netns/netfilter.h> #include <net/netns/x_tables.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netns/conntrack.h> @@ -94,6 +95,7 @@ struct net { struct netns_dccp dccp; #endif #ifdef CONFIG_NETFILTER + struct netns_nf nf; struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index caca0c4d6b4b..644d9c223d24 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -184,7 +184,7 @@ extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); extern void nf_ct_dying_timeout(struct nf_conn *ct); -extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); +extern void nf_conntrack_flush_report(struct net *net, u32 portid, int report); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 930275fa2ea6..fb2b6234e937 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -27,6 +27,7 @@ extern unsigned int nf_conntrack_in(struct net *net, extern int nf_conntrack_init_net(struct net *net); extern void nf_conntrack_cleanup_net(struct net *net); +extern void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); extern int nf_conntrack_proto_pernet_init(struct net *net); extern void nf_conntrack_proto_pernet_fini(struct net *net); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cbbae7621e22..3f3aecbc8632 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -88,7 +88,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, - u32 pid, int report); + u32 portid, int report); static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { nf_ct_unlink_expect_report(exp, 0, 0); @@ -106,7 +106,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report); + u32 portid, int report); static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect) { return nf_ct_expect_related_report(expect, 0, 0); diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index e991bd0a27af..31f1fb9eb784 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -49,12 +49,18 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger); -void nf_log_unbind_pf(u_int8_t pf); +void nf_log_set(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unset(struct net *net, const struct nf_logger *logger); + +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger); +void nf_log_unbind_pf(struct net *net, u_int8_t pf); /* Calls the registered backend logging function */ -__printf(7, 8) -void nf_log_packet(u_int8_t pf, +__printf(8, 9) +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 1242f371718b..005e2c2e39a9 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,6 +71,7 @@ struct netns_ipv6 { struct fib_rules_ops *mr6_rules_ops; #endif #endif + atomic_t dev_addr_genid; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h new file mode 100644 index 000000000000..88740024ccf3 --- /dev/null +++ b/include/net/netns/netfilter.h @@ -0,0 +1,18 @@ +#ifndef __NETNS_NETFILTER_H +#define __NETNS_NETFILTER_H + +#include <linux/proc_fs.h> +#include <linux/netfilter.h> + +struct nf_logger; + +struct netns_nf { +#if defined CONFIG_PROC_FS + struct proc_dir_entry *proc_netfilter; +#endif + const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *nf_log_dir_header; +#endif +}; +#endif diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 1d04b6f0fbd4..50ab8c26ab59 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -29,7 +29,7 @@ struct cgroup_netprio_state { struct cgroup_subsys_state css; }; -extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); +extern void sock_update_netprioidx(struct sock *sk); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -68,7 +68,7 @@ static inline u32 task_netprioidx(struct task_struct *p) return 0; } -#define sock_update_netprioidx(sk, task) +#define sock_update_netprioidx(sk) #endif /* CONFIG_NETPRIO_CGROUP */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a51dbd17c2de..59795e42c8b6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -27,19 +27,13 @@ struct sk_buff; struct dst_entry; struct proto; -/* empty to "strongly type" an otherwise void parameter. - */ -struct request_values { -}; - struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(struct sock *sk, - struct request_sock *req, - struct request_values *rvp); + struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(struct sock *sk, @@ -54,7 +48,7 @@ extern int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); /* struct request_sock - mini sock to represent a connection request */ struct request_sock { - struct request_sock *dl_next; /* Must be first member! */ + struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 5a15fabd6a75..702664833a53 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -4,7 +4,7 @@ #include <linux/rtnetlink.h> #include <net/netlink.h> -typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *); +typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); diff --git a/include/net/scm.h b/include/net/scm.h index 975cca01048b..8de2d37d2077 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -26,7 +26,6 @@ struct scm_fp_list { struct scm_cookie { struct pid *pid; /* Skb credentials */ - const struct cred *cred; struct scm_fp_list *fp; /* Passed files */ struct scm_creds creds; /* Skb credentials */ #ifdef CONFIG_SECURITY_NETWORK @@ -51,23 +50,18 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co #endif /* CONFIG_SECURITY_NETWORK */ static __inline__ void scm_set_cred(struct scm_cookie *scm, - struct pid *pid, const struct cred *cred) + struct pid *pid, kuid_t uid, kgid_t gid) { scm->pid = get_pid(pid); - scm->cred = cred ? get_cred(cred) : NULL; scm->creds.pid = pid_vnr(pid); - scm->creds.uid = cred ? cred->euid : INVALID_UID; - scm->creds.gid = cred ? cred->egid : INVALID_GID; + scm->creds.uid = uid; + scm->creds.gid = gid; } static __inline__ void scm_destroy_cred(struct scm_cookie *scm) { put_pid(scm->pid); scm->pid = NULL; - - if (scm->cred) - put_cred(scm->cred); - scm->cred = NULL; } static __inline__ void scm_destroy(struct scm_cookie *scm) @@ -81,8 +75,10 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, bool forcecreds) { memset(scm, 0, sizeof(*scm)); + scm->creds.uid = INVALID_UID; + scm->creds.gid = INVALID_GID; if (forcecreds) - scm_set_cred(scm, task_tgid(current), current_cred()); + scm_set_cred(scm, task_tgid(current), current_uid(), current_gid()); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index a7dd5c50df79..ca50e0751e47 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -49,7 +49,6 @@ #include <linux/sctp.h> #include <linux/ipv6.h> /* For ipv6hdr. */ -#include <net/sctp/user.h> #include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0e0f9d2322e3..1bd4c4144fe8 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -399,7 +399,6 @@ struct sctp_stream { struct sctp_ssnmap { struct sctp_stream in; struct sctp_stream out; - int malloced; }; struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, @@ -715,8 +714,7 @@ struct sctp_packet { has_sack:1, /* This packet contains a SACK chunk. */ has_auth:1, /* This packet contains an AUTH chunk */ has_data:1, /* This packet contains at least 1 DATA chunk */ - ipfragok:1, /* So let ip fragment this packet */ - malloced:1; /* Is it malloced? */ + ipfragok:1; /* So let ip fragment this packet */ }; struct sctp_packet *sctp_packet_init(struct sctp_packet *, @@ -780,10 +778,7 @@ struct sctp_transport { hb_sent:1, /* Is the Path MTU update pending on this tranport */ - pmtu_pending:1, - - /* Is this structure kfree()able? */ - malloced:1; + pmtu_pending:1; /* Has this transport moved the ctsn since we last sacked */ __u32 sack_generation; @@ -992,8 +987,6 @@ struct sctp_inq { * messages. */ struct work_struct immediate; - - int malloced; /* Is this structure kfree()able? */ }; void sctp_inq_init(struct sctp_inq *); @@ -1062,9 +1055,6 @@ struct sctp_outq { /* Is this structure empty? */ char empty; - - /* Are we kfree()able? */ - char malloced; }; void sctp_outq_init(struct sctp_association *, struct sctp_outq *); @@ -1102,8 +1092,6 @@ struct sctp_bind_addr { * peer(s) in INIT and INIT ACK chunks. */ struct list_head address_list; - - int malloced; /* Are we kfree()able? */ }; void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); @@ -1174,11 +1162,9 @@ struct sctp_ep_common { /* Some fields to help us manage this object. * refcnt - Reference count access to this object. * dead - Do not attempt to use this object. - * malloced - Do we need to kfree this object? */ atomic_t refcnt; - char dead; - char malloced; + bool dead; /* What socket does this endpoint belong to? */ struct sock *sk; diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index ff1b8ba73ab1..00e50ba3f24b 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -49,7 +49,6 @@ /* A structure to carry information to the ULP (e.g. Sockets API) */ struct sctp_ulpq { - char malloced; char pd_mode; struct sctp_association *asoc; struct sk_buff_head reasm; diff --git a/include/net/sock.h b/include/net/sock.h index 14f6e9d19dc7..5c97b0fc5623 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -667,6 +667,7 @@ enum sock_flags { * user-space instead. */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ + SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -2158,10 +2159,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, * @sk: socket sending this packet * @tx_flags: filled with instructions for time stamping * - * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if - * parameters are invalid. + * Currently only depends on SOCK_TIMESTAMPING* flags. */ -extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/include/net/tcp.h b/include/net/tcp.h index cf0694d4ad60..5bba80fbd1d9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -179,7 +179,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ -#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -273,7 +272,6 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; -extern int sysctl_tcp_frto_response; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; @@ -284,7 +282,6 @@ extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; -extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; @@ -373,6 +370,7 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern void tcp_release_cb(struct sock *sk); +extern void tcp_wfree(struct sk_buff *skb); extern void tcp_write_timer_handler(struct sock *sk); extern void tcp_delack_timer_handler(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); @@ -425,8 +423,6 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, bool fastopen); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); -extern bool tcp_use_frto(struct sock *sk); -extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); @@ -454,7 +450,7 @@ extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); extern void tcp_parse_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx, const u8 **hvpp, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -476,7 +472,6 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc); extern int tcp_disconnect(struct sock *sk, int flags); @@ -543,6 +538,8 @@ extern bool tcp_syn_flood_action(struct sock *sk, extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); +extern void tcp_send_loss_probe(struct sock *sk); +extern bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); @@ -756,7 +753,6 @@ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ - CA_EVENT_FRTO, /* fast recovery timeout */ CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_FAST_ACK, /* in sequence ack */ CA_EVENT_SLOW_ACK, /* other ack */ @@ -873,8 +869,8 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { tp->do_early_retrans = sysctl_tcp_early_retrans && - !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; - tp->early_retrans_delayed = 0; + sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && + sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) @@ -1030,50 +1026,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) #endif } -/* Packet is added to VJ-style prequeue for processing in process - * context, if a reader task is waiting. Apparently, this exciting - * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) - * failed somewhere. Latency? Burstiness? Well, at least now we will - * see, why it failed. 8)8) --ANK - * - * NOTE: is this not too big to inline? - */ -static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (sysctl_tcp_low_latency || !tp->ucopy.task) - return false; - - if (skb->len <= tcp_hdrlen(skb) && - skb_queue_len(&tp->ucopy.prequeue) == 0) - return false; - - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (tp->ucopy.memory > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk_backlog_rcv(sk, skb1); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPPREQUEUEDROPPED); - } - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk_sleep(sk), - POLLIN | POLLRDNORM | POLLRDBAND); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * tcp_rto_min(sk)) / 4, - TCP_RTO_MAX); - } - return true; -} - +extern bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE @@ -1630,91 +1583,6 @@ struct tcp_request_sock_ops { #endif }; -/* Using SHA1 for now, define some constants. - */ -#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) -#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) -#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) - -extern int tcp_cookie_generator(u32 *bakery); - -/** - * struct tcp_cookie_values - each socket needs extra space for the - * cookies, together with (optional) space for any SYN data. - * - * A tcp_sock contains a pointer to the current value, and this is - * cloned to the tcp_timewait_sock. - * - * @cookie_pair: variable data from the option exchange. - * - * @cookie_desired: user specified tcpct_cookie_desired. Zero - * indicates default (sysctl_tcp_cookie_size). - * After cookie sent, remembers size of cookie. - * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. - * - * @s_data_desired: user specified tcpct_s_data_desired. When the - * constant payload is specified (@s_data_constant), - * holds its length instead. - * Range 0 to TCP_MSS_DESIRED. - * - * @s_data_payload: constant data that is to be included in the - * payload of SYN or SYNACK segments when the - * cookie option is present. - */ -struct tcp_cookie_values { - struct kref kref; - u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; - u8 cookie_pair_size; - u8 cookie_desired; - u16 s_data_desired:11, - s_data_constant:1, - s_data_in:1, - s_data_out:1, - s_data_unused:2; - u8 s_data_payload[0]; -}; - -static inline void tcp_cookie_values_release(struct kref *kref) -{ - kfree(container_of(kref, struct tcp_cookie_values, kref)); -} - -/* The length of constant payload data. Note that s_data_desired is - * overloaded, depending on s_data_constant: either the length of constant - * data (returned here) or the limit on variable data. - */ -static inline int tcp_s_data_size(const struct tcp_sock *tp) -{ - return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) - ? tp->cookie_values->s_data_desired - : 0; -} - -/** - * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. - * - * As tcp_request_sock has already been extended in other places, the - * only remaining method is to pass stack values along as function - * parameters. These parameters are not needed after sending SYNACK. - * - * @cookie_bakery: cryptographic secret and message workspace. - * - * @cookie_plus: bytes in authenticator/cookie option, copied from - * struct tcp_options_received (above). - */ -struct tcp_extend_values { - struct request_values rv; - u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; - u8 cookie_plus:6, - cookie_out_never:1, - cookie_in_always:1; -}; - -static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) -{ - return (struct tcp_extend_values *)rvp; -} - extern void tcp_v4_init(void); extern void tcp_init(void); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 24c8886fd969..ae16531d0d35 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -162,6 +162,7 @@ struct xfrm_state { xfrm_address_t saddr; int header_len; int trailer_len; + u32 extra_flags; } props; struct xfrm_lifetime_cfg lft; diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 399162b50a8d..e1379b4e8faf 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -1074,7 +1074,8 @@ void fc_rport_terminate_io(struct fc_rport *); /* * DISCOVERY LAYER *****************************/ -int fc_disc_init(struct fc_lport *); +void fc_disc_init(struct fc_lport *); +void fc_disc_config(struct fc_lport *, void *); static inline struct fc_lport *fc_disc_lport(struct fc_disc *disc) { diff --git a/include/sound/max98090.h b/include/sound/max98090.h index 95efb13f8478..95efb13f8478 100755..100644 --- a/include/sound/max98090.h +++ b/include/sound/max98090.h diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index e1ef63d4a5c4..44a30b108683 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -488,6 +488,7 @@ struct snd_soc_dapm_path { /* status */ u32 connect:1; /* source and sink widgets are connected */ u32 walked:1; /* path has been walked */ + u32 walking:1; /* path is in the process of being walked */ u32 weak:1; /* path ignored for power management */ int (*connected)(struct snd_soc_dapm_widget *source, diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9961726523d0..9c1467357b03 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -257,6 +257,7 @@ TRACE_EVENT(block_bio_bounce, /** * block_bio_complete - completed all work on the block operation + * @q: queue holding the block operation * @bio: block operation completed * @error: io error value * @@ -265,9 +266,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct bio *bio, int error), + TP_PROTO(struct request_queue *q, struct bio *bio, int error), - TP_ARGS(bio, error), + TP_ARGS(q, bio, error), TP_STRUCT__entry( __field( dev_t, dev ) @@ -278,8 +279,7 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev ? - bio->bi_bdev->bd_dev : 0; + __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; __entry->error = error; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5a8671e8a67f..e5586caff67a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -147,7 +147,7 @@ TRACE_EVENT(sched_switch, __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, - { 128, "W" }) : "R", + { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 4ef3acbba5da..c5d2e3a1cf68 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -74,4 +74,6 @@ #define SO_LOCK_FILTER 44 +#define SO_SELECT_ERR_QUEUE 45 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5c8a1d25e21c..ab5d4992e568 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -285,6 +285,7 @@ header-y += nvram.h header-y += omap3isp.h header-y += omapfb.h header-y += oom.h +header-y += openvswitch.h header-y += packet_diag.h header-y += param.h header-y += parport.h @@ -331,6 +332,7 @@ header-y += rtnetlink.h header-y += scc.h header-y += sched.h header-y += screen_info.h +header-y += sctp.h header-y += sdla.h header-y += seccomp.h header-y += securebits.h diff --git a/include/uapi/linux/acct.h b/include/uapi/linux/acct.h index 11b6ca3e0873..df2f9a0bba6a 100644 --- a/include/uapi/linux/acct.h +++ b/include/uapi/linux/acct.h @@ -107,10 +107,12 @@ struct acct_v3 #define ACORE 0x08 /* ... dumped core */ #define AXSIG 0x10 /* ... was killed by a signal */ -#ifdef __BIG_ENDIAN +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) #define ACCT_BYTEORDER 0x80 /* accounting file is big endian */ -#else +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) #define ACCT_BYTEORDER 0x00 /* accounting file is little endian */ +#else +#error unspecified endianness #endif #ifndef __KERNEL__ diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 86fa7a71336a..bb2554f7fbd1 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -62,9 +62,9 @@ struct io_event { __s64 res2; /* secondary result */ }; -#if defined(__LITTLE_ENDIAN) +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) #define PADDED(x,y) x, y -#elif defined(__BIG_ENDIAN) +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) #define PADDED(x,y) y, x #else #error edit for your odd byteorder. diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h index 3f3bac6af7bc..586e9f98184f 100644 --- a/include/uapi/linux/caif/caif_socket.h +++ b/include/uapi/linux/caif/caif_socket.h @@ -1,7 +1,7 @@ /* linux/caif_socket.h * CAIF Definitions for CAIF socket and network layer * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/uapi/linux/caif/if_caif.h b/include/uapi/linux/caif/if_caif.h index 5e7eed4edf51..7618aabe8c6b 100644 --- a/include/uapi/linux/caif/if_caif.h +++ b/include/uapi/linux/caif/if_caif.h @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/ sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h index 0d7b49973bb3..f6c271035bbd 100644 --- a/include/uapi/linux/cn_proc.h +++ b/include/uapi/linux/cn_proc.h @@ -56,7 +56,9 @@ struct proc_event { PROC_EVENT_PTRACE = 0x00000100, PROC_EVENT_COMM = 0x00000200, /* "next" should be 0x00000400 */ - /* "last" is the last process event: exit */ + /* "last" is the last process event: exit, + * while "next to last" is coredumping event */ + PROC_EVENT_COREDUMP = 0x40000000, PROC_EVENT_EXIT = 0x80000000 } what; __u32 cpu; @@ -110,11 +112,17 @@ struct proc_event { char comm[16]; } comm; + struct coredump_proc_event { + __kernel_pid_t process_pid; + __kernel_pid_t process_tgid; + } coredump; + struct exit_proc_event { __kernel_pid_t process_pid; __kernel_pid_t process_tgid; __u32 exit_code, exit_signal; } exit; + } event_data; }; diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 9cfde6941099..8eb9ccaa5b48 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -129,7 +129,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_ALU_XOR_X 40 #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 -#define SKF_AD_MAX 52 +#define SKF_AD_PAY_OFFSET 52 +#define SKF_AD_MAX 56 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 4c43b4448792..706d035fa748 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -95,15 +95,10 @@ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H -#ifdef __linux__ +#ifdef __KERNEL__ #include <linux/types.h> #else #include <stdint.h> -#define __u64 uint64_t -#define __s64 int64_t -#define __u32 uint32_t -#define __s32 int32_t -#define __u16 uint16_t #endif /* @@ -139,42 +134,42 @@ userspace works under 64bit kernels */ struct fuse_attr { - __u64 ino; - __u64 size; - __u64 blocks; - __u64 atime; - __u64 mtime; - __u64 ctime; - __u32 atimensec; - __u32 mtimensec; - __u32 ctimensec; - __u32 mode; - __u32 nlink; - __u32 uid; - __u32 gid; - __u32 rdev; - __u32 blksize; - __u32 padding; + uint64_t ino; + uint64_t size; + uint64_t blocks; + uint64_t atime; + uint64_t mtime; + uint64_t ctime; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t ctimensec; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint32_t rdev; + uint32_t blksize; + uint32_t padding; }; struct fuse_kstatfs { - __u64 blocks; - __u64 bfree; - __u64 bavail; - __u64 files; - __u64 ffree; - __u32 bsize; - __u32 namelen; - __u32 frsize; - __u32 padding; - __u32 spare[6]; + uint64_t blocks; + uint64_t bfree; + uint64_t bavail; + uint64_t files; + uint64_t ffree; + uint32_t bsize; + uint32_t namelen; + uint32_t frsize; + uint32_t padding; + uint32_t spare[6]; }; struct fuse_file_lock { - __u64 start; - __u64 end; - __u32 type; - __u32 pid; /* tgid */ + uint64_t start; + uint64_t end; + uint32_t type; + uint32_t pid; /* tgid */ }; /** @@ -364,143 +359,143 @@ enum fuse_notify_code { #define FUSE_COMPAT_ENTRY_OUT_SIZE 120 struct fuse_entry_out { - __u64 nodeid; /* Inode ID */ - __u64 generation; /* Inode generation: nodeid:gen must - be unique for the fs's lifetime */ - __u64 entry_valid; /* Cache timeout for the name */ - __u64 attr_valid; /* Cache timeout for the attributes */ - __u32 entry_valid_nsec; - __u32 attr_valid_nsec; + uint64_t nodeid; /* Inode ID */ + uint64_t generation; /* Inode generation: nodeid:gen must + be unique for the fs's lifetime */ + uint64_t entry_valid; /* Cache timeout for the name */ + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t entry_valid_nsec; + uint32_t attr_valid_nsec; struct fuse_attr attr; }; struct fuse_forget_in { - __u64 nlookup; + uint64_t nlookup; }; struct fuse_forget_one { - __u64 nodeid; - __u64 nlookup; + uint64_t nodeid; + uint64_t nlookup; }; struct fuse_batch_forget_in { - __u32 count; - __u32 dummy; + uint32_t count; + uint32_t dummy; }; struct fuse_getattr_in { - __u32 getattr_flags; - __u32 dummy; - __u64 fh; + uint32_t getattr_flags; + uint32_t dummy; + uint64_t fh; }; #define FUSE_COMPAT_ATTR_OUT_SIZE 96 struct fuse_attr_out { - __u64 attr_valid; /* Cache timeout for the attributes */ - __u32 attr_valid_nsec; - __u32 dummy; + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t attr_valid_nsec; + uint32_t dummy; struct fuse_attr attr; }; #define FUSE_COMPAT_MKNOD_IN_SIZE 8 struct fuse_mknod_in { - __u32 mode; - __u32 rdev; - __u32 umask; - __u32 padding; + uint32_t mode; + uint32_t rdev; + uint32_t umask; + uint32_t padding; }; struct fuse_mkdir_in { - __u32 mode; - __u32 umask; + uint32_t mode; + uint32_t umask; }; struct fuse_rename_in { - __u64 newdir; + uint64_t newdir; }; struct fuse_link_in { - __u64 oldnodeid; + uint64_t oldnodeid; }; struct fuse_setattr_in { - __u32 valid; - __u32 padding; - __u64 fh; - __u64 size; - __u64 lock_owner; - __u64 atime; - __u64 mtime; - __u64 unused2; - __u32 atimensec; - __u32 mtimensec; - __u32 unused3; - __u32 mode; - __u32 unused4; - __u32 uid; - __u32 gid; - __u32 unused5; + uint32_t valid; + uint32_t padding; + uint64_t fh; + uint64_t size; + uint64_t lock_owner; + uint64_t atime; + uint64_t mtime; + uint64_t unused2; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t unused3; + uint32_t mode; + uint32_t unused4; + uint32_t uid; + uint32_t gid; + uint32_t unused5; }; struct fuse_open_in { - __u32 flags; - __u32 unused; + uint32_t flags; + uint32_t unused; }; struct fuse_create_in { - __u32 flags; - __u32 mode; - __u32 umask; - __u32 padding; + uint32_t flags; + uint32_t mode; + uint32_t umask; + uint32_t padding; }; struct fuse_open_out { - __u64 fh; - __u32 open_flags; - __u32 padding; + uint64_t fh; + uint32_t open_flags; + uint32_t padding; }; struct fuse_release_in { - __u64 fh; - __u32 flags; - __u32 release_flags; - __u64 lock_owner; + uint64_t fh; + uint32_t flags; + uint32_t release_flags; + uint64_t lock_owner; }; struct fuse_flush_in { - __u64 fh; - __u32 unused; - __u32 padding; - __u64 lock_owner; + uint64_t fh; + uint32_t unused; + uint32_t padding; + uint64_t lock_owner; }; struct fuse_read_in { - __u64 fh; - __u64 offset; - __u32 size; - __u32 read_flags; - __u64 lock_owner; - __u32 flags; - __u32 padding; + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t read_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; }; #define FUSE_COMPAT_WRITE_IN_SIZE 24 struct fuse_write_in { - __u64 fh; - __u64 offset; - __u32 size; - __u32 write_flags; - __u64 lock_owner; - __u32 flags; - __u32 padding; + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t write_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; }; struct fuse_write_out { - __u32 size; - __u32 padding; + uint32_t size; + uint32_t padding; }; #define FUSE_COMPAT_STATFS_SIZE 48 @@ -510,32 +505,32 @@ struct fuse_statfs_out { }; struct fuse_fsync_in { - __u64 fh; - __u32 fsync_flags; - __u32 padding; + uint64_t fh; + uint32_t fsync_flags; + uint32_t padding; }; struct fuse_setxattr_in { - __u32 size; - __u32 flags; + uint32_t size; + uint32_t flags; }; struct fuse_getxattr_in { - __u32 size; - __u32 padding; + uint32_t size; + uint32_t padding; }; struct fuse_getxattr_out { - __u32 size; - __u32 padding; + uint32_t size; + uint32_t padding; }; struct fuse_lk_in { - __u64 fh; - __u64 owner; + uint64_t fh; + uint64_t owner; struct fuse_file_lock lk; - __u32 lk_flags; - __u32 padding; + uint32_t lk_flags; + uint32_t padding; }; struct fuse_lk_out { @@ -543,134 +538,135 @@ struct fuse_lk_out { }; struct fuse_access_in { - __u32 mask; - __u32 padding; + uint32_t mask; + uint32_t padding; }; struct fuse_init_in { - __u32 major; - __u32 minor; - __u32 max_readahead; - __u32 flags; + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; }; struct fuse_init_out { - __u32 major; - __u32 minor; - __u32 max_readahead; - __u32 flags; - __u16 max_background; - __u16 congestion_threshold; - __u32 max_write; + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; + uint16_t max_background; + uint16_t congestion_threshold; + uint32_t max_write; }; #define CUSE_INIT_INFO_MAX 4096 struct cuse_init_in { - __u32 major; - __u32 minor; - __u32 unused; - __u32 flags; + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; }; struct cuse_init_out { - __u32 major; - __u32 minor; - __u32 unused; - __u32 flags; - __u32 max_read; - __u32 max_write; - __u32 dev_major; /* chardev major */ - __u32 dev_minor; /* chardev minor */ - __u32 spare[10]; + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; + uint32_t max_read; + uint32_t max_write; + uint32_t dev_major; /* chardev major */ + uint32_t dev_minor; /* chardev minor */ + uint32_t spare[10]; }; struct fuse_interrupt_in { - __u64 unique; + uint64_t unique; }; struct fuse_bmap_in { - __u64 block; - __u32 blocksize; - __u32 padding; + uint64_t block; + uint32_t blocksize; + uint32_t padding; }; struct fuse_bmap_out { - __u64 block; + uint64_t block; }; struct fuse_ioctl_in { - __u64 fh; - __u32 flags; - __u32 cmd; - __u64 arg; - __u32 in_size; - __u32 out_size; + uint64_t fh; + uint32_t flags; + uint32_t cmd; + uint64_t arg; + uint32_t in_size; + uint32_t out_size; }; struct fuse_ioctl_iovec { - __u64 base; - __u64 len; + uint64_t base; + uint64_t len; }; struct fuse_ioctl_out { - __s32 result; - __u32 flags; - __u32 in_iovs; - __u32 out_iovs; + int32_t result; + uint32_t flags; + uint32_t in_iovs; + uint32_t out_iovs; }; struct fuse_poll_in { - __u64 fh; - __u64 kh; - __u32 flags; - __u32 events; + uint64_t fh; + uint64_t kh; + uint32_t flags; + uint32_t events; }; struct fuse_poll_out { - __u32 revents; - __u32 padding; + uint32_t revents; + uint32_t padding; }; struct fuse_notify_poll_wakeup_out { - __u64 kh; + uint64_t kh; }; struct fuse_fallocate_in { - __u64 fh; - __u64 offset; - __u64 length; - __u32 mode; - __u32 padding; + uint64_t fh; + uint64_t offset; + uint64_t length; + uint32_t mode; + uint32_t padding; }; struct fuse_in_header { - __u32 len; - __u32 opcode; - __u64 unique; - __u64 nodeid; - __u32 uid; - __u32 gid; - __u32 pid; - __u32 padding; + uint32_t len; + uint32_t opcode; + uint64_t unique; + uint64_t nodeid; + uint32_t uid; + uint32_t gid; + uint32_t pid; + uint32_t padding; }; struct fuse_out_header { - __u32 len; - __s32 error; - __u64 unique; + uint32_t len; + int32_t error; + uint64_t unique; }; struct fuse_dirent { - __u64 ino; - __u64 off; - __u32 namelen; - __u32 type; + uint64_t ino; + uint64_t off; + uint32_t namelen; + uint32_t type; char name[]; }; #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) +#define FUSE_DIRENT_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) @@ -685,47 +681,47 @@ struct fuse_direntplus { FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) struct fuse_notify_inval_inode_out { - __u64 ino; - __s64 off; - __s64 len; + uint64_t ino; + int64_t off; + int64_t len; }; struct fuse_notify_inval_entry_out { - __u64 parent; - __u32 namelen; - __u32 padding; + uint64_t parent; + uint32_t namelen; + uint32_t padding; }; struct fuse_notify_delete_out { - __u64 parent; - __u64 child; - __u32 namelen; - __u32 padding; + uint64_t parent; + uint64_t child; + uint32_t namelen; + uint32_t padding; }; struct fuse_notify_store_out { - __u64 nodeid; - __u64 offset; - __u32 size; - __u32 padding; + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; }; struct fuse_notify_retrieve_out { - __u64 notify_unique; - __u64 nodeid; - __u64 offset; - __u32 size; - __u32 padding; + uint64_t notify_unique; + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; }; /* Matches the size of fuse_write_in */ struct fuse_notify_retrieve_in { - __u64 dummy1; - __u64 offset; - __u32 size; - __u32 dummy2; - __u64 dummy3; - __u64 dummy4; + uint64_t dummy1; + uint64_t offset; + uint32_t size; + uint32_t dummy2; + uint64_t dummy3; + uint64_t dummy4; }; #endif /* _LINUX_FUSE_H */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 798032d01112..ade07f1c491a 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -94,6 +94,9 @@ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value + * then the frame is Ethernet II. Else it is 802.3 */ + /* * Non DIX types. Won't clash for 1500 types. */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c4edfe11f1f7..e3163544f339 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -201,6 +201,7 @@ enum { IFLA_INET6_MCAST, /* MC things. What of them? */ IFLA_INET6_CACHEINFO, /* time values and max reasm size */ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ __IFLA_INET6_MAX }; @@ -249,6 +250,7 @@ enum { IFLA_VLAN_FLAGS, IFLA_VLAN_EGRESS_QOS, IFLA_VLAN_INGRESS_QOS, + IFLA_VLAN_PROTOCOL, __IFLA_VLAN_MAX, }; @@ -295,7 +297,7 @@ enum macvlan_mode { enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, - IFLA_VXLAN_GROUP, + IFLA_VXLAN_REMOTE, IFLA_VXLAN_LINK, IFLA_VXLAN_LOCAL, IFLA_VXLAN_TTL, diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index f9a60375f0d0..8136658ea477 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -55,6 +55,8 @@ struct sockaddr_ll { #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 #define PACKET_FANOUT_CPU 2 +#define PACKET_FANOUT_ROLLOVER 3 +#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index adb068c53c4e..f175212420ab 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -21,6 +21,9 @@ enum { NDA_CACHEINFO, NDA_PROBES, NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, __NDA_MAX }; diff --git a/include/uapi/linux/netfilter/xt_NFQUEUE.h b/include/uapi/linux/netfilter/xt_NFQUEUE.h index 9eafdbbb401c..8bb5fe657d34 100644 --- a/include/uapi/linux/netfilter/xt_NFQUEUE.h +++ b/include/uapi/linux/netfilter/xt_NFQUEUE.h @@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 { __u16 bypass; }; +struct xt_NFQ_info_v3 { + __u16 queuenum; + __u16 queues_total; + __u16 flags; +#define NFQ_FLAG_BYPASS 0x01 /* for compatibility with v2 */ +#define NFQ_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ +#define NFQ_FLAG_MASK 0x03 +}; + #endif /* _XT_NFQ_TARGET_H */ diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h index b47f61b9e082..dfd8bc2268cf 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h @@ -4,9 +4,9 @@ #include <linux/types.h> struct ip6t_frag { - __u32 ids[2]; /* Security Parameter Index */ + __u32 ids[2]; /* Identification range */ __u32 hdrlen; /* Header Length */ - __u8 flags; /* */ + __u8 flags; /* Flags */ __u8 invflags; /* Inverse flags */ }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 78d5b8a546d6..1a85940f8ab7 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -1,6 +1,7 @@ #ifndef _UAPI__LINUX_NETLINK_H #define _UAPI__LINUX_NETLINK_H +#include <linux/kernel.h> #include <linux/socket.h> /* for __kernel_sa_family_t */ #include <linux/types.h> @@ -78,7 +79,7 @@ struct nlmsghdr { #define NLMSG_ALIGNTO 4U #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) #define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) -#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN)) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) #define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ @@ -105,11 +106,42 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 struct nl_pktinfo { __u32 group; }; +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ enum { diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h new file mode 100644 index 000000000000..4e31db4eea41 --- /dev/null +++ b/include/uapi/linux/netlink_diag.h @@ -0,0 +1,52 @@ +#ifndef __NETLINK_DIAG_H__ +#define __NETLINK_DIAG_H__ + +#include <linux/types.h> + +struct netlink_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u16 pad; + __u32 ndiag_ino; + __u32 ndiag_show; + __u32 ndiag_cookie[2]; +}; + +struct netlink_diag_msg { + __u8 ndiag_family; + __u8 ndiag_type; + __u8 ndiag_protocol; + __u8 ndiag_state; + + __u32 ndiag_portid; + __u32 ndiag_dst_portid; + __u32 ndiag_dst_group; + __u32 ndiag_ino; + __u32 ndiag_cookie[2]; +}; + +struct netlink_diag_ring { + __u32 ndr_block_size; + __u32 ndr_block_nr; + __u32 ndr_frame_size; + __u32 ndr_frame_nr; +}; + +enum { + NETLINK_DIAG_MEMINFO, + NETLINK_DIAG_GROUPS, + NETLINK_DIAG_RX_RING, + NETLINK_DIAG_TX_RING, + + __NETLINK_DIAG_MAX, +}; + +#define NETLINK_DIAG_MAX (__NETLINK_DIAG_MAX - 1) + +#define NDIAG_PROTO_ALL ((__u8) ~0) + +#define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ +#define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ + +#endif diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h new file mode 100644 index 000000000000..405918dd7b3f --- /dev/null +++ b/include/uapi/linux/openvswitch.h @@ -0,0 +1,456 @@ + +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _UAPI__LINUX_OPENVSWITCH_H +#define _UAPI__LINUX_OPENVSWITCH_H 1 + +#include <linux/types.h> +#include <linux/if_ether.h> + +/** + * struct ovs_header - header for OVS Generic Netlink messages. + * @dp_ifindex: ifindex of local port for datapath (0 to make a request not + * specific to a datapath). + * + * Attributes following the header are specific to a particular OVS Generic + * Netlink family, but all of the OVS families use this header. + */ + +struct ovs_header { + int dp_ifindex; +}; + +/* Datapaths. */ + +#define OVS_DATAPATH_FAMILY "ovs_datapath" +#define OVS_DATAPATH_MCGROUP "ovs_datapath" +#define OVS_DATAPATH_VERSION 0x1 + +enum ovs_datapath_cmd { + OVS_DP_CMD_UNSPEC, + OVS_DP_CMD_NEW, + OVS_DP_CMD_DEL, + OVS_DP_CMD_GET, + OVS_DP_CMD_SET +}; + +/** + * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. + * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local + * port". This is the name of the network device whose dp_ifindex is given in + * the &struct ovs_header. Always present in notifications. Required in + * %OVS_DP_NEW requests. May be used as an alternative to specifying + * dp_ifindex in other requests (with a dp_ifindex of 0). + * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially + * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on + * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should + * not be sent. + * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the + * datapath. Always present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_DP_* commands. + */ +enum ovs_datapath_attr { + OVS_DP_ATTR_UNSPEC, + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + __OVS_DP_ATTR_MAX +}; + +#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) + +struct ovs_dp_stats { + __u64 n_hit; /* Number of flow table matches. */ + __u64 n_missed; /* Number of flow table misses. */ + __u64 n_lost; /* Number of misses not sent to userspace. */ + __u64 n_flows; /* Number of flows present */ +}; + +struct ovs_vport_stats { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ +}; + +/* Fixed logical ports. */ +#define OVSP_LOCAL ((__u32)0) + +/* Packet transfer. */ + +#define OVS_PACKET_FAMILY "ovs_packet" +#define OVS_PACKET_VERSION 0x1 + +enum ovs_packet_cmd { + OVS_PACKET_CMD_UNSPEC, + + /* Kernel-to-user notifications. */ + OVS_PACKET_CMD_MISS, /* Flow table miss. */ + OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ + + /* Userspace commands. */ + OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ +}; + +/** + * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. + * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire + * packet as received, from the start of the Ethernet header onward. For + * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by + * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is + * the flow key extracted from the packet as originally received. + * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key + * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows + * userspace to adapt its flow setup strategy by comparing its notion of the + * flow key against the kernel's. + * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used + * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. + * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content + * specified there. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_PACKET_* commands. + */ +enum ovs_packet_attr { + OVS_PACKET_ATTR_UNSPEC, + OVS_PACKET_ATTR_PACKET, /* Packet data. */ + OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ + OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ + __OVS_PACKET_ATTR_MAX +}; + +#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) + +/* Virtual ports. */ + +#define OVS_VPORT_FAMILY "ovs_vport" +#define OVS_VPORT_MCGROUP "ovs_vport" +#define OVS_VPORT_VERSION 0x1 + +enum ovs_vport_cmd { + OVS_VPORT_CMD_UNSPEC, + OVS_VPORT_CMD_NEW, + OVS_VPORT_CMD_DEL, + OVS_VPORT_CMD_GET, + OVS_VPORT_CMD_SET +}; + +enum ovs_vport_type { + OVS_VPORT_TYPE_UNSPEC, + OVS_VPORT_TYPE_NETDEV, /* network device */ + OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ + __OVS_VPORT_TYPE_MAX +}; + +#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) + +/** + * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. + * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. + * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type + * of vport. + * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device + * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes + * plus a null terminator. + * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. + * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that + * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on + * this port. A value of zero indicates that upcalls should not be sent. + * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for + * packets sent or received through the vport. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_VPORT_* commands. + * + * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and + * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is + * optional; if not specified a free port number is automatically selected. + * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type + * of vport. + * and other attributes are ignored. + * + * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to + * look up the vport to operate on; otherwise dp_idx from the &struct + * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. + */ +enum ovs_vport_attr { + OVS_VPORT_ATTR_UNSPEC, + OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ + OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ + OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ + OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ + OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ + OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ + __OVS_VPORT_ATTR_MAX +}; + +#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) + +/* Flows. */ + +#define OVS_FLOW_FAMILY "ovs_flow" +#define OVS_FLOW_MCGROUP "ovs_flow" +#define OVS_FLOW_VERSION 0x1 + +enum ovs_flow_cmd { + OVS_FLOW_CMD_UNSPEC, + OVS_FLOW_CMD_NEW, + OVS_FLOW_CMD_DEL, + OVS_FLOW_CMD_GET, + OVS_FLOW_CMD_SET +}; + +struct ovs_flow_stats { + __u64 n_packets; /* Number of matched packets. */ + __u64 n_bytes; /* Number of matched bytes. */ +}; + +enum ovs_key_attr { + OVS_KEY_ATTR_UNSPEC, + OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ + OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ + OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ + OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ + OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ + OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ + OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ + OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ + OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ + OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ + OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ + OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ + OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ + OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ + OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ + __OVS_KEY_ATTR_MAX +}; + +#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) + +/** + * enum ovs_frag_type - IPv4 and IPv6 fragment type + * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. + * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. + * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. + * + * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct + * ovs_key_ipv6. + */ +enum ovs_frag_type { + OVS_FRAG_TYPE_NONE, + OVS_FRAG_TYPE_FIRST, + OVS_FRAG_TYPE_LATER, + __OVS_FRAG_TYPE_MAX +}; + +#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) + +struct ovs_key_ethernet { + __u8 eth_src[ETH_ALEN]; + __u8 eth_dst[ETH_ALEN]; +}; + +struct ovs_key_ipv4 { + __be32 ipv4_src; + __be32 ipv4_dst; + __u8 ipv4_proto; + __u8 ipv4_tos; + __u8 ipv4_ttl; + __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_ipv6 { + __be32 ipv6_src[4]; + __be32 ipv6_dst[4]; + __be32 ipv6_label; /* 20-bits in least-significant bits. */ + __u8 ipv6_proto; + __u8 ipv6_tclass; + __u8 ipv6_hlimit; + __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ +}; + +struct ovs_key_tcp { + __be16 tcp_src; + __be16 tcp_dst; +}; + +struct ovs_key_udp { + __be16 udp_src; + __be16 udp_dst; +}; + +struct ovs_key_icmp { + __u8 icmp_type; + __u8 icmp_code; +}; + +struct ovs_key_icmpv6 { + __u8 icmpv6_type; + __u8 icmpv6_code; +}; + +struct ovs_key_arp { + __be32 arp_sip; + __be32 arp_tip; + __be16 arp_op; + __u8 arp_sha[ETH_ALEN]; + __u8 arp_tha[ETH_ALEN]; +}; + +struct ovs_key_nd { + __u32 nd_target[4]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; +}; + +/** + * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. + * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow + * key. Always present in notifications. Required for all requests (except + * dumps). + * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying + * the actions to take for packets that match the key. Always present in + * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for + * %OVS_FLOW_CMD_SET requests. + * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this + * flow. Present in notifications if the stats would be nonzero. Ignored in + * requests. + * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the + * TCP flags seen on packets in this flow. Only present in notifications for + * TCP flows, and only if it would be nonzero. Ignored in requests. + * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on + * the system monotonic clock, at which a packet was last processed for this + * flow. Only present in notifications if a packet has been processed for this + * flow. Ignored in requests. + * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the + * last-used time, accumulated TCP flags, and statistics for this flow. + * Otherwise ignored in requests. Never present in notifications. + * + * These attributes follow the &struct ovs_header within the Generic Netlink + * payload for %OVS_FLOW_* commands. + */ +enum ovs_flow_attr { + OVS_FLOW_ATTR_UNSPEC, + OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ + OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ + OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ + OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ + __OVS_FLOW_ATTR_MAX +}; + +#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) + +/** + * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. + * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with + * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of + * %UINT32_MAX samples all packets and intermediate values sample intermediate + * fractions of packets. + * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. + * Actions are passed as nested attributes. + * + * Executes the specified actions with the given probability on a per-packet + * basis. + */ +enum ovs_sample_attr { + OVS_SAMPLE_ATTR_UNSPEC, + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + __OVS_SAMPLE_ATTR_MAX, +}; + +#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) + +/** + * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. + * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION + * message should be sent. Required. + * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is + * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. + */ +enum ovs_userspace_attr { + OVS_USERSPACE_ATTR_UNSPEC, + OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ + OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ + __OVS_USERSPACE_ATTR_MAX +}; + +#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) + +/** + * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. + * @vlan_tpid: Tag protocol identifier (TPID) to push. + * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set + * (but it will not be set in the 802.1Q header that is pushed). + * + * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID + * values are those that the kernel module also parses as 802.1Q headers, to + * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN + * from having surprising results. + */ +struct ovs_action_push_vlan { + __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ +}; + +/** + * enum ovs_action_attr - Action types. + * + * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. + * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested + * %OVS_USERSPACE_ATTR_* attributes. + * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The + * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its + * value. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the + * packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in + * the nested %OVS_SAMPLE_ATTR_* attributes. + * + * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all + * fields within a header are modifiable, e.g. the IPv4 protocol and fragment + * type may not be changed. + */ + +enum ovs_action_attr { + OVS_ACTION_ATTR_UNSPEC, + OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ + OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ + OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ + OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ + OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ + OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ + __OVS_ACTION_ATTR_MAX +}; + +#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) + +#endif /* _LINUX_OPENVSWITCH_H */ diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index 93f5fa94a431..afafd703ad92 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -33,9 +33,11 @@ enum { PACKET_DIAG_TX_RING, PACKET_DIAG_FANOUT, - PACKET_DIAG_MAX, + __PACKET_DIAG_MAX, }; +#define PACKET_DIAG_MAX (__PACKET_DIAG_MAX - 1) + struct packet_diag_info { __u32 pdi_index; __u32 pdi_version; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 32aef0a439ef..dbd71b0c7d8c 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -348,6 +348,7 @@ enum { TCA_HTB_INIT, TCA_HTB_CTAB, TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, __TCA_HTB_MAX, }; diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ee753536ab70..fe1a5406d4d9 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -145,16 +145,18 @@ typedef struct mdp_superblock_s { __u32 failed_disks; /* 4 Number of failed disks */ __u32 spare_disks; /* 5 Number of spare disks */ __u32 sb_csum; /* 6 checksum of the whole superblock */ -#ifdef __BIG_ENDIAN +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) __u32 events_hi; /* 7 high-order of superblock update count */ __u32 events_lo; /* 8 low-order of superblock update count */ __u32 cp_events_hi; /* 9 high-order of checkpoint update count */ __u32 cp_events_lo; /* 10 low-order of checkpoint update count */ -#else +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) __u32 events_lo; /* 7 low-order of superblock update count */ __u32 events_hi; /* 8 high-order of superblock update count */ __u32 cp_events_lo; /* 9 low-order of checkpoint update count */ __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ +#else +#error unspecified endianness #endif __u32 recovery_cp; /* 11 recovery checkpoint sector count */ /* There are only valid for minor_version > 90 */ diff --git a/include/net/sctp/user.h b/include/uapi/linux/sctp.h index 9a0ae091366d..66b466e4ca08 100644 --- a/include/net/sctp/user.h +++ b/include/uapi/linux/sctp.h @@ -42,15 +42,17 @@ * Jon Grimm <jgrimm@us.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Ryan Layer <rmlayer@us.ibm.com> - * Ardelle Fan <ardelle.fan@intel.com> + * Ardelle Fan <ardelle.fan@intel.com> * Sridhar Samudrala <sri@us.ibm.com> + * Inaky Perez-Gonzalez <inaky.gonzalez@intel.com> + * Vlad Yasevich <vladislav.yasevich@hp.com> * * Any bugs reported given to us we will try to fix... any fixes shared will * be incorporated into the next SCTP release. */ -#ifndef __net_sctp_user_h__ -#define __net_sctp_user_h__ +#ifndef _UAPI_SCTP_H +#define _UAPI_SCTP_H #include <linux/types.h> #include <linux/socket.h> @@ -165,17 +167,23 @@ enum sctp_sinfo_flags { SCTP_ADDR_OVER = 2, /* Override the primary destination. */ SCTP_ABORT=4, /* Send an ABORT message to the peer. */ SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ - SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ + SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ }; +typedef union { + __u8 raw; + struct sctp_initmsg init; + struct sctp_sndrcvinfo sndrcv; +} sctp_cmsg_data_t; /* These are cmsg_types. */ typedef enum sctp_cmsg_type { SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ +#define SCTP_INIT SCTP_INIT SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ +#define SCTP_SNDRCV SCTP_SNDRCV } sctp_cmsg_t; - /* * 5.3.1.1 SCTP_ASSOC_CHANGE * @@ -345,6 +353,12 @@ struct sctp_pdapi_event { enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; +/* + * 5.3.1.8. SCTP_AUTHENTICATION_EVENT + * + * When a receiver is using authentication this message will provide + * notifications regarding new keys being made active as well as errors. + */ struct sctp_authkey_event { __u16 auth_type; __u16 auth_flags; @@ -421,15 +435,23 @@ union sctp_notification { enum sctp_sn_type { SCTP_SN_TYPE_BASE = (1<<15), SCTP_ASSOC_CHANGE, +#define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE SCTP_PEER_ADDR_CHANGE, +#define SCTP_PEER_ADDR_CHANGE SCTP_PEER_ADDR_CHANGE SCTP_SEND_FAILED, +#define SCTP_SEND_FAILED SCTP_SEND_FAILED SCTP_REMOTE_ERROR, +#define SCTP_REMOTE_ERROR SCTP_REMOTE_ERROR SCTP_SHUTDOWN_EVENT, +#define SCTP_SHUTDOWN_EVENT SCTP_SHUTDOWN_EVENT SCTP_PARTIAL_DELIVERY_EVENT, +#define SCTP_PARTIAL_DELIVERY_EVENT SCTP_PARTIAL_DELIVERY_EVENT SCTP_ADAPTATION_INDICATION, +#define SCTP_ADAPTATION_INDICATION SCTP_ADAPTATION_INDICATION SCTP_AUTHENTICATION_EVENT, #define SCTP_AUTHENTICATION_INDICATION SCTP_AUTHENTICATION_EVENT SCTP_SENDER_DRY_EVENT, +#define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT }; /* Notification error codes used to fill up the error fields in some @@ -454,7 +476,7 @@ typedef enum sctp_sn_error { * * The protocol parameters used to initialize and bound retransmission * timeout (RTO) are tunable. See [SCTP] for more information on how - * these parameters are used in RTO calculation. + * these parameters are used in RTO calculation. */ struct sctp_rtoinfo { sctp_assoc_t srto_assoc_id; @@ -504,6 +526,9 @@ struct sctp_prim { struct sockaddr_storage ssp_addr; } __attribute__((packed, aligned(4))); +/* For backward compatibility use, define the old name too */ +#define sctp_setprim sctp_prim + /* * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) * @@ -564,12 +589,27 @@ struct sctp_authchunk { * * This option gets or sets the list of HMAC algorithms that the local * endpoint requires the peer to use. -*/ + */ +#ifndef __KERNEL__ +/* This here is only used by user space as is. It might not be a good idea + * to export/reveal the whole structure with reserved fields etc. + */ +enum { + SCTP_AUTH_HMAC_ID_SHA1 = 1, + SCTP_AUTH_HMAC_ID_SHA256 = 3, +}; +#endif + struct sctp_hmacalgo { __u32 shmac_num_idents; __u16 shmac_idents[]; }; +/* Sadly, user and kernel space have different names for + * this structure member, so this is to not break anything. + */ +#define shmac_number_of_idents shmac_num_idents + /* * 7.1.20. Set a shared key (SCTP_AUTH_KEY) * @@ -691,6 +731,24 @@ struct sctp_authchunks { uint8_t gauth_chunks[]; }; +/* The broken spelling has been released already in lksctp-tools header, + * so don't break anyone, now that it's fixed. + */ +#define guth_number_of_chunks gauth_number_of_chunks + +/* Association states. */ +enum sctp_sstat_state { + SCTP_EMPTY = 0, + SCTP_CLOSED = 1, + SCTP_COOKIE_WAIT = 2, + SCTP_COOKIE_ECHOED = 3, + SCTP_ESTABLISHED = 4, + SCTP_SHUTDOWN_PENDING = 5, + SCTP_SHUTDOWN_SENT = 6, + SCTP_SHUTDOWN_RECEIVED = 7, + SCTP_SHUTDOWN_ACK_SENT = 8, +}; + /* * 8.2.6. Get the Current Identifiers of Associations * (SCTP_GET_ASSOC_ID_LIST) @@ -705,15 +763,20 @@ struct sctp_assoc_ids { /* * 8.3, 8.5 get all peer/local addresses in an association. - * This parameter struct is used by SCTP_GET_PEER_ADDRS and + * This parameter struct is used by SCTP_GET_PEER_ADDRS and * SCTP_GET_LOCAL_ADDRS socket options used internally to implement - * sctp_getpaddrs() and sctp_getladdrs() API. + * sctp_getpaddrs() and sctp_getladdrs() API. */ struct sctp_getaddrs_old { sctp_assoc_t assoc_id; int addr_num; +#ifdef __KERNEL__ struct sockaddr __user *addrs; +#else + struct sockaddr *addrs; +#endif }; + struct sctp_getaddrs { sctp_assoc_t assoc_id; /*input*/ __u32 addr_num; /*output*/ @@ -779,4 +842,5 @@ struct sctp_paddrthlds { __u16 spt_pathmaxrxt; __u16 spt_pathpfthld; }; -#endif /* __net_sctp_user_h__ */ + +#endif /* _UAPI_SCTP_H */ diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index b6a23a483d74..74c2bf7211f8 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -51,7 +51,10 @@ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ #define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ #define PORT_BRCM_TRUMANAGE 25 -#define PORT_MAX_8250 25 /* max port ID */ +#define PORT_ALTR_16550_F32 26 /* Altera 16550 UART with 32 FIFOs */ +#define PORT_ALTR_16550_F64 27 /* Altera 16550 UART with 64 FIFOs */ +#define PORT_ALTR_16550_F128 28 /* Altera 16550 UART with 128 FIFOs */ +#define PORT_MAX_8250 28 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b49eab89c9fd..fefdec91c68b 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -202,6 +202,8 @@ enum LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ + LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ + LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ @@ -245,6 +247,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 6b1ead0b0c9d..8d776ebc4829 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -102,7 +102,6 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ #define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ @@ -199,29 +198,4 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; -/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */ -#define TCP_COOKIE_MIN 8 /* 64-bits */ -#define TCP_COOKIE_MAX 16 /* 128-bits */ -#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX) - -/* Flags for both getsockopt and setsockopt */ -#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */ -#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies, - * supercedes everything. */ - -/* Flags for getsockopt */ -#define TCP_S_DATA_IN (1 << 2) /* Was data received? */ -#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */ - -/* TCP_COOKIE_TRANSACTIONS data */ -struct tcp_cookie_transactions { - __u16 tcpct_flags; /* see above */ - __u8 __tcpct_pad1; /* zero */ - __u8 tcpct_cookie_desired; /* bytes */ - __u16 tcpct_s_data_desired; /* bytes of variable data */ - __u16 tcpct_used; /* bytes in value */ - __u8 tcpct_value[TCP_MSS_DEFAULT]; -}; - - #endif /* _UAPI_LINUX_TCP_H */ diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h index b8a24941db21..b9e2a6a7446f 100644 --- a/include/uapi/linux/unix_diag.h +++ b/include/uapi/linux/unix_diag.h @@ -39,9 +39,11 @@ enum { UNIX_DIAG_MEMINFO, UNIX_DIAG_SHUTDOWN, - UNIX_DIAG_MAX, + __UNIX_DIAG_MAX, }; +#define UNIX_DIAG_MAX (__UNIX_DIAG_MAX - 1) + struct unix_diag_vfs { __u32 udiag_vfs_ino; __u32 udiag_vfs_dev; diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index df91301847ec..b4ed5d895699 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -13,12 +13,10 @@ * more details. */ -#ifndef _VM_SOCKETS_H_ -#define _VM_SOCKETS_H_ +#ifndef _UAPI_VM_SOCKETS_H +#define _UAPI_VM_SOCKETS_H -#if !defined(__KERNEL__) -#include <sys/socket.h> -#endif +#include <linux/socket.h> /* Option name for STREAM socket buffer size. Use as the option name in * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that @@ -137,14 +135,13 @@ #define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) /* Address structure for vSockets. The address family should be set to - * whatever vmci_sock_get_af_value_fd() returns. The structure members should - * all align on their natural boundaries without resorting to compiler packing - * directives. The total size of this structure should be exactly the same as - * that of struct sockaddr. + * AF_VSOCK. The structure members should all align on their natural + * boundaries without resorting to compiler packing directives. The total size + * of this structure should be exactly the same as that of struct sockaddr. */ struct sockaddr_vm { - sa_family_t svm_family; + __kernel_sa_family_t svm_family; unsigned short svm_reserved1; unsigned int svm_port; unsigned int svm_cid; @@ -156,8 +153,4 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) -#if defined(__KERNEL__) -int vm_sockets_get_local_cid(void); -#endif - -#endif +#endif /* _UAPI_VM_SOCKETS_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 28e493b5b94c..a8cd6a4a2970 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -297,6 +297,7 @@ enum xfrm_attr_type_t { XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ + XFRMA_SA_EXTRA_FLAGS, /* __u32 */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -367,6 +368,8 @@ struct xfrm_usersa_info { #define XFRM_STATE_ESN 128 }; +#define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 + struct xfrm_usersa_id { xfrm_address_t daddr; __be32 spi; diff --git a/include/video/atmel_lcdc.h b/include/video/atmel_lcdc.h index 28447f1594fa..8deb22672ada 100644 --- a/include/video/atmel_lcdc.h +++ b/include/video/atmel_lcdc.h @@ -30,7 +30,6 @@ */ #define ATMEL_LCDC_WIRING_BGR 0 #define ATMEL_LCDC_WIRING_RGB 1 -#define ATMEL_LCDC_WIRING_RGB555 2 /* LCD Controller info data structure, stored in device platform_data */ @@ -62,6 +61,7 @@ struct atmel_lcdfb_info { void (*atmel_lcdfb_power_control)(int on); struct fb_monspecs *default_monspecs; u32 pseudo_palette[16]; + bool have_intensity_bit; }; #define ATMEL_LCDC_DMABADDR1 0x00 diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 01c3d62436ef..ffd4652de91c 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -138,11 +138,21 @@ struct blkif_request_discard { uint8_t _pad3; } __attribute__((__packed__)); +struct blkif_request_other { + uint8_t _pad1; + blkif_vdev_t _pad2; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad3; /* offsetof(blkif_req..,u.other.id)==8*/ +#endif + uint64_t id; /* private guest value, echoed in resp */ +} __attribute__((__packed__)); + struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; + struct blkif_request_other other; } u; } __attribute__((__packed__)); diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index 9dfc12000980..3ef3fe05ee99 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -13,6 +13,24 @@ #include <xen/interface/grant_table.h> /* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* * Notifications after enqueuing any type of message should be conditional on * the appropriate req_event or rsp_event field in the shared ring. * If the client sends notification for rx requests then it should specify @@ -47,6 +65,7 @@ #define _XEN_NETTXF_extra_info (3) #define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info) +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF struct xen_netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 1844d31f4552..7000bb1f6e96 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -251,6 +251,12 @@ struct physdev_pci_device_add { #define PHYSDEVOP_pci_device_remove 26 #define PHYSDEVOP_restore_msi_ext 27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix 30 +#define PHYSDEVOP_release_msix 31 struct physdev_pci_device { /* IN */ uint16_t seg; diff --git a/init/Kconfig b/init/Kconfig index 22616cd434bc..5341d7232c3a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -28,10 +28,6 @@ config BUILDTIME_EXTABLE_SORT menu "General setup" -config EXPERIMENTAL - bool - default y - config BROKEN bool diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e5c4f609f22c..e4e47f647446 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - if (!(flags & MS_KERNMOUNT)) - data = current->nsproxy->ipc_ns; + if (!(flags & MS_KERNMOUNT)) { + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + /* Don't allow mounting unless the caller has CAP_SYS_ADMIN + * over the ipc namespace. + */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + data = ns; + } return mount_ns(fs_type, flags, data, mqueue_fill_super); } @@ -840,7 +848,8 @@ out_putfd: fd = error; } mutex_unlock(&root->d_inode->i_mutex); - mnt_drop_write(mnt); + if (!ro) + mnt_drop_write(mnt); out_putname: putname(name); return fd; diff --git a/ipc/msg.c b/ipc/msg.c index 950572f9d796..fede1d06ef30 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -820,15 +820,17 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct msg_msg *copy = NULL; unsigned long copy_number = 0; + ns = current->nsproxy->ipc_ns; + if (msqid < 0 || (long) bufsz < 0) return -EINVAL; if (msgflg & MSG_COPY) { - copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); + copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax), + msgflg, &msgtyp, ©_number); if (IS_ERR(copy)) return PTR_ERR(copy); } mode = convert_mode(&msgtyp, msgflg); - ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); if (IS_ERR(msq)) { @@ -870,6 +872,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, goto out_unlock; break; } + msg = ERR_PTR(-EAGAIN); } else break; msg_counter++; diff --git a/ipc/msgutil.c b/ipc/msgutil.c index ebfcbfa8b7f2..5df8e4bf1db0 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -117,9 +117,6 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) if (alen > DATALEN_MSG) alen = DATALEN_MSG; - dst->next = NULL; - dst->security = NULL; - memcpy(dst + 1, src + 1, alen); len -= alen; diff --git a/kernel/audit.c b/kernel/audit.c index d596e5355f15..488f85f76335 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -58,7 +58,7 @@ #ifdef CONFIG_SECURITY #include <linux/security.h> #endif -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/freezer.h> #include <linux/tty.h> #include <linux/pid_namespace.h> @@ -910,7 +910,7 @@ static void audit_receive_skb(struct sk_buff *skb) { struct nlmsghdr *nlh; /* - * len MUST be signed for NLMSG_NEXT to be able to dec it below 0 + * len MUST be signed for nlmsg_next to be able to dec it below 0 * if the nlmsg_len was not aligned */ int len; @@ -919,13 +919,13 @@ static void audit_receive_skb(struct sk_buff *skb) nlh = nlmsg_hdr(skb); len = skb->len; - while (NLMSG_OK(nlh, len)) { + while (nlmsg_ok(nlh, len)) { err = audit_receive_msg(skb, nlh); /* if err or if this message says it wants a response */ if (err || (nlh->nlmsg_flags & NLM_F_ACK)) netlink_ack(skb, nlh, err); - nlh = NLMSG_NEXT(nlh, len); + nlh = nlmsg_next(nlh, &len); } } @@ -1483,7 +1483,7 @@ void audit_log_end(struct audit_buffer *ab) audit_log_lost("rate limit exceeded"); } else { struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); - nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); + nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN; if (audit_pid) { skb_queue_tail(&audit_skb_queue, ab->skb); diff --git a/kernel/capability.c b/kernel/capability.c index 493d97259484..f6c2ce5701e1 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -393,6 +393,30 @@ bool ns_capable(struct user_namespace *ns, int cap) EXPORT_SYMBOL(ns_capable); /** + * file_ns_capable - Determine if the file's opener had a capability in effect + * @file: The file we want to check + * @ns: The usernamespace we want the capability in + * @cap: The capability to be tested for + * + * Return true if task that opened the file had a capability in effect + * when the file was opened. + * + * This does not set PF_SUPERPRIV because the caller may not + * actually be privileged. + */ +bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap) +{ + if (WARN_ON_ONCE(!cap_valid(cap))) + return false; + + if (security_capable(file->f_cred, ns, cap) == 0) + return true; + + return false; +} +EXPORT_SYMBOL(file_ns_capable); + +/** * capable - Determine if the current task has a superior capability in effect * @cap: The capability to be tested for * diff --git a/kernel/events/core.c b/kernel/events/core.c index b0cd86501c30..4d3124b39277 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4434,12 +4434,15 @@ static void perf_event_task_event(struct perf_task_event *task_event) if (ctxn < 0) goto next; ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) + perf_event_task_ctx(ctx, task_event); } - if (ctx) - perf_event_task_ctx(ctx, task_event); next: put_cpu_ptr(pmu->pmu_cpu_context); } + if (task_event->task_ctx) + perf_event_task_ctx(task_event->task_ctx, task_event); + rcu_read_unlock(); } @@ -4734,7 +4737,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) } else { if (arch_vma_name(mmap_event->vma)) { name = strncpy(tmp, arch_vma_name(mmap_event->vma), - sizeof(tmp)); + sizeof(tmp) - 1); + tmp[sizeof(tmp) - 1] = '\0'; goto got_name; } @@ -5327,7 +5331,7 @@ static void sw_perf_event_destroy(struct perf_event *event) static int perf_swevent_init(struct perf_event *event) { - int event_id = event->attr.config; + u64 event_id = event->attr.config; if (event->attr.type != PERF_TYPE_SOFTWARE) return -ENOENT; @@ -5647,6 +5651,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) event->attr.sample_period = NSEC_PER_SEC / freq; hwc->sample_period = event->attr.sample_period; local64_set(&hwc->period_left, hwc->sample_period); + hwc->last_period = hwc->sample_period; event->attr.freq = 0; } } @@ -5982,6 +5987,7 @@ skip_type: if (pmu->pmu_cpu_context) goto got_cpu_context; + ret = -ENOMEM; pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); if (!pmu->pmu_cpu_context) goto free_dev; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d56a64c99a8b..eb675c4d59df 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -16,7 +16,7 @@ struct ring_buffer { int page_order; /* allocation order */ #endif int nr_pages; /* nr of data pages */ - int writable; /* are we writable */ + int overwrite; /* can overwrite itself */ atomic_t poll; /* POLL_ for wakeups */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 23cb34ff3973..97fddb09762b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -18,12 +18,24 @@ static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, unsigned long offset, unsigned long head) { - unsigned long mask; + unsigned long sz = perf_data_size(rb); + unsigned long mask = sz - 1; - if (!rb->writable) + /* + * check if user-writable + * overwrite : over-write its own tail + * !overwrite: buffer possibly drops events. + */ + if (rb->overwrite) return true; - mask = perf_data_size(rb) - 1; + /* + * verify that payload is not bigger than buffer + * otherwise masking logic may fail to detect + * the "not enough space" condition + */ + if ((head - offset) > sz) + return false; offset = (offset - tail) & mask; head = (head - tail) & mask; @@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->watermark = max_size / 2; if (flags & RING_BUFFER_WRITABLE) - rb->writable = 1; + rb->overwrite = 0; + else + rb->overwrite = 1; atomic_set(&rb->refcount, 1); diff --git a/kernel/exit.c b/kernel/exit.c index 51e485ca9935..60bc027c61c3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -835,7 +835,7 @@ void do_exit(long code) /* * Make sure we are holding no locks: */ - debug_check_no_locks_held(); + debug_check_no_locks_held(tsk); /* * We can do this unlocked here. The futex code uses this flag * just to verify whether the pi state cleanup has been done diff --git a/kernel/fork.c b/kernel/fork.c index 8d932b1c9056..1766d324d5e3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1141,6 +1141,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) + return ERR_PTR(-EINVAL); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -1807,7 +1810,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) * If unsharing a user namespace must also unshare the thread. */ if (unshare_flags & CLONE_NEWUSER) - unshare_flags |= CLONE_THREAD; + unshare_flags |= CLONE_THREAD | CLONE_FS; /* * If unsharing a pid namespace must also unshare the thread. */ diff --git a/kernel/futex.c b/kernel/futex.c index f0090a993dab..b26dcfc02c94 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -223,7 +223,8 @@ static void drop_futex_key_refs(union futex_key *key) * @rw: mapping needs to be read/write (values: VERIFY_READ, * VERIFY_WRITE) * - * Returns a negative error code or 0 + * Return: a negative error code or 0 + * * The key words are stored in *key on success. * * For shared mappings, it's (page->index, file_inode(vma->vm_file), @@ -705,9 +706,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, * be "current" except in the case of requeue pi. * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * - * Returns: - * 0 - ready to wait - * 1 - acquired the lock + * Return: + * 0 - ready to wait; + * 1 - acquired the lock; * <0 - error * * The hb->lock and futex_key refs shall be held by the caller. @@ -1191,9 +1192,9 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. * hb1 and hb2 must be held by the caller. * - * Returns: - * 0 - failed to acquire the lock atomicly - * 1 - acquired the lock + * Return: + * 0 - failed to acquire the lock atomically; + * 1 - acquired the lock; * <0 - error */ static int futex_proxy_trylock_atomic(u32 __user *pifutex, @@ -1254,8 +1255,8 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire * uaddr2 atomically on behalf of the top waiter. * - * Returns: - * >=0 - on success, the number of tasks requeued or woken + * Return: + * >=0 - on success, the number of tasks requeued or woken; * <0 - on error */ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, @@ -1536,8 +1537,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must * be paired with exactly one earlier call to queue_me(). * - * Returns: - * 1 - if the futex_q was still queued (and we removed unqueued it) + * Return: + * 1 - if the futex_q was still queued (and we removed unqueued it); * 0 - if the futex_q was already removed by the waking thread */ static int unqueue_me(struct futex_q *q) @@ -1707,9 +1708,9 @@ static long futex_wait_restart(struct restart_block *restart); * the pi_state owner as well as handle race conditions that may allow us to * acquire the lock. Must be called with the hb lock held. * - * Returns: - * 1 - success, lock taken - * 0 - success, lock not taken + * Return: + * 1 - success, lock taken; + * 0 - success, lock not taken; * <0 - on error (-EFAULT) */ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) @@ -1824,8 +1825,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * Return with the hb lock held and a q.key reference on success, and unlocked * with no q.key reference on failure. * - * Returns: - * 0 - uaddr contains val and hb has been locked + * Return: + * 0 - uaddr contains val and hb has been locked; * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked */ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, @@ -2203,9 +2204,9 @@ pi_faulted: * the wakeup and return the appropriate error code to the caller. Must be * called with the hb lock held. * - * Returns - * 0 - no early wakeup detected - * <0 - -ETIMEDOUT or -ERESTARTNOINTR + * Return: + * 0 = no early wakeup detected; + * <0 = -ETIMEDOUT or -ERESTARTNOINTR */ static inline int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, @@ -2247,7 +2248,6 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * @val: the expected value of uaddr * @abs_time: absolute timeout * @bitset: 32 bit wakeup bitset set by userspace, defaults to all - * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) * @uaddr2: the pi futex we will take prior to returning to user-space * * The caller will wait on uaddr and will be requeued by futex_requeue() to @@ -2258,7 +2258,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * there was a need to. * * We call schedule in futex_wait_queue_me() when we enqueue and return there - * via the following: + * via the following-- * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() * 2) wakeup on uaddr2 after a requeue * 3) signal @@ -2276,8 +2276,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * * If 4 or 7, we cleanup and return with -ETIMEDOUT. * - * Returns: - * 0 - On success + * Return: + * 0 - On success; * <0 - On error */ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..14be27feda49 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -63,6 +63,7 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { + .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), .clock_base = { { @@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; timerqueue_init_head(&cpu_base->clock_base[i].active); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b6..ffd4e111fd67 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -55,7 +55,7 @@ struct resource crashk_res = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; struct resource crashk_low_res = { - .name = "Crash kernel low", + .name = "Crash kernel", .start = 0, .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM @@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline, return 0; } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - const char *name) + const char *name, + const char *suffix) { - char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; + char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - ck_cmdline = p; - p = strstr(p+1, name); - } + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; ck_cmdline += strlen(name); + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax @@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline, return 0; } +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel="); + "crashkernel=", NULL); +} + +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); } int __init parse_crashkernel_low(char *cmdline, @@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_low="); + "crashkernel=", suffix_tbl[SUFFIX_LOW]); } static void update_vmcoreinfo_note(void) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e35be53f6613..3fed7f0cbcdf 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -794,16 +794,16 @@ out: } #ifdef CONFIG_SYSCTL -/* This should be called with kprobe_mutex locked */ static void __kprobes optimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; unsigned int i; + mutex_lock(&kprobe_mutex); /* If optimization is already allowed, just return */ if (kprobes_allow_optimization) - return; + goto out; kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void) optimize_kprobe(p); } printk(KERN_INFO "Kprobes globally optimized\n"); +out: + mutex_unlock(&kprobe_mutex); } -/* This should be called with kprobe_mutex locked */ static void __kprobes unoptimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; unsigned int i; + mutex_lock(&kprobe_mutex); /* If optimization is already prohibited, just return */ - if (!kprobes_allow_optimization) + if (!kprobes_allow_optimization) { + mutex_unlock(&kprobe_mutex); return; + } kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void) unoptimize_kprobe(p, false); } } + mutex_unlock(&kprobe_mutex); + /* Wait for unoptimizing completion */ wait_for_kprobe_optimizer(); printk(KERN_INFO "Kprobes globally unoptimized\n"); } +static DEFINE_MUTEX(kprobe_sysctl_mutex); int sysctl_kprobes_optimization; int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, @@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, { int ret; - mutex_lock(&kprobe_mutex); + mutex_lock(&kprobe_sysctl_mutex); sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); @@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, optimize_all_kprobes(); else unoptimize_all_kprobes(); - mutex_unlock(&kprobe_mutex); + mutex_unlock(&kprobe_sysctl_mutex); return ret; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 691dc2ef9baf..9eb7fed0bbaa 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_PARKED); while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) complete(&self->parked); schedule(); - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_PARKED); } clear_bit(KTHREAD_IS_PARKED, &self->flags); __set_current_state(TASK_RUNNING); @@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), } EXPORT_SYMBOL(kthread_create_on_node); -static void __kthread_bind(struct task_struct *p, unsigned int cpu) +static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state) { + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, state)) { + WARN_ON(1); + return; + } /* It's safe because the task is inactive. */ do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_THREAD_BOUND; @@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu) */ void kthread_bind(struct task_struct *p, unsigned int cpu) { - /* Must have done schedule() in kthread() before we set_task_cpu */ - if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { - WARN_ON(1); - return; - } - __kthread_bind(p, cpu); + __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(kthread_bind); @@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k) return NULL; } +static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) +{ + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * We clear the IS_PARKED bit here as we don't wait + * until the task has left the park code. So if we'd + * park before that happens we'd see the IS_PARKED bit + * which might be about to be cleared. + */ + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu, TASK_PARKED); + wake_up_state(k, TASK_PARKED); + } +} + /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). @@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k) { struct kthread *kthread = task_get_live_kthread(k); - if (kthread) { - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); - /* - * We clear the IS_PARKED bit here as we don't wait - * until the task has left the park code. So if we'd - * park before that happens we'd see the IS_PARKED bit - * which might be about to be cleared. - */ - if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) - __kthread_bind(k, kthread->cpu); - wake_up_process(k); - } - } + if (kthread) + __kthread_unpark(k, kthread); put_task_struct(k); } @@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k) trace_sched_kthread_stop(k); if (kthread) { set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + __kthread_unpark(k, kthread); wake_up_process(k); wait_for_completion(&kthread->exited); } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 259db207b5d9..8a0efac4f99d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4088,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); -static void print_held_locks_bug(void) +static void print_held_locks_bug(struct task_struct *curr) { if (!debug_locks_off()) return; @@ -4097,21 +4097,22 @@ static void print_held_locks_bug(void) printk("\n"); printk("=====================================\n"); - printk("[ BUG: %s/%d still has locks held! ]\n", - current->comm, task_pid_nr(current)); + printk("[ BUG: lock held at task exit time! ]\n"); print_kernel_ident(); printk("-------------------------------------\n"); - lockdep_print_held_locks(current); + printk("%s/%d is exiting with locks still held!\n", + curr->comm, task_pid_nr(curr)); + lockdep_print_held_locks(curr); + printk("\nstack backtrace:\n"); dump_stack(); } -void debug_check_no_locks_held(void) +void debug_check_no_locks_held(struct task_struct *task) { - if (unlikely(current->lockdep_depth > 0)) - print_held_locks_bug(); + if (unlikely(task->lockdep_depth > 0)) + print_held_locks_bug(task); } -EXPORT_SYMBOL_GPL(debug_check_no_locks_held); void debug_show_all_locks(void) { diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index c1c3dc1c6023..bea15bdf82b0 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) int nr; int rc; struct task_struct *task, *me = current; + int init_pids = thread_group_leader(me) ? 1 : 2; /* Don't allow any more processes into the pid namespace */ disable_pid_allocation(pid_ns); @@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) */ for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (pid_ns->nr_hashed == 1) + if (pid_ns->nr_hashed == init_pids) break; schedule(); } diff --git a/kernel/printk.c b/kernel/printk.c index 0b31715f335a..abbdd9e2ac82 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -63,8 +63,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ -DECLARE_WAIT_QUEUE_HEAD(log_wait); - int console_printk[4] = { DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ @@ -224,6 +222,7 @@ struct log { static DEFINE_RAW_SPINLOCK(logbuf_lock); #ifdef CONFIG_PRINTK +DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static u32 syslog_idx; @@ -1957,45 +1956,6 @@ int is_console_locked(void) return console_locked; } -/* - * Delayed printk version, for scheduler-internal messages: - */ -#define PRINTK_BUF_SIZE 512 - -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_SCHED 0x02 - -static DEFINE_PER_CPU(int, printk_pending); -static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); - -static void wake_up_klogd_work_func(struct irq_work *irq_work) -{ - int pending = __this_cpu_xchg(printk_pending, 0); - - if (pending & PRINTK_PENDING_SCHED) { - char *buf = __get_cpu_var(printk_sched_buf); - printk(KERN_WARNING "[sched_delayed] %s", buf); - } - - if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible(&log_wait); -} - -static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { - .func = wake_up_klogd_work_func, - .flags = IRQ_WORK_LAZY, -}; - -void wake_up_klogd(void) -{ - preempt_disable(); - if (waitqueue_active(&log_wait)) { - this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); - irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); - } - preempt_enable(); -} - static void console_cont_flush(char *text, size_t size) { unsigned long flags; @@ -2458,6 +2418,44 @@ static int __init printk_late_init(void) late_initcall(printk_late_init); #if defined CONFIG_PRINTK +/* + * Delayed printk version, for scheduler-internal messages: + */ +#define PRINTK_BUF_SIZE 512 + +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_SCHED 0x02 + +static DEFINE_PER_CPU(int, printk_pending); +static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); + +static void wake_up_klogd_work_func(struct irq_work *irq_work) +{ + int pending = __this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_SCHED) { + char *buf = __get_cpu_var(printk_sched_buf); + printk(KERN_WARNING "[sched_delayed] %s", buf); + } + + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); +} + +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { + .func = wake_up_klogd_work_func, + .flags = IRQ_WORK_LAZY, +}; + +void wake_up_klogd(void) +{ + preempt_disable(); + if (waitqueue_active(&log_wait)) { + this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + } + preempt_enable(); +} int printk_sched(const char *fmt, ...) { diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c685e31492df..c3ae1446461c 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd) u64 this_clock, remote_clock; u64 *ptr, old_val, val; +#if BITS_PER_LONG != 64 +again: + /* + * Careful here: The local and the remote clock values need to + * be read out atomic as we need to compare the values and + * then update either the local or the remote side. So the + * cmpxchg64 below only protects one readout. + * + * We must reread via sched_clock_local() in the retry case on + * 32bit as an NMI could use sched_clock_local() via the + * tracer and hit between the readout of + * the low32bit and the high 32bit portion. + */ + this_clock = sched_clock_local(my_scd); + /* + * We must enforce atomic readout on 32bit, otherwise the + * update on the remote cpu can hit inbetween the readout of + * the low32bit and the high 32bit portion. + */ + remote_clock = cmpxchg64(&scd->clock, 0, 0); +#else + /* + * On 64bit the read of [my]scd->clock is atomic versus the + * update, so we can avoid the above 32bit dance. + */ sched_clock_local(my_scd); again: this_clock = my_scd->clock; remote_clock = scd->clock; +#endif /* * Use the opportunity that we have both locks diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393c..67d04651f44b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1498,8 +1498,10 @@ static void try_to_wake_up_local(struct task_struct *p) { struct rq *rq = task_rq(p); - BUG_ON(rq != this_rq()); - BUG_ON(p == current); + if (WARN_ON_ONCE(rq != this_rq()) || + WARN_ON_ONCE(p == current)) + return; + lockdep_assert_held(&rq->lock); if (!raw_spin_trylock(&p->pi_lock)) { @@ -4999,7 +5001,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) } static int min_load_idx = 0; -static int max_load_idx = CPU_LOAD_IDX_MAX; +static int max_load_idx = CPU_LOAD_IDX_MAX-1; static void set_table_entry(struct ctl_table *entry, diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ed12cbb135f4..e93cca92f38b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -310,7 +310,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) t = tsk; do { - task_cputime(tsk, &utime, &stime); + task_cputime(t, &utime, &stime); times->utime += utime; times->stime += stime; times->sum_exec_runtime += task_sched_runtime(t); diff --git a/kernel/signal.c b/kernel/signal.c index 2ec870a4c3c4..06ff7764ab7c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -32,6 +32,7 @@ #include <linux/user_namespace.h> #include <linux/uprobes.h> #include <linux/compat.h> +#include <linux/cn_proc.h> #define CREATE_TRACE_POINTS #include <trace/events/signal.h> @@ -485,6 +486,9 @@ flush_signal_handlers(struct task_struct *t, int force_default) if (force_default || ka->sa.sa_handler != SIG_IGN) ka->sa.sa_handler = SIG_DFL; ka->sa.sa_flags = 0; +#ifdef __ARCH_HAS_SA_RESTORER + ka->sa.sa_restorer = NULL; +#endif sigemptyset(&ka->sa.sa_mask); ka++; } @@ -2347,6 +2351,7 @@ relock: if (sig_kernel_coredump(signr)) { if (print_fatal_signals) print_fatal_signal(info->si_signo); + proc_coredump_connector(current); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with @@ -2682,7 +2687,7 @@ static int do_sigpending(void *set, unsigned long sigsetsize) /** * sys_rt_sigpending - examine a pending signal that has been raised * while blocked - * @set: stores pending signals + * @uset: stores pending signals * @sigsetsize: size of sigset_t type or larger */ SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) @@ -2945,7 +2950,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) static int do_tkill(pid_t tgid, pid_t pid, int sig) { - struct siginfo info; + struct siginfo info = {}; info.si_signo = sig; info.si_errno = 0; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 25d3d8b6e4e1..02fc5c933673 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -131,7 +131,7 @@ static int smpboot_thread_fn(void *data) continue; } - //BUG_ON(td->cpu != smp_processor_id()); + BUG_ON(td->cpu != smp_processor_id()); /* Check for state change setup */ switch (td->status) { @@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) } get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; - if (ht->create) - ht->create(cpu); + if (ht->create) { + /* + * Make sure that the task has actually scheduled out + * into park position, before calling the create + * callback. At least the migration thread callback + * requires that the task is off the runqueue. + */ + if (!wait_task_inactive(tsk, TASK_PARKED)) + WARN_ON(1); + else + ht->create(cpu); + } return 0; } diff --git a/kernel/sys.c b/kernel/sys.c index 81f56445fba9..0da73cf73e60 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -324,7 +324,6 @@ void kernel_restart_prepare(char *cmd) system_state = SYSTEM_RESTART; usermodehelper_disable(); device_shutdown(); - syscore_shutdown(); } /** @@ -370,6 +369,7 @@ void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); disable_nonboot_cpus(); + syscore_shutdown(); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); else @@ -395,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); + disable_nonboot_cpus(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); @@ -2185,9 +2186,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; -static int __orderly_poweroff(void) +static int __orderly_poweroff(bool force) { - int argc; char **argv; static char *envp[] = { "HOME=/", @@ -2196,20 +2196,40 @@ static int __orderly_poweroff(void) }; int ret; - argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); - if (argv == NULL) { + argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); + if (argv) { + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + argv_free(argv); + } else { printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", - __func__, poweroff_cmd); - return -ENOMEM; + __func__, poweroff_cmd); + ret = -ENOMEM; } - ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, - NULL, NULL, NULL); - argv_free(argv); + if (ret && force) { + printk(KERN_WARNING "Failed to start orderly shutdown: " + "forcing the issue\n"); + /* + * I guess this should try to kick off some daemon to sync and + * poweroff asap. Or not even bother syncing if we're doing an + * emergency shutdown? + */ + emergency_sync(); + kernel_power_off(); + } return ret; } +static bool poweroff_force; + +static void poweroff_work_func(struct work_struct *work) +{ + __orderly_poweroff(poweroff_force); +} + +static DECLARE_WORK(poweroff_work, poweroff_work_func); + /** * orderly_poweroff - Trigger an orderly system poweroff * @force: force poweroff if command execution fails @@ -2219,21 +2239,9 @@ static int __orderly_poweroff(void) */ int orderly_poweroff(bool force) { - int ret = __orderly_poweroff(); - - if (ret && force) { - printk(KERN_WARNING "Failed to start orderly shutdown: " - "forcing the issue\n"); - - /* - * I guess this should try to kick off some daemon to sync and - * poweroff asap. Or not even bother syncing if we're doing an - * emergency shutdown? - */ - emergency_sync(); - kernel_power_off(); - } - - return ret; + if (force) /* do not override the pending "true" */ + poweroff_force = true; + schedule_work(&poweroff_work); + return 0; } EXPORT_SYMBOL_GPL(orderly_poweroff); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 2fb8cb88df8d..7f32fe0e52cd 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -67,7 +67,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) */ int tick_check_broadcast_device(struct clock_event_device *dev) { - if ((tick_broadcast_device.evtdev && + if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || + (tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 192473b22799..fc382d6e2765 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -414,24 +414,28 @@ config PROBE_EVENTS def_bool n config DYNAMIC_FTRACE - bool "enable/disable ftrace tracepoints dynamically" + bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER depends on HAVE_DYNAMIC_FTRACE default y help - This option will modify all the calls to ftrace dynamically - (will patch them out of the binary image and replace them - with a No-Op instruction) as they are called. A table is - created to dynamically enable them again. + This option will modify all the calls to function tracing + dynamically (will patch them out of the binary image and + replace them with a No-Op instruction) on boot up. During + compile time, a table is made of all the locations that ftrace + can function trace, and this table is linked into the kernel + image. When this is enabled, functions can be individually + enabled, and the functions not enabled will not affect + performance of the system. + + See the files in /sys/kernel/debug/tracing: + available_filter_functions + set_ftrace_filter + set_ftrace_notrace This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise has native performance as long as no tracing is active. - The changes to the code are done by a kernel thread that - wakes up once a second and checks to see if any ftrace calls - were made. If so, it runs stop_machine (stops all CPUS) - and modifies the code to jump over the call to ftrace. - config DYNAMIC_FTRACE_WITH_REGS def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e5b8c272eec..5a0f781cd729 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, struct request *rq) { - struct blk_trace *bt = q->blk_trace; - - /* if control ever passes through here, it's a request based driver */ - if (unlikely(bt && !bt->rq_based)) - bt->rq_based = true; - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); } @@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore, blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } -static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) +static void blk_add_trace_bio_complete(void *ignore, + struct request_queue *q, struct bio *bio, + int error) { - struct request_queue *q; - struct blk_trace *bt; - - if (!bio->bi_bdev) - return; - - q = bdev_get_queue(bio->bi_bdev); - bt = q->blk_trace; - - /* - * Request based drivers will generate both rq and bio completions. - * Ignore bio ones. - */ - if (likely(!bt) || bt->rq_based) - return; - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ab25b88aae56..b3fde6d7b7fc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -66,7 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, }; /* ftrace_enabled is a method to turn ftrace on or off */ @@ -694,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) free_page(tmp); } - free_page((unsigned long)stat->pages); stat->pages = NULL; stat->start = NULL; @@ -1053,6 +1052,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; +loff_t +ftrace_filter_lseek(struct file *file, loff_t offset, int whence) +{ + loff_t ret; + + if (file->f_mode & FMODE_READ) + ret = seq_lseek(file, offset, whence); + else + file->f_pos = ret = 1; + + return ret; +} + #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -2613,7 +2625,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) * routine, you can use ftrace_filter_write() for the write * routine if @flag has FTRACE_ITER_FILTER set, or * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. - * ftrace_regex_lseek() should be used as the lseek routine, and + * ftrace_filter_lseek() should be used as the lseek routine, and * release must call ftrace_regex_release(). */ int @@ -2697,19 +2709,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file) inode, file); } -loff_t -ftrace_regex_lseek(struct file *file, loff_t offset, int whence) -{ - loff_t ret; - - if (file->f_mode & FMODE_READ) - ret = seq_lseek(file, offset, whence); - else - file->f_pos = ret = 1; - - return ret; -} - static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; @@ -3104,8 +3103,8 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, continue; } - hlist_del(&entry->node); - call_rcu(&entry->rcu, ftrace_free_entry_rcu); + hlist_del_rcu(&entry->node); + call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu); } } __disable_ftrace_function_probe(); @@ -3441,14 +3440,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_ftrace_notrace(char *str) { - strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { - strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_filter=", set_ftrace_filter); @@ -3571,7 +3570,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; @@ -3579,7 +3578,7 @@ static const struct file_operations ftrace_notrace_fops = { .open = ftrace_notrace_open, .read = seq_read, .write = ftrace_notrace_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; @@ -3784,8 +3783,8 @@ static const struct file_operations ftrace_graph_fops = { .open = ftrace_graph_open, .read = seq_read, .write = ftrace_graph_write, + .llseek = ftrace_filter_lseek, .release = ftrace_graph_release, - .llseek = seq_lseek, }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -4131,7 +4130,8 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); trace_recursion_set(TRACE_CONTROL_BIT); do_for_each_ftrace_op(op, ftrace_control_list) { - if (!ftrace_function_local_disabled(op) && + if (!(op->flags & FTRACE_OPS_FL_STUB) && + !ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); @@ -4439,7 +4439,7 @@ static const struct file_operations ftrace_pid_fops = { .open = ftrace_pid_open, .write = ftrace_pid_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_pid_release, }; @@ -4555,12 +4555,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ftrace_startup_sysctl(); /* we are starting ftrace again */ - if (ftrace_ops_list != &ftrace_list_end) { - if (ftrace_ops_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_ops_list->func; - else - ftrace_trace_function = ftrace_ops_list_func; - } + if (ftrace_ops_list != &ftrace_list_end) + update_ftrace_function(); } else { /* stopping ftrace calls (just send to ftrace_stub) */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c2e2c2310374..66338c4f7f4b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -132,7 +132,7 @@ static char *default_bootup_tracer; static int __init set_cmdline_ftrace(char *str) { - strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); + strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ ring_buffer_expanded = 1; @@ -162,7 +162,7 @@ static char *trace_boot_options __initdata; static int __init set_trace_boot_options(char *str) { - strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); + strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); trace_boot_options = trace_boot_options_buf; return 0; } @@ -704,7 +704,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct ring_buffer *buf = tr->buffer; + struct ring_buffer *buf; if (trace_stop_count) return; @@ -719,6 +719,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&ftrace_max_lock); + buf = tr->buffer; tr->buffer = max_tr.buffer; max_tr.buffer = buf; @@ -743,8 +744,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) + if (!current_trace->allocated_snapshot) { + /* Only the nop tracer should hit this when disabling */ + WARN_ON_ONCE(current_trace != &nop_trace); return; + } arch_spin_lock(&ftrace_max_lock); @@ -2400,6 +2404,27 @@ static void test_ftrace_alive(struct seq_file *m) seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); } +#ifdef CONFIG_TRACER_MAX_TRACE +static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) +{ + if (iter->trace->allocated_snapshot) + seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); + else + seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); + + seq_printf(m, "# Snapshot commands:\n"); + seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); + seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); + seq_printf(m, "# Takes a snapshot of the main buffer.\n"); + seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n"); + seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); + seq_printf(m, "# is not a '0' or '1')\n"); +} +#else +/* Should never be called */ +static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } +#endif + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2411,7 +2436,9 @@ static int s_show(struct seq_file *m, void *v) seq_puts(m, "#\n"); test_ftrace_alive(m); } - if (iter->trace && iter->trace->print_header) + if (iter->snapshot && trace_empty(iter)) + print_snapshot_help(m, iter); + else if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); else trace_default_header(m); @@ -2857,11 +2884,25 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg) return -EINVAL; } -static void set_tracer_flags(unsigned int mask, int enabled) +/* Some tracers require overwrite to stay enabled */ +int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) +{ + if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set) + return -1; + + return 0; +} + +int set_tracer_flag(unsigned int mask, int enabled) { /* do nothing if flag is already set */ if (!!(trace_flags & mask) == !!enabled) - return; + return 0; + + /* Give the tracer a chance to approve the change */ + if (current_trace->flag_changed) + if (current_trace->flag_changed(current_trace, mask, !!enabled)) + return -EINVAL; if (enabled) trace_flags |= mask; @@ -2871,18 +2912,24 @@ static void set_tracer_flags(unsigned int mask, int enabled) if (mask == TRACE_ITER_RECORD_CMD) trace_event_enable_cmd_record(enabled); - if (mask == TRACE_ITER_OVERWRITE) + if (mask == TRACE_ITER_OVERWRITE) { ring_buffer_change_overwrite(global_trace.buffer, enabled); +#ifdef CONFIG_TRACER_MAX_TRACE + ring_buffer_change_overwrite(max_tr.buffer, enabled); +#endif + } if (mask == TRACE_ITER_PRINTK) trace_printk_start_stop_comm(enabled); + + return 0; } static int trace_set_options(char *option) { char *cmp; int neg = 0; - int ret = 0; + int ret = -ENODEV; int i; cmp = strstrip(option); @@ -2892,19 +2939,20 @@ static int trace_set_options(char *option) cmp += 2; } + mutex_lock(&trace_types_lock); + for (i = 0; trace_options[i]; i++) { if (strcmp(cmp, trace_options[i]) == 0) { - set_tracer_flags(1 << i, !neg); + ret = set_tracer_flag(1 << i, !neg); break; } } /* If no option could be set, test the specific tracer options */ - if (!trace_options[i]) { - mutex_lock(&trace_types_lock); + if (!trace_options[i]) ret = set_tracer_option(current_trace, cmp, neg); - mutex_unlock(&trace_types_lock); - } + + mutex_unlock(&trace_types_lock); return ret; } @@ -2914,6 +2962,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2923,7 +2972,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - trace_set_options(buf); + ret = trace_set_options(buf); + if (ret < 0) + return ret; *ppos += cnt; @@ -3227,6 +3278,9 @@ static int tracing_set_tracer(const char *buf) goto out; trace_branch_disable(); + + current_trace->enabled = false; + if (current_trace->reset) current_trace->reset(tr); @@ -3271,6 +3325,7 @@ static int tracing_set_tracer(const char *buf) } current_trace = t; + current_trace->enabled = true; trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); @@ -4144,8 +4199,6 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, default: if (current_trace->allocated_snapshot) tracing_reset_online_cpus(&max_tr); - else - ret = -EINVAL; break; } @@ -4759,7 +4812,13 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, if (val != 0 && val != 1) return -EINVAL; - set_tracer_flags(1 << index, val); + + mutex_lock(&trace_types_lock); + ret = set_tracer_flag(1 << index, val); + mutex_unlock(&trace_types_lock); + + if (ret < 0) + return ret; *ppos += cnt; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 57d7e5397d56..2081971367ea 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -283,11 +283,15 @@ struct tracer { enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(u32 old_flags, u32 bit, int set); + /* Return 0 if OK with change, else return non-zero */ + int (*flag_changed)(struct tracer *tracer, + u32 mask, int set); struct tracer *next; struct tracer_flags *flags; bool print_max; bool use_max_tr; bool allocated_snapshot; + bool enabled; }; @@ -943,6 +947,8 @@ extern const char *__stop___trace_bprintk_fmt[]; void trace_printk_init_buffers(void); void trace_printk_start_comm(void); +int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); +int set_tracer_flag(unsigned int mask, int enabled); #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 713a2cac4881..443b25b43b4f 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -32,7 +32,7 @@ enum { static int trace_type __read_mostly; -static int save_lat_flag; +static int save_flags; static void stop_irqsoff_tracer(struct trace_array *tr, int graph); static int start_irqsoff_tracer(struct trace_array *tr, int graph); @@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph) static void __irqsoff_tracer_init(struct trace_array *tr) { - save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; - trace_flags |= TRACE_ITER_LATENCY_FMT; + save_flags = trace_flags; + + /* non overwrite screws up the latency tracers */ + set_tracer_flag(TRACE_ITER_OVERWRITE, 1); + set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1); tracing_max_latency = 0; irqsoff_trace = tr; @@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct trace_array *tr) static void irqsoff_tracer_reset(struct trace_array *tr) { + int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; + int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; + stop_irqsoff_tracer(tr, is_graph()); - if (!save_lat_flag) - trace_flags &= ~TRACE_ITER_LATENCY_FMT; + set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); + set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag); } static void irqsoff_tracer_start(struct trace_array *tr) @@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __read_mostly = .print_line = irqsoff_print_line, .flags = &tracer_flags, .set_flag = irqsoff_set_flag, + .flag_changed = trace_keep_overwrite, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, #endif @@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer __read_mostly = .print_line = irqsoff_print_line, .flags = &tracer_flags, .set_flag = irqsoff_set_flag, + .flag_changed = trace_keep_overwrite, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, #endif @@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .print_line = irqsoff_print_line, .flags = &tracer_flags, .set_flag = irqsoff_set_flag, + .flag_changed = trace_keep_overwrite, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, #endif diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 75aa97fbe1a1..fde652c9a511 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_array *tr); static int wakeup_graph_entry(struct ftrace_graph_ent *trace); static void wakeup_graph_return(struct ftrace_graph_ret *trace); -static int save_lat_flag; +static int save_flags; #define TRACE_DISPLAY_GRAPH 1 @@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int __wakeup_tracer_init(struct trace_array *tr) { - save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; - trace_flags |= TRACE_ITER_LATENCY_FMT; + save_flags = trace_flags; + + /* non overwrite screws up the latency tracers */ + set_tracer_flag(TRACE_ITER_OVERWRITE, 1); + set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1); tracing_max_latency = 0; wakeup_trace = tr; @@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct trace_array *tr) static void wakeup_tracer_reset(struct trace_array *tr) { + int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; + int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; + stop_wakeup_tracer(tr); /* make sure we put back any tasks we are tracing */ wakeup_reset(tr); - if (!save_lat_flag) - trace_flags &= ~TRACE_ITER_LATENCY_FMT; + set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); + set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag); } static void wakeup_tracer_start(struct trace_array *tr) @@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __read_mostly = .print_line = wakeup_print_line, .flags = &tracer_flags, .set_flag = wakeup_set_flag, + .flag_changed = trace_keep_overwrite, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, #endif @@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = .print_line = wakeup_print_line, .flags = &tracer_flags, .set_flag = wakeup_set_flag, + .flag_changed = trace_keep_overwrite, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, #endif diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 42ca822fc701..83a8b5b7bd35 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -322,7 +322,7 @@ static const struct file_operations stack_trace_filter_fops = { .open = stack_trace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; diff --git a/kernel/user.c b/kernel/user.c index e81978e8c03b..8e635a18ab52 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,8 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, + .may_mount_sysfs = true, + .may_mount_proc = true, }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8b650837083e..e134d8f365dd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,10 +21,12 @@ #include <linux/uaccess.h> #include <linux/ctype.h> #include <linux/projid.h> +#include <linux/fs_struct.h> static struct kmem_cache *user_ns_cachep __read_mostly; -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) @@ -60,6 +62,15 @@ int create_user_ns(struct cred *new) kgid_t group = new->egid; int ret; + /* + * Verify that we can not violate the policy of which files + * may be accessed that is specified by the root directory, + * by verifing that the root directory is at the root of the + * mount namespace which allows all files to be accessed. + */ + if (current_chrooted()) + return -EPERM; + /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who * created a user_namespace. @@ -86,6 +97,8 @@ int create_user_ns(struct cred *new) set_cred_user_ns(new, ns); + update_mnt_policy(ns); + return 0; } @@ -600,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (map->nr_extents != 0) goto out; - /* Require the appropriate privilege CAP_SETUID or CAP_SETGID - * over the user namespace in order to set the id mapping. + /* + * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; /* Get a buffer */ @@ -688,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) goto out; /* Map the lower ids from the parent user namespace to the @@ -775,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t &ns->projid_map, &ns->parent->projid_map); } -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { /* Allow mapping to your own filesystem ids */ @@ -783,12 +797,12 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, current_fsuid())) + if (uid_eq(uid, file->f_cred->fsuid)) return true; } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, current_fsgid())) + if (gid_eq(gid, file->f_cred->fsgid)) return true; } } @@ -799,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. + * And the opener of the id file also had the approprpiate capability. */ - if (ns_capable(ns->parent, cap_setid)) + if (ns_capable(ns->parent, cap_setid) && + file_ns_capable(file, ns->parent, cap_setid)) return true; return false; @@ -837,6 +853,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns) if (atomic_read(¤t->mm->mm_users) > 1) return -EINVAL; + if (current->fs->users != 1) + return -EINVAL; + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 81f2457811eb..b48cd597145d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -457,11 +457,12 @@ static int worker_pool_assign_id(struct worker_pool *pool) int ret; mutex_lock(&worker_pool_idr_mutex); - idr_pre_get(&worker_pool_idr, GFP_KERNEL); - ret = idr_get_new(&worker_pool_idr, pool, &pool->id); + ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); + if (ret >= 0) + pool->id = ret; mutex_unlock(&worker_pool_idr_mutex); - return ret; + return ret < 0 ? ret : 0; } /* @@ -3446,28 +3447,34 @@ static void wq_unbind_fn(struct work_struct *work) spin_unlock_irq(&pool->lock); mutex_unlock(&pool->assoc_mutex); - } - /* - * Call schedule() so that we cross rq->lock and thus can guarantee - * sched callbacks see the %WORKER_UNBOUND flag. This is necessary - * as scheduler callbacks may be invoked from other cpus. - */ - schedule(); + /* + * Call schedule() so that we cross rq->lock and thus can + * guarantee sched callbacks see the %WORKER_UNBOUND flag. + * This is necessary as scheduler callbacks may be invoked + * from other cpus. + */ + schedule(); - /* - * Sched callbacks are disabled now. Zap nr_running. After this, - * nr_running stays zero and need_more_worker() and keep_working() - * are always true as long as the worklist is not empty. Pools on - * @cpu now behave as unbound (in terms of concurrency management) - * pools which are served by workers tied to the CPU. - * - * On return from this function, the current worker would trigger - * unbound chain execution of pending work items if other workers - * didn't already. - */ - for_each_std_worker_pool(pool, cpu) + /* + * Sched callbacks are disabled now. Zap nr_running. + * After this, nr_running stays zero and need_more_worker() + * and keep_working() are always true as long as the + * worklist is not empty. This pool now behaves as an + * unbound (in terms of concurrency management) pool which + * are served by workers tied to the pool. + */ atomic_set(&pool->nr_running, 0); + + /* + * With concurrency management just turned off, a busy + * worker blocking could lead to lengthy stalls. Kick off + * unbound chain execution of currently pending work items. + */ + spin_lock_irq(&pool->lock); + wake_up_worker(pool); + spin_unlock_irq(&pool->lock); + } } /* diff --git a/lib/Kconfig b/lib/Kconfig index 3958dc4389f9..fe01d418b09a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -404,4 +404,7 @@ config OID_REGISTRY help Enable fast lookup object identifier registry. +config UCS2_STRING + tristate + endmenu diff --git a/lib/Makefile b/lib/Makefile index d7946ff75b2e..6e2cc561f761 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -174,3 +174,5 @@ quiet_cmd_build_OID_registry = GEN $@ cmd_build_OID_registry = perl $(srctree)/$(src)/build_OID_registry $< $@ clean-files += oid_registry_data.c + +obj-$(CONFIG_UCS2_STRING) += ucs2_string.o diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c index 9681d54b95d1..f8e0e5367398 100644 --- a/lib/bust_spinlocks.c +++ b/lib/bust_spinlocks.c @@ -8,6 +8,7 @@ */ #include <linux/kernel.h> +#include <linux/printk.h> #include <linux/spinlock.h> #include <linux/tty.h> #include <linux/wait.h> @@ -28,5 +29,3 @@ void __attribute__((weak)) bust_spinlocks(int yes) wake_up_klogd(); } } - - diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 5e396accd3d0..d87a17a819d0 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -862,17 +862,21 @@ static void check_unmap(struct dma_debug_entry *ref) entry = bucket_find_exact(bucket, ref); if (!entry) { + /* must drop lock before calling dma_mapping_error */ + put_hash_bucket(bucket, &flags); + if (dma_mapping_error(ref->dev, ref->dev_addr)) { err_printk(ref->dev, NULL, - "DMA-API: device driver tries " - "to free an invalid DMA memory address\n"); - return; + "DMA-API: device driver tries to free an " + "invalid DMA memory address\n"); + } else { + err_printk(ref->dev, NULL, + "DMA-API: device driver tries to free DMA " + "memory it has not allocated [device " + "address=0x%016llx] [size=%llu bytes]\n", + ref->dev_addr, ref->size); } - err_printk(ref->dev, NULL, "DMA-API: device driver tries " - "to free DMA memory it has not allocated " - "[device address=0x%016llx] [size=%llu bytes]\n", - ref->dev_addr, ref->size); - goto out; + return; } if (ref->size != entry->size) { @@ -936,7 +940,6 @@ static void check_unmap(struct dma_debug_entry *ref) hash_bucket_del(entry); dma_entry_free(entry); -out: put_hash_bucket(bucket, &flags); } @@ -1082,13 +1085,27 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) ref.dev = dev; ref.dev_addr = dma_addr; bucket = get_hash_bucket(&ref, &flags); - entry = bucket_find_exact(bucket, &ref); - if (!entry) - goto out; + list_for_each_entry(entry, &bucket->list, list) { + if (!exact_match(&ref, entry)) + continue; + + /* + * The same physical address can be mapped multiple + * times. Without a hardware IOMMU this results in the + * same device addresses being put into the dma-debug + * hash multiple times too. This can result in false + * positives being reported. Therefore we implement a + * best-fit algorithm here which updates the first entry + * from the hash which fits the reference value and is + * not currently listed as being checked. + */ + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + entry->map_err_type = MAP_ERR_CHECKED; + break; + } + } - entry->map_err_type = MAP_ERR_CHECKED; -out: put_hash_bucket(bucket, &flags); } EXPORT_SYMBOL(debug_dma_mapping_error); diff --git a/lib/idr.c b/lib/idr.c index 73f4d53c02f3..322e2816f2fb 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -106,8 +106,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) if (layer_idr) return get_from_free_list(layer_idr); - /* try to allocate directly from kmem_cache */ - new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + /* + * Try to allocate directly from kmem_cache. We want to try this + * before preload buffer; otherwise, non-preloading idr_alloc() + * users will end up taking advantage of preloading ones. As the + * following is allowed to fail for preloaded cases, suppress + * warning this time. + */ + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN); if (new) return new; @@ -115,18 +121,24 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) * Try to fetch one from the per-cpu preload buffer if in process * context. See idr_preload() for details. */ - if (in_interrupt()) - return NULL; - - preempt_disable(); - new = __this_cpu_read(idr_preload_head); - if (new) { - __this_cpu_write(idr_preload_head, new->ary[0]); - __this_cpu_dec(idr_preload_cnt); - new->ary[0] = NULL; + if (!in_interrupt()) { + preempt_disable(); + new = __this_cpu_read(idr_preload_head); + if (new) { + __this_cpu_write(idr_preload_head, new->ary[0]); + __this_cpu_dec(idr_preload_cnt); + new->ary[0] = NULL; + } + preempt_enable(); + if (new) + return new; } - preempt_enable(); - return new; + + /* + * Both failed. Try kmem_cache again w/o adding __GFP_NOWARN so + * that memory allocation failure warning is printed as intended. + */ + return kmem_cache_zalloc(idr_layer_cache, gfp_mask); } static void idr_layer_rcu_free(struct rcu_head *head) @@ -184,20 +196,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) } } -/** - * idr_pre_get - reserve resources for idr allocation - * @idp: idr handle - * @gfp_mask: memory allocation flags - * - * This function should be called prior to calling the idr_get_new* functions. - * It preallocates enough memory to satisfy the worst possible allocation. The - * caller should pass in GFP_KERNEL if possible. This of course requires that - * no spinning locks be held. - * - * If the system is REALLY out of memory this function returns %0, - * otherwise %1. - */ -int idr_pre_get(struct idr *idp, gfp_t gfp_mask) +int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < MAX_IDR_FREE) { struct idr_layer *new; @@ -208,13 +207,12 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) } return 1; } -EXPORT_SYMBOL(idr_pre_get); +EXPORT_SYMBOL(__idr_pre_get); /** * sub_alloc - try to allocate an id without growing the tree depth * @idp: idr handle * @starting_id: id to start search at - * @id: pointer to the allocated handle * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer * @gfp_mask: allocation mask for idr_layer_alloc() * @layer_idr: optional idr passed to idr_layer_alloc() @@ -376,25 +374,7 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, idr_mark_full(pa, id); } -/** - * idr_get_new_above - allocate new idr entry above or equal to a start id - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @starting_id: id to start search at - * @id: pointer to the allocated handle - * - * This is the allocate id function. It should be called with any - * required locks. - * - * If allocation from IDR's private freelist fails, idr_get_new_above() will - * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill - * IDR's preallocation and then retry the idr_get_new_above() call. - * - * If the idr is full idr_get_new_above() will return %-ENOSPC. - * - * @id returns a value in the range @starting_id ... %0x7fffffff - */ -int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) +int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { struct idr_layer *pa[MAX_IDR_LEVEL + 1]; int rv; @@ -407,7 +387,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) *id = rv; return 0; } -EXPORT_SYMBOL(idr_get_new_above); +EXPORT_SYMBOL(__idr_get_new_above); /** * idr_preload - preload for idr_alloc() @@ -569,8 +549,7 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *p; struct idr_layer *to_free; - /* see comment in idr_find_slowpath() */ - if (WARN_ON_ONCE(id < 0)) + if (id < 0) return; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); @@ -667,15 +646,7 @@ void *idr_find_slowpath(struct idr *idp, int id) int n; struct idr_layer *p; - /* - * If @id is negative, idr_find() used to ignore the sign bit and - * performed lookup with the rest of bits, which is weird and can - * lead to very obscure bugs. We're now returning NULL for all - * negative IDs but just in case somebody was depending on the sign - * bit being ignored, let's trigger WARN_ON_ONCE() so that they can - * be detected and fixed. WARN_ON_ONCE() can later be removed. - */ - if (WARN_ON_ONCE(id < 0)) + if (id < 0) return NULL; p = rcu_dereference_raw(idp->top); @@ -824,8 +795,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) int n; struct idr_layer *p, *old_p; - /* see comment in idr_find_slowpath() */ - if (WARN_ON_ONCE(id < 0)) + if (id < 0) return ERR_PTR(-EINVAL); p = idp->top; @@ -918,7 +888,7 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) int ida_pre_get(struct ida *ida, gfp_t gfp_mask) { /* allocate idr_layers */ - if (!idr_pre_get(&ida->idr, gfp_mask)) + if (!__idr_pre_get(&ida->idr, gfp_mask)) return 0; /* allocate free_bitmap */ diff --git a/lib/kobject.c b/lib/kobject.c index e07ee1fcd6f1..a65486613d79 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -529,6 +529,13 @@ struct kobject *kobject_get(struct kobject *kobj) return kobj; } +static struct kobject *kobject_get_unless_zero(struct kobject *kobj) +{ + if (!kref_get_unless_zero(&kobj->kref)) + kobj = NULL; + return kobj; +} + /* * kobject_cleanup - free kobject resources. * @kobj: object to cleanup @@ -751,7 +758,7 @@ struct kobject *kset_find_obj(struct kset *kset, const char *name) list_for_each_entry(k, &kset->list, entry) { if (kobject_name(k) && !strcmp(kobject_name(k), name)) { - ret = kobject_get(k); + ret = kobject_get_unless_zero(k); break; } } diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bfe02b8fc55b..d23762e6652c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -105,9 +105,9 @@ setup_io_tlb_npages(char *str) if (!strcmp(str, "force")) swiotlb_force = 1; - return 1; + return 0; } -__setup("swiotlb=", setup_io_tlb_npages); +early_param("swiotlb", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ unsigned long swiotlb_nr_tbl(void) @@ -115,6 +115,18 @@ unsigned long swiotlb_nr_tbl(void) return io_tlb_nslabs; } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); + +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) +unsigned long swiotlb_size_or_default(void) +{ + unsigned long size; + + size = io_tlb_nslabs << IO_TLB_SHIFT; + + return size ? size : (IO_TLB_DEFAULT_SIZE); +} + /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -188,8 +200,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) void __init swiotlb_init(int verbose) { - /* default to 64MB */ - size_t default_size = 64UL<<20; + size_t default_size = IO_TLB_DEFAULT_SIZE; unsigned char *vstart; unsigned long bytes; diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c new file mode 100644 index 000000000000..6f500ef2301d --- /dev/null +++ b/lib/ucs2_string.c @@ -0,0 +1,51 @@ +#include <linux/ucs2_string.h> +#include <linux/module.h> + +/* Return the number of unicode characters in data */ +unsigned long +ucs2_strnlen(const ucs2_char_t *s, size_t maxlength) +{ + unsigned long length = 0; + + while (*s++ != 0 && length < maxlength) + length++; + return length; +} +EXPORT_SYMBOL(ucs2_strnlen); + +unsigned long +ucs2_strlen(const ucs2_char_t *s) +{ + return ucs2_strnlen(s, ~0UL); +} +EXPORT_SYMBOL(ucs2_strlen); + +/* + * Return the number of bytes is the length of this string + * Note: this is NOT the same as the number of unicode characters + */ +unsigned long +ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength) +{ + return ucs2_strnlen(data, maxlength/sizeof(ucs2_char_t)) * sizeof(ucs2_char_t); +} +EXPORT_SYMBOL(ucs2_strsize); + +int +ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) +{ + while (1) { + if (len == 0) + return 0; + if (*a < *b) + return -1; + if (*a > *b) + return 1; + if (*a == 0) /* implies *b == 0 */ + return 0; + a++; + b++; + len--; + } +} +EXPORT_SYMBOL(ucs2_strncmp); diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 82a04d7ba99e..08837db52d94 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -15,7 +15,7 @@ config XZ_DEC_X86 config XZ_DEC_POWERPC bool "PowerPC BCJ filter decoder" - default y if POWERPC + default y if PPC select XZ_DEC_BCJ config XZ_DEC_IA64 diff --git a/mm/Kconfig b/mm/Kconfig index ae55c1e04d10..3bea74f1ccfe 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -286,8 +286,12 @@ config NR_QUICK default "1" config VIRT_TO_BUS - def_bool y - depends on HAVE_VIRT_TO_BUS + bool + help + An architecture should select this if it implements the + deprecated interface virt_to_bus(). All new architectures + should probably not select this. + config MMU_NOTIFIER bool diff --git a/mm/fremap.c b/mm/fremap.c index 0cd4c11488ed..87da3590c61e 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -129,7 +129,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, struct vm_area_struct *vma; int err = -EINVAL; int has_write_lock = 0; - vm_flags_t vm_flags; + vm_flags_t vm_flags = 0; if (prot) return err; @@ -204,10 +204,8 @@ get_write_lock: unsigned long addr; struct file *file = get_file(vma->vm_file); - vm_flags = vma->vm_flags; - if (!(flags & MAP_NONBLOCK)) - vm_flags |= VM_POPULATE; - addr = mmap_region(file, start, size, vm_flags, pgoff); + addr = mmap_region(file, start, size, + vma->vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; @@ -226,12 +224,6 @@ get_write_lock: mutex_unlock(&mapping->i_mmap_mutex); } - if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) { - if (!has_write_lock) - goto get_write_lock; - vma->vm_flags |= VM_POPULATE; - } - if (vma->vm_flags & VM_LOCKED) { /* * drop PG_Mlocked flag for over-mapped range @@ -254,7 +246,8 @@ get_write_lock: */ out: - vm_flags = vma->vm_flags; + if (vma) + vm_flags = vma->vm_flags; if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0a0be33bb199..1a12f5b9a0ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2124,8 +2124,12 @@ int hugetlb_report_node_meminfo(int nid, char *buf) /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ unsigned long hugetlb_total_pages(void) { - struct hstate *h = &default_hstate; - return h->nr_huge_pages * pages_per_huge_page(h); + struct hstate *h; + unsigned long nr_total_pages = 0; + + for_each_hstate(h) + nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h); + return nr_total_pages; } static int hugetlb_acct_memory(struct hstate *h, long delta) @@ -2957,7 +2961,17 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } - if (absent || + /* + * We need call hugetlb_fault for both hugepages under migration + * (in which case hugetlb_fault waits for the migration,) and + * hwpoisoned hugepages (in which case we need to prevent the + * caller from accessing to them.) In order to do this, we use + * here is_swap_pte instead of is_hugetlb_entry_migration and + * is_hugetlb_entry_hwpoisoned. This is because it simply covers + * both cases, and because we can't follow correct pages + * directly from any kind of swap entries. + */ + if (absent || is_swap_pte(huge_ptep_get(pte)) || ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) { int ret; @@ -489,7 +489,7 @@ out: page = NULL; */ static inline int get_kpfn_nid(unsigned long kpfn) { - return ksm_merge_across_nodes ? 0 : pfn_to_nid(kpfn); + return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn)); } static void remove_node_from_stable_tree(struct stable_node *stable_node) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 53b8201b31eb..2b552224f5cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3012,6 +3012,8 @@ void memcg_update_array_size(int num) memcg_limited_groups_array_size = memcg_caches_array_size(num); } +static void kmem_cache_destroy_work_func(struct work_struct *w); + int memcg_update_cache_size(struct kmem_cache *s, int num_groups) { struct memcg_cache_params *cur_params = s->memcg_params; @@ -3031,6 +3033,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) return -ENOMEM; } + INIT_WORK(&s->memcg_params->destroy, + kmem_cache_destroy_work_func); s->memcg_params->is_root_cache = true; /* @@ -3078,6 +3082,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, if (!s->memcg_params) return -ENOMEM; + INIT_WORK(&s->memcg_params->destroy, + kmem_cache_destroy_work_func); if (memcg) { s->memcg_params->memcg = memcg; s->memcg_params->root_cache = root_cache; @@ -3358,8 +3364,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) list_for_each_entry(params, &memcg->memcg_slab_caches, list) { cachep = memcg_params_to_cache(params); cachep->memcg_params->dead = true; - INIT_WORK(&cachep->memcg_params->destroy, - kmem_cache_destroy_work_func); schedule_work(&cachep->memcg_params->destroy); } mutex_unlock(&memcg->slab_caches_mutex); diff --git a/mm/memory.c b/mm/memory.c index 494526ae024a..ba94dec5b259 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -216,6 +216,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) tlb->mm = mm; tlb->fullmm = fullmm; + tlb->need_flush_all = 0; tlb->start = -1UL; tlb->end = 0; tlb->need_flush = 0; @@ -2392,6 +2393,53 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(remap_pfn_range); +/** + * vm_iomap_memory - remap memory to userspace + * @vma: user vma to map to + * @start: start of area + * @len: size of area + * + * This is a simplified io_remap_pfn_range() for common driver use. The + * driver just needs to give us the physical memory range to be mapped, + * we'll figure out the rest from the vma information. + * + * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get + * whatever write-combining details or similar. + */ +int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) +{ + unsigned long vm_len, pfn, pages; + + /* Check that the physical memory area passed in looks valid */ + if (start + len < start) + return -EINVAL; + /* + * You *really* shouldn't map things that aren't page-aligned, + * but we've historically allowed it because IO memory might + * just have smaller alignment. + */ + len += start & ~PAGE_MASK; + pfn = start >> PAGE_SHIFT; + pages = (len + ~PAGE_MASK) >> PAGE_SHIFT; + if (pfn + pages < pfn) + return -EINVAL; + + /* We start the mapping 'vm_pgoff' pages into the area */ + if (vma->vm_pgoff > pages) + return -EINVAL; + pfn += vma->vm_pgoff; + pages -= vma->vm_pgoff; + + /* Can we fit all of the mapping? */ + vm_len = vma->vm_end - vma->vm_start; + if (vm_len >> PAGE_SHIFT > pages) + return -EINVAL; + + /* Ok, let it rip */ + return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); +} +EXPORT_SYMBOL(vm_iomap_memory); + static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b81a367b9f39..ee3765760818 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1779,7 +1779,11 @@ void try_offline_node(int nid) for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; - if (zone->wait_table) + /* + * wait_table may be allocated from boot memory, + * here only free if it's allocated by vmalloc. + */ + if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } @@ -1801,7 +1805,7 @@ int __ref remove_memory(int nid, u64 start, u64 size) int retry = 1; start_pfn = PFN_DOWN(start); - end_pfn = start_pfn + PFN_DOWN(size); + end_pfn = PFN_UP(start + size - 1); /* * When CONFIG_MEMCG is on, one memory block may be used by other diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 31d26637b658..74310017296e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2390,9 +2390,9 @@ restart: *mpol_new = *n->policy; atomic_set(&mpol_new->refcnt, 1); - sp_node_init(n_new, n->end, end, mpol_new); - sp_insert(sp, n_new); + sp_node_init(n_new, end, n->end, mpol_new); n->end = start; + sp_insert(sp, n_new); n_new = NULL; mpol_new = NULL; break; diff --git a/mm/mlock.c b/mm/mlock.c index 1c5e33fce639..79b7cf7d1bca 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -358,7 +358,7 @@ static int do_mlock(unsigned long start, size_t len, int on) newflags = vma->vm_flags & ~VM_LOCKED; if (on) - newflags |= VM_LOCKED | VM_POPULATE; + newflags |= VM_LOCKED; tmp = vma->vm_end; if (tmp > end) @@ -418,8 +418,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) * range with the first VMA. Also, skip undesirable VMA types. */ nend = min(end, vma->vm_end); - if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) != - VM_POPULATE) + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) continue; if (nstart < vma->vm_start) nstart = vma->vm_start; @@ -492,9 +491,9 @@ static int do_mlockall(int flags) struct vm_area_struct * vma, * prev = NULL; if (flags & MCL_FUTURE) - current->mm->def_flags |= VM_LOCKED | VM_POPULATE; + current->mm->def_flags |= VM_LOCKED; else - current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE); + current->mm->def_flags &= ~VM_LOCKED; if (flags == MCL_FUTURE) goto out; @@ -503,7 +502,7 @@ static int do_mlockall(int flags) newflags = vma->vm_flags & ~VM_LOCKED; if (flags & MCL_CURRENT) - newflags |= VM_LOCKED | VM_POPULATE; + newflags |= VM_LOCKED; /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); diff --git a/mm/mmap.c b/mm/mmap.c index 2664a47cec93..0db0de1c2fbe 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1306,7 +1306,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } addr = mmap_region(file, addr, len, vm_flags, pgoff); - if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE)) + if (!IS_ERR_VALUE(addr) && + ((vm_flags & VM_LOCKED) || + (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) *populate = len; return addr; } @@ -1938,7 +1940,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) /* Check the cache first. */ /* (Cache hit rate is typically around 35%.) */ - vma = mm->mmap_cache; + vma = ACCESS_ONCE(mm->mmap_cache); if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { struct rb_node *rb_node; diff --git a/mm/nommu.c b/mm/nommu.c index e19328087534..2f3ea749c318 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -821,7 +821,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma; /* check the cache first */ - vma = mm->mmap_cache; + vma = ACCESS_ONCE(mm->mmap_cache); if (vma && vma->vm_start <= addr && vma->vm_end > addr) return vma; diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 926b46649749..fd26d0433509 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -429,12 +429,6 @@ compat_process_vm_rw(compat_pid_t pid, if (flags != 0) return -EINVAL; - if (!access_ok(VERIFY_READ, lvec, liovcnt * sizeof(*lvec))) - goto out; - - if (!access_ok(VERIFY_READ, rvec, riovcnt * sizeof(*rvec))) - goto out; - if (vm_write) rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV, iovstack_l, @@ -459,8 +453,6 @@ free_iovecs: kfree(iov_r); if (iov_l != iovstack_l) kfree(iov_l); - -out: return rc; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 88c5fed8b9a4..669fba39be1a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3188,9 +3188,9 @@ int kswapd_run(int nid) if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ BUG_ON(system_state == SYSTEM_BOOTING); - pgdat->kswapd = NULL; pr_err("Failed to start kswapd on node %d\n", nid); ret = PTR_ERR(pgdat->kswapd); + pgdat->kswapd = NULL; } return ret; } diff --git a/net/802/garp.c b/net/802/garp.c index 8456f5d98b85..5d9630a0eb93 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + + spin_lock_bh(&app->lock); garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); garp_pdu_queue(app); + spin_unlock_bh(&app->lock); + garp_queue_xmit(app); dev_mc_del(dev, appl->proto.group_address); diff --git a/net/802/mrp.c b/net/802/mrp.c index a4cc3229952a..e085bcc754f6 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -870,8 +870,12 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + + spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_pdu_queue(app); + spin_unlock(&app->lock); + mrp_queue_xmit(app); dev_mc_del(dev, appl->group_address); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index 8f7517df41a5..b85a91fa61f1 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -3,7 +3,7 @@ # config VLAN_8021Q - tristate "802.1Q VLAN Support" + tristate "802.1Q/802.1ad VLAN Support" ---help--- Select this and you will be able to create 802.1Q VLAN interfaces on your ethernet interfaces. 802.1Q VLAN supports almost diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a18714469bf7..9424f3718ea7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION; /* End of global variables definitions. */ -static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) +static int vlan_group_prealloc_vid(struct vlan_group *vg, + __be16 vlan_proto, u16 vlan_id) { struct net_device **array; + unsigned int pidx, vidx; unsigned int size; ASSERT_RTNL(); - array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + pidx = vlan_proto_idx(vlan_proto); + vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN; + array = vg->vlan_devices_arrays[pidx][vidx]; if (array != NULL) return 0; @@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) if (array == NULL) return -ENOBUFS; - vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; + vg->vlan_devices_arrays[pidx][vidx] = array; return 0; } @@ -86,13 +90,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = &vlan_info->grp; - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan_id); - grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) @@ -100,7 +97,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); - vlan_group_set_device(grp, vlan_id, NULL); + vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL); /* Because unregister_netdevice_queue() makes sure at least one rcu * grace period is respected before device freeing, * we dont need to call synchronize_net() here. @@ -114,11 +111,19 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id) + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); + /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } -int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) +int vlan_check_real_dev(struct net_device *real_dev, + __be16 protocol, u16 vlan_id) { const char *name = real_dev->name; @@ -127,7 +132,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) return -EOPNOTSUPP; } - if (vlan_find_dev(real_dev, vlan_id) != NULL) + if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL) return -EEXIST; return 0; @@ -142,7 +147,7 @@ int register_vlan_dev(struct net_device *dev) struct vlan_group *grp; int err; - err = vlan_vid_add(real_dev, vlan_id); + err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id); if (err) return err; @@ -160,7 +165,7 @@ int register_vlan_dev(struct net_device *dev) goto out_uninit_gvrp; } - err = vlan_group_prealloc_vid(grp, vlan_id); + err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id); if (err < 0) goto out_uninit_mvrp; @@ -181,7 +186,7 @@ int register_vlan_dev(struct net_device *dev) /* So, got the sucker initialized, now lets place * it into our local structure. */ - vlan_group_set_device(grp, vlan_id, dev); + vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; return 0; @@ -195,7 +200,7 @@ out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: - vlan_vid_del(real_dev, vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); return err; } @@ -213,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) if (vlan_id >= VLAN_VID_MASK) return -ERANGE; - err = vlan_check_real_dev(real_dev, vlan_id); + err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id); if (err < 0) return err; @@ -255,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) new_dev->mtu = real_dev->mtu; new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); + vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q); vlan_dev_priv(new_dev)->vlan_id = vlan_id; vlan_dev_priv(new_dev)->real_dev = real_dev; vlan_dev_priv(new_dev)->dent = NULL; @@ -301,7 +307,7 @@ static void vlan_transfer_features(struct net_device *dev, { vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_TX) + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; @@ -341,16 +347,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; struct vlan_dev_priv *vlan; + bool last = false; LIST_HEAD(list); if (is_vlan_dev(dev)) __vlan_device_event(dev, event); if ((event == NETDEV_UP) && - (dev->features & NETIF_F_HW_VLAN_FILTER)) { + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); - vlan_vid_add(dev, 0); + vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } vlan_info = rtnl_dereference(dev->vlan_info); @@ -365,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, switch (event) { case NETDEV_CHANGE: /* Propagate real device state to vlan devices */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) netif_stacked_transfer_operstate(dev, vlandev); - } break; case NETDEV_CHANGEADDR: /* Adjust unicast filters on underlying device */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; @@ -390,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_CHANGEMTU: - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { if (vlandev->mtu <= dev->mtu) continue; @@ -404,26 +398,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_FEAT_CHANGE: /* Propagate device features to underlying device */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) vlan_transfer_features(dev, vlandev); - } - break; case NETDEV_DOWN: - if (dev->features & NETIF_F_HW_VLAN_FILTER) - vlan_vid_del(dev, 0); + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); /* Put all VLANs for this dev in the down state too. */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; @@ -437,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (flgs & IFF_UP) continue; @@ -458,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (dev->reg_state != NETREG_UNREGISTERING) break; - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) { /* removal of last vid destroys vlan_info, abort * afterwards */ if (vlan_info->nr_vids == 1) - i = VLAN_N_VID; + last = true; unregister_vlan_dev(vlandev, &list); + if (last) + break; } unregister_netdevice_many(&list); break; @@ -482,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER: /* Propagate to vlan devices */ - for (i = 0; i < VLAN_N_VID; i++) { - vlandev = vlan_group_get_device(grp, i); - if (!vlandev) - continue; - + vlan_group_for_each_dev(grp, i, vlandev) call_netdevice_notifiers(event, vlandev); - } break; } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 670f1e8cfc0f..ba5983f34c42 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -49,6 +49,7 @@ struct netpoll; * @ingress_priority_map: ingress priority mappings * @nr_egress_mappings: number of egress priority mappings * @egress_priority_map: hash of egress priority mappings + * @vlan_proto: VLAN encapsulation protocol * @vlan_id: VLAN identifier * @flags: device flags * @real_dev: underlying netdevice @@ -62,6 +63,7 @@ struct vlan_dev_priv { unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; + __be16 vlan_proto; u16 vlan_id; u16 flags; @@ -87,10 +89,17 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) #define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 #define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) +enum vlan_protos { + VLAN_PROTO_8021Q = 0, + VLAN_PROTO_8021AD, + VLAN_PROTO_NUM, +}; + struct vlan_group { unsigned int nr_vlan_devs; struct hlist_node hlist; /* linked list */ - struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; + struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM] + [VLAN_GROUP_ARRAY_SPLIT_PARTS]; }; struct vlan_info { @@ -103,37 +112,67 @@ struct vlan_info { struct rcu_head rcu; }; -static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, - u16 vlan_id) +static inline unsigned int vlan_proto_idx(__be16 proto) +{ + switch (proto) { + case __constant_htons(ETH_P_8021Q): + return VLAN_PROTO_8021Q; + case __constant_htons(ETH_P_8021AD): + return VLAN_PROTO_8021AD; + default: + BUG(); + return 0; + } +} + +static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg, + unsigned int pidx, + u16 vlan_id) { struct net_device **array; - array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + + array = vg->vlan_devices_arrays[pidx] + [vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; } +static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, + __be16 vlan_proto, + u16 vlan_id) +{ + return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id); +} + static inline void vlan_group_set_device(struct vlan_group *vg, - u16 vlan_id, + __be16 vlan_proto, u16 vlan_id, struct net_device *dev) { struct net_device **array; if (!vg) return; - array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)] + [vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; } /* Must be invoked with rcu_read_lock or with RTNL. */ static inline struct net_device *vlan_find_dev(struct net_device *real_dev, - u16 vlan_id) + __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); if (vlan_info) - return vlan_group_get_device(&vlan_info->grp, vlan_id); + return vlan_group_get_device(&vlan_info->grp, + vlan_proto, vlan_id); return NULL; } +#define vlan_group_for_each_dev(grp, i, dev) \ + for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \ + if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \ + (i) % VLAN_N_VID))) + /* found in vlan_dev.c */ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); @@ -142,7 +181,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); -int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); +int vlan_check_real_dev(struct net_device *real_dev, + __be16 protocol, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f3b6f515eba6..8a15eaadc4bd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -8,11 +8,12 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; + __be16 vlan_proto = skb->vlan_proto; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; - vlan_dev = vlan_find_dev(skb->dev, vlan_id); + vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id); if (!vlan_dev) return false; @@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp) * original position later */ skb_push(skb, offset); - skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto, + skb->vlan_tci); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); @@ -62,12 +64,13 @@ bool vlan_do_receive(struct sk_buff **skbp) /* Must be invoked with rcu_read_lock. */ struct net_device *__vlan_find_dev_deep(struct net_device *dev, - u16 vlan_id) + __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); if (vlan_info) { - return vlan_group_get_device(&vlan_info->grp, vlan_id); + return vlan_group_get_device(&vlan_info->grp, + vlan_proto, vlan_id); } else { /* * Lower devices of master uppers (bonding, team) do not have @@ -78,7 +81,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev, upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - return __vlan_find_dev_deep(upper_dev, vlan_id); + return __vlan_find_dev_deep(upper_dev, + vlan_proto, vlan_id); } return NULL; @@ -125,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) vhdr = (struct vlan_hdr *) skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); - __vlan_hwaccel_put_tag(skb, vlan_tci); + __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); @@ -153,10 +157,11 @@ EXPORT_SYMBOL(vlan_untag); static void vlan_group_free(struct vlan_group *grp) { - int i; + int i, j; - for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) - kfree(grp->vlan_devices_arrays[i]); + for (i = 0; i < VLAN_PROTO_NUM; i++) + for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++) + kfree(grp->vlan_devices_arrays[i][j]); } static void vlan_info_free(struct vlan_info *vlan_info) @@ -185,35 +190,49 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev) struct vlan_vid_info { struct list_head list; - unsigned short vid; + __be16 proto; + u16 vid; int refcount; }; +static bool vlan_hw_filter_capable(const struct net_device *dev, + const struct vlan_vid_info *vid_info) +{ + if (vid_info->proto == htons(ETH_P_8021Q) && + dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + return true; + if (vid_info->proto == htons(ETH_P_8021AD) && + dev->features & NETIF_F_HW_VLAN_STAG_FILTER) + return true; + return false; +} + static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, - unsigned short vid) + __be16 proto, u16 vid) { struct vlan_vid_info *vid_info; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { - if (vid_info->vid == vid) + if (vid_info->proto == proto && vid_info->vid == vid) return vid_info; } return NULL; } -static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid) +static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid) { struct vlan_vid_info *vid_info; vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); if (!vid_info) return NULL; + vid_info->proto = proto; vid_info->vid = vid; return vid_info; } -static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, +static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, struct vlan_vid_info **pvid_info) { struct net_device *dev = vlan_info->real_dev; @@ -221,12 +240,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, struct vlan_vid_info *vid_info; int err; - vid_info = vlan_vid_info_alloc(vid); + vid_info = vlan_vid_info_alloc(proto, vid); if (!vid_info) return -ENOMEM; - if (dev->features & NETIF_F_HW_VLAN_FILTER) { - err = ops->ndo_vlan_rx_add_vid(dev, vid); + if (vlan_hw_filter_capable(dev, vid_info)) { + err = ops->ndo_vlan_rx_add_vid(dev, proto, vid); if (err) { kfree(vid_info); return err; @@ -238,7 +257,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, return 0; } -int vlan_vid_add(struct net_device *dev, unsigned short vid) +int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; @@ -254,9 +273,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid) return -ENOMEM; vlan_info_created = true; } - vid_info = vlan_vid_info_get(vlan_info, vid); + vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) { - err = __vlan_vid_add(vlan_info, vid, &vid_info); + err = __vlan_vid_add(vlan_info, proto, vid, &vid_info); if (err) goto out_free_vlan_info; } @@ -279,14 +298,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, { struct net_device *dev = vlan_info->real_dev; const struct net_device_ops *ops = dev->netdev_ops; - unsigned short vid = vid_info->vid; + __be16 proto = vid_info->proto; + u16 vid = vid_info->vid; int err; - if (dev->features & NETIF_F_HW_VLAN_FILTER) { - err = ops->ndo_vlan_rx_kill_vid(dev, vid); + if (vlan_hw_filter_capable(dev, vid_info)) { + err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid); if (err) { - pr_warn("failed to kill vid %d for device %s\n", - vid, dev->name); + pr_warn("failed to kill vid %04x/%d for device %s\n", + proto, vid, dev->name); } } list_del(&vid_info->list); @@ -294,7 +314,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, vlan_info->nr_vids--; } -void vlan_vid_del(struct net_device *dev, unsigned short vid) +void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; @@ -305,7 +325,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid) if (!vlan_info) return; - vid_info = vlan_vid_info_get(vlan_info, vid); + vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) return; vid_info->refcount--; @@ -333,7 +353,7 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { - err = vlan_vid_add(dev, vid_info->vid); + err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; } @@ -343,7 +363,7 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { - vlan_vid_del(dev, vid_info->vid); + vlan_vid_del(dev, vid_info->proto, vid_info->vid); } return err; @@ -363,7 +383,7 @@ void vlan_vids_del_by_dev(struct net_device *dev, return; list_for_each_entry(vid_info, &vlan_info->vid_list, list) - vlan_vid_del(dev, vid_info->vid); + vlan_vid_del(dev, vid_info->proto, vid_info->vid); } EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 19cf81bf9f69..8af508536d36 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, const void *daddr, const void *saddr, unsigned int len) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_hdr *vhdr; unsigned int vhdrlen = 0; u16 vlan_tci = 0; @@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, else vhdr->h_vlan_encapsulated_proto = htons(len); - skb->protocol = htons(ETH_P_8021Q); - type = ETH_P_8021Q; + skb->protocol = vlan->vlan_proto; + type = ntohs(vlan->vlan_proto); vhdrlen = VLAN_HLEN; } @@ -161,12 +162,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ - if (veth->h_vlan_proto != htons(ETH_P_8021Q) || + if (veth->h_vlan_proto != vlan->vlan_proto || vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_hwaccel_put_tag(skb, vlan_tci); + skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci); } skb->dev = vlan->real_dev; @@ -583,7 +584,7 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_TX) { + if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c index 6f9755352760..66a80320b032 100644 --- a/net/8021q/vlan_gvrp.c +++ b/net/8021q/vlan_gvrp.c @@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return 0; return garp_request_join(vlan->real_dev, &vlan_gvrp_app, &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); } @@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return; garp_request_leave(vlan->real_dev, &vlan_gvrp_app, &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); } diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c index d9ec1d5964aa..e0fe091801b0 100644 --- a/net/8021q/vlan_mvrp.c +++ b/net/8021q/vlan_mvrp.c @@ -38,6 +38,8 @@ int vlan_mvrp_request_join(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return 0; return mrp_request_join(vlan->real_dev, &vlan_mrp_app, &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); } @@ -47,6 +49,8 @@ void vlan_mvrp_request_leave(const struct net_device *dev) const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); + if (vlan->vlan_proto != htons(ETH_P_8021Q)) + return; mrp_request_leave(vlan->real_dev, &vlan_mrp_app, &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 1789658b7cd7..309129732285 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = { [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, + [IFLA_VLAN_PROTOCOL] = { .type = NLA_U16 }, }; static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { @@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (!data) return -EINVAL; + if (data[IFLA_VLAN_PROTOCOL]) { + switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) { + case __constant_htons(ETH_P_8021Q): + case __constant_htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } + if (data[IFLA_VLAN_ID]) { id = nla_get_u16(data[IFLA_VLAN_ID]); if (id >= VLAN_VID_MASK) @@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev; + __be16 proto; int err; if (!data[IFLA_VLAN_ID]) @@ -118,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, if (!real_dev) return -ENODEV; - vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); - vlan->real_dev = real_dev; - vlan->flags = VLAN_FLAG_REORDER_HDR; + if (data[IFLA_VLAN_PROTOCOL]) + proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]); + else + proto = htons(ETH_P_8021Q); + + vlan->vlan_proto = proto; + vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); + vlan->real_dev = real_dev; + vlan->flags = VLAN_FLAG_REORDER_HDR; - err = vlan_check_real_dev(real_dev, vlan->vlan_id); + err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id); if (err < 0) return err; @@ -151,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - return nla_total_size(2) + /* IFLA_VLAN_ID */ + return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ + nla_total_size(2) + /* IFLA_VLAN_ID */ sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); @@ -166,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) struct nlattr *nest; unsigned int i; - if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id)) + if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) || + nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id)) goto nla_put_failure; if (vlan->flags) { f.flags = vlan->flags; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 74dea377fe5b..de2e950a0a7a 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -655,7 +655,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, - //.zc_request = p9_virtio_zc_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, /* * We leave one entry for input and one entry for response diff --git a/net/Kconfig b/net/Kconfig index 6f676ab885be..1a2221630e6a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -23,6 +23,15 @@ menuconfig NET if NET +config NETLINK_MMAP + bool "Netlink: mmaped IO" + help + This option enables support for memory mapped netlink IO. This + reduces overhead by avoiding copying data between kernel- and + userspace. + + If unsure, say N. + config WANT_COMPAT_NETLINK_MESSAGES bool help @@ -217,6 +226,7 @@ source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" +source "net/netlink/Kconfig" config RPS boolean diff --git a/net/atm/common.c b/net/atm/common.c index 7b491006eaf4..737bef59ce89 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; + msg->msg_namelen = 0; + if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/atm/lec.h b/net/atm/lec.h index a86aff9a3c04..4149db1b7885 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -58,7 +58,7 @@ struct lane2_ops { * field in h_type field. Data follows immediately after header. * 2. LLC Data frames whose total length, including LLC field and data, * but not padding required to meet the minimum data frame length, - * is less than 1536(0x0600) MUST be encoded by placing that length + * is less than ETH_P_802_3_MIN MUST be encoded by placing that length * in the h_type field. The LLC field follows header immediately. * 3. LLC data frames longer than this maximum MUST be encoded by placing * the value 0 in the h_type field. diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7b11f8bc5071..e277e38f736b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1642,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, ax25_address src; const unsigned char *mac = skb_mac_header(skb); + memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, &digi, NULL, NULL); sax->sax25_family = AF_AX25; diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 8d8afb134b3a..fa780b76630e 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -36,6 +36,20 @@ config BATMAN_ADV_DAT mesh networks. If you think that your network does not need this option you can safely remove it and save some space. +config BATMAN_ADV_NC + bool "Network Coding" + depends on BATMAN_ADV + default n + help + This option enables network coding, a mechanism that aims to + increase the overall network throughput by fusing multiple + packets in one transmission. + Note that interfaces controlled by batman-adv must be manually + configured to have promiscuous mode enabled in order to make + network coding work. + If you think that your network does not need this feature you + can safely disable it and save some space. + config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index e45e3b4e32e3..acbac2a9c62f 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # @@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o batman-adv-y += hash.o batman-adv-y += icmp_socket.o batman-adv-y += main.o +batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o batman-adv-y += ring_buffer.o batman-adv-y += routing.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a0b253ecadaf..071f288b77a8 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -27,6 +27,7 @@ #include "hard-interface.h" #include "send.h" #include "bat_algo.h" +#include "network-coding.h" static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, @@ -1185,6 +1186,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (!orig_neigh_node) goto out; + /* Update nc_nodes of the originator */ + batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node, + batadv_ogm_packet, is_single_hop_neigh); + orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node); /* drop packet if sender is not a direct neighbor and if we @@ -1288,7 +1293,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; /* unpack the aggregated packets and process them one by one */ - do { + while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, + batadv_ogm_packet->tt_num_changes)) { tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff, @@ -1299,8 +1305,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, packet_pos = packet_buff + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; - } while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tt_num_changes)); + } kfree_skb(skb); return NET_RX_SUCCESS; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6a4f728680ae..379061c72549 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -341,7 +341,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, } if (vid != -1) - skb = vlan_insert_tag(skb, vid); + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 6ae86516db4d..f186a55b23c3 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -32,6 +32,7 @@ #include "icmp_socket.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" +#include "network-coding.h" static struct dentry *batadv_debugfs; @@ -310,6 +311,14 @@ struct batadv_debuginfo { const struct file_operations fops; }; +#ifdef CONFIG_BATMAN_ADV_NC +static int batadv_nc_nodes_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_nc_nodes_seq_print_text, net_dev); +} +#endif + #define BATADV_DEBUGINFO(_name, _mode, _open) \ struct batadv_debuginfo batadv_debuginfo_##_name = { \ .attr = { .name = __stringify(_name), \ @@ -348,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); +#ifdef CONFIG_BATMAN_ADV_NC +static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); +#endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_originators, @@ -362,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { #endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, +#ifdef CONFIG_BATMAN_ADV_NC + &batadv_debuginfo_nc_nodes, +#endif NULL, }; @@ -431,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev) } } + if (batadv_nc_init_debugfs(bat_priv) < 0) + goto rem_attr; + return 0; rem_attr: debugfs_remove_recursive(bat_priv->debug_dir); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index d54188a112ea..8e15d966d9b0 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -816,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; - struct batadv_hard_iface *primary_if = NULL; if (!atomic_read(&bat_priv->distributed_arp_table)) goto out; @@ -838,22 +837,18 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); if (dat_entry) { - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - primary_if->soft_iface, ip_dst, hw_src, + bat_priv->soft_iface, ip_dst, hw_src, dat_entry->mac_addr, hw_src); if (!skb_new) goto out; skb_reset_mac_header(skb_new); skb_new->protocol = eth_type_trans(skb_new, - primary_if->soft_iface); + bat_priv->soft_iface); bat_priv->stats.rx_packets++; bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; - primary_if->soft_iface->last_rx = jiffies; + bat_priv->soft_iface->last_rx = jiffies; netif_rx(skb_new); batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); @@ -866,8 +861,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, out: if (dat_entry) batadv_dat_entry_free_ref(dat_entry); - if (primary_if) - batadv_hardif_free_ref(primary_if); return ret; } @@ -887,7 +880,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, __be32 ip_src, ip_dst; uint8_t *hw_src; struct sk_buff *skb_new; - struct batadv_hard_iface *primary_if = NULL; struct batadv_dat_entry *dat_entry = NULL; bool ret = false; int err; @@ -912,12 +904,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!dat_entry) goto out; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - primary_if->soft_iface, ip_dst, hw_src, + bat_priv->soft_iface, ip_dst, hw_src, dat_entry->mac_addr, hw_src); if (!skb_new) @@ -941,8 +929,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, out: if (dat_entry) batadv_dat_entry_free_ref(dat_entry); - if (primary_if) - batadv_hardif_free_ref(primary_if); if (ret) kfree_skb(skb); return ret; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 34f99a46ec1d..f105219f4a4b 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) rcu_read_unlock(); if (gw_count == 0) - seq_printf(seq, "No gateways in range ...\n"); + seq_puts(seq, "No gateways in range ...\n"); out: if (primary_if) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 368219e026a9..522243aff2f3 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); } +/** + * batadv_master_del_slave - remove hard_iface from the current master interface + * @slave: the interface enslaved in another master + * @master: the master from which slave has to be removed + * + * Invoke ndo_del_slave on master passing slave as argument. In this way slave + * is free'd and master can correctly change its internal state. + * Return 0 on success, a negative value representing the error otherwise + */ +static int batadv_master_del_slave(struct batadv_hard_iface *slave, + struct net_device *master) +{ + int ret; + + if (!master) + return 0; + + ret = -EBUSY; + if (master->netdev_ops->ndo_del_slave) + ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev); + + return ret; +} + int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, const char *iface_name) { struct batadv_priv *bat_priv; - struct net_device *soft_iface; + struct net_device *soft_iface, *master; __be16 ethertype = __constant_htons(ETH_P_BATMAN); int ret; @@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (!atomic_inc_not_zero(&hard_iface->refcount)) goto out; - /* hard-interface is part of a bridge */ - if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT) - pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n", - hard_iface->net_dev->name); - soft_iface = dev_get_by_name(&init_net, iface_name); if (!soft_iface) { @@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, goto err_dev; } + /* check if the interface is enslaved in another virtual one and + * in that case unlink it first + */ + master = netdev_master_upper_dev_get(hard_iface->net_dev); + ret = batadv_master_del_slave(hard_iface, master); + if (ret) + goto err_dev; + hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + if (ret) + goto err_dev; + ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); if (ret < 0) - goto err_dev; + goto err_upper; hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; @@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); bat_priv->num_ifaces--; hard_iface->if_status = BATADV_IF_NOT_IN_USE; - goto err_dev; + goto err_upper; } hard_iface->batman_adv_ptype.type = ethertype; @@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, out: return 0; +err_upper: + netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface); err_dev: + hard_iface->soft_iface = NULL; dev_put(soft_iface); err: batadv_hardif_free_ref(hard_iface); return ret; } -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, + enum batadv_hard_if_cleanup autodel) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; @@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) dev_put(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) - batadv_softif_destroy(hard_iface->soft_iface); + if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO) + batadv_softif_destroy_sysfs(hard_iface->soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); hard_iface->soft_iface = NULL; batadv_hardif_free_ref(hard_iface); @@ -533,7 +569,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) - batadv_hardif_disable_interface(hard_iface); + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_AUTO); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; @@ -563,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; + if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { + batadv_sysfs_add_meshif(net_dev); + return NOTIFY_DONE; + } + hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && event == NETDEV_REGISTER) hard_iface = batadv_hardif_add_interface(net_dev); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 308437d52e22..49892881a7c5 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -29,13 +29,24 @@ enum batadv_hard_if_state { BATADV_IF_I_WANT_YOU, }; +/** + * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal + * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface + * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed + */ +enum batadv_hard_if_cleanup { + BATADV_IF_CLEANUP_KEEP, + BATADV_IF_CLEANUP_AUTO, +}; + extern struct notifier_block batadv_hard_if_notifier; struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, const char *iface_name); -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, + enum batadv_hard_if_cleanup autodel); void batadv_hardif_remove_interfaces(void); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0488d70c8c35..3e30a0f1b908 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -35,6 +35,7 @@ #include "vis.h" #include "hash.h" #include "bat_algo.h" +#include "network-coding.h" /* List manipulations on hardif_list have to be rtnl_lock()'ed, @@ -70,6 +71,7 @@ static int __init batadv_init(void) batadv_debugfs_init(); register_netdevice_notifier(&batadv_hard_if_notifier); + rtnl_link_register(&batadv_link_ops); pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); @@ -80,6 +82,7 @@ static int __init batadv_init(void) static void __exit batadv_exit(void) { batadv_debugfs_destroy(); + rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); batadv_hardif_remove_interfaces(); @@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; + ret = batadv_nc_init(bat_priv); + if (ret < 0) + goto err; + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -157,6 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_gw_node_purge(bat_priv); batadv_originator_free(bat_priv); + batadv_nc_free(bat_priv); batadv_tt_free(bat_priv); @@ -169,7 +177,13 @@ void batadv_mesh_free(struct net_device *soft_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -int batadv_is_my_mac(const uint8_t *addr) +/** + * batadv_is_my_mac - check if the given mac address belongs to any of the real + * interfaces in the current mesh + * @bat_priv: the bat priv with all the soft interface information + * @addr: the address to check + */ +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -178,6 +192,9 @@ int batadv_is_my_mac(const uint8_t *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; @@ -411,7 +428,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; - seq_printf(seq, "Available routing algorithms:\n"); + seq_puts(seq, "Available routing algorithms:\n"); hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { seq_printf(seq, "%s\n", bat_algo_ops->name); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ced08b936a96..59a0d6af15c8 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.1.0" +#define BATADV_SOURCE_VERSION "2013.2.0" #endif /* B.A.T.M.A.N. parameters */ @@ -105,6 +105,8 @@ #define BATADV_RESET_PROTECTION_MS 30000 #define BATADV_EXPECTED_SEQNO_RANGE 65536 +#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */ + enum batadv_mesh_state { BATADV_MESH_INACTIVE, BATADV_MESH_ACTIVE, @@ -150,6 +152,7 @@ enum batadv_uev_type { #include <linux/percpu.h> #include <linux/slab.h> #include <net/sock.h> /* struct sock */ +#include <net/rtnetlink.h> #include <linux/jiffies.h> #include <linux/seq_file.h> #include "types.h" @@ -162,7 +165,7 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -int batadv_is_my_mac(const uint8_t *addr); +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, @@ -185,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr); * @BATADV_DBG_TT: translation table messages * @BATADV_DBG_BLA: bridge loop avoidance messages * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_NC: network coding related messages * @BATADV_DBG_ALL: the union of all the above log levels */ enum batadv_dbg_level { @@ -193,7 +197,8 @@ enum batadv_dbg_level { BATADV_DBG_TT = BIT(2), BATADV_DBG_BLA = BIT(3), BATADV_DBG_DAT = BIT(4), - BATADV_DBG_ALL = 31, + BATADV_DBG_NC = BIT(5), + BATADV_DBG_ALL = 63, }; #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv, return sum; } +/* Define a macro to reach the control buffer of the skb. The members of the + * control buffer are defined in struct batadv_skb_cb in types.h. + * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. + */ +#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c new file mode 100644 index 000000000000..f7c54305a918 --- /dev/null +++ b/net/batman-adv/network-coding.c @@ -0,0 +1,1822 @@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/debugfs.h> + +#include "main.h" +#include "hash.h" +#include "network-coding.h" +#include "send.h" +#include "originator.h" +#include "hard-interface.h" +#include "routing.h" + +static struct lock_class_key batadv_nc_coding_hash_lock_class_key; +static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; + +static void batadv_nc_worker(struct work_struct *work); +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); + +/** + * batadv_nc_start_timer - initialise the nc periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_start_timer(struct batadv_priv *bat_priv) +{ + queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work, + msecs_to_jiffies(10)); +} + +/** + * batadv_nc_init - initialise coding hash table and start house keeping + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init(struct batadv_priv *bat_priv) +{ + bat_priv->nc.timestamp_fwd_flush = jiffies; + bat_priv->nc.timestamp_sniffed_purge = jiffies; + + if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash) + return 0; + + bat_priv->nc.coding_hash = batadv_hash_new(128); + if (!bat_priv->nc.coding_hash) + goto err; + + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_coding_hash_lock_class_key); + + bat_priv->nc.decoding_hash = batadv_hash_new(128); + if (!bat_priv->nc.decoding_hash) + goto err; + + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_decoding_hash_lock_class_key); + + /* Register our packet type */ + if (batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet) < 0) + goto err; + + INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); + batadv_nc_start_timer(bat_priv); + + return 0; + +err: + return -ENOMEM; +} + +/** + * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ + atomic_set(&bat_priv->network_coding, 1); + bat_priv->nc.min_tq = 200; + bat_priv->nc.max_fwd_delay = 10; + bat_priv->nc.max_buffer_time = 200; +} + +/** + * batadv_nc_init_orig - initialise the nc fields of an orig_node + * @orig_node: the orig_node which is going to be initialised + */ +void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ + INIT_LIST_HEAD(&orig_node->in_coding_list); + INIT_LIST_HEAD(&orig_node->out_coding_list); + spin_lock_init(&orig_node->in_coding_list_lock); + spin_lock_init(&orig_node->out_coding_list_lock); +} + +/** + * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove + * its refcount on the orig_node + * @rcu: rcu pointer of the nc node + */ +static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +{ + struct batadv_nc_node *nc_node; + + nc_node = container_of(rcu, struct batadv_nc_node, rcu); + batadv_orig_node_free_ref(nc_node->orig_node); + kfree(nc_node); +} + +/** + * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly + * frees it + * @nc_node: the nc node to free + */ +static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) +{ + if (atomic_dec_and_test(&nc_node->refcount)) + call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); +} + +/** + * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly + * frees it + * @nc_path: the nc node to free + */ +static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) +{ + if (atomic_dec_and_test(&nc_path->refcount)) + kfree_rcu(nc_path, rcu); +} + +/** + * batadv_nc_packet_free - frees nc packet + * @nc_packet: the nc packet to free + */ +static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) +{ + if (nc_packet->skb) + kfree_skb(nc_packet->skb); + + batadv_nc_path_free_ref(nc_packet->nc_path); + kfree(nc_packet); +} + +/** + * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged + * @bat_priv: the bat priv with all the soft interface information + * @nc_node: the nc node to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, + struct batadv_nc_node *nc_node) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT); +} + +/** + * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + /* purge the path when no packets has been added for 10 times the + * max_fwd_delay time + */ + return batadv_has_timed_out(nc_path->last_valid, + bat_priv->nc.max_fwd_delay * 10); +} + +/** + * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + /* purge the path when no packets has been added for 10 times the + * max_buffer time + */ + return batadv_has_timed_out(nc_path->last_valid, + bat_priv->nc.max_buffer_time*10); +} + +/** + * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale + * entries + * @bat_priv: the bat priv with all the soft interface information + * @list: list of nc nodes + * @lock: nc node list lock + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true if the entry has to be deleted, false + * otherwise + */ +static void +batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, + struct list_head *list, + spinlock_t *lock, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + struct batadv_nc_node *nc_node, *nc_node_tmp; + + /* For each nc_node in list */ + spin_lock_bh(lock); + list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(bat_priv, nc_node)) + continue; + + batadv_dbg(BATADV_DBG_NC, bat_priv, + "Removing nc_node %pM -> %pM\n", + nc_node->addr, nc_node->orig_node->orig); + list_del_rcu(&nc_node->list); + batadv_nc_node_free_ref(nc_node); + } + spin_unlock_bh(lock); +} + +/** + * batadv_nc_purge_orig - purges all nc node data attached of the given + * originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig_node with the nc node entries to be purged + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true is the entry has to be deleted, false + * otherwise + */ +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + /* Check ingoing nc_node's of this orig_node */ + batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list, + &orig_node->in_coding_list_lock, + to_purge); + + /* Check outgoing nc_node's of this orig_node */ + batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list, + &orig_node->out_coding_list_lock, + to_purge); +} + +/** + * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they + * have timed out nc nodes + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + uint32_t i; + + if (!hash) + return; + + /* For each orig_node */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) + batadv_nc_purge_orig(bat_priv, orig_node, + batadv_nc_to_purge_nc_node); + rcu_read_unlock(); + } +} + +/** + * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove + * unused ones + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc paths to check + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true is the entry has to be deleted, false + * otherwise + */ +static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_path *)) +{ + struct hlist_head *head; + struct hlist_node *node_tmp; + struct batadv_nc_path *nc_path; + spinlock_t *lock; /* Protects lists in hash */ + uint32_t i; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + lock = &hash->list_locks[i]; + + /* For each nc_path in this bin */ + spin_lock_bh(lock); + hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(bat_priv, nc_path)) + continue; + + /* purging an non-empty nc_path should never happen, but + * is observed under high CPU load. Delay the purging + * until next iteration to allow the packet_list to be + * emptied first. + */ + if (!unlikely(list_empty(&nc_path->packet_list))) { + net_ratelimited_function(printk, + KERN_WARNING + "Skipping free of non-empty nc_path (%pM -> %pM)!\n", + nc_path->prev_hop, + nc_path->next_hop); + continue; + } + + /* nc_path is unused, so remove it */ + batadv_dbg(BATADV_DBG_NC, bat_priv, + "Remove nc_path %pM -> %pM\n", + nc_path->prev_hop, nc_path->next_hop); + hlist_del_rcu(&nc_path->hash_entry); + batadv_nc_path_free_ref(nc_path); + } + spin_unlock_bh(lock); + } +} + +/** + * batadv_nc_hash_key_gen - computes the nc_path hash key + * @key: buffer to hold the final hash key + * @src: source ethernet mac address going into the hash key + * @dst: destination ethernet mac address going into the hash key + */ +static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, + const char *dst) +{ + memcpy(key->prev_hop, src, sizeof(key->prev_hop)); + memcpy(key->next_hop, dst, sizeof(key->next_hop)); +} + +/** + * batadv_nc_hash_choose - compute the hash value for an nc path + * @data: data to hash + * @size: size of the hash table + * + * Returns the selected index in the hash table for the given data. + */ +static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size) +{ + const struct batadv_nc_path *nc_path = data; + uint32_t hash = 0; + + hash = batadv_hash_bytes(hash, &nc_path->prev_hop, + sizeof(nc_path->prev_hop)); + hash = batadv_hash_bytes(hash, &nc_path->next_hop, + sizeof(nc_path->next_hop)); + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/** + * batadv_nc_hash_compare - comparing function used in the network coding hash + * tables + * @node: node in the local table + * @data2: second object to compare the node to + * + * Returns 1 if the two entry are the same, 0 otherwise + */ +static int batadv_nc_hash_compare(const struct hlist_node *node, + const void *data2) +{ + const struct batadv_nc_path *nc_path1, *nc_path2; + + nc_path1 = container_of(node, struct batadv_nc_path, hash_entry); + nc_path2 = data2; + + /* Return 1 if the two keys are identical */ + if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop, + sizeof(nc_path1->prev_hop)) != 0) + return 0; + + if (memcmp(nc_path1->next_hop, nc_path2->next_hop, + sizeof(nc_path1->next_hop)) != 0) + return 0; + + return 1; +} + +/** + * batadv_nc_hash_find - search for an existing nc path and return it + * @hash: hash table containing the nc path + * @data: search key + * + * Returns the nc_path if found, NULL otherwise. + */ +static struct batadv_nc_path * +batadv_nc_hash_find(struct batadv_hashtable *hash, + void *data) +{ + struct hlist_head *head; + struct batadv_nc_path *nc_path, *nc_path_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = batadv_nc_hash_choose(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, head, hash_entry) { + if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) + continue; + + if (!atomic_inc_not_zero(&nc_path->refcount)) + continue; + + nc_path_tmp = nc_path; + break; + } + rcu_read_unlock(); + + return nc_path_tmp; +} + +/** + * batadv_nc_send_packet - send non-coded packet and free nc_packet struct + * @nc_packet: the nc packet to send + */ +static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) +{ + batadv_send_skb_packet(nc_packet->skb, + nc_packet->neigh_node->if_incoming, + nc_packet->nc_path->next_hop); + nc_packet->skb = NULL; + batadv_nc_packet_free(nc_packet); +} + +/** + * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given sniffed (overheard) nc_packet has hit its buffering + * timeout. If so, the packet is no longer kept and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path, + struct batadv_nc_packet *nc_packet) +{ + unsigned long timeout = bat_priv->nc.max_buffer_time; + bool res = false; + + /* Packets are added to tail, so the remaining packets did not time + * out and we can stop processing the current queue + */ + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && + !batadv_has_timed_out(nc_packet->timestamp, timeout)) + goto out; + + /* purge nc packet */ + list_del(&nc_packet->list); + batadv_nc_packet_free(nc_packet); + + res = true; + +out: + return res; +} + +/** + * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given nc packet has hit its forward timeout. If so, the + * packet is no longer delayed, immediately sent and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path, + struct batadv_nc_packet *nc_packet) +{ + unsigned long timeout = bat_priv->nc.max_fwd_delay; + + /* Packets are added to tail, so the remaining packets did not time + * out and we can stop processing the current queue + */ + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && + !batadv_has_timed_out(nc_packet->timestamp, timeout)) + return false; + + /* Send packet */ + batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); + batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, + nc_packet->skb->len + ETH_HLEN); + list_del(&nc_packet->list); + batadv_nc_send_packet(nc_packet); + + return true; +} + +/** + * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out + * nc packets + * @bat_priv: the bat priv with all the soft interface information + * @hash: to be processed hash table + * @process_fn: Function called to process given nc packet. Should return true + * to encourage this function to proceed with the next packet. + * Otherwise the rest of the current queue is skipped. + */ +static void +batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + bool (*process_fn)(struct batadv_priv *, + struct batadv_nc_path *, + struct batadv_nc_packet *)) +{ + struct hlist_head *head; + struct batadv_nc_packet *nc_packet, *nc_packet_tmp; + struct batadv_nc_path *nc_path; + bool ret; + int i; + + if (!hash) + return; + + /* Loop hash table bins */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + /* Loop coding paths */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, head, hash_entry) { + /* Loop packets */ + spin_lock_bh(&nc_path->packet_list_lock); + list_for_each_entry_safe(nc_packet, nc_packet_tmp, + &nc_path->packet_list, list) { + ret = process_fn(bat_priv, nc_path, nc_packet); + if (!ret) + break; + } + spin_unlock_bh(&nc_path->packet_list_lock); + } + rcu_read_unlock(); + } +} + +/** + * batadv_nc_worker - periodic task for house keeping related to network coding + * @work: kernel work struct + */ +static void batadv_nc_worker(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_nc *priv_nc; + struct batadv_priv *bat_priv; + unsigned long timeout; + + delayed_work = container_of(work, struct delayed_work, work); + priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); + bat_priv = container_of(priv_nc, struct batadv_priv, nc); + + batadv_nc_purge_orig_hash(bat_priv); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, + batadv_nc_to_purge_nc_path_coding); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, + batadv_nc_to_purge_nc_path_decoding); + + timeout = bat_priv->nc.max_fwd_delay; + + if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) { + batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash, + batadv_nc_fwd_flush); + bat_priv->nc.timestamp_fwd_flush = jiffies; + } + + if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge, + bat_priv->nc.max_buffer_time)) { + batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash, + batadv_nc_sniffed_purge); + bat_priv->nc.timestamp_sniffed_purge = jiffies; + } + + /* Schedule a new check */ + batadv_nc_start_timer(bat_priv); +} + +/** + * batadv_can_nc_with_orig - checks whether the given orig node is suitable for + * coding or not + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: neighboring orig node which may be used as nc candidate + * @ogm_packet: incoming ogm packet also used for the checks + * + * Returns true if: + * 1) The OGM must have the most recent sequence number. + * 2) The TTL must be decremented by one and only one. + * 3) The OGM must be received from the first hop from orig_node. + * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq. + */ +static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_ogm_packet *ogm_packet) +{ + if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) + return false; + if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + return false; + if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) + return false; + if (ogm_packet->tq < bat_priv->nc.min_tq) + return false; + + return true; +} + +/** + * batadv_nc_find_nc_node - search for an existing nc node and return it + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found, NULL otherwise. + */ +static struct batadv_nc_node +*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) +{ + struct batadv_nc_node *nc_node, *nc_node_out = NULL; + struct list_head *list; + + if (in_coding) + list = &orig_neigh_node->in_coding_list; + else + list = &orig_neigh_node->out_coding_list; + + /* Traverse list of nc_nodes to orig_node */ + rcu_read_lock(); + list_for_each_entry_rcu(nc_node, list, list) { + if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) + continue; + + if (!atomic_inc_not_zero(&nc_node->refcount)) + continue; + + /* Found a match */ + nc_node_out = nc_node; + break; + } + rcu_read_unlock(); + + return nc_node_out; +} + +/** + * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was + * not found + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found or created, NULL in case of an error. + */ +static struct batadv_nc_node +*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) +{ + struct batadv_nc_node *nc_node; + spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ + struct list_head *list; + + /* Check if nc_node is already added */ + nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); + + /* Node found */ + if (nc_node) + return nc_node; + + nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); + if (!nc_node) + return NULL; + + if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) + goto free; + + /* Initialize nc_node */ + INIT_LIST_HEAD(&nc_node->list); + memcpy(nc_node->addr, orig_node->orig, ETH_ALEN); + nc_node->orig_node = orig_neigh_node; + atomic_set(&nc_node->refcount, 2); + + /* Select ingoing or outgoing coding node */ + if (in_coding) { + lock = &orig_neigh_node->in_coding_list_lock; + list = &orig_neigh_node->in_coding_list; + } else { + lock = &orig_neigh_node->out_coding_list_lock; + list = &orig_neigh_node->out_coding_list; + } + + batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", + nc_node->addr, nc_node->orig_node->orig); + + /* Add nc_node to orig_node */ + spin_lock_bh(lock); + list_add_tail_rcu(&nc_node->list, list); + spin_unlock_bh(lock); + + return nc_node; + +free: + kfree(nc_node); + return NULL; +} + +/** + * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs + * (best called on incoming OGMs) + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @ogm_packet: incoming ogm packet + * @is_single_hop_neigh: orig_node is a single hop neighbor + */ +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh) +{ + struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* accept ogms from 'good' neighbors and single hop neighbors */ + if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) && + !is_single_hop_neigh) + goto out; + + /* Add orig_node as in_nc_node on hop */ + in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node, + orig_neigh_node, true); + if (!in_nc_node) + goto out; + + in_nc_node->last_seen = jiffies; + + /* Add hop as out_nc_node on orig_node */ + out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node, + orig_node, false); + if (!out_nc_node) + goto out; + + out_nc_node->last_seen = jiffies; + +out: + if (in_nc_node) + batadv_nc_node_free_ref(in_nc_node); + if (out_nc_node) + batadv_nc_node_free_ref(out_nc_node); +} + +/** + * batadv_nc_get_path - get existing nc_path or allocate a new one + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc path + * @src: ethernet source address - first half of the nc path search key + * @dst: ethernet destination address - second half of the nc path search key + * + * Returns pointer to nc_path if the path was found or created, returns NULL + * on error. + */ +static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + uint8_t *src, + uint8_t *dst) +{ + int hash_added; + struct batadv_nc_path *nc_path, nc_path_key; + + batadv_nc_hash_key_gen(&nc_path_key, src, dst); + + /* Search for existing nc_path */ + nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key); + + if (nc_path) { + /* Set timestamp to delay removal of nc_path */ + nc_path->last_valid = jiffies; + return nc_path; + } + + /* No existing nc_path was found; create a new */ + nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC); + + if (!nc_path) + return NULL; + + /* Initialize nc_path */ + INIT_LIST_HEAD(&nc_path->packet_list); + spin_lock_init(&nc_path->packet_list_lock); + atomic_set(&nc_path->refcount, 2); + nc_path->last_valid = jiffies; + memcpy(nc_path->next_hop, dst, ETH_ALEN); + memcpy(nc_path->prev_hop, src, ETH_ALEN); + + batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n", + nc_path->prev_hop, + nc_path->next_hop); + + /* Add nc_path to hash table */ + hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, + batadv_nc_hash_choose, &nc_path_key, + &nc_path->hash_entry); + + if (hash_added < 0) { + kfree(nc_path); + return NULL; + } + + return nc_path; +} + +/** + * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair + * selection of a receiver with slightly lower TQ than the other + * @tq: to be weighted tq value + */ +static uint8_t batadv_nc_random_weight_tq(uint8_t tq) +{ + uint8_t rand_val, rand_tq; + + get_random_bytes(&rand_val, sizeof(rand_val)); + + /* randomize the estimated packet loss (max TQ - estimated TQ) */ + rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq); + + /* normalize the randomized packet loss */ + rand_tq /= BATADV_TQ_MAX_VALUE; + + /* convert to (randomized) estimated tq again */ + return BATADV_TQ_MAX_VALUE - rand_tq; +} + +/** + * batadv_nc_memxor - XOR destination with source + * @dst: byte array to XOR into + * @src: byte array to XOR from + * @len: length of destination array + */ +static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; ++i) + dst[i] ^= src[i]; +} + +/** + * batadv_nc_code_packets - code a received unicast_packet with an nc packet + * into a coded_packet and send it + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @ethhdr: pointer to the ethernet header inside the skb + * @nc_packet: structure containing the packet to the skb can be coded with + * @neigh_node: next hop to forward packet to + * + * Returns true if both packets are consumed, false otherwise. + */ +static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct ethhdr *ethhdr, + struct batadv_nc_packet *nc_packet, + struct batadv_neigh_node *neigh_node) +{ + uint8_t tq_weighted_neigh, tq_weighted_coding; + struct sk_buff *skb_dest, *skb_src; + struct batadv_unicast_packet *packet1; + struct batadv_unicast_packet *packet2; + struct batadv_coded_packet *coded_packet; + struct batadv_neigh_node *neigh_tmp, *router_neigh; + struct batadv_neigh_node *router_coding = NULL; + uint8_t *first_source, *first_dest, *second_source, *second_dest; + __be32 packet_id1, packet_id2; + size_t count; + bool res = false; + int coding_len; + int unicast_size = sizeof(*packet1); + int coded_size = sizeof(*coded_packet); + int header_add = coded_size - unicast_size; + + router_neigh = batadv_orig_node_get_router(neigh_node->orig_node); + if (!router_neigh) + goto out; + + neigh_tmp = nc_packet->neigh_node; + router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node); + if (!router_coding) + goto out; + + tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg); + tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg); + + /* Select one destination for the MAC-header dst-field based on + * weighted TQ-values. + */ + if (tq_weighted_neigh >= tq_weighted_coding) { + /* Destination from nc_packet is selected for MAC-header */ + first_dest = nc_packet->nc_path->next_hop; + first_source = nc_packet->nc_path->prev_hop; + second_dest = neigh_node->addr; + second_source = ethhdr->h_source; + packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; + packet2 = (struct batadv_unicast_packet *)skb->data; + packet_id1 = nc_packet->packet_id; + packet_id2 = batadv_skb_crc32(skb, + skb->data + sizeof(*packet2)); + } else { + /* Destination for skb is selected for MAC-header */ + first_dest = neigh_node->addr; + first_source = ethhdr->h_source; + second_dest = nc_packet->nc_path->next_hop; + second_source = nc_packet->nc_path->prev_hop; + packet1 = (struct batadv_unicast_packet *)skb->data; + packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; + packet_id1 = batadv_skb_crc32(skb, + skb->data + sizeof(*packet1)); + packet_id2 = nc_packet->packet_id; + } + + /* Instead of zero padding the smallest data buffer, we + * code into the largest. + */ + if (skb->len <= nc_packet->skb->len) { + skb_dest = nc_packet->skb; + skb_src = skb; + } else { + skb_dest = skb; + skb_src = nc_packet->skb; + } + + /* coding_len is used when decoding the packet shorter packet */ + coding_len = skb_src->len - unicast_size; + + if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0) + goto out; + + skb_push(skb_dest, header_add); + + coded_packet = (struct batadv_coded_packet *)skb_dest->data; + skb_reset_mac_header(skb_dest); + + coded_packet->header.packet_type = BATADV_CODED; + coded_packet->header.version = BATADV_COMPAT_VERSION; + coded_packet->header.ttl = packet1->header.ttl; + + /* Info about first unicast packet */ + memcpy(coded_packet->first_source, first_source, ETH_ALEN); + memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN); + coded_packet->first_crc = packet_id1; + coded_packet->first_ttvn = packet1->ttvn; + + /* Info about second unicast packet */ + memcpy(coded_packet->second_dest, second_dest, ETH_ALEN); + memcpy(coded_packet->second_source, second_source, ETH_ALEN); + memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); + coded_packet->second_crc = packet_id2; + coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttvn = packet2->ttvn; + coded_packet->coded_len = htons(coding_len); + + /* This is where the magic happens: Code skb_src into skb_dest */ + batadv_nc_memxor(skb_dest->data + coded_size, + skb_src->data + unicast_size, coding_len); + + /* Update counters accordingly */ + if (BATADV_SKB_CB(skb_src)->decoded && + BATADV_SKB_CB(skb_dest)->decoded) { + /* Both packets are recoded */ + count = skb_src->len + ETH_HLEN; + count += skb_dest->len + ETH_HLEN; + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count); + } else if (!BATADV_SKB_CB(skb_src)->decoded && + !BATADV_SKB_CB(skb_dest)->decoded) { + /* Both packets are newly coded */ + count = skb_src->len + ETH_HLEN; + count += skb_dest->len + ETH_HLEN; + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count); + } else if (BATADV_SKB_CB(skb_src)->decoded && + !BATADV_SKB_CB(skb_dest)->decoded) { + /* skb_src recoded and skb_dest is newly coded */ + batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, + skb_src->len + ETH_HLEN); + batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, + skb_dest->len + ETH_HLEN); + } else if (!BATADV_SKB_CB(skb_src)->decoded && + BATADV_SKB_CB(skb_dest)->decoded) { + /* skb_src is newly coded and skb_dest is recoded */ + batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, + skb_src->len + ETH_HLEN); + batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, + skb_dest->len + ETH_HLEN); + } + + /* skb_src is now coded into skb_dest, so free it */ + kfree_skb(skb_src); + + /* avoid duplicate free of skb from nc_packet */ + nc_packet->skb = NULL; + batadv_nc_packet_free(nc_packet); + + /* Send the coded packet and return true */ + batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest); + res = true; +out: + if (router_neigh) + batadv_neigh_node_free_ref(router_neigh); + if (router_coding) + batadv_neigh_node_free_ref(router_coding); + return res; +} + +/** + * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst. + * @skb: data skb to forward + * @dst: destination mac address of the other skb to code with + * @src: source mac address of skb + * + * Whenever we network code a packet we have to check whether we received it in + * a network coded form. If so, we may not be able to use it for coding because + * some neighbors may also have received (overheard) the packet in the network + * coded form without being able to decode it. It is hard to know which of the + * neighboring nodes was able to decode the packet, therefore we can only + * re-code the packet if the source of the previous encoded packet is involved. + * Since the source encoded the packet we can be certain it has all necessary + * decode information. + * + * Returns true if coding of a decoded packet is allowed. + */ +static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, + uint8_t *dst, uint8_t *src) +{ + if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) + return false; + else + return true; +} + +/** + * batadv_nc_path_search - Find the coding path matching in_nc_node and + * out_nc_node to retrieve a buffered packet that can be used for coding. + * @bat_priv: the bat priv with all the soft interface information + * @in_nc_node: pointer to skb next hop's neighbor nc node + * @out_nc_node: pointer to skb source's neighbor nc node + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * + * Returns true if coding of a decoded skb is allowed. + */ +static struct batadv_nc_packet * +batadv_nc_path_search(struct batadv_priv *bat_priv, + struct batadv_nc_node *in_nc_node, + struct batadv_nc_node *out_nc_node, + struct sk_buff *skb, + uint8_t *eth_dst) +{ + struct batadv_nc_path *nc_path, nc_path_key; + struct batadv_nc_packet *nc_packet_out = NULL; + struct batadv_nc_packet *nc_packet, *nc_packet_tmp; + struct batadv_hashtable *hash = bat_priv->nc.coding_hash; + int idx; + + if (!hash) + return NULL; + + /* Create almost path key */ + batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr, + out_nc_node->addr); + idx = batadv_nc_hash_choose(&nc_path_key, hash->size); + + /* Check for coding opportunities in this nc_path */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) { + if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr)) + continue; + + if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr)) + continue; + + spin_lock_bh(&nc_path->packet_list_lock); + if (list_empty(&nc_path->packet_list)) { + spin_unlock_bh(&nc_path->packet_list_lock); + continue; + } + + list_for_each_entry_safe(nc_packet, nc_packet_tmp, + &nc_path->packet_list, list) { + if (!batadv_nc_skb_coding_possible(nc_packet->skb, + eth_dst, + in_nc_node->addr)) + continue; + + /* Coding opportunity is found! */ + list_del(&nc_packet->list); + nc_packet_out = nc_packet; + break; + } + + spin_unlock_bh(&nc_path->packet_list_lock); + break; + } + rcu_read_unlock(); + + return nc_packet_out; +} + +/** + * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the + * skb's sender (may be equal to the originator). + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * @eth_src: source mac address of skb + * @in_nc_node: pointer to skb next hop's neighbor nc node + * + * Returns an nc packet if a suitable coding packet was found, NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_skb_src_search(struct batadv_priv *bat_priv, + struct sk_buff *skb, + uint8_t *eth_dst, + uint8_t *eth_src, + struct batadv_nc_node *in_nc_node) +{ + struct batadv_orig_node *orig_node; + struct batadv_nc_node *out_nc_node; + struct batadv_nc_packet *nc_packet = NULL; + + orig_node = batadv_orig_hash_find(bat_priv, eth_src); + if (!orig_node) + return NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(out_nc_node, + &orig_node->out_coding_list, list) { + /* Check if the skb is decoded and if recoding is possible */ + if (!batadv_nc_skb_coding_possible(skb, + out_nc_node->addr, eth_src)) + continue; + + /* Search for an opportunity in this nc_path */ + nc_packet = batadv_nc_path_search(bat_priv, in_nc_node, + out_nc_node, skb, eth_dst); + if (nc_packet) + break; + } + rcu_read_unlock(); + + batadv_orig_node_free_ref(orig_node); + return nc_packet; +} + +/** + * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the + * unicast skb before it is stored for use in later decoding + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + * @eth_dst_new: new destination mac address of skb + */ +static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, + struct sk_buff *skb, + uint8_t *eth_dst_new) +{ + struct ethhdr *ethhdr; + + /* Copy skb header to change the mac header */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (!skb) + return; + + /* Set the mac header as if we actually sent the packet uncoded */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); + memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); + memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); + + /* Set data pointer to MAC header to mimic packets from our tx path */ + skb_push(skb, ETH_HLEN); + + /* Add the packet to the decoding packet pool */ + batadv_nc_skb_store_for_decoding(bat_priv, skb); + + /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free + * our ref + */ + kfree_skb(skb); +} + +/** + * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst. + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Loops through list of neighboring nodes the next hop has a good connection to + * (receives OGMs with a sufficient quality). We need to find a neighbor of our + * next hop that potentially sent a packet which our next hop also received + * (overheard) and has stored for later decoding. + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +static bool batadv_nc_skb_dst_search(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct batadv_priv *bat_priv = netdev_priv(netdev); + struct batadv_orig_node *orig_node = neigh_node->orig_node; + struct batadv_nc_node *nc_node; + struct batadv_nc_packet *nc_packet = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) { + /* Search for coding opportunity with this in_nc_node */ + nc_packet = batadv_nc_skb_src_search(bat_priv, skb, + neigh_node->addr, + ethhdr->h_source, nc_node); + + /* Opportunity was found, so stop searching */ + if (nc_packet) + break; + } + rcu_read_unlock(); + + if (!nc_packet) + return false; + + /* Save packets for later decoding */ + batadv_nc_skb_store_before_coding(bat_priv, skb, + neigh_node->addr); + batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb, + nc_packet->neigh_node->addr); + + /* Code and send packets */ + if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet, + neigh_node)) + return true; + + /* out of mem ? Coding failed - we have to free the buffered packet + * to avoid memleaks. The skb passed as argument will be dealt with + * by the calling function. + */ + batadv_nc_send_packet(nc_packet); + return false; +} + +/** + * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding + * @skb: skb to add to path + * @nc_path: path to add skb to + * @neigh_node: next hop to forward packet to + * @packet_id: checksum to identify packet + * + * Returns true if the packet was buffered or false in case of an error. + */ +static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, + struct batadv_nc_path *nc_path, + struct batadv_neigh_node *neigh_node, + __be32 packet_id) +{ + struct batadv_nc_packet *nc_packet; + + nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC); + if (!nc_packet) + return false; + + /* Initialize nc_packet */ + nc_packet->timestamp = jiffies; + nc_packet->packet_id = packet_id; + nc_packet->skb = skb; + nc_packet->neigh_node = neigh_node; + nc_packet->nc_path = nc_path; + + /* Add coding packet to list */ + spin_lock_bh(&nc_path->packet_list_lock); + list_add_tail(&nc_packet->list, &nc_path->packet_list); + spin_unlock_bh(&nc_path->packet_list_lock); + + return true; +} + +/** + * batadv_nc_skb_forward - try to code a packet or add it to the coding packet + * buffer + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + const struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct batadv_priv *bat_priv = netdev_priv(netdev); + struct batadv_unicast_packet *packet; + struct batadv_nc_path *nc_path; + __be32 packet_id; + u8 *payload; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* We only handle unicast packets */ + payload = skb_network_header(skb); + packet = (struct batadv_unicast_packet *)payload; + if (packet->header.packet_type != BATADV_UNICAST) + goto out; + + /* Try to find a coding opportunity and send the skb if one is found */ + if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr)) + return true; + + /* Find or create a nc_path for this src-dst pair */ + nc_path = batadv_nc_get_path(bat_priv, + bat_priv->nc.coding_hash, + ethhdr->h_source, + neigh_node->addr); + + if (!nc_path) + goto out; + + /* Add skb to nc_path */ + packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); + if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id)) + goto free_nc_path; + + /* Packet is consumed */ + return true; + +free_nc_path: + batadv_nc_path_free_ref(nc_path); +out: + /* Packet is not consumed */ + return false; +} + +/** + * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used + * when decoding coded packets + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + */ +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct batadv_unicast_packet *packet; + struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + __be32 packet_id; + u8 *payload; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* Check for supported packet type */ + payload = skb_network_header(skb); + packet = (struct batadv_unicast_packet *)payload; + if (packet->header.packet_type != BATADV_UNICAST) + goto out; + + /* Find existing nc_path or create a new */ + nc_path = batadv_nc_get_path(bat_priv, + bat_priv->nc.decoding_hash, + ethhdr->h_source, + ethhdr->h_dest); + + if (!nc_path) + goto out; + + /* Clone skb and adjust skb->data to point at batman header */ + skb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto free_nc_path; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto free_skb; + + if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN))) + goto free_skb; + + /* Add skb to nc_path */ + packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); + if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id)) + goto free_skb; + + batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER); + return; + +free_skb: + kfree_skb(skb); +free_nc_path: + batadv_nc_path_free_ref(nc_path); +out: + return; +} + +/** + * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet + * should be saved in the decoding buffer and, if so, store it there + * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to store + */ +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + + if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) + return; + + /* Set data pointer to MAC header to mimic packets from our tx path */ + skb_push(skb, ETH_HLEN); + + batadv_nc_skb_store_for_decoding(bat_priv, skb); +} + +/** + * batadv_nc_skb_decode_packet - decode given skb using the decode data stored + * in nc_packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to decode + * @nc_packet: decode data needed to decode the skb + * + * Returns pointer to decoded unicast packet if the packet was decoded or NULL + * in case of an error. + */ +static struct batadv_unicast_packet * +batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, + struct batadv_nc_packet *nc_packet) +{ + const int h_size = sizeof(struct batadv_unicast_packet); + const int h_diff = sizeof(struct batadv_coded_packet) - h_size; + struct batadv_unicast_packet *unicast_packet; + struct batadv_coded_packet coded_packet_tmp; + struct ethhdr *ethhdr, ethhdr_tmp; + uint8_t *orig_dest, ttl, ttvn; + unsigned int coding_len; + + /* Save headers temporarily */ + memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp)); + memcpy(ðhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp)); + + if (skb_cow(skb, 0) < 0) + return NULL; + + if (unlikely(!skb_pull_rcsum(skb, h_diff))) + return NULL; + + /* Data points to batman header, so set mac header 14 bytes before + * and network to data + */ + skb_set_mac_header(skb, -ETH_HLEN); + skb_reset_network_header(skb); + + /* Reconstruct original mac header */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); + memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); + + /* Select the correct unicast header information based on the location + * of our mac address in the coded_packet header + */ + if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) { + /* If we are the second destination the packet was overheard, + * so the Ethernet address must be copied to h_dest and + * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST + */ + memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN); + skb->pkt_type = PACKET_HOST; + + orig_dest = coded_packet_tmp.second_orig_dest; + ttl = coded_packet_tmp.second_ttl; + ttvn = coded_packet_tmp.second_ttvn; + } else { + orig_dest = coded_packet_tmp.first_orig_dest; + ttl = coded_packet_tmp.header.ttl; + ttvn = coded_packet_tmp.first_ttvn; + } + + coding_len = ntohs(coded_packet_tmp.coded_len); + + if (coding_len > skb->len) + return NULL; + + /* Here the magic is reversed: + * extract the missing packet from the received coded packet + */ + batadv_nc_memxor(skb->data + h_size, + nc_packet->skb->data + h_size, + coding_len); + + /* Resize decoded skb if decoded with larger packet */ + if (nc_packet->skb->len > coding_len + h_size) + pskb_trim_rcsum(skb, coding_len + h_size); + + /* Create decoded unicast packet */ + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->header.ttl = ttl; + memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); + unicast_packet->ttvn = ttvn; + + batadv_nc_packet_free(nc_packet); + return unicast_packet; +} + +/** + * batadv_nc_find_decoding_packet - search through buffered decoding data to + * find the data needed to decode the coded packet + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: pointer to the ethernet header inside the coded packet + * @coded: coded packet we try to find decode data for + * + * Returns pointer to nc packet if the needed data was found or NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, + struct ethhdr *ethhdr, + struct batadv_coded_packet *coded) +{ + struct batadv_hashtable *hash = bat_priv->nc.decoding_hash; + struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL; + struct batadv_nc_path *nc_path, nc_path_key; + uint8_t *dest, *source; + __be32 packet_id; + int index; + + if (!hash) + return NULL; + + /* Select the correct packet id based on the location of our mac-addr */ + dest = ethhdr->h_source; + if (!batadv_is_my_mac(bat_priv, coded->second_dest)) { + source = coded->second_source; + packet_id = coded->second_crc; + } else { + source = coded->first_source; + packet_id = coded->first_crc; + } + + batadv_nc_hash_key_gen(&nc_path_key, source, dest); + index = batadv_nc_hash_choose(&nc_path_key, hash->size); + + /* Search for matching coding path */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) { + /* Find matching nc_packet */ + spin_lock_bh(&nc_path->packet_list_lock); + list_for_each_entry(tmp_nc_packet, + &nc_path->packet_list, list) { + if (packet_id == tmp_nc_packet->packet_id) { + list_del(&tmp_nc_packet->list); + + nc_packet = tmp_nc_packet; + break; + } + } + spin_unlock_bh(&nc_path->packet_list_lock); + + if (nc_packet) + break; + } + rcu_read_unlock(); + + if (!nc_packet) + batadv_dbg(BATADV_DBG_NC, bat_priv, + "No decoding packet found for %u\n", packet_id); + + return nc_packet; +} + +/** + * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the + * resulting unicast packet + * @skb: incoming coded packet + * @recv_if: pointer to interface this packet was received on + */ +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_unicast_packet *unicast_packet; + struct batadv_coded_packet *coded_packet; + struct batadv_nc_packet *nc_packet; + struct ethhdr *ethhdr; + int hdr_size = sizeof(*coded_packet); + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + return NET_RX_DROP; + + /* Make sure we can access (and remove) header */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return NET_RX_DROP; + + coded_packet = (struct batadv_coded_packet *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* Verify frame is destined for us */ + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && + !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) + return NET_RX_DROP; + + /* Update stat counter */ + if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) + batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED); + + nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr, + coded_packet); + if (!nc_packet) { + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); + return NET_RX_DROP; + } + + /* Make skb's linear, because decoding accesses the entire buffer */ + if (skb_linearize(skb) < 0) + goto free_nc_packet; + + if (skb_linearize(nc_packet->skb) < 0) + goto free_nc_packet; + + /* Decode the packet */ + unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet); + if (!unicast_packet) { + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); + goto free_nc_packet; + } + + /* Mark packet as decoded to do correct recoding when forwarding */ + BATADV_SKB_CB(skb)->decoded = true; + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES, + skb->len + ETH_HLEN); + return batadv_recv_unicast_packet(skb, recv_if); + +free_nc_packet: + batadv_nc_packet_free(nc_packet); + return NET_RX_DROP; +} + +/** + * batadv_nc_free - clean up network coding memory + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_free(struct batadv_priv *bat_priv) +{ + batadv_recv_handler_unregister(BATADV_CODED); + cancel_delayed_work_sync(&bat_priv->nc.work); + + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); + batadv_hash_destroy(bat_priv->nc.coding_hash); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL); + batadv_hash_destroy(bat_priv->nc.decoding_hash); +} + +/** + * batadv_nc_nodes_seq_print_text - print the nc node information + * @seq: seq file to print on + * @offset: not used + */ +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct batadv_hard_iface *primary_if; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + struct batadv_nc_node *nc_node; + int i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + goto out; + + /* Traverse list of originators */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + /* For each orig_node in this bin */ + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + seq_printf(seq, "Node: %pM\n", orig_node->orig); + + seq_puts(seq, " Ingoing: "); + /* For each in_nc_node to this orig_node */ + list_for_each_entry_rcu(nc_node, + &orig_node->in_coding_list, + list) + seq_printf(seq, "%pM ", + nc_node->addr); + seq_puts(seq, "\n"); + + seq_puts(seq, " Outgoing: "); + /* For out_nc_node to this orig_node */ + list_for_each_entry_rcu(nc_node, + &orig_node->out_coding_list, + list) + seq_printf(seq, "%pM ", + nc_node->addr); + seq_puts(seq, "\n\n"); + } + rcu_read_unlock(); + } + +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return 0; +} + +/** + * batadv_nc_init_debugfs - create nc folder and related files in debugfs + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ + struct dentry *nc_dir, *file; + + nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir); + if (!nc_dir) + goto out; + + file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.min_tq); + if (!file) + goto out; + + file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.max_fwd_delay); + if (!file) + goto out; + + file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.max_buffer_time); + if (!file) + goto out; + + return 0; + +out: + return -ENOMEM; +} diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h new file mode 100644 index 000000000000..4fa6d0caddbd --- /dev/null +++ b/net/batman-adv/network-coding.h @@ -0,0 +1,123 @@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ +#define _NET_BATMAN_ADV_NETWORK_CODING_H_ + +#ifdef CONFIG_BATMAN_ADV_NC + +int batadv_nc_init(struct batadv_priv *bat_priv); +void batadv_nc_free(struct batadv_priv *bat_priv); +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh); +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)); +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); +void batadv_nc_init_orig(struct batadv_orig_node *orig_node); +bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr); +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb); +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb); +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset); +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); + +#else /* ifdef CONFIG_BATMAN_ADV_NC */ + +static inline int batadv_nc_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_nc_free(struct batadv_priv *bat_priv) +{ + return; +} + +static inline void +batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh) +{ + return; +} + +static inline void +batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + return; +} + +static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ + return; +} + +static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ + return; +} + +static inline bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + return false; +} + +static inline void +batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return; +} + +static inline void +batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return; +} + +static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq, + void *offset) +{ + return 0; +} + +static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ + return 0; +} + +#endif /* ifdef CONFIG_BATMAN_ADV_NC */ + +#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 96fb80b724dc..2f3452546636 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -28,6 +28,7 @@ #include "unicast.h" #include "soft-interface.h" #include "bridge_loop_avoidance.h" +#include "network-coding.h" /* hash class keys */ static struct lock_class_key batadv_orig_hash_lock_class_key; @@ -142,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) spin_unlock_bh(&orig_node->neigh_list_lock); + /* Free nc_nodes */ + batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); + batadv_frag_list_free(&orig_node->frag_list); batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, "originator timed out"); @@ -219,6 +223,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, spin_lock_init(&orig_node->neigh_list_lock); spin_lock_init(&orig_node->tt_buff_lock); + batadv_nc_init_orig(orig_node); + /* extra reference for return */ atomic_set(&orig_node->refcount, 2); @@ -459,7 +465,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node_tmp->tq_avg); } - seq_printf(seq, "\n"); + seq_puts(seq, "\n"); batman_count++; next: @@ -469,7 +475,7 @@ next: } if (batman_count == 0) - seq_printf(seq, "No batman nodes in range ...\n"); + seq_puts(seq, "No batman nodes in range ...\n"); out: if (primary_if) diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index ed0aa89bbf8b..a51ccfc39da4 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -30,6 +30,7 @@ enum batadv_packettype { BATADV_TT_QUERY = 0x07, BATADV_ROAM_ADV = 0x08, BATADV_UNICAST_4ADDR = 0x09, + BATADV_CODED = 0x0a, }; /** @@ -278,4 +279,36 @@ struct batadv_tt_change { uint8_t addr[ETH_ALEN]; } __packed; +/** + * struct batadv_coded_packet - network coded packet + * @header: common batman packet header and ttl of first included packet + * @reserved: Align following fields to 2-byte boundaries + * @first_source: original source of first included packet + * @first_orig_dest: original destinal of first included packet + * @first_crc: checksum of first included packet + * @first_ttvn: tt-version number of first included packet + * @second_ttl: ttl of second packet + * @second_dest: second receiver of this coded packet + * @second_source: original source of second included packet + * @second_orig_dest: original destination of second included packet + * @second_crc: checksum of second included packet + * @second_ttvn: tt version number of second included packet + * @coded_len: length of network coded part of the payload + */ +struct batadv_coded_packet { + struct batadv_header header; + uint8_t first_ttvn; + /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ + uint8_t first_source[ETH_ALEN]; + uint8_t first_orig_dest[ETH_ALEN]; + __be32 first_crc; + uint8_t second_ttl; + uint8_t second_ttvn; + uint8_t second_dest[ETH_ALEN]; + uint8_t second_source[ETH_ALEN]; + uint8_t second_orig_dest[ETH_ALEN]; + __be32 second_crc; + __be16 coded_len; +}; + #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 5ee21cebbbb0..2f1f88923df8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -29,6 +29,7 @@ #include "unicast.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" +#include "network-coding.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -402,7 +403,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, goto out; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) goto out; icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; @@ -416,7 +417,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, } /* packet for me */ - if (batadv_is_my_mac(icmp_packet->dst)) + if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); /* TTL exceeded */ @@ -548,27 +549,39 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) +/** + * batadv_check_unicast_packet - Check for malformed unicast packets + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: size of header to pull + * + * Check for short header and bad addresses in given packet. Returns negative + * value when check fails and 0 otherwise. The negative value depends on the + * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, + * and -EREMOTE for non-local (other host) destination. + */ +static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return -1; + return -ENODATA; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) - return -1; + return -EBADR; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return -1; + return -EBADR; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) - return -1; + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) + return -EREMOTE; return 0; } @@ -582,7 +595,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) char tt_flag; size_t packet_size; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; /* I could need to modify it */ @@ -614,7 +627,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) case BATADV_TT_RESPONSE: batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); - if (batadv_is_my_mac(tt_query->dst)) { + if (batadv_is_my_mac(bat_priv, tt_query->dst)) { /* packet needs to be linearized to access the TT * changes */ @@ -657,14 +670,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, + sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; - if (!batadv_is_my_mac(roam_adv_packet->dst)) + if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst)) return batadv_route_unicast_packet(skb, recv_if); /* check if it is a backbone gateway. we don't accept @@ -850,14 +864,17 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* Update stats counter */ - batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); - batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, - skb->len + ETH_HLEN); - - /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + /* network code packet if possible */ + if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { ret = NET_RX_SUCCESS; + } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { + ret = NET_RX_SUCCESS; + + /* Update stats counter */ + batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); + batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, + skb->len + ETH_HLEN); + } out: if (neigh_node) @@ -967,7 +984,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * last time) the packet had an updated information or not */ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - if (!batadv_is_my_mac(unicast_packet->dest)) { + if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); /* if it is not possible to find the orig_node representing the @@ -1033,7 +1050,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_unicast_4addr_packet *unicast_4addr_packet; uint8_t *orig_addr; struct batadv_orig_node *orig_node = NULL; - int hdr_size = sizeof(*unicast_packet); + int check, hdr_size = sizeof(*unicast_packet); bool is4addr; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -1044,14 +1061,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + /* function returns -EREMOTE for promiscuous packets */ + check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); + + /* Even though the packet is not for us, we might save it to use for + * decoding a later received coded packet + */ + if (check == -EREMOTE) + batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); + + if (check < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { if (is4addr) { batadv_dat_inc_counter(bat_priv, unicast_4addr_packet->subtype); @@ -1088,7 +1114,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, struct sk_buff *new_skb = NULL; int ret; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) @@ -1097,7 +1123,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -1151,13 +1177,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* ignore broadcasts sent by myself */ - if (batadv_is_my_mac(ethhdr->h_source)) + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto out; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ - if (batadv_is_my_mac(bcast_packet->orig)) + if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; if (bcast_packet->header.ttl < 2) @@ -1243,14 +1269,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb, ethhdr = (struct ethhdr *)skb_mac_header(skb); /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return NET_RX_DROP; /* ignore own packets */ - if (batadv_is_my_mac(vis_packet->vis_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig)) return NET_RX_DROP; - if (batadv_is_my_mac(vis_packet->sender_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig)) return NET_RX_DROP; switch (vis_packet->vis_type) { diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index a67cffde37ae..263cfd1ccee7 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -27,6 +27,7 @@ #include "vis.h" #include "gateway_common.h" #include "originator.h" +#include "network-coding.h" #include <linux/if_ether.h> @@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct ethhdr *ethhdr; if (hard_iface->if_status != BATADV_IF_ACTIVE) @@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb->dev = hard_iface->net_dev; + /* Save a clone of the skb to use when decoding coded packets */ + batadv_nc_skb_store_for_decoding(bat_priv, skb); + /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2711e870f557..6f20d339e33a 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -37,6 +37,7 @@ #include <linux/if_ether.h> #include "unicast.h" #include "bridge_loop_avoidance.h" +#include "network-coding.h" static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); @@ -401,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev) } /** - * batadv_softif_init - Late stage initialization of soft interface - * @dev: registered network device to modify - * - * Returns error code on failures - */ -static int batadv_softif_init(struct net_device *dev) -{ - batadv_set_lockdep_class(dev); - - return 0; -} - -static const struct net_device_ops batadv_netdev_ops = { - .ndo_init = batadv_softif_init, - .ndo_open = batadv_interface_open, - .ndo_stop = batadv_interface_release, - .ndo_get_stats = batadv_interface_stats, - .ndo_set_mac_address = batadv_interface_set_mac_addr, - .ndo_change_mtu = batadv_interface_change_mtu, - .ndo_start_xmit = batadv_interface_tx, - .ndo_validate_addr = eth_validate_addr -}; - -static void batadv_interface_setup(struct net_device *dev) -{ - struct batadv_priv *priv = netdev_priv(dev); - - ether_setup(dev); - - dev->netdev_ops = &batadv_netdev_ops; - dev->destructor = free_netdev; - dev->tx_queue_len = 0; - - /* can't call min_mtu, because the needed variables - * have not been initialized yet - */ - dev->mtu = ETH_DATA_LEN; - /* reserve more space in the skbuff for our header */ - dev->hard_header_len = BATADV_HEADER_LEN; - - /* generate random address */ - eth_hw_addr_random(dev); - - SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); - - memset(priv, 0, sizeof(*priv)); -} - -/** * batadv_softif_destroy_finish - cleans up the remains of a softif * @work: work queue item * @@ -465,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work) cleanup_work); soft_iface = bat_priv->soft_iface; - batadv_debugfs_del_meshif(soft_iface); batadv_sysfs_del_meshif(soft_iface); rtnl_lock(); @@ -473,21 +424,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work) rtnl_unlock(); } -struct net_device *batadv_softif_create(const char *name) +/** + * batadv_softif_init_late - late stage initialization of soft interface + * @dev: registered network device to modify + * + * Returns error code on failures + */ +static int batadv_softif_init_late(struct net_device *dev) { - struct net_device *soft_iface; struct batadv_priv *bat_priv; int ret; size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; - soft_iface = alloc_netdev(sizeof(*bat_priv), name, - batadv_interface_setup); - - if (!soft_iface) - goto out; + batadv_set_lockdep_class(dev); - bat_priv = netdev_priv(soft_iface); - bat_priv->soft_iface = soft_iface; + bat_priv = netdev_priv(dev); + bat_priv->soft_iface = dev; INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as @@ -495,14 +447,7 @@ struct net_device *batadv_softif_create(const char *name) */ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); if (!bat_priv->bat_counters) - goto free_soft_iface; - - ret = register_netdevice(soft_iface); - if (ret < 0) { - pr_err("Unable to register the batman interface '%s': %i\n", - name, ret); - goto free_bat_counters; - } + return -ENOMEM; atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); @@ -540,49 +485,196 @@ struct net_device *batadv_softif_create(const char *name) bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; - ret = batadv_algo_select(bat_priv, batadv_routing_algo); - if (ret < 0) - goto unreg_soft_iface; + batadv_nc_init_bat_priv(bat_priv); - ret = batadv_sysfs_add_meshif(soft_iface); + ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) - goto unreg_soft_iface; + goto free_bat_counters; - ret = batadv_debugfs_add_meshif(soft_iface); + ret = batadv_debugfs_add_meshif(dev); if (ret < 0) - goto unreg_sysfs; + goto free_bat_counters; - ret = batadv_mesh_init(soft_iface); + ret = batadv_mesh_init(dev); if (ret < 0) goto unreg_debugfs; - return soft_iface; + return 0; unreg_debugfs: - batadv_debugfs_del_meshif(soft_iface); -unreg_sysfs: - batadv_sysfs_del_meshif(soft_iface); -unreg_soft_iface: - free_percpu(bat_priv->bat_counters); - unregister_netdevice(soft_iface); - return NULL; - + batadv_debugfs_del_meshif(dev); free_bat_counters: free_percpu(bat_priv->bat_counters); -free_soft_iface: - free_netdev(soft_iface); + + return ret; +} + +/** + * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface + * @dev: batadv_soft_interface used as master interface + * @slave_dev: net_device which should become the slave interface + * + * Return 0 if successful or error otherwise. + */ +static int batadv_softif_slave_add(struct net_device *dev, + struct net_device *slave_dev) +{ + struct batadv_hard_iface *hard_iface; + int ret = -EINVAL; + + hard_iface = batadv_hardif_get_by_netdev(slave_dev); + if (!hard_iface || hard_iface->soft_iface != NULL) + goto out; + + ret = batadv_hardif_enable_interface(hard_iface, dev->name); + out: - return NULL; + if (hard_iface) + batadv_hardif_free_ref(hard_iface); + return ret; } -void batadv_softif_destroy(struct net_device *soft_iface) +/** + * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface + * @dev: batadv_soft_interface used as master interface + * @slave_dev: net_device which should be removed from the master interface + * + * Return 0 if successful or error otherwise. + */ +static int batadv_softif_slave_del(struct net_device *dev, + struct net_device *slave_dev) +{ + struct batadv_hard_iface *hard_iface; + int ret = -EINVAL; + + hard_iface = batadv_hardif_get_by_netdev(slave_dev); + + if (!hard_iface || hard_iface->soft_iface != dev) + goto out; + + batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP); + ret = 0; + +out: + if (hard_iface) + batadv_hardif_free_ref(hard_iface); + return ret; +} + +static const struct net_device_ops batadv_netdev_ops = { + .ndo_init = batadv_softif_init_late, + .ndo_open = batadv_interface_open, + .ndo_stop = batadv_interface_release, + .ndo_get_stats = batadv_interface_stats, + .ndo_set_mac_address = batadv_interface_set_mac_addr, + .ndo_change_mtu = batadv_interface_change_mtu, + .ndo_start_xmit = batadv_interface_tx, + .ndo_validate_addr = eth_validate_addr, + .ndo_add_slave = batadv_softif_slave_add, + .ndo_del_slave = batadv_softif_slave_del, +}; + +/** + * batadv_softif_free - Deconstructor of batadv_soft_interface + * @dev: Device to cleanup and remove + */ +static void batadv_softif_free(struct net_device *dev) +{ + batadv_debugfs_del_meshif(dev); + batadv_mesh_free(dev); + + /* some scheduled RCU callbacks need the bat_priv struct to accomplish + * their tasks. Wait for them all to be finished before freeing the + * netdev and its private data (bat_priv) + */ + rcu_barrier(); + + free_netdev(dev); +} + +/** + * batadv_softif_init_early - early stage initialization of soft interface + * @dev: registered network device to modify + */ +static void batadv_softif_init_early(struct net_device *dev) +{ + struct batadv_priv *priv = netdev_priv(dev); + + ether_setup(dev); + + dev->netdev_ops = &batadv_netdev_ops; + dev->destructor = batadv_softif_free; + dev->tx_queue_len = 0; + + /* can't call min_mtu, because the needed variables + * have not been initialized yet + */ + dev->mtu = ETH_DATA_LEN; + /* reserve more space in the skbuff for our header */ + dev->hard_header_len = BATADV_HEADER_LEN; + + /* generate random address */ + eth_hw_addr_random(dev); + + SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + + memset(priv, 0, sizeof(*priv)); +} + +struct net_device *batadv_softif_create(const char *name) +{ + struct net_device *soft_iface; + int ret; + + soft_iface = alloc_netdev(sizeof(struct batadv_priv), name, + batadv_softif_init_early); + if (!soft_iface) + return NULL; + + soft_iface->rtnl_link_ops = &batadv_link_ops; + + ret = register_netdevice(soft_iface); + if (ret < 0) { + pr_err("Unable to register the batman interface '%s': %i\n", + name, ret); + free_netdev(soft_iface); + return NULL; + } + + return soft_iface; +} + +/** + * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs + * @soft_iface: the to-be-removed batman-adv interface + */ +void batadv_softif_destroy_sysfs(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); - batadv_mesh_free(soft_iface); queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); } +/** + * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink + * @soft_iface: the to-be-removed batman-adv interface + * @head: list pointer + */ +static void batadv_softif_destroy_netlink(struct net_device *soft_iface, + struct list_head *head) +{ + struct batadv_hard_iface *hard_iface; + + list_for_each_entry(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface == soft_iface) + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_KEEP); + } + + batadv_sysfs_del_meshif(soft_iface); + unregister_netdevice_queue(soft_iface, head); +} + int batadv_softif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) @@ -591,6 +683,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev) return 0; } +struct rtnl_link_ops batadv_link_ops __read_mostly = { + .kind = "batadv", + .priv_size = sizeof(struct batadv_priv), + .setup = batadv_softif_init_early, + .dellink = batadv_softif_destroy_netlink, +}; + /* ethtool */ static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -662,6 +761,17 @@ static const struct { { "dat_put_rx" }, { "dat_cached_reply_tx" }, #endif +#ifdef CONFIG_BATMAN_ADV_NC + { "nc_code" }, + { "nc_code_bytes" }, + { "nc_recode" }, + { "nc_recode_bytes" }, + { "nc_buffer" }, + { "nc_decode" }, + { "nc_decode_bytes" }, + { "nc_decode_failed" }, + { "nc_sniffed" }, +#endif }; static void batadv_get_strings(struct net_device *dev, uint32_t stringset, diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 43182e5e603a..2f2472c2ea0d 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node); struct net_device *batadv_softif_create(const char *name); -void batadv_softif_destroy(struct net_device *soft_iface); +void batadv_softif_destroy_sysfs(struct net_device *soft_iface); int batadv_softif_is_valid(const struct net_device *net_dev); +extern struct rtnl_link_ops batadv_link_ops; #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index afbba319d73a..15a22efa9a67 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, #ifdef CONFIG_BATMAN_ADV_DEBUG BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); #endif +#ifdef CONFIG_BATMAN_ADV_NC +BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL); +#endif static struct batadv_attribute *batadv_mesh_attrs[] = { &batadv_attr_aggregated_ogms, @@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_DEBUG &batadv_attr_log_level, #endif +#ifdef CONFIG_BATMAN_ADV_NC + &batadv_attr_network_coding, +#endif NULL, }; @@ -582,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, } if (status_tmp == BATADV_IF_NOT_IN_USE) { - batadv_hardif_disable_interface(hard_iface); + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_AUTO); goto unlock; } /* if the interface already is in use */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) - batadv_hardif_disable_interface(hard_iface); + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_AUTO); ret = batadv_hardif_enable_interface(hard_iface, buff); @@ -688,15 +696,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, enum batadv_uev_action action, const char *data) { int ret = -ENOMEM; - struct batadv_hard_iface *primary_if; struct kobject *bat_kobj; char *uevent_env[4] = { NULL, NULL, NULL, NULL }; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - bat_kobj = &primary_if->soft_iface->dev.kobj; + bat_kobj = &bat_priv->soft_iface->dev.kobj; uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) + strlen(batadv_uev_type_str[type]) + 1, @@ -732,9 +735,6 @@ out: kfree(uevent_env[1]); kfree(uevent_env[2]); - if (primary_if) - batadv_hardif_free_ref(primary_if); - if (ret) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 98a66a021a60..5e89deeb9542 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -385,25 +385,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv, int *packet_buff_len, int min_packet_len) { - struct batadv_hard_iface *primary_if; int req_len; - primary_if = batadv_primary_if_get_selected(bat_priv); - req_len = min_packet_len; req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented */ - if ((!primary_if) || (req_len > primary_if->soft_iface->mtu)) + if (req_len > bat_priv->soft_iface->mtu) req_len = min_packet_len; batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, min_packet_len, req_len); - - if (primary_if) - batadv_hardif_free_ref(primary_if); } static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, @@ -908,7 +902,7 @@ out_remove: /* remove address from local hash if present */ local_flags = batadv_tt_local_remove(bat_priv, tt_addr, "global tt received", - !!(flags & BATADV_TT_CLIENT_ROAM)); + flags & BATADV_TT_CLIENT_ROAM); tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; if (!(flags & BATADV_TT_CLIENT_ROAM)) @@ -1580,7 +1574,7 @@ static int batadv_tt_global_valid(const void *entry_ptr, static struct sk_buff * batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, struct batadv_hashtable *hash, - struct batadv_hard_iface *primary_if, + struct batadv_priv *bat_priv, int (*valid_cb)(const void *, const void *), void *cb_data) { @@ -1594,8 +1588,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, uint32_t i; size_t len; - if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { - tt_len = primary_if->soft_iface->mtu - tt_query_size; + if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) { + tt_len = bat_priv->soft_iface->mtu - tt_query_size; tt_len -= tt_len % sizeof(struct batadv_tt_change); } tt_tot = tt_len / sizeof(struct batadv_tt_change); @@ -1715,7 +1709,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, { struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; - struct batadv_hard_iface *primary_if = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; int ret = false; unsigned char *tt_buff; @@ -1740,10 +1733,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, if (!res_dst_orig_node) goto out; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_request->ttvn; @@ -1791,7 +1780,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, skb = batadv_tt_response_fill_table(tt_len, ttvn, bat_priv->tt.global_hash, - primary_if, + bat_priv, batadv_tt_global_valid, req_dst_orig_node); if (!skb) @@ -1828,8 +1817,6 @@ out: batadv_orig_node_free_ref(res_dst_orig_node); if (req_dst_orig_node) batadv_orig_node_free_ref(req_dst_orig_node); - if (primary_if) - batadv_hardif_free_ref(primary_if); if (!ret) kfree_skb(skb); return ret; @@ -1907,7 +1894,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, skb = batadv_tt_response_fill_table(tt_len, ttvn, bat_priv->tt.local_hash, - primary_if, + bat_priv, batadv_tt_local_valid_entry, NULL); if (!skb) @@ -1953,7 +1940,7 @@ out: bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tt_query_packet *tt_request) { - if (batadv_is_my_mac(tt_request->dst)) { + if (batadv_is_my_mac(bat_priv, tt_request->dst)) { /* don't answer backbone gws! */ if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) return true; @@ -2528,7 +2515,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, if (!tt_global_entry) goto out; - ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); + ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; batadv_tt_global_entry_free_ref(tt_global_entry); out: return ret; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 4cd87a0b5b80..aba8364c3689 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -128,6 +128,10 @@ struct batadv_hard_iface { * @bond_list: list of bonding candidates * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner + * @in_coding_list: list of nodes this orig can hear + * @out_coding_list: list of nodes that can hear this orig + * @in_coding_list_lock: protects in_coding_list + * @out_coding_list_lock: protects out_coding_list */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; @@ -171,6 +175,12 @@ struct batadv_orig_node { struct list_head bond_list; atomic_t refcount; struct rcu_head rcu; +#ifdef CONFIG_BATMAN_ADV_NC + struct list_head in_coding_list; + struct list_head out_coding_list; + spinlock_t in_coding_list_lock; /* Protects in_coding_list */ + spinlock_t out_coding_list_lock; /* Protects out_coding_list */ +#endif }; /** @@ -265,6 +275,17 @@ struct batadv_bcast_duplist_entry { * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet * counter + * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter + * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter + * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter + * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter + * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding + * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter + * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter + * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet + * counter + * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc + * mode. * @BATADV_CNT_NUM: number of traffic counters */ enum batadv_counters { @@ -292,6 +313,17 @@ enum batadv_counters { BATADV_CNT_DAT_PUT_RX, BATADV_CNT_DAT_CACHED_REPLY_TX, #endif +#ifdef CONFIG_BATMAN_ADV_NC + BATADV_CNT_NC_CODE, + BATADV_CNT_NC_CODE_BYTES, + BATADV_CNT_NC_RECODE, + BATADV_CNT_NC_RECODE_BYTES, + BATADV_CNT_NC_BUFFER, + BATADV_CNT_NC_DECODE, + BATADV_CNT_NC_DECODE_BYTES, + BATADV_CNT_NC_DECODE_FAILED, + BATADV_CNT_NC_SNIFFED, +#endif BATADV_CNT_NUM, }; @@ -428,6 +460,35 @@ struct batadv_priv_dat { #endif /** + * struct batadv_priv_nc - per mesh interface network coding private data + * @work: work queue callback item for cleanup + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq + * @max_fwd_delay: maximum packet forward delay to allow coding of packets + * @max_buffer_time: buffer time for sniffed packets used to decoding + * @timestamp_fwd_flush: timestamp of last forward packet queue flush + * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge + * @coding_hash: Hash table used to buffer skbs while waiting for another + * incoming skb to code it with. Skbs are added to the buffer just before being + * forwarded in routing.c + * @decoding_hash: Hash table used to buffer skbs that might be needed to decode + * a received coded skb. The buffer is used for 1) skbs arriving on the + * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs + * forwarded by batman-adv. + */ +struct batadv_priv_nc { + struct delayed_work work; + struct dentry *debug_dir; + u8 min_tq; + u32 max_fwd_delay; + u32 max_buffer_time; + unsigned long timestamp_fwd_flush; + unsigned long timestamp_sniffed_purge; + struct batadv_hashtable *coding_hash; + struct batadv_hashtable *decoding_hash; +}; + +/** * struct batadv_priv - per mesh interface data * @mesh_state: current status of the mesh (inactive/active/deactivating) * @soft_iface: net device which holds this struct as private data @@ -470,6 +531,8 @@ struct batadv_priv_dat { * @tt: translation table data * @vis: vis data * @dat: distributed arp table data + * @network_coding: bool indicating whether network coding is enabled + * @batadv_priv_nc: network coding data */ struct batadv_priv { atomic_t mesh_state; @@ -522,6 +585,10 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DAT struct batadv_priv_dat dat; #endif +#ifdef CONFIG_BATMAN_ADV_NC + atomic_t network_coding; + struct batadv_priv_nc nc; +#endif /* CONFIG_BATMAN_ADV_NC */ }; /** @@ -702,6 +769,75 @@ struct batadv_tt_roam_node { }; /** + * struct batadv_nc_node - network coding node + * @list: next and prev pointer for the list handling + * @addr: the node's mac address + * @refcount: number of contexts the object is used by + * @rcu: struct used for freeing in an RCU-safe manner + * @orig_node: pointer to corresponding orig node struct + * @last_seen: timestamp of last ogm received from this node + */ +struct batadv_nc_node { + struct list_head list; + uint8_t addr[ETH_ALEN]; + atomic_t refcount; + struct rcu_head rcu; + struct batadv_orig_node *orig_node; + unsigned long last_seen; +}; + +/** + * struct batadv_nc_path - network coding path + * @hash_entry: next and prev pointer for the list handling + * @rcu: struct used for freeing in an RCU-safe manner + * @refcount: number of contexts the object is used by + * @packet_list: list of buffered packets for this path + * @packet_list_lock: access lock for packet list + * @next_hop: next hop (destination) of path + * @prev_hop: previous hop (source) of path + * @last_valid: timestamp for last validation of path + */ +struct batadv_nc_path { + struct hlist_node hash_entry; + struct rcu_head rcu; + atomic_t refcount; + struct list_head packet_list; + spinlock_t packet_list_lock; /* Protects packet_list */ + uint8_t next_hop[ETH_ALEN]; + uint8_t prev_hop[ETH_ALEN]; + unsigned long last_valid; +}; + +/** + * struct batadv_nc_packet - network coding packet used when coding and + * decoding packets + * @list: next and prev pointer for the list handling + * @packet_id: crc32 checksum of skb data + * @timestamp: field containing the info when the packet was added to path + * @neigh_node: pointer to original next hop neighbor of skb + * @skb: skb which can be encoded or used for decoding + * @nc_path: pointer to path this nc packet is attached to + */ +struct batadv_nc_packet { + struct list_head list; + __be32 packet_id; + unsigned long timestamp; + struct batadv_neigh_node *neigh_node; + struct sk_buff *skb; + struct batadv_nc_path *nc_path; +}; + +/** + * batadv_skb_cb - control buffer structure used to store private data relevant + * to batman-adv in the skb->cb buffer in skbs. + * @decoded: Marks a skb as decoded, which is checked when searching for coding + * opportunities in network-coding.c + */ +struct batadv_skb_cb { + bool decoded; +}; + +/** * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded * @list: list node for batadv_socket_client::queue_list * @send_time: execution time for delayed_work (packet sending) diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 50e079f00be6..0bb3b5982f94 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head, { struct batadv_frag_packet_list_entry *tfp; struct batadv_unicast_frag_packet *tmp_up = NULL; - int is_head_tmp, is_head; + bool is_head_tmp, is_head; uint16_t search_seqno; if (up->flags & BATADV_UNI_FRAG_HEAD) @@ -130,7 +130,7 @@ batadv_frag_search_packet(struct list_head *head, else search_seqno = ntohs(up->seqno)-1; - is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); + is_head = up->flags & BATADV_UNI_FRAG_HEAD; list_for_each_entry(tfp, head, list) { if (!tfp->skb) @@ -142,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head, tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; if (tfp->seqno == search_seqno) { - is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); + is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD; if (is_head_tmp != is_head) return tfp; else diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c053244b97bd..1625e5793a89 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq, hlist_for_each_entry(entry, if_list, list) { if (entry->primary) - seq_printf(seq, "PRIMARY, "); + seq_puts(seq, "PRIMARY, "); else seq_printf(seq, "SEC %pM, ", entry->addr); } @@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, if (batadv_compare_eth(entry->addr, packet->vis_orig)) batadv_vis_data_read_prim_sec(seq, list); - seq_printf(seq, "\n"); + seq_puts(seq, "\n"); } } @@ -477,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, /* Are we the target for this VIS packet? */ if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && - batadv_is_my_mac(vis_packet->target_orig)) + batadv_is_my_mac(bat_priv, vis_packet->target_orig)) are_target = 1; spin_lock_bh(&bat_priv->vis.hash_lock); @@ -496,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_send_list_add(bat_priv, info); /* ... we're not the recipient (and thus need to forward). */ - } else if (!batadv_is_my_mac(packet->target_orig)) { + } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) { batadv_send_list_add(bat_priv, info); } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 81598e588f7f..e5338f787d68 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -221,6 +221,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -228,8 +230,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; @@ -413,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index e58c8b32589c..4b488ec26105 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb) struct ethhdr *eh = (void *) skb->data; u16 proto = ntohs(eh->h_proto); - if (proto >= 1536) + if (proto >= ETH_P_802_3_MIN) return proto; if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 3786ddc45152..a8638b58c4bf 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -608,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); + msg->msg_namelen = 0; return 0; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9e62102443dc..373d81e6e8f0 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -700,6 +700,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sco_conn_defer_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; + msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d5f1d3fd4b28..967312803e41 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -66,7 +66,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - mdst = br_mdb_get(br, skb); + mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else @@ -348,10 +348,10 @@ void br_dev_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX; + NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_TX; br->dev = dev; spin_lock_init(&br->lock); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b0812c91c0f0..c581f1200ef7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -161,9 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!pv) return; - for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); - vid < BR_VLAN_BITMAP_LEN; - vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { + for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { f = __br_fdb_get(br, br->dev->dev_addr, vid); if (f && f->is_local && !f->dst) fdb_delete(br, f); @@ -423,7 +421,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); + source ? source->dev->name : br->dev->name); fdb_delete(br, fdb); } @@ -724,13 +722,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); - while (vid < BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); if (err) goto out; - vid = find_next_bit(pv->vlan_bitmap, - BR_VLAN_BITMAP_LEN, vid+1); } } @@ -815,11 +810,8 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], * vlan on this port. */ err = -ENOENT; - vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); - while (vid < BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { err &= __br_fdb_delete(p, addr, vid); - vid = find_next_bit(pv->vlan_bitmap, - BR_VLAN_BITMAP_LEN, vid+1); } } out: diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ef1b91431c6b..4cdba60926ff 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -67,7 +67,8 @@ void br_port_carrier_check(struct net_bridge_port *p) struct net_device *dev = p->dev; struct net_bridge *br = p->br; - if (netif_running(dev) && netif_oper_up(dev)) + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); if (!netif_running(br->dev)) @@ -148,7 +149,6 @@ static void del_nbp(struct net_bridge_port *p) dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); - synchronize_net(); netdev_upper_dev_unlink(dev, br->dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 480330151898..828e2bcc1f52 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -97,7 +97,7 @@ int br_handle_frame_finish(struct sk_buff *skb) if (is_broadcast_ether_addr(dest)) skb2 = skb; else if (is_multicast_ether_addr(dest)) { - mdst = br_mdb_get(br, skb); + mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 9f97b850fc65..19942e38fd2d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -80,6 +80,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, port = p->port; if (port) { struct br_mdb_entry e; + memset(&e, 0, sizeof(e)); e.ifindex = port->dev->ifindex; e.state = p->state; if (p->addr.proto == htons(ETH_P_IP)) @@ -136,6 +137,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) break; bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); bpm->ifindex = dev->ifindex; if (br_mdb_fill_info(skb, cb, dev) < 0) goto out; @@ -171,6 +173,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, return -EMSGSIZE; bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; nest = nla_nest_start(skb, MDBA_MDB); @@ -228,6 +231,7 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, { struct br_mdb_entry entry; + memset(&entry, 0, sizeof(entry)); entry.ifindex = port->dev->ifindex; entry.addr.proto = group->proto; entry.addr.u.ip4 = group->u.ip4; @@ -378,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, return ret; } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct br_mdb_entry *entry; @@ -454,7 +458,7 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net_device *dev; struct br_mdb_entry *entry; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 10e6fce1bb62..81f2389f78eb 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -132,7 +132,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get( #endif struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb); struct br_ip ip; @@ -144,6 +144,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return NULL; ip.proto = skb->protocol; + ip.vid = vid; switch (skb->protocol) { case htons(ETH_P_IP): @@ -1368,7 +1369,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, return -EINVAL; if (iph->protocol != IPPROTO_IGMP) { - if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) + if (!ipv4_is_local_multicast(iph->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fe43bc7b063f..1ed75bfd8d1d 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); + vlan = __vlan_find_dev_deep(br, skb->vlan_proto, + vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 27aa3ee517ce..8e3abf564798 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -29,6 +29,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + 0; } @@ -135,10 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; pvid = br_get_pvid(pv); - for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); - vid < BR_VLAN_BITMAP_LEN; - vid = find_next_bit(pv->vlan_bitmap, - BR_VLAN_BITMAP_LEN, vid+1)) { + for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { vinfo.vid = vid; vinfo.flags = 0; if (vid == pvid) @@ -329,6 +327,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); @@ -353,17 +352,14 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) /* Change state and parameters on port. */ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { - struct ifinfomsg *ifm; struct nlattr *protinfo; struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err; - - ifm = nlmsg_data(nlh); + int err = 0; - protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!protinfo && !afspec) return 0; @@ -371,7 +367,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) /* We want to accept dev as bridge itself if the AF_SPEC * is set to see if someone is setting vlan info on the brigde */ - if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) + if (!p && !afspec) return -EINVAL; if (p && protinfo) { @@ -412,14 +408,11 @@ out: /* Delete port information */ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) { - struct ifinfomsg *ifm; struct nlattr *afspec; struct net_bridge_port *p; int err; - ifm = nlmsg_data(nlh); - - afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6d314c4e6bcb..d2c043a857b6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -156,6 +156,7 @@ struct net_bridge_port #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 +#define BR_ADMIN_COST 0x00000010 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -442,7 +443,7 @@ extern int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb); extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb); + struct sk_buff *skb, u16 vid); extern void br_multicast_add_port(struct net_bridge_port *port); extern void br_multicast_del_port(struct net_bridge_port *port); extern void br_multicast_enable_port(struct net_bridge_port *port); @@ -504,7 +505,7 @@ static inline int br_multicast_rcv(struct net_bridge *br, } static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { return NULL; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 0bdb4ebd362b..d45e760141bb 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) path_cost > BR_MAX_PATH_COST) return -ERANGE; + p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 93dde75923f0..bd58b45f5f90 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -34,6 +34,7 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { + const struct net_device_ops *ops; struct net_bridge_port *p = NULL; struct net_bridge *br; struct net_device *dev; @@ -53,15 +54,17 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) br = v->parent.br; dev = br->dev; } + ops = dev->netdev_ops; - if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { /* Add VLAN to the device filter if it is supported. * Stricly speaking, this is not necessary now, since * devices are made promiscuous by the bridge, but if * that ever changes this code will allow tagged * traffic to enter the bridge. */ - err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); + err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), + vid); if (err) return err; } @@ -82,8 +85,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) return 0; out_filt: - if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) - dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid); return err; } @@ -97,9 +100,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; + const struct net_device_ops *ops = dev->netdev_ops; - if (dev->features & NETIF_F_HW_VLAN_FILTER) - dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid); } clear_bit(vid, v->vlan_bitmap); @@ -171,7 +175,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, * mac header. */ skb_push(skb, ETH_HLEN); - skb = __vlan_put_tag(skb, skb->vlan_tci); + skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci); if (!skb) goto out; /* put skb->data back to where it was */ @@ -213,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* PVID is set on this port. Any untagged ingress * frame is considered to belong to this vlan. */ - __vlan_hwaccel_put_tag(skb, pvid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); return true; } diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 92de5e5f9db2..9878eb8204c5 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, const char *prefix) { unsigned int bitmask; + struct net *net = dev_net(in ? in : out); + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; spin_lock_bh(&ebt_log_lock); printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", @@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, + par->in, par->out, &li, "%s", info->prefix); else ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + par->out, &li, info->prefix); return EBT_CONTINUE; } @@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = { .me = THIS_MODULE, }; +static int __net_init ebt_log_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger); + return 0; +} + +static void __net_exit ebt_log_net_fini(struct net *net) +{ + nf_log_unset(net, &ebt_log_logger); +} + +static struct pernet_operations ebt_log_net_ops = { + .init = ebt_log_net_init, + .exit = ebt_log_net_fini, +}; + static int __init ebt_log_init(void) { int ret; + ret = register_pernet_subsys(&ebt_log_net_ops); + if (ret < 0) + goto err_pernet; + ret = xt_register_target(&ebt_log_tg_reg); if (ret < 0) - return ret; + goto err_target; + nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); - return 0; + + return ret; + +err_target: + unregister_pernet_subsys(&ebt_log_net_ops); +err_pernet: + return ret; } static void __exit ebt_log_fini(void) { + unregister_pernet_subsys(&ebt_log_net_ops); nf_log_unregister(&ebt_log_logger); xt_unregister_target(&ebt_log_tg_reg); } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 5be68bbcc341..59ac7952010d 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, - &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 3bf43f7bb9d4..fc1905c51417 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -35,12 +35,13 @@ #include <linux/skbuff.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netdevice.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/sock.h> #include "../br_private.h" @@ -62,13 +63,22 @@ typedef struct { spinlock_t lock; /* the per-queue lock */ } ebt_ulog_buff_t; -static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; -static struct sock *ebtulognl; +static int ebt_ulog_net_id __read_mostly; +struct ebt_ulog_net { + unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS]; + ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; + struct sock *ebtulognl; +}; + +static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net) +{ + return net_generic(net, ebt_ulog_net_id); +} /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroup) +static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup) { - ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup]; del_timer(&ub->timer); @@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup) ub->lastnlh->nlmsg_type = NLMSG_DONE; NETLINK_CB(ub->skb).dst_group = nlgroup + 1; - netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); + netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; @@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { - spin_lock_bh(&ulog_buffers[data].lock); - if (ulog_buffers[data].skb) - ulog_send(data); - spin_unlock_bh(&ulog_buffers[data].lock); + struct ebt_ulog_net *ebt = container_of((void *)data, + struct ebt_ulog_net, + nlgroup[*(unsigned int *)data]); + + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data]; + spin_lock_bh(&ub->lock); + if (ub->skb) + ulog_send(ebt, *(unsigned int *)data); + spin_unlock_bh(&ub->lock); } static struct sk_buff *ulog_alloc_skb(unsigned int size) @@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ebt_ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; + struct net *net = dev_net(in ? in : out); + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); unsigned int group = uloginfo->nlgroup; - ebt_ulog_buff_t *ub = &ulog_buffers[group]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group]; spinlock_t *lock = &ub->lock; ktime_t kt; @@ -134,7 +151,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, else copy_len = uloginfo->cprange; - size = NLMSG_SPACE(sizeof(*pm) + copy_len); + size = nlmsg_total_size(sizeof(*pm) + copy_len); if (size > nlbufsiz) { pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); return; @@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (!(ub->skb = ulog_alloc_skb(size))) goto unlock; } else if (size > skb_tailroom(ub->skb)) { - ulog_send(group); + ulog_send(ebt, group); if (!(ub->skb = ulog_alloc_skb(size))) goto unlock; @@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ub->lastnlh = nlh; if (ub->qlen >= uloginfo->qthreshold) - ulog_send(group); + ulog_send(ebt, group); else if (!timer_pending(&ub->timer)) { ub->timer.expires = jiffies + flushtimeout * HZ / 100; add_timer(&ub->timer); @@ -277,56 +294,89 @@ static struct nf_logger ebt_ulog_logger __read_mostly = { .me = THIS_MODULE, }; -static int __init ebt_ulog_init(void) +static int __net_init ebt_ulog_net_init(struct net *net) { - int ret; int i; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); + struct netlink_kernel_cfg cfg = { .groups = EBT_ULOG_MAXNLGROUPS, }; - if (nlbufsiz >= 128*1024) { - pr_warning("Netlink buffer has to be <= 128kB," - " please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; - } - /* initialize ulog_buffers */ for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - spin_lock_init(&ulog_buffers[i].lock); + ebt->nlgroup[i] = i; + setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer, + (unsigned long)&ebt->nlgroup[i]); + spin_lock_init(&ebt->ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); - if (!ebtulognl) - ret = -ENOMEM; - else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) - netlink_kernel_release(ebtulognl); + ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); + if (!ebt->ebtulognl) + return -ENOMEM; - if (ret == 0) - nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); - - return ret; + nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger); + return 0; } -static void __exit ebt_ulog_fini(void) +static void __net_exit ebt_ulog_net_fini(struct net *net) { - ebt_ulog_buff_t *ub; int i; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - nf_log_unregister(&ebt_ulog_logger); - xt_unregister_target(&ebt_ulog_tg_reg); + nf_log_unset(net, &ebt_ulog_logger); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ub = &ulog_buffers[i]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i]; del_timer(&ub->timer); - spin_lock_bh(&ub->lock); + if (ub->skb) { kfree_skb(ub->skb); ub->skb = NULL; } - spin_unlock_bh(&ub->lock); } - netlink_kernel_release(ebtulognl); + netlink_kernel_release(ebt->ebtulognl); +} + +static struct pernet_operations ebt_ulog_net_ops = { + .init = ebt_ulog_net_init, + .exit = ebt_ulog_net_fini, + .id = &ebt_ulog_net_id, + .size = sizeof(struct ebt_ulog_net), +}; + +static int __init ebt_ulog_init(void) +{ + int ret; + + if (nlbufsiz >= 128*1024) { + pr_warn("Netlink buffer has to be <= 128kB," + "please try a smaller nlbufsiz parameter.\n"); + return -EINVAL; + } + + ret = register_pernet_subsys(&ebt_ulog_net_ops); + if (ret) + goto out_pernet; + + ret = xt_register_target(&ebt_ulog_tg_reg); + if (ret) + goto out_target; + + nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); + + return 0; + +out_target: + unregister_pernet_subsys(&ebt_ulog_net_ops); +out_pernet: + return ret; +} + +static void __exit ebt_ulog_fini(void) +{ + nf_log_unregister(&ebt_ulog_logger); + xt_unregister_target(&ebt_ulog_tg_reg); + unregister_pernet_subsys(&ebt_ulog_net_ops); } module_init(ebt_ulog_init); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 40d8258bf74f..70f656ce0f4a 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { net->xt.broute_table = ebt_register_table(net, &broute_table); - if (IS_ERR(net->xt.broute_table)) - return PTR_ERR(net->xt.broute_table); - return 0; + return PTR_RET(net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d493c91a562..3d110c4fc787 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) + if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 21760f008974..1f9ece1a9c34 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -1,7 +1,7 @@ /* * CAIF Interface registration. * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 * * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont @@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on) } void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, - struct cflayer *link_support, int head_room, - struct cflayer **layer, int (**rcv_func)( - struct sk_buff *, struct net_device *, - struct packet_type *, struct net_device *)) + struct cflayer *link_support, int head_room, + struct cflayer **layer, + int (**rcv_func)(struct sk_buff *, struct net_device *, + struct packet_type *, + struct net_device *)) { struct caif_device_entry *caifd; enum cfcnfg_phy_preference pref; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 095259f83902..05a41c7ec304 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr) /* Packet Control Callback function called from CAIF */ static void caif_ctrl_cb(struct cflayer *layr, - enum caif_ctrlcmd flow, - int phyid) + enum caif_ctrlcmd flow, + int phyid) { struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); switch (flow) { @@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk) * changed locking, address handling and added MSG_TRUNC. */ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) + struct msghdr *m, size_t len, int flags) { struct sock *sk = sock->sk; @@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; + m->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -346,8 +348,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) * changed locking calls, changed address handling. */ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) + struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int copied = 0; @@ -462,7 +464,7 @@ out: * CAIF flow-on and sock_writable. */ static long caif_wait_for_flow_on(struct caifsock *cf_sk, - int wait_writeable, long timeo, int *err) + int wait_writeable, long timeo, int *err) { struct sock *sk = &cf_sk->sk; DEFINE_WAIT(wait); @@ -516,7 +518,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -591,7 +593,7 @@ err: * and other minor adaptations. */ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -670,7 +672,7 @@ out_err: } static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) + int lvl, int opt, char __user *ov, unsigned int ol) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -932,7 +934,7 @@ static int caif_release(struct socket *sock) /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ static unsigned int caif_poll(struct file *file, - struct socket *sock, poll_table *wait) + struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask; @@ -1022,7 +1024,7 @@ static void caif_sock_destructor(struct sock *sk) } static int caif_create(struct net *net, struct socket *sock, int protocol, - int kern) + int kern) { struct sock *sk = NULL; struct caifsock *cf_sk = NULL; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ef8ebaa993cf..942e00a425fd 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -1,7 +1,7 @@ /* * CAIF USB handler * Copyright (C) ST-Ericsson AB 2011 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 * */ @@ -75,7 +75,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt) } static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { if (layr->up && layr->up->ctrlcmd) layr->up->ctrlcmd(layr->up, ctrl, layr->id); @@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *arg) { struct net_device *dev = arg; struct caif_dev_common common; diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index f1dbddb95a6c..fa39fc298708 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -61,11 +61,11 @@ struct cfcnfg { }; static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, - enum cfctrl_srv serv, u8 phyid, - struct cflayer *adapt_layer); + enum cfctrl_srv serv, u8 phyid, + struct cflayer *adapt_layer); static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, - struct cflayer *adapt_layer); + struct cflayer *adapt_layer); static void cfctrl_resp_func(void); static void cfctrl_enum_resp(void); @@ -131,7 +131,7 @@ static void cfctrl_resp_func(void) } static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, - u8 phyid) + u8 phyid) { struct cfcnfg_phyinfo *phy; @@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = { static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, - struct caif_connect_request *s, - struct cfctrl_link_param *l) + struct caif_connect_request *s, + struct cfctrl_link_param *l) { struct dev_info *dev_info; enum cfcnfg_phy_preference pref; @@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, struct cflayer *adap_layer, int *ifindex, - int *proto_head, - int *proto_tail) + int *proto_head, int *proto_tail) { struct cflayer *frml; struct cfcnfg_phyinfo *phy; @@ -364,7 +363,7 @@ unlock: EXPORT_SYMBOL(caif_connect_client); static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, - struct cflayer *adapt_layer) + struct cflayer *adapt_layer) { if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) adapt_layer->ctrlcmd(adapt_layer, @@ -526,7 +525,7 @@ out_err: EXPORT_SYMBOL(cfcnfg_add_phy_layer); int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, - bool up) + bool up) { struct cfcnfg_phyinfo *phyinfo; diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index a376ec1ac0a7..2bd4b58f4372 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -20,12 +20,12 @@ #ifdef CAIF_NO_LOOP static int handle_loop(struct cfctrl *ctrl, - int cmd, struct cfpkt *pkt){ + int cmd, struct cfpkt *pkt){ return -1; } #else static int handle_loop(struct cfctrl *ctrl, - int cmd, struct cfpkt *pkt); + int cmd, struct cfpkt *pkt); #endif static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt); static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, @@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer) } static bool param_eq(const struct cfctrl_link_param *p1, - const struct cfctrl_link_param *p2) + const struct cfctrl_link_param *p2) { bool eq = p1->linktype == p2->linktype && @@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) } int cfctrl_linkup_request(struct cflayer *layer, - struct cfctrl_link_param *param, - struct cflayer *user_layer) + struct cfctrl_link_param *param, + struct cflayer *user_layer) { struct cfctrl *cfctrl = container_obj(layer); u32 tmp32; @@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer, } int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, - struct cflayer *client) + struct cflayer *client) { int ret; struct cfpkt *pkt; @@ -555,7 +555,7 @@ error: } static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { struct cfctrl *this = container_obj(layr); switch (ctrl) { diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 2914659eb9b2..7aae0b56829e 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index a63f4a5f5aff..3bdddb32d55a 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 0a7df7ef062d..8bc7caa28e64 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -2,7 +2,7 @@ * CAIF Framing Layer. * * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -28,7 +28,7 @@ struct cffrml { static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); + int phyid); static u32 cffrml_rcv_error; static u32 cffrml_rcv_checsum_error; @@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) } static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { if (layr->up && layr->up->ctrlcmd) layr->up->ctrlcmd(layr->up, ctrl, layr->id); diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 94b08612a4d8..8c5d6386319f 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -42,7 +42,7 @@ struct cfmuxl { static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); + int phyid); static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); struct cflayer *cfmuxl_create(void) @@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt) } static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { struct cfmuxl *muxl = container_obj(layr); struct cflayer *layer; diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 863dedd91bb6..6493351f39c6 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt) } inline u16 cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16, void *, u16), - u16 data) + u16 (*iter_func)(u16, void *, u16), + u16 data) { /* * Don't care about the performance hit of linearizing, @@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len) } struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, - struct cfpkt *addpkt, - u16 expectlen) + struct cfpkt *addpkt, + u16 expectlen) { struct sk_buff *dst = pkt_to_skb(dstpkt); struct sk_buff *add = pkt_to_skb(addpkt); diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 2b563ad04597..61d7617d9249 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer) } struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, - int mtu_size) + int mtu_size) { int tmp; struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); @@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, } static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, - struct cfpkt *pkt, int *err) + struct cfpkt *pkt, int *err) { struct cfpkt *tmppkt; *err = -EPROTO; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 8e68b97f13ee..ce60f06d76de 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -29,7 +29,7 @@ struct cfserl { static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); + int phyid); struct cflayer *cfserl_create(int instance, bool use_stx) { @@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt) } static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { layr->up->ctrlcmd(layr->up, ctrl, phyid); } diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index ba217e90765e..353f793d1b3b 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ @@ -25,7 +25,7 @@ #define container_obj(layr) container_of(layr, struct cfsrvl, layer) static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { struct cfsrvl *service = container_obj(layr); @@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer) } void cfsrvl_init(struct cfsrvl *service, - u8 channel_id, - struct dev_info *dev_info, - bool supports_flowctrl - ) + u8 channel_id, + struct dev_info *dev_info, + bool supports_flowctrl) { caif_assert(offsetof(struct cfsrvl, layer) == 0); service->open = false; @@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer) EXPORT_SYMBOL(caif_free_client); void caif_client_register_refcnt(struct cflayer *adapt_layer, - void (*hold)(struct cflayer *lyr), - void (*put)(struct cflayer *lyr)) + void (*hold)(struct cflayer *lyr), + void (*put)(struct cflayer *lyr)) { struct cfsrvl *service; diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 86d2dadb4b73..1728fa4471cf 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 910ab0661f66..262224581efa 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index a8e2a2d758a5..b3b110e8a350 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * Author: Sjur Brendeland * License terms: GNU General Public License (GPL) version 2 */ diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index e597733affb8..7344a8fa1bb0 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -1,7 +1,7 @@ /* * Copyright (C) ST-Ericsson AB 2010 - * Authors: Sjur Brendeland/sjur.brandeland@stericsson.com - * Daniel Martensson / Daniel.Martensson@stericsson.com + * Authors: Sjur Brendeland + * Daniel Martensson * License terms: GNU General Public License (GPL) version 2 */ @@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr) } static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, - int phyid) + int phyid) { struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); pr_debug("NET flowctrl func called flow: %s\n", @@ -443,7 +443,7 @@ nla_put_failure: } static void caif_netlink_parms(struct nlattr *data[], - struct caif_connect_request *conn_req) + struct caif_connect_request *conn_req) { if (!data) { pr_warn("no params data found\n"); @@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, } static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[]) { struct chnl_net *caifdev; ASSERT_RTNL(); diff --git a/net/can/af_can.c b/net/can/af_can.c index c48e5220bbac..c4e50852c9f4 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -525,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, d = find_dev_rcv_lists(dev); if (!d) { - printk(KERN_ERR "BUG: receive list not found for " + pr_err("BUG: receive list not found for " "dev %s, id %03X, mask %03X\n", DNAME(dev), can_id, mask); goto out; @@ -546,16 +546,13 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, } /* - * Check for bugs in CAN protocol implementations: - * If no matching list item was found, the list cursor variable next - * will be NULL, while r will point to the last item of the list. + * Check for bugs in CAN protocol implementations using af_can.c: + * 'r' will be NULL if no matching list item was found for removal. */ if (!r) { - printk(KERN_ERR "BUG: receive list entry not found for " - "dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); - r = NULL; + WARN(1, "BUG: receive list entry not found for dev %s, " + "id %03X, mask %03X\n", DNAME(dev), can_id, mask); goto out; } @@ -749,8 +746,7 @@ int can_proto_register(const struct can_proto *cp) int err = 0; if (proto < 0 || proto >= CAN_NPROTO) { - printk(KERN_ERR "can: protocol number %d out of range\n", - proto); + pr_err("can: protocol number %d out of range\n", proto); return -EINVAL; } @@ -761,8 +757,7 @@ int can_proto_register(const struct can_proto *cp) mutex_lock(&proto_tab_lock); if (proto_tab[proto]) { - printk(KERN_ERR "can: protocol %d already registered\n", - proto); + pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; } else RCU_INIT_POINTER(proto_tab[proto], cp); @@ -816,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, /* create new dev_rcv_lists for this device */ d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) { - printk(KERN_ERR - "can: allocation of receive list failed\n"); + if (!d) return NOTIFY_DONE; - } BUG_ON(dev->ml_priv); dev->ml_priv = d; @@ -838,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, dev->ml_priv = NULL; } } else - printk(KERN_ERR "can: notifier: receive list not " - "found for dev %s\n", dev->name); + pr_err("can: notifier: receive list not found for dev " + "%s\n", dev->name); spin_unlock(&can_rcvlists_lock); @@ -927,7 +919,7 @@ static __exit void can_exit(void) /* remove created dev_rcv_lists from still registered CAN devices */ rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv){ + if (dev->type == ARPHRD_CAN && dev->ml_priv) { struct dev_rcv_lists *d = dev->ml_priv; diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc5ebea..3ee690e8c7d3 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -466,7 +466,7 @@ static int cgw_notifier(struct notifier_block *nb, if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } } @@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, return 0; } -static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct rtcanmsg *r; struct cgw_job *gwj; @@ -864,11 +863,11 @@ static void cgw_remove_all_jobs(void) hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } -static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct cgw_job *gwj = NULL; struct hlist_node *nx; @@ -920,7 +919,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); err = 0; break; } diff --git a/net/can/raw.c b/net/can/raw.c index c1764e41ddaf..1085e65f848e 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto free_skb; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 69bc4bf89e3e..4543b9aba40c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -654,6 +654,24 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) return 0; } +static int __decode_pgid(void **p, void *end, struct ceph_pg *pg) +{ + u8 v; + + ceph_decode_need(p, end, 1+8+4+4, bad); + v = ceph_decode_8(p); + if (v != 1) + goto bad; + pg->pool = ceph_decode_64(p); + pg->seed = ceph_decode_32(p); + *p += 4; /* skip preferred */ + return 0; + +bad: + dout("error decoding pgid\n"); + return -EINVAL; +} + /* * decode a full map. */ @@ -745,13 +763,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) for (i = 0; i < len; i++) { int n, j; struct ceph_pg pgid; - struct ceph_pg_v1 pgid_v1; struct ceph_pg_mapping *pg; - ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); - ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); - pgid.pool = le32_to_cpu(pgid_v1.pool); - pgid.seed = le16_to_cpu(pgid_v1.ps); + err = __decode_pgid(p, end, &pgid); + if (err) + goto bad; + ceph_decode_need(p, end, sizeof(u32), bad); n = ceph_decode_32(p); err = -EINVAL; if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) @@ -818,8 +835,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, u16 version; ceph_decode_16_safe(p, end, version, bad); - if (version > 6) { - pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); + if (version != 6) { + pr_warning("got unknown v %d != 6 of inc osdmap\n", version); goto bad; } @@ -963,15 +980,14 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_mapping *pg; int j; - struct ceph_pg_v1 pgid_v1; struct ceph_pg pgid; u32 pglen; - ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); - ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); - pgid.pool = le32_to_cpu(pgid_v1.pool); - pgid.seed = le16_to_cpu(pgid_v1.ps); - pglen = ceph_decode_32(p); + err = __decode_pgid(p, end, &pgid); + if (err) + goto bad; + ceph_decode_need(p, end, sizeof(u32), bad); + pglen = ceph_decode_32(p); if (pglen) { ceph_decode_need(p, end, pglen*sizeof(u32), bad); diff --git a/net/core/datagram.c b/net/core/datagram.c index 368f9c3f9dc6..ebba65d7e0da 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/core/dev.c b/net/core/dev.c index 8f152f904f70..9e26b8d9eafe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -200,7 +200,7 @@ static inline void rps_unlock(struct softnet_data *sd) } /* Device list insertion */ -static int list_netdevice(struct net_device *dev) +static void list_netdevice(struct net_device *dev) { struct net *net = dev_net(dev); @@ -214,8 +214,6 @@ static int list_netdevice(struct net_device *dev) write_unlock_bh(&dev_base_lock); dev_base_seq_inc(net); - - return 0; } /* Device list removal @@ -1545,7 +1543,6 @@ void net_enable_timestamp(void) return; } #endif - WARN_ON(in_interrupt()); static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1625,7 +1622,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_orphan(skb); - nf_reset(skb); if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); @@ -1641,6 +1637,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb->mark = 0; secpath_reset(skb); nf_reset(skb); + nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -2149,6 +2146,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb) struct net_device *dev = skb->dev; const char *driver = ""; + if (!net_ratelimit()) + return; + if (dev && dev->dev.parent) driver = dev_driver_string(dev->dev.parent); @@ -2208,30 +2208,40 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) +__be16 skb_network_protocol(struct sk_buff *skb) { - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; __be16 type = skb->protocol; + int vlan_depth = ETH_HLEN; - while (type == htons(ETH_P_8021Q)) { - int vlan_depth = ETH_HLEN; + while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return ERR_PTR(-EINVAL); + return 0; vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } + return type; +} + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + __be16 type = skb_network_protocol(skb); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + __skb_pull(skb, skb->mac_len); rcu_read_lock(); @@ -2398,24 +2408,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; - features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2431,20 +2429,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q)) { + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { return harmonize_features(skb, protocol, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); - if (protocol != htons(ETH_P_8021Q)) { + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; return harmonize_features(skb, protocol, features); } } @@ -2485,8 +2485,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && - !(features & NETIF_F_HW_VLAN_TX)) { - skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + !vlan_hw_offload_capable(features, skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; @@ -2590,6 +2591,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) */ if (shinfo->gso_size) { unsigned int hdr_len; + u16 gso_segs = shinfo->gso_segs; /* mac layer + network layer */ hdr_len = skb_transport_header(skb) - skb_mac_header(skb); @@ -2599,7 +2601,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len += tcp_hdrlen(skb); else hdr_len += sizeof(struct udphdr); - qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; + + if (shinfo->gso_type & SKB_GSO_DODGY) + gso_segs = DIV_ROUND_UP(skb->len - hdr_len, + shinfo->gso_size); + + qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } } @@ -3315,6 +3322,7 @@ int netdev_rx_handler_register(struct net_device *dev, if (dev->rx_handler) return -EBUSY; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -3326,7 +3334,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register); * netdev_rx_handler_unregister - unregister receive handler * @dev: device to unregister a handler from * - * Unregister a receive hander from a device. + * Unregister a receive handler from a device. * * The caller must hold the rtnl_mutex. */ @@ -3335,6 +3343,11 @@ void netdev_rx_handler_unregister(struct net_device *dev) ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); + /* a reader seeing a non NULL rx_handler in a rcu_read_lock() + * section has a guarantee to see a non NULL rx_handler_data + * as well. + */ + synchronize_net(); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); @@ -3350,6 +3363,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) case __constant_htons(ETH_P_IP): case __constant_htons(ETH_P_IPV6): case __constant_htons(ETH_P_8021Q): + case __constant_htons(ETH_P_8021AD): return true; default: return false; @@ -3390,7 +3404,8 @@ another_round: __this_cpu_inc(softnet_data.processed); - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || + skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = vlan_untag(skb); if (unlikely(!skb)) goto unlock; @@ -3444,6 +3459,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: + ret = NET_RX_SUCCESS; goto unlock; case RX_HANDLER_ANOTHER: goto another_round; @@ -4057,6 +4073,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; + if (weight > NAPI_POLL_WEIGHT) + pr_err_once("netif_napi_add() called with weight %d on device %s\n", + weight, dev->name); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; @@ -4918,20 +4937,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } - /* Fix illegal SG+CSUM combinations. */ - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) { - netdev_dbg(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); - features &= ~NETIF_F_SG; - } - /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } + if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IP_CSUM)) { + netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO; + features &= ~NETIF_F_TSO_ECN; + } + + if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IPV6_CSUM)) { + netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO6; + } + /* TSO ECN requires that TSO is present as well. */ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) features &= ~NETIF_F_TSO_ECN; @@ -5162,7 +5186,8 @@ int register_netdevice(struct net_device *dev) } } - if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && + if (((dev->hw_features | dev->features) & + NETIF_F_HW_VLAN_CTAG_FILTER) && (!dev->netdev_ops->ndo_vlan_rx_add_vid || !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); @@ -5199,6 +5224,10 @@ int register_netdevice(struct net_device *dev) */ dev->vlan_features |= NETIF_F_HIGHDMA; + /* Make NETIF_F_SG inheritable to tunnel devices. + */ + dev->hw_enc_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bd2eb9d3e369..c013f38482a1 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -22,7 +22,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, + bool sync) { struct netdev_hw_addr *ha; int alloc_size; @@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = false; + ha->synced = sync; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; @@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, else ha->global_use = true; } + if (sync) { + if (ha->synced) + return 0; + else + ha->synced = true; + } ha->refcount++; return 0; } } - return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); + return __hw_addr_create_ex(list, addr, addr_len, addr_type, global, + sync); } static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); + return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, + struct netdev_hw_addr *ha, bool global, + bool sync) +{ + if (global && !ha->global_use) + return -ENOENT; + + if (sync && !ha->synced) + return -ENOENT; + + if (global) + ha->global_use = false; + + if (sync) + ha->synced = false; + + if (--ha->refcount) + return 0; + list_del_rcu(&ha->list); + kfree_rcu(ha, rcu_head); + list->count--; + return 0; } static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global) + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && - (ha->type == addr_type || !addr_type)) { - if (global) { - if (!ha->global_use) - break; - else - ha->global_use = false; - } - if (--ha->refcount) - return 0; - list_del_rcu(&ha->list); - kfree_rcu(ha, rcu_head); - list->count--; - return 0; - } + (ha->type == addr_type || !addr_type)) + return __hw_addr_del_entry(list, ha, global, sync); } return -ENOENT; } @@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); + return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr *ha, + int addr_len) +{ + int err; + + err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, + false, true); + if (err) + return err; + ha->sync_cnt++; + ha->refcount++; + + return 0; +} + +static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + struct netdev_hw_addr *ha, + int addr_len) +{ + int err; + + err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, + false, true); + if (err) + return; + ha->sync_cnt--; + __hw_addr_del_entry(from_list, ha, false, true); +} + +static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len) +{ + int err = 0; + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { + if (ha->sync_cnt == ha->refcount) { + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); + } else { + err = __hw_addr_sync_one(to_list, ha, addr_len); + if (err) + break; + } + } + return err; } int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, @@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_del_multiple); +/* This function only works where there is a strict 1-1 relationship + * between source and destionation of they synch. If you ever need to + * sync addresses to more then 1 destination, you need to use + * __hw_addr_sync_multiple(). + */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len) @@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { - if (!ha->synced) { - err = __hw_addr_add(to_list, ha->addr, - addr_len, ha->type); + if (!ha->sync_cnt) { + err = __hw_addr_sync_one(to_list, ha, addr_len); if (err) break; - ha->synced = true; - ha->refcount++; - } else if (ha->refcount == 1) { - __hw_addr_del(to_list, ha->addr, addr_len, ha->type); - __hw_addr_del(from_list, ha->addr, addr_len, ha->type); - } + } else if (ha->refcount == 1) + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } return err; } @@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr *ha, *tmp; list_for_each_entry_safe(ha, tmp, &from_list->list, list) { - if (ha->synced) { - __hw_addr_del(to_list, ha->addr, - addr_len, ha->type); - ha->synced = false; - __hw_addr_del(from_list, ha->addr, - addr_len, ha->type); - } + if (ha->sync_cnt) + __hw_addr_unsync_one(to_list, from_list, ha, addr_len); } } EXPORT_SYMBOL(__hw_addr_unsync); @@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) } } err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_UNICAST, true); + NETDEV_HW_ADDR_T_UNICAST, true, false); if (!err) __dev_set_rx_mode(dev); out: @@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del); * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode - * function of layered software devices. + * function of layered software devices. This function assumes that + * addresses will only ever be synced to the @to devices and no other. */ int dev_uc_sync(struct net_device *to, struct net_device *from) { @@ -488,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_uc_sync); /** + * dev_uc_sync_multiple - Synchronize device's unicast list to another + * device, but allow for multiple calls to sync to multiple devices. + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have been deleted from the source. The source device + * must be locked by netif_addr_lock_bh. + * + * This function is intended to be called from the dev->set_rx_mode + * function of layered software devices. It allows for a single source + * device to be synced to multiple destination devices. + */ +int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) +{ + int err = 0; + + if (to->addr_len != from->addr_len) + return -EINVAL; + + netif_addr_lock_nested(to); + err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + netif_addr_unlock(to); + return err; +} +EXPORT_SYMBOL(dev_uc_sync_multiple); + +/** * dev_uc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device @@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) } } err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, true); + NETDEV_HW_ADDR_T_MULTICAST, true, false); if (!err) __dev_set_rx_mode(dev); out: @@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global); + NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); @@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global); + NETDEV_HW_ADDR_T_MULTICAST, global, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); @@ -679,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_mc_sync); /** + * dev_mc_sync_multiple - Synchronize device's unicast list to another + * device, but allow for multiple calls to sync to multiple devices. + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_addr_lock_bh. + * + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. It allows for a single + * source device to be synced to multiple destination devices. + */ +int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) +{ + int err = 0; + + if (to->addr_len != from->addr_len) + return -EINVAL; + + netif_addr_lock_nested(to); + err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + netif_addr_unlock(to); + return err; +} +EXPORT_SYMBOL(dev_mc_sync_multiple); + +/** * dev_mc_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device diff --git a/net/core/dst.c b/net/core/dst.c index 35fd12f1a69c..df9cc810ec8e 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) EXPORT_SYMBOL(__dst_destroy_metrics_generic); /** - * skb_dst_set_noref - sets skb dst, without a reference + * __skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer * @dst: dst entry + * @force: if force is set, use noref version even for DST_NOCACHE entries * * Sets skb dst, assuming a reference was not taken on dst * skb_dst_drop() should not dst_release() this dst */ -void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - if (unlikely(dst->flags & DST_NOCACHE)) { + if (unlikely((dst->flags & DST_NOCACHE) && !force)) { dst_hold(dst); skb_dst_set(skb, dst); } else { skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } } -EXPORT_SYMBOL(skb_dst_set_noref); +EXPORT_SYMBOL(__skb_dst_set_noref); /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3e9b2c3e30f0..5a934ef90f8b 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -60,10 +60,13 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", [NETIF_F_HIGHDMA_BIT] = "highdma", [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", - [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", + [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-ctag-hw-insert", - [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", - [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", + [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-ctag-hw-parse", + [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter", + [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", + [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", + [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", [NETIF_F_GSO_BIT] = "tx-generic-segmentation", [NETIF_F_LLTX_BIT] = "tx-lockless", @@ -78,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -266,18 +270,19 @@ static int ethtool_set_one_feature(struct net_device *dev, #define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) -#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ - NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) +#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \ + NETIF_F_RXHASH) static u32 __ethtool_get_flags(struct net_device *dev) { u32 flags = 0; - if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; + if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; + if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; + if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; return flags; } @@ -290,8 +295,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return -EINVAL; if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; - if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; + if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; + if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba27dfe3..d5a9f8ead0d8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -266,7 +266,7 @@ errout: return err; } -static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -415,7 +415,7 @@ errout: return err; } -static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); diff --git a/net/core/filter.c b/net/core/filter.c index 2e20b55a7830..dad2a178f9f8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,6 +348,9 @@ load_b: case BPF_S_ANC_VLAN_TAG_PRESENT: A = !!vlan_tx_tag_present(skb); continue; + case BPF_S_ANC_PAY_OFFSET: + A = __skb_get_poff(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(ALU_XOR_X); ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); + ANCILLARY(PAY_OFFSET); } /* ancillary operation unknown or unsupported */ @@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, diff --git a/net/core/flow.c b/net/core/flow.c index c56ea6f7f6c7..7102f166482d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -323,12 +323,30 @@ static void flow_cache_flush_tasklet(unsigned long data) complete(&info->completion); } +/* + * Return whether a cpu needs flushing. Conservatively, we assume + * the presence of any entries means the core may require flushing, + * since the flow_cache_ops.check() function may assume it's running + * on the same core as the per-cpu cache component. + */ +static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) +{ + struct flow_cache_percpu *fcp; + int i; + + fcp = per_cpu_ptr(fc->percpu, cpu); + for (i = 0; i < flow_cache_hash_size(fc); i++) + if (!hlist_empty(&fcp->hash_table[i])) + return 0; + return 1; +} + static void flow_cache_flush_per_cpu(void *data) { struct flow_flush_info *info = data; struct tasklet_struct *tasklet; - tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } @@ -337,22 +355,40 @@ void flow_cache_flush(void) { struct flow_flush_info info; static DEFINE_MUTEX(flow_flush_sem); + cpumask_var_t mask; + int i, self; + + /* Track which cpus need flushing to avoid disturbing all cores. */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return; + cpumask_clear(mask); /* Don't want cpus going down or up during this. */ get_online_cpus(); mutex_lock(&flow_flush_sem); info.cache = &flow_cache_global; - atomic_set(&info.cpuleft, num_online_cpus()); + for_each_online_cpu(i) + if (!flow_cache_percpu_empty(info.cache, i)) + cpumask_set_cpu(i, mask); + atomic_set(&info.cpuleft, cpumask_weight(mask)); + if (atomic_read(&info.cpuleft) == 0) + goto done; + init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 0); - flow_cache_flush_tasklet((unsigned long)&info); + self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); + on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); + if (self) + flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); wait_for_completion(&info.completion); + +done: mutex_unlock(&flow_flush_sem); put_online_cpus(); + free_cpumask_var(mask); } static void flow_cache_flush_task(struct work_struct *work) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9d4c7201400d..00ee068efc1c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,10 @@ #include <linux/if_vlan.h> #include <net/ip.h> #include <net/ipv6.h> +#include <linux/igmp.h> +#include <linux/icmp.h> +#include <linux/sctp.h> +#include <linux/dccp.h> #include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> @@ -119,6 +123,17 @@ ipv6: nhoff += 4; if (hdr->flags & GRE_SEQ) nhoff += 4; + if (proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = skb_header_pointer(skb, nhoff, + sizeof(_eth), &_eth); + if (!eth) + return false; + proto = eth->h_proto; + nhoff += sizeof(*eth); + } goto again; } break; @@ -140,6 +155,8 @@ ipv6: flow->ports = *ports; } + flow->thoff = (u16) nhoff; + return true; } EXPORT_SYMBOL(skb_flow_dissect); @@ -215,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, } EXPORT_SYMBOL(__skb_tx_hash); +/* __skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 __skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + u32 poff = 0; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + poff += keys.thoff; + switch (keys.ip_proto) { + case IPPROTO_TCP: { + const struct tcphdr *tcph; + struct tcphdr _tcph; + + tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + if (!tcph) + return poff; + + poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + break; + } + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + poff += sizeof(struct udphdr); + break; + /* For the rest, we do not really care about header + * extensions at this point for now. + */ + case IPPROTO_ICMP: + poff += sizeof(struct icmphdr); + break; + case IPPROTO_ICMPV6: + poff += sizeof(struct icmp6hdr); + break; + case IPPROTO_IGMP: + poff += sizeof(struct igmphdr); + break; + case IPPROTO_DCCP: + poff += sizeof(struct dccp_hdr); + break; + case IPPROTO_SCTP: + poff += sizeof(struct sctphdr); + break; + } + + return poff; +} + static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3863b8f639c5..89a3a07d85fb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -39,21 +39,13 @@ #include <linux/string.h> #include <linux/log2.h> +#define DEBUG #define NEIGH_DEBUG 1 - -#define NEIGH_PRINTK(x...) printk(x) -#define NEIGH_NOPRINTK(x...) do { ; } while(0) -#define NEIGH_PRINTK1 NEIGH_NOPRINTK -#define NEIGH_PRINTK2 NEIGH_NOPRINTK - -#if NEIGH_DEBUG >= 1 -#undef NEIGH_PRINTK1 -#define NEIGH_PRINTK1 NEIGH_PRINTK -#endif -#if NEIGH_DEBUG >= 2 -#undef NEIGH_PRINTK2 -#define NEIGH_PRINTK2 NEIGH_PRINTK -#endif +#define neigh_dbg(level, fmt, ...) \ +do { \ + if (level <= NEIGH_DEBUG) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) #define PNEIGH_HASHMASK 0xF @@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) n->nud_state = NUD_NOARP; else n->nud_state = NUD_NONE; - NEIGH_PRINTK2("neigh %p is stray.\n", n); + neigh_dbg(2, "neigh %p is stray\n", n); } write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, lockdep_is_held(&tbl->lock))); rcu_assign_pointer(nht->hash_buckets[hash_val], n); write_unlock_bh(&tbl->lock); - NEIGH_PRINTK2("neigh %p is created.\n", n); + neigh_dbg(2, "neigh %p is created\n", n); rc = n; out: return rc; @@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh) dev_put(dev); neigh_parms_put(neigh->parms); - NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); + neigh_dbg(2, "neigh %p is destroyed\n", neigh); atomic_dec(&neigh->tbl->entries); kfree_rcu(neigh, rcu); @@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy); */ static void neigh_suspect(struct neighbour *neigh) { - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->output = neigh->ops->output; } @@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh) */ static void neigh_connect(struct neighbour *neigh) { - NEIGH_PRINTK2("neigh %p is connected.\n", neigh); + neigh_dbg(2, "neigh %p is connected\n", neigh); neigh->output = neigh->ops->connected_output; } @@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh) struct sk_buff *skb; NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); - NEIGH_PRINTK2("neigh %p is failed.\n", neigh); + neigh_dbg(2, "neigh %p is failed\n", neigh); neigh->updated = jiffies; /* It is very thin place. report_unreachable is very complicated @@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg) if (state & NUD_REACHABLE) { if (time_before_eq(now, neigh->confirmed + neigh->parms->reachable_time)) { - NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); + neigh_dbg(2, "neigh %p is still alive\n", neigh); next = neigh->confirmed + neigh->parms->reachable_time; } else if (time_before_eq(now, neigh->used + neigh->parms->delay_probe_time)) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_suspect(neigh); next = now + neigh->parms->delay_probe_time; } else { - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); @@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg) } else if (state & NUD_DELAY) { if (time_before_eq(now, neigh->confirmed + neigh->parms->delay_probe_time)) { - NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); + neigh_dbg(2, "neigh %p is now reachable\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { - NEIGH_PRINTK2("neigh %p is probed.\n", neigh); + neigh_dbg(2, "neigh %p is probed\n", neigh); neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); @@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) return 1; } } else if (neigh->nud_state & NUD_STALE) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_add_timer(neigh, @@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) out: return rc; discard: - NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, neigh); + neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } } write_unlock_bh(&tbl->lock); - NEIGH_PRINTK1("neigh_parms_release: not found\n"); + neigh_dbg(1, "%s: not found\n", __func__); } EXPORT_SYMBOL(neigh_parms_release); @@ -1613,7 +1604,7 @@ int neigh_table_clear(struct neigh_table *tbl) } EXPORT_SYMBOL(neigh_table_clear); -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1677,7 +1668,7 @@ out: return err; } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1955,7 +1946,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct neigh_table *tbl; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 3174f1998ee6..569d355fec3e 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -271,7 +271,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s %pF\n", + seq_printf(seq, " %-8s %pf\n", pt->dev ? pt->dev->name : "", pt->func); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fa32899006a2..209d84253dd5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,7 +47,7 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; -static struct srcu_struct netpoll_srcu; +DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 @@ -383,8 +383,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { if (vlan_tx_tag_present(skb) && - !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { - skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + !vlan_hw_offload_capable(netif_skb_features(skb), + skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); if (unlikely(!skb)) break; skb->vlan_tci = 0; @@ -1212,7 +1213,6 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); - init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b376410ff259..18af08a73f0a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) } if (ops->fill_info) { data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) + if (data == NULL) { + err = -EMSGSIZE; goto err_cancel_link; + } err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; @@ -515,32 +517,6 @@ out: return err; } -static const int rtm_min[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), - [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), - [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -}; - -static const int rta_max[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, - [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, - [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, - [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, - [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, - [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, - [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, -}; - int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; @@ -979,6 +955,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, * report anything. */ ivi.spoofchk = -1; + memset(ivi.mac, 0, sizeof(ivi.mac)); if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = @@ -1069,7 +1046,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) @@ -1536,7 +1513,7 @@ errout: return err; } -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -1577,7 +1554,7 @@ errout: return err; } -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1708,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group, return 0; } -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1863,7 +1840,7 @@ out: } } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -1919,7 +1896,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; - if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1954,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (rtnl_msg_handlers[idx] == NULL || rtnl_msg_handlers[idx][type].dumpit == NULL) continue; - if (idx > s_idx) + if (idx > s_idx) { memset(&cb->args[0], 0, sizeof(cb->args)); + cb->prev_seq = 0; + cb->seq = 0; + } if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) break; } @@ -2048,7 +2028,39 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +/** + * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry + */ +int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags) +{ + int err = -EINVAL; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return err; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_add); + +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2079,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } addr = nla_data(tb[NDA_LLADDR]); - if (!is_valid_ether_addr(addr)) { + if (is_zero_ether_addr(addr)) { pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); return -EINVAL; } @@ -2100,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { - err = dev->netdev_ops->ndo_fdb_add(ndm, tb, - dev, addr, - nlh->nlmsg_flags); + if ((ndm->ndm_flags & NTF_SELF)) { + if (dev->netdev_ops->ndo_fdb_add) + err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); + else + err = ndo_dflt_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); if (!err) { rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); @@ -2114,7 +2129,36 @@ out: return err; } -static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +/** + * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry + */ +int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr) +{ + int err = -EOPNOTSUPP; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state & NUD_PERMANENT) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_del); + +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2171,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + if (ndm->ndm_flags & NTF_SELF) { + if (dev->netdev_ops->ndo_fdb_del) + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2217,7 +2264,7 @@ skip: * @dev: netdevice * * Default netdevice operation to dump the existing unicast address list. - * Returns zero on success. + * Returns number of addresses from list put in skb. */ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -2257,6 +2304,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (dev->netdev_ops->ndo_fdb_dump) idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); + else + idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); } rcu_read_unlock(); @@ -2408,8 +2457,7 @@ errout: return err; } -static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2479,8 +2527,7 @@ out: return err; } -static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2550,10 +2597,6 @@ out: return err; } -/* Protected by RTNL sempahore. */ -static struct rtattr **rta_buf; -static int rtattr_max; - /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -2561,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); rtnl_doit_func doit; int sz_idx, kind; - int min_len; int family; int type; int err; @@ -2573,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) type -= RTM_BASE; /* All the messages must have at least 1 byte length */ - if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) + if (nlmsg_len(nlh) < sizeof(struct rtgenmsg)) return 0; - family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; + family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; sz_idx = type>>2; kind = type&3; @@ -2609,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); - - min_len = rtm_min[sz_idx]; - if (nlh->nlmsg_len < min_len) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type; - if (flavor) { - if (flavor > rta_max[sz_idx]) - return -EINVAL; - rta_buf[flavor-1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } - doit = rtnl_get_doit(family, type); if (doit == NULL) return -EOPNOTSUPP; - return doit(skb, nlh, (void *)&rta_buf[0]); + return doit(skb, nlh); } static void rtnetlink_rcv(struct sk_buff *skb) @@ -2704,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { - int i; - - rtattr_max = 0; - for (i = 0; i < ARRAY_SIZE(rta_max); i++) - if (rta_max[i] > rtattr_max) - rtattr_max = rta_max[i]; - rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); - if (!rta_buf) - panic("rtnetlink_init: cannot allocate rta_buf\n"); - if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/core/scm.c b/net/core/scm.c index 905dcc6ad1e3..03795d0147f2 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> +#include <linux/pid_namespace.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> @@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || @@ -185,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->creds.uid = uid; p->creds.gid = gid; - - if (!p->cred || - !uid_eq(p->cred->euid, uid) || - !gid_eq(p->cred->egid, gid)) { - struct cred *cred; - err = -ENOMEM; - cred = prepare_creds(); - if (!cred) - goto error; - - cred->uid = cred->euid = uid; - cred->gid = cred->egid = gid; - if (p->cred) - put_cred(p->cred); - p->cred = cred; - } break; } default: @@ -304,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk, current); - sock_update_classid(sock->sk, current); + sock_update_netprioidx(sock->sk); + sock_update_classid(sock->sk); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 33245ef54c3b..898cf5c566f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -179,6 +179,33 @@ out: * */ +struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) +{ + struct sk_buff *skb; + + /* Get the HEAD */ + skb = kmem_cache_alloc_node(skbuff_head_cache, + gfp_mask & ~__GFP_DMA, node); + if (!skb) + goto out; + + /* + * Only clear those fields we need to clear, not those that we will + * actually initialise below. Hence, don't put any more fields after + * the tail pointer in struct sk_buff! + */ + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->data = NULL; + skb->truesize = sizeof(struct sk_buff); + atomic_set(&skb->users, 1); + +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->mac_header = ~0U; +#endif +out: + return skb; +} + /** * __alloc_skb - allocate a network buffer * @size: size to allocate @@ -584,7 +611,8 @@ static void skb_release_head_state(struct sk_buff *skb) static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); - skb_release_data(skb); + if (likely(skb->data)) + skb_release_data(skb); } /** @@ -673,6 +701,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; + new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; @@ -706,6 +735,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->tc_verd = old->tc_verd; #endif #endif + new->vlan_proto = old->vlan_proto; new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); @@ -867,6 +897,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) } EXPORT_SYMBOL(skb_clone); +static void skb_headers_offset_update(struct sk_buff *skb, int off) +{ + /* {transport,network,mac}_header and tail are relative to skb->head */ + skb->transport_header += off; + skb->network_header += off; + if (skb_mac_header_was_set(skb)) + skb->mac_header += off; + skb->inner_transport_header += off; + skb->inner_network_header += off; + skb->inner_mac_header += off; +} + static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { #ifndef NET_SKBUFF_DATA_USES_OFFSET @@ -879,13 +921,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) __copy_skb_header(new, old); #ifndef NET_SKBUFF_DATA_USES_OFFSET - /* {transport,network,mac}_header are relative to skb->head */ - new->transport_header += offset; - new->network_header += offset; - if (skb_mac_header_was_set(new)) - new->mac_header += offset; - new->inner_transport_header += offset; - new->inner_network_header += offset; + skb_headers_offset_update(new, offset); #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -1077,14 +1113,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #else skb->end = skb->head + size; #endif - /* {transport,network,mac}_header and tail are relative to skb->head */ skb->tail += off; - skb->transport_header += off; - skb->network_header += off; - if (skb_mac_header_was_set(skb)) - skb->mac_header += off; - skb->inner_transport_header += off; - skb->inner_network_header += off; + skb_headers_offset_update(skb, off); /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1180,12 +1210,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, if (n->ip_summed == CHECKSUM_PARTIAL) n->csum_start += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET - n->transport_header += off; - n->network_header += off; - if (skb_mac_header_was_set(skb)) - n->mac_header += off; - n->inner_transport_header += off; - n->inner_network_header += off; + skb_headers_offset_update(n, off); #endif return n; @@ -2741,12 +2766,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; + __be16 proto; + bool csum; int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; + proto = skb_network_protocol(skb); + if (unlikely(!proto)) + return ERR_PTR(-EINVAL); + + csum = !!can_checksum_protocol(features, proto); __skb_push(skb, doffset); headroom = skb_headroom(skb); pos = skb_headlen(skb); @@ -2884,6 +2916,12 @@ skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; + + if (!csum) { + nskb->csum = skb_checksum(nskb, doffset, + nskb->len - doffset, 0); + nskb->ip_summed = CHECKSUM_NONE; + } } while ((offset += len) < skb->len); return segs; @@ -3361,6 +3399,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + start; skb->csum_offset = off; + skb_set_transport_header(skb, start); return true; } EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff --git a/net/core/sock.c b/net/core/sock.c index b261a7977746..d4f4cea726e7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -907,6 +907,10 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; + case SO_SELECT_ERR_QUEUE: + sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; + case SO_SELECT_ERR_QUEUE: + v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); + break; + default: return -ENOPROTOOPT; } @@ -1298,13 +1306,12 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#ifdef CONFIG_CGROUPS #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk, struct task_struct *task) +void sock_update_classid(struct sock *sk) { u32 classid; - classid = task_cls_classid(task); + classid = task_cls_classid(current); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1312,16 +1319,15 @@ EXPORT_SYMBOL(sock_update_classid); #endif #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) -void sock_update_netprioidx(struct sock *sk, struct task_struct *task) +void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(task); + sk->sk_cgrp_prioidx = task_netprioidx(current); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif -#endif /** * sk_alloc - All socket objects are allocated here @@ -1347,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk, current); - sock_update_netprioidx(sk, current); + sock_update_classid(sk); + sock_update_netprioidx(sk); } return sk; diff --git a/net/core/utils.c b/net/core/utils.c index e3487e461939..3c7f5b51b979 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/ctype.h> #include <linux/inet.h> #include <linux/mm.h> #include <linux/net.h> @@ -348,9 +349,7 @@ int mac_pton(const char *s, u8 *mac) /* Don't dirty result unless string is valid MAC. */ for (i = 0; i < ETH_ALEN; i++) { - if (!strchr("0123456789abcdefABCDEF", s[i * 3])) - return 0; - if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1])) + if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1])) return 0; if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') return 0; diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 1d9eb7c60a68..4f72fc40bf02 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/notifier.h> #include <linux/export.h> +#include <net/dcbevent.h> static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 1b588e23cf80..40d5829ed36a 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -284,6 +284,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getpermhwaddr) return -EOPNOTSUPP; + memset(perm_addr, 0, sizeof(perm_addr)); netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); return nla_put(skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), perm_addr); @@ -1042,6 +1043,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_getets) { struct ieee_ets ets; + memset(&ets, 0, sizeof(ets)); err = ops->ieee_getets(netdev, &ets); if (!err && nla_put(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets)) @@ -1050,6 +1052,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_getmaxrate) { struct ieee_maxrate maxrate; + memset(&maxrate, 0, sizeof(maxrate)); err = ops->ieee_getmaxrate(netdev, &maxrate); if (!err) { err = nla_put(skb, DCB_ATTR_IEEE_MAXRATE, @@ -1061,6 +1064,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_getpfc) { struct ieee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); err = ops->ieee_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc)) @@ -1094,6 +1098,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) /* get peer info if available */ if (ops->ieee_peer_getets) { struct ieee_ets ets; + memset(&ets, 0, sizeof(ets)); err = ops->ieee_peer_getets(netdev, &ets); if (!err && nla_put(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets)) @@ -1102,6 +1107,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->ieee_peer_getpfc) { struct ieee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); err = ops->ieee_peer_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc)) @@ -1280,6 +1286,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* peer info if available */ if (ops->cee_peer_getpg) { struct cee_pg pg; + memset(&pg, 0, sizeof(pg)); err = ops->cee_peer_getpg(netdev, &pg); if (!err && nla_put(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg)) @@ -1288,6 +1295,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) if (ops->cee_peer_getpfc) { struct cee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); err = ops->cee_peer_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc)) @@ -1650,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = { [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, }; -static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct net_device *netdev; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4f9f5eb478f1..ebc54fef85a5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; @@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e05981f271e..9c61f9c02fdb 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -213,8 +213,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c8da116d84a4..7d9197063ebb 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { .len = IFNAMSIZ - 1 }, }; -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -607,7 +607,7 @@ errout: return err; } -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index e36614eccc04..57dc159245ec 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi return NULL; } -__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) +static int dn_fib_count_nhs(const struct nlattr *attr) { - while(RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(__le16*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - - return 0; -} - -static int dn_fib_count_nhs(struct rtattr *rta) -{ - int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhs = 0, nhlen = nla_len(attr); while(nhlen >= (int)sizeof(struct rtnexthop)) { if ((nhlen -= nhp->rtnh_len) < 0) @@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta) return nhs; } -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, + const struct rtmsg *r) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhlen = nla_len(attr); change_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons nh->nh_weight = nhp->rtnh_hops + 1; if (attrlen) { - nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); @@ -268,7 +260,8 @@ out: } -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) +struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], + const struct nlmsghdr *nlh, int *errp) { int err; struct dn_fib_info *fi = NULL; @@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) goto err_inval; - if (rta->rta_mp) { - nhs = dn_fib_count_nhs(rta->rta_mp); - if (nhs == 0) - goto err_inval; - } + if (attrs[RTA_MULTIPATH] && + (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) + goto err_inval; fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); err = -ENOBUFS; @@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta fi->fib_protocol = r->rtm_protocol; fi->fib_nhs = nhs; fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - while(RTA_OK(attr, attrlen)) { - unsigned int flavour = attr->rta_type; + if (attrs[RTA_PRIORITY]) + fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); + + if (attrs[RTA_METRICS]) { + struct nlattr *attr; + int rem; - if (flavour) { - if (flavour > RTAX_MAX) + nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { + int type = nla_type(attr); + + if (type) { + if (type > RTAX_MAX || nla_len(attr) < 4) goto err_inval; - fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); + + fi->fib_metrics[type-1] = nla_get_u32(attr); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2); - if (rta->rta_mp) { - if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) + if (attrs[RTA_PREFSRC]) + fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); + + if (attrs[RTA_MULTIPATH]) { + if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + + if (attrs[RTA_OIF] && + fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF])) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) + + if (attrs[RTA_GATEWAY] && + fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY])) goto err_inval; } else { struct dn_fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 2); + + if (attrs[RTA_OIF]) + nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); + + if (attrs[RTA_GATEWAY]) + nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); + nh->nh_flags = r->rtm_flags; nh->nh_weight = 1; } if (r->rtm_type == RTN_NAT) { - if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) + if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF]) goto err_inval; - memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); + + fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); goto link_it; } if (dn_fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH]) goto err_inval; + goto link_it; } @@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) + if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || + fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST])) if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) spin_unlock_bh(&dn_fib_multipath_lock); } - -static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) -{ - int i; - - for(i = 1; i <= RTA_MAX; i++) { - struct rtattr *attr = rta[i-1]; - if (attr) { - if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) - rta[i-1] = (struct rtattr *)RTA_DATA(attr); - } - } - - return 0; -} - -static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) { - if (rta[RTA_TABLE - 1]) - table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); + if (attrs[RTA_TABLE]) + table = nla_get_u32(attrs[RTA_TABLE]); return table; } -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); - if (tb) - return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); + if (!tb) + return -ESRCH; - return -ESRCH; + return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); - if (tb) - return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); + if (!tb) + return -ENOBUFS; - return -ENOBUFS; + return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb)); } static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) @@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad struct nlmsghdr nlh; struct rtmsg rtm; } req; - struct dn_kern_rta rta; + struct { + struct nlattr hdr; + __le16 dst; + } dst_attr = { + .dst = dst, + }; + struct { + struct nlattr hdr; + __le16 prefsrc; + } prefsrc_attr = { + .prefsrc = ifa->ifa_local, + }; + struct { + struct nlattr hdr; + u32 oif; + } oif_attr = { + .oif = ifa->ifa_dev->dev->ifindex, + }; + struct nlattr *attrs[RTA_MAX+1] = { + [RTA_DST] = (struct nlattr *) &dst_attr, + [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, + [RTA_OIF] = (struct nlattr *) &oif_attr, + }; memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); if (type == RTN_UNICAST) tb = dn_fib_get_table(RT_MIN_TABLE, 1); @@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); req.rtm.rtm_type = type; - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; - if (cmd == RTM_NEWROUTE) - tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL); else - tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL); } static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5ac0e153ef83..fe32388ea24f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1613,23 +1613,41 @@ errout: return -EMSGSIZE; } +const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { + [RTA_DST] = { .type = NLA_U16 }, + [RTA_SRC] = { .type = NLA_U16 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U16 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U16 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .type = NLA_NESTED }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, +}; + /* * This is called by both endnodes and routers now. */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); - struct rtattr **rta = arg; struct rtmsg *rtm = nlmsg_data(nlh); struct dn_route *rt = NULL; struct dn_skb_cb *cb; int err; struct sk_buff *skb; struct flowidn fld; + struct nlattr *tb[RTA_MAX+1]; if (!net_eq(net, &init_net)) return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; + memset(&fld, 0, sizeof(fld)); fld.flowidn_proto = DNPROTO_NSP; @@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb_reset_mac_header(skb); cb = DN_SKB_CB(skb); - if (rta[RTA_SRC-1]) - memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_IIF-1]) - memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + if (tb[RTA_SRC]) + fld.saddr = nla_get_le16(tb[RTA_SRC]); + + if (tb[RTA_DST]) + fld.daddr = nla_get_le16(tb[RTA_DST]); + + if (tb[RTA_IIF]) + fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]); if (fld.flowidn_iif) { struct net_device *dev; @@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (!err && -rt->dst.error) err = rt->dst.error; } else { - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fld.flowidn_oif = oif; + if (tb[RTA_OIF]) + fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 6c2445bcaba1..86e3807052e9 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -19,7 +19,6 @@ #include <linux/sockios.h> #include <linux/init.h> #include <linux/skbuff.h> -#include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> #include <linux/netdevice.h> @@ -224,26 +223,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) } -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) +static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi) { struct rtnexthop *nhp; int nhlen; - if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) + if (attrs[RTA_PRIORITY] && + nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) + if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { + if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && + (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw)) return 0; return 1; } - if (rta->rta_mp == NULL) + if (!attrs[RTA_MULTIPATH]) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + nhp = nla_data(attrs[RTA_MULTIPATH]); + nhlen = nla_len(attrs[RTA_MULTIPATH]); for_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -254,7 +254,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) return 1; if (attrlen) { - gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + gw = gw_attr ? nla_get_le16(gw_attr) : 0; if (gw && gw != nh->nh_gw) return 1; @@ -488,7 +491,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!net_eq(net, &init_net)) return 0; - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED) return dn_cache_dump(skb, cb); @@ -517,7 +520,8 @@ out: return skb->len; } -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; struct dn_fib_node *new_f, *f, **fp, **del_fp; @@ -536,15 +540,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct return -ENOBUFS; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); } - if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) + if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL) return err; if (dz->dz_nent > (dz->dz_divisor << 2) && @@ -654,7 +657,8 @@ out: } -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash*)tb->data; struct dn_fib_node **fp, **del_fp, *f; @@ -671,9 +675,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct return -ESRCH; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); @@ -703,7 +706,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) && - dn_fib_nh_match(r, n, rta, fi) == 0) + dn_fib_nh_match(r, n, attrs, fi) == 0) del_fp = fp; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index dfe42012a044..2a7efe388344 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -19,7 +19,7 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/spinlock.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter_decnet.h> #include <net/sock.h> @@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp) unsigned char *ptr; struct nf_dn_rtmsg *rtm; - size = NLMSG_SPACE(rt_skb->len); - size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); - skb = alloc_skb(size, GFP_ATOMIC); + size = NLMSG_ALIGN(rt_skb->len) + + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); + skb = nlmsg_new(size, GFP_ATOMIC); if (!skb) { *errp = -ENOMEM; return NULL; } old_tail = skb->tail; - nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0); + nlh = nlmsg_put(skb, 0, 0, 0, size, 0); if (!nlh) { kfree_skb(skb); *errp = -ENOMEM; return NULL; } - rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh); + rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh); rtm->nfdn_ifindex = rt_skb->dev->ifindex; ptr = NFDN_RTMSG(rtm); skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2bc62ea857c8..0eb5d5e76dfb 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1,6 +1,7 @@ /* * net/dsa/dsa.c - Hardware switch handling * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,6 +15,9 @@ #include <linux/slab.h> #include <linux/module.h> #include <net/dsa.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_platform.h> #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; @@ -287,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev) return NULL; } +#ifdef CONFIG_OF +static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, + struct device_node *link) +{ + int ret; + const __be32 *reg; + int link_port_addr; + int link_sw_addr; + struct device_node *parent_sw; + int len; + + parent_sw = of_get_parent(link); + if (!parent_sw) + return -EINVAL; + + reg = of_get_property(parent_sw, "reg", &len); + if (!reg || (len != sizeof(*reg) * 2)) + return -EINVAL; + + link_sw_addr = be32_to_cpup(reg + 1); + + if (link_sw_addr >= pd->nr_chips) + return -EINVAL; + + /* First time routing table allocation */ + if (!cd->rtable) { + cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL); + if (!cd->rtable) + return -ENOMEM; + + /* default to no valid uplink/downlink */ + memset(cd->rtable, -1, pd->nr_chips * sizeof(s8)); + } + + reg = of_get_property(link, "reg", NULL); + if (!reg) { + ret = -EINVAL; + goto out; + } + + link_port_addr = be32_to_cpup(reg); + + cd->rtable[link_sw_addr] = link_port_addr; + + return 0; +out: + kfree(cd->rtable); + return ret; +} + +static void dsa_of_free_platform_data(struct dsa_platform_data *pd) +{ + int i; + int port_index; + + for (i = 0; i < pd->nr_chips; i++) { + port_index = 0; + while (port_index < DSA_MAX_PORTS) { + if (pd->chip[i].port_names[port_index]) + kfree(pd->chip[i].port_names[port_index]); + port_index++; + } + kfree(pd->chip[i].rtable); + } + kfree(pd->chip); +} + +static int dsa_of_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device_node *child, *mdio, *ethernet, *port, *link; + struct mii_bus *mdio_bus; + struct platform_device *ethernet_dev; + struct dsa_platform_data *pd; + struct dsa_chip_data *cd; + const char *port_name; + int chip_index, port_index; + const unsigned int *sw_addr, *port_reg; + int ret; + + mdio = of_parse_phandle(np, "dsa,mii-bus", 0); + if (!mdio) + return -EINVAL; + + mdio_bus = of_mdio_find_bus(mdio); + if (!mdio_bus) + return -EINVAL; + + ethernet = of_parse_phandle(np, "dsa,ethernet", 0); + if (!ethernet) + return -EINVAL; + + ethernet_dev = of_find_device_by_node(ethernet); + if (!ethernet_dev) + return -ENODEV; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + pdev->dev.platform_data = pd; + pd->netdev = ðernet_dev->dev; + pd->nr_chips = of_get_child_count(np); + if (pd->nr_chips > DSA_MAX_SWITCHES) + pd->nr_chips = DSA_MAX_SWITCHES; + + pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data), + GFP_KERNEL); + if (!pd->chip) { + ret = -ENOMEM; + goto out_free; + } + + chip_index = 0; + for_each_available_child_of_node(np, child) { + cd = &pd->chip[chip_index]; + + cd->mii_bus = &mdio_bus->dev; + + sw_addr = of_get_property(child, "reg", NULL); + if (!sw_addr) + continue; + + cd->sw_addr = be32_to_cpup(sw_addr); + if (cd->sw_addr > PHY_MAX_ADDR) + continue; + + for_each_available_child_of_node(child, port) { + port_reg = of_get_property(port, "reg", NULL); + if (!port_reg) + continue; + + port_index = be32_to_cpup(port_reg); + + port_name = of_get_property(port, "label", NULL); + if (!port_name) + continue; + + cd->port_names[port_index] = kstrdup(port_name, + GFP_KERNEL); + if (!cd->port_names[port_index]) { + ret = -ENOMEM; + goto out_free_chip; + } + + link = of_parse_phandle(port, "link", 0); + + if (!strcmp(port_name, "dsa") && link && + pd->nr_chips > 1) { + ret = dsa_of_setup_routing_table(pd, cd, + chip_index, link); + if (ret) + goto out_free_chip; + } + + if (port_index == DSA_MAX_PORTS) + break; + } + } + + return 0; + +out_free_chip: + dsa_of_free_platform_data(pd); +out_free: + kfree(pd); + pdev->dev.platform_data = NULL; + return ret; +} + +static void dsa_of_remove(struct platform_device *pdev) +{ + struct dsa_platform_data *pd = pdev->dev.platform_data; + + if (!pdev->dev.of_node) + return; + + dsa_of_free_platform_data(pd); + kfree(pd); +} +#else +static inline int dsa_of_probe(struct platform_device *pdev) +{ + return 0; +} + +static inline void dsa_of_remove(struct platform_device *pdev) +{ +} +#endif + static int dsa_probe(struct platform_device *pdev) { static int dsa_version_printed; struct dsa_platform_data *pd = pdev->dev.platform_data; struct net_device *dev; struct dsa_switch_tree *dst; - int i; + int i, ret; if (!dsa_version_printed++) printk(KERN_NOTICE "Distributed Switch Architecture " "driver version %s\n", dsa_driver_version); + if (pdev->dev.of_node) { + ret = dsa_of_probe(pdev); + if (ret) + return ret; + + pd = pdev->dev.platform_data; + } + if (pd == NULL || pd->netdev == NULL) return -EINVAL; dev = dev_to_net_device(pd->netdev); - if (dev == NULL) - return -EINVAL; + if (dev == NULL) { + ret = -EINVAL; + goto out; + } if (dev->dsa_ptr != NULL) { dev_put(dev); - return -EEXIST; + ret = -EEXIST; + goto out; } dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (dst == NULL) { dev_put(dev); - return -ENOMEM; + ret = -ENOMEM; + goto out; } platform_set_drvdata(pdev, dst); @@ -366,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev) } return 0; + +out: + dsa_of_remove(pdev); + + return ret; } static int dsa_remove(struct platform_device *pdev) @@ -385,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev) dsa_switch_destroy(ds); } + dsa_of_remove(pdev); + return 0; } @@ -392,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev) { } +static const struct of_device_id dsa_of_match_table[] = { + { .compatible = "marvell,dsa", }, + {} +}; +MODULE_DEVICE_TABLE(of, dsa_of_match_table); + static struct platform_driver dsa_driver = { .probe = dsa_probe, .remove = dsa_remove, @@ -399,6 +621,7 @@ static struct platform_driver dsa_driver = { .driver = { .name = "dsa", .owner = THIS_MODULE, + .of_match_table = dsa_of_match_table, }, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a36c85eab5b4..5359560926bc 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (netdev_uses_trailer_tags(dev)) return htons(ETH_P_TRAILER); - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; /* diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 43b95ca61114..55e1fd5b3e56 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80}; struct lowpan_dev_info { struct net_device *real_dev; /* real WPAN device ptr */ struct mutex dev_list_mtx; /* mutex for list ops */ + unsigned short fragment_tag; }; struct lowpan_dev_record { @@ -120,7 +121,6 @@ struct lowpan_fragment { struct list_head list; /* fragments list */ }; -static unsigned short fragment_tag; static LIST_HEAD(lowpan_fragments); static DEFINE_SPINLOCK(flist_lock); @@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) /* checksum is always inline */ memcpy(*hc06_ptr, &uh->check, 2); *hc06_ptr += 2; + + /* skip the UDP header */ + skb_pull(skb, sizeof(struct udphdr)); } static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) @@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) } static int -lowpan_uncompress_udp_header(struct sk_buff *skb) +lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) { - struct udphdr *uh = udp_hdr(skb); u8 tmp; if (!uh) @@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb) /* copy checksum */ memcpy(&uh->check, &skb->data[0], 2); skb_pull(skb, 2); + + /* + * UDP lenght needs to be infered from the lower layers + * here, we obtain the hint from the remaining size of the + * frame + */ + uh->len = htons(skb->len + sizeof(struct udphdr)); + pr_debug("uncompressed UDP length: src = %d", uh->len); } else { pr_debug("ERROR: unsupported NH format\n"); goto err; @@ -572,17 +582,31 @@ static int lowpan_header_create(struct sk_buff *skb, * this isn't implemented in mainline yet, so currently we assign 0xff */ { + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + /* prepare wpan address data */ sa.addr_type = IEEE802154_ADDR_LONG; - sa.pan_id = 0xff; - - da.addr_type = IEEE802154_ADDR_LONG; - da.pan_id = 0xff; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - memcpy(&(da.hwaddr), daddr, 8); memcpy(&(sa.hwaddr), saddr, 8); + /* intra-PAN communications */ + da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + /* + * if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast(daddr)) { + da.addr_type = IEEE802154_ADDR_SHORT; + da.short_addr = IEEE802154_ADDR_BROADCAST; + } else { + da.addr_type = IEEE802154_ADDR_LONG; + memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); + + /* request acknowledgment */ + mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; + } return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, type, (void *)&da, (void *)&sa, skb->len); @@ -650,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr) } static struct lowpan_fragment * -lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag) +lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag) { struct lowpan_fragment *frame; @@ -720,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb) { struct lowpan_fragment *frame; /* slen stores the rightmost 8 bits of the 11 bits length */ - u8 slen, offset; + u8 slen, offset = 0; u16 len, tag; bool found = false; @@ -731,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb) /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */ len = ((iphc0 & 7) << 8) | slen; + if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) { + pr_debug("%s received a FRAG1 packet (tag: %d, " + "size of the entire IP packet: %d)", + __func__, tag, len); + } else { /* FRAGN */ + if (lowpan_fetch_skb_u8(skb, &offset)) + goto unlock_and_drop; + pr_debug("%s received a FRAGN packet (tag: %d, " + "size of the entire IP packet: %d, " + "offset: %d)", __func__, tag, len, offset * 8); + } + /* * check if frame assembling with the same tag is * already in progress @@ -745,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb) /* alloc new frame structure */ if (!found) { + pr_debug("%s first fragment received for tag %d, " + "begin packet reassembly", __func__, tag); frame = lowpan_alloc_new_frame(skb, len, tag); if (!frame) goto unlock_and_drop; } - if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) - goto unlock_and_drop; - - if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */ - goto unlock_and_drop; - /* if payload fits buffer, copy it */ if (likely((offset * 8 + skb->len) <= frame->length)) skb_copy_to_linear_data_offset(frame->skb, offset * 8, @@ -773,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb) list_del(&frame->list); spin_unlock_bh(&flist_lock); + pr_debug("%s successfully reassembled fragment " + "(tag %d)", __func__, tag); + dev_kfree_skb(skb); skb = frame->skb; kfree(frame); @@ -918,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb) } /* UDP data uncompression */ - if (iphc0 & LOWPAN_IPHC_NH_C) - if (lowpan_uncompress_udp_header(skb)) + if (iphc0 & LOWPAN_IPHC_NH_C) { + struct udphdr uh; + struct sk_buff *new; + if (lowpan_uncompress_udp_header(skb, &uh)) goto drop; + /* + * replace the compressed UDP head by the uncompressed UDP + * header + */ + new = skb_copy_expand(skb, sizeof(struct udphdr), + skb_tailroom(skb), GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb = new; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + + lowpan_raw_dump_table(__func__, "raw UDP header dump", + (u8 *)&uh, sizeof(uh)); + + hdr.nexthdr = UIP_PROTO_UDP; + } + /* Not fragmented package */ hdr.payload_len = htons(skb->len); @@ -969,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb) static int lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, - int mlen, int plen, int offset) + int mlen, int plen, int offset, int type) { struct sk_buff *frag; int hlen, ret; - /* if payload length is zero, therefore it's a first fragment */ - hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE); + hlen = (type == LOWPAN_DISPATCH_FRAG1) ? + LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); @@ -1003,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, } static int -lowpan_skb_fragmentation(struct sk_buff *skb) +lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) { int err, header_length, payload_length, tag, offset = 0; u8 head[5]; header_length = lowpan_get_mac_header_length(skb); payload_length = skb->len - header_length; - tag = fragment_tag++; + tag = lowpan_dev_info(dev)->fragment_tag++; /* first fragment header */ head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7); @@ -1018,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb) head[2] = tag >> 8; head[3] = tag & 0xff; - err = lowpan_fragment_xmit(skb, head, header_length, 0, 0); + err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE, + 0, LOWPAN_DISPATCH_FRAG1); + + if (err) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, tag); + goto exit; + } + + offset = LOWPAN_FRAG_SIZE; /* next fragment header */ head[0] &= ~LOWPAN_DISPATCH_FRAG1; @@ -1033,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb) len = payload_length - offset; err = lowpan_fragment_xmit(skb, head, header_length, - len, offset); + len, offset, LOWPAN_DISPATCH_FRAGN); + if (err) { + pr_debug("%s unable to send a subsequent FRAGN packet " + "(tag: %d, offset: %d", __func__, tag, offset); + goto exit; + } + offset += len; } +exit: return err; } @@ -1059,14 +1135,14 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) } pr_debug("frame is too big, fragmentation is needed\n"); - err = lowpan_skb_fragmentation(skb); + err = lowpan_skb_fragmentation(skb, dev); error: dev_kfree_skb(skb); out: - if (err < 0) + if (err) pr_debug("ERROR: xmit failed\n"); - return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); + return (err < 0) ? NET_XMIT_DROP : err; } static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) @@ -1087,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev) return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); } +static u8 lowpan_get_dsn(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); +} + static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -1100,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = { .get_pan_id = lowpan_get_pan_id, .get_phy = lowpan_get_phy, .get_short_addr = lowpan_get_short_addr, + .get_dsn = lowpan_get_dsn, }; static void lowpan_setup(struct net_device *dev) @@ -1203,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, return -ENODEV; lowpan_dev_info(dev)->real_dev = real_dev; + lowpan_dev_info(dev)->fragment_tag = 0; mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index 8c2251fb0a3f..4b8f917658b5 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -84,7 +84,7 @@ (memcmp(addr1, addr2, length >> 3) == 0) /* local link, i.e. FE80::/10 */ -#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE) +#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80)) /* * check whether we can compress the IID to 16 bits, @@ -92,9 +92,10 @@ */ #define lowpan_is_iid_16_bit_compressable(a) \ ((((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr16[5]) == 0) && \ - (((a)->s6_addr16[6]) == 0) && \ - ((((a)->s6_addr[14]) & 0x80) == 0)) + (((a)->s6_addr[10]) == 0) && \ + (((a)->s6_addr[11]) == 0xff) && \ + (((a)->s6_addr[12]) == 0xfe) && \ + (((a)->s6_addr[13]) == 0)) /* multicast address */ #define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index e0da175f8e5b..581a59504bd5 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; + struct sockaddr_ieee802154 *saddr; + + saddr = (struct sockaddr_ieee802154 *)msg->msg_name; skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, sock_recv_ts_and_drops(msg, sk, skb); + if (saddr) { + saddr->family = AF_IEEE802154; + saddr->addr = mac_cb(skb)->sa; + } + if (addr_len) + *addr_len = sizeof(*saddr); + if (flags & MSG_TRUNC) copied = skb->len; done: diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 97351e1d07a4..7e49bbcc6967 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req) int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) { - /* XXX: nlh is right at the start of msg */ - void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); + struct nlmsghdr *nlh = nlmsg_hdr(msg); + void *hdr = genlmsg_data(nlmsg_data(nlh)); if (genlmsg_end(msg, hdr) < 0) goto out; @@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) { - /* XXX: nlh is right at the start of msg */ - void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); + struct nlmsghdr *nlh = nlmsg_hdr(msg); + void *hdr = genlmsg_data(nlmsg_data(nlh)); if (genlmsg_end(msg, hdr) < 0) goto out; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 96bb08abece2..b0bdd8c51e9c 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, struct net_device *dev; struct ieee802154_addr addr; u8 page; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_CHANNEL] || !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || @@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, page, nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); +out: dev_put(dev); return ret; } @@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_STATUS] || !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || @@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->assoc_resp) + goto out; addr.addr_type = IEEE802154_ADDR_LONG; nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], @@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb, nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); +out: dev_put(dev); return ret; } @@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || @@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->disassoc_req) + goto out; if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { addr.addr_type = IEEE802154_ADDR_LONG; @@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb, ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); +out: dev_put(dev); return ret; } @@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) u8 channel, bcn_ord, sf_ord; u8 page; int pan_coord, blx, coord_realign; - int ret; + int ret = -EOPNOTSUPP; if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || @@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->start_req) + goto out; addr.addr_type = IEEE802154_ADDR_SHORT; addr.short_addr = nla_get_u16( @@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, bcn_ord, sf_ord, pan_coord, blx, coord_realign); +out: dev_put(dev); return ret; } @@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; - int ret; + int ret = -EOPNOTSUPP; u8 type; u32 channels; u8 duration; @@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) dev = ieee802154_nl_get_dev(info); if (!dev) return -ENODEV; + if (!ieee802154_mlme_ops(dev)->scan_req) + goto out; type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); @@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); +out: dev_put(dev); return ret; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7944df768454..8603ca827104 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,6 +166,7 @@ config IP_PNP_RARP config NET_IPIP tristate "IP: tunneling" select INET_TUNNEL + select NET_IP_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX This is helper module to demultiplex GRE packets on GRE version field criteria. Required by ip_gre and pptp modules. +config NET_IP_TUNNEL + tristate + default n + config NET_IPGRE tristate "IP: GRE tunnels over IP" depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -313,6 +319,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" select INET_TUNNEL + select NET_IP_TUNNEL depends on INET_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15ca63ec604e..089cb9f36387 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o +obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94f7661..93824c57b108 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -111,7 +111,6 @@ #include <net/sock.h> #include <net/raw.h> #include <net/icmp.h> -#include <net/ipip.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/net_namespace.h> @@ -1283,9 +1282,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; unsigned int offset = 0; - - if (!(features & NETIF_F_V4_CSUM)) - features &= ~NETIF_F_SG; + bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1293,6 +1290,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0))) goto out; @@ -1307,6 +1305,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, ihl))) goto out; + tunnel = !!skb->encapsulation; + __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); @@ -1326,15 +1326,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = ip_hdr(skb); - if (proto == IPPROTO_UDP) { + if (!tunnel && proto == IPPROTO_UDP) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fea4929f6200..247ec1951c35 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr += dev->addr_len; memcpy(arp_ptr, &src_ip, 4); arp_ptr += 4; - if (target_hw != NULL) - memcpy(arp_ptr, target_hw, dev->addr_len); - else - memset(arp_ptr, 0, dev->addr_len); - arp_ptr += dev->addr_len; + + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + if (target_hw != NULL) + memcpy(arp_ptr, target_hw, dev->addr_len); + else + memset(arp_ptr, 0, dev->addr_len); + arp_ptr += dev->addr_len; + } memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb) arp_ptr += dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; - arp_ptr += dev->addr_len; + switch (dev_type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + arp_ptr += dev->addr_len; + } memcpy(&tip, arp_ptr, 4); /* * Check for bad requests for 127.x.x.x and requests for multicast diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f678507bc829..dfc39d4d48b7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -587,13 +587,16 @@ static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; + struct hlist_node *n; int i; now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - rcu_read_lock(); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + bool change_needed = false; + + rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age; @@ -606,16 +609,7 @@ static void check_lifetime(struct work_struct *work) if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_valid_lft) { - struct in_ifaddr **ifap ; - - rtnl_lock(); - for (ifap = &ifa->ifa_dev->ifa_list; - *ifap != NULL; ifap = &ifa->ifa_next) { - if (*ifap == ifa) - inet_del_ifa(ifa->ifa_dev, - ifap, 1); - } - rtnl_unlock(); + change_needed = true; } else if (ifa->ifa_preferred_lft == INFINITY_LIFE_TIME) { continue; @@ -625,10 +619,8 @@ static void check_lifetime(struct work_struct *work) next = ifa->ifa_tstamp + ifa->ifa_valid_lft * HZ; - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { - ifa->ifa_flags |= IFA_F_DEPRECATED; - rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); - } + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + change_needed = true; } else if (time_before(ifa->ifa_tstamp + ifa->ifa_preferred_lft * HZ, next)) { @@ -636,8 +628,42 @@ static void check_lifetime(struct work_struct *work) ifa->ifa_preferred_lft * HZ; } } + rcu_read_unlock(); + if (!change_needed) + continue; + rtnl_lock(); + hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap; + + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &(*ifap)->ifa_next) { + if (*ifap == ifa) { + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + break; + } + } + } else if (ifa->ifa_preferred_lft != + INFINITY_LIFE_TIME && + age >= ifa->ifa_preferred_lft && + !(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } + rtnl_unlock(); } - rcu_read_unlock(); next_sec = round_jiffies_up(next); next_sched = next; @@ -775,7 +801,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) return NULL; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; @@ -802,8 +828,12 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) return -EEXIST; - - set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + ifa = ifa_existing; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); + blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } @@ -1499,6 +1529,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ + net->dev_base_seq; hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -1519,6 +1551,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_unlock(); goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } cont: idx++; @@ -1730,8 +1763,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -1791,6 +1823,77 @@ errout: return err; } +static int inet_netconf_dump_devconf(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int h, s_h; + int idx, s_idx; + struct net_device *dev; + struct in_device *in_dev; + struct hlist_head *head; + + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ + net->dev_base_seq; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + goto cont; + + if (inet_netconf_fill_devconf(skb, dev->ifindex, + &in_dev->cnf, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + -1) <= 0) { + rcu_read_unlock(); + goto done; + } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + rcu_read_unlock(); + } + if (h == NETDEV_HASHENTRIES) { + if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } + if (h == NETDEV_HASHENTRIES + 1) { + if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } +done: + cb->args[0] = h; + cb->args[1] = idx; + + return skb->len; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -2195,6 +2298,6 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - NULL, NULL); + inet_netconf_dump_devconf, NULL); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3b4f0cd2e63e..4cfe34d4cc96 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - err = -ENOMEM; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb4bb12b3eb4..c7629a209f9d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -604,7 +604,7 @@ errout: return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -626,7 +626,7 @@ errout: return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -957,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb) net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); - if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || - nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) + if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || + nlmsg_len(nlh) < sizeof(*frn)) return; skb = skb_clone(skb, GFP_KERNEL); @@ -966,7 +966,7 @@ static void nl_fib_input(struct sk_buff *skb) return; nlh = nlmsg_hdr(skb); - frn = (struct fib_result_nl *) NLMSG_DATA(nlh); + frn = (struct fib_result_nl *) nlmsg_data(nlh); tb = fib_get_table(net, frn->tb_id_in); nl_fib_lookup(frn, tb); diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 7a4c710c4cdd..d2d5a99fba09 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -27,11 +27,6 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7d1874be1df3..6acb541c9091 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) { - int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + int err = req->rsk_ops->rtx_syn_ack(parent, req); if (!err) req->num_retrans++; @@ -735,6 +735,7 @@ EXPORT_SYMBOL(inet_csk_destroy_sock); * tcp/dccp_create_openreq_child(). */ void inet_csk_prepare_forced_close(struct sock *sk) + __releases(&sk->sk_lock.slock) { /* sk_clone_lock locked the socket and set refcnt to 2 */ bh_unlock_sock(sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7afa2c3c788f..5f648751fce2 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); @@ -322,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s } err = sk_diag_fill(sk, rep, req, - sk_user_ns(NETLINK_CB(in_skb).ssk), + sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -628,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk, return 0; return inet_csk_diag_fill(sk, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).ssk), + sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -803,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } err = inet_diag_fill_req(skb, sk, req, - sk_user_ns(NETLINK_CB(cb->skb).ssk), + sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 245ae078a07f..e97d66a1fdde 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -21,7 +21,30 @@ #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <net/sock.h> #include <net/inet_frag.h> +#include <net/inet_ecn.h> + +/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements + * Value : 0xff if frame should be dropped. + * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field + */ +const u8 ip_frag_ecn_table[16] = { + /* at least one fragment had CE, and others ECT_0 or ECT_1 */ + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + + /* invalid combinations : drop frame */ + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +}; +EXPORT_SYMBOL(ip_frag_ecn_table); static void inet_frag_secret_rebuild(unsigned long dummy) { @@ -29,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy) unsigned long now = jiffies; int i; + /* Per bucket lock NOT needed here, due to write lock protection */ write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; struct hlist_node *n; - hlist_for_each_entry_safe(q, n, &f->hash[i], list) { + hb = &f->hash[i]; + hlist_for_each_entry_safe(q, n, &hb->chain, list) { unsigned int hval = f->hashfn(q); if (hval != i) { + struct inet_frag_bucket *hb_dest; + hlist_del(&q->list); /* Relink to new hash chain. */ - hlist_add_head(&q->list, &f->hash[hval]); + hb_dest = &f->hash[hval]; + hlist_add_head(&q->list, &hb_dest->chain); } } } @@ -55,9 +85,12 @@ void inet_frags_init(struct inet_frags *f) { int i; - for (i = 0; i < INETFRAGS_HASHSZ; i++) - INIT_HLIST_HEAD(&f->hash[i]); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_bucket *hb = &f->hash[i]; + spin_lock_init(&hb->chain_lock); + INIT_HLIST_HEAD(&hb->chain); + } rwlock_init(&f->lock); f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ @@ -99,10 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net); static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { - write_lock(&f->lock); + struct inet_frag_bucket *hb; + unsigned int hash; + + read_lock(&f->lock); + hash = f->hashfn(fq); + hb = &f->hash[hash]; + + spin_lock(&hb->chain_lock); hlist_del(&fq->list); - fq->net->nqueues--; - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + + read_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -181,6 +222,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) q = list_first_entry(&nf->lru_list, struct inet_frag_queue, lru_list); atomic_inc(&q->refcnt); + /* Remove q from list to avoid several CPUs grabbing it */ + list_del_init(&q->lru_list); + spin_unlock(&nf->lru_lock); spin_lock(&q->lock); @@ -201,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, void *arg) { + struct inet_frag_bucket *hb; struct inet_frag_queue *qp; #ifdef CONFIG_SMP #endif unsigned int hash; - write_lock(&f->lock); + read_lock(&f->lock); /* Protects against hash rebuild */ /* * While we stayed w/o the lock other CPU could update * the rnd seed, so we need to re-calculate the hash * chain. Fortunatelly the qp_in can be used to get one. */ hash = f->hashfn(qp_in); + hb = &f->hash[hash]; + spin_lock(&hb->chain_lock); + #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we - * promoted read lock to write lock. + * released the hash bucket lock. */ - hlist_for_each_entry(qp, &f->hash[hash], list) { + hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); - write_unlock(&f->lock); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); qp_in->last_in |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; @@ -233,9 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &f->hash[hash]); - nf->nqueues++; - write_unlock(&f->lock); + hlist_add_head(&qp->list, &hb->chain); + spin_unlock(&hb->chain_lock); + read_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; } @@ -276,17 +325,40 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock) { + struct inet_frag_bucket *hb; struct inet_frag_queue *q; + int depth = 0; + + hb = &f->hash[hash]; - hlist_for_each_entry(q, &f->hash[hash], list) { + spin_lock(&hb->chain_lock); + hlist_for_each_entry(q, &hb->chain, list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); return q; } + depth++; } + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - return inet_frag_create(nf, f, key); + if (depth <= INETFRAGS_MAXDEPTH) + return inet_frag_create(nf, f, key); + else + return ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find); + +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix) +{ + static const char msg[] = "inet_frag_find: Fragment hash bucket" + " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) + ". Dropping fragment.\n"; + + if (PTR_ERR(q) == -ENOBUFS) + LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); +} +EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cc280a3f4f96..1975f52933c5 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/if_vlan.h> #include <linux/inet_lro.h> +#include <net/checksum.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); @@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) *(p+2) = lro_desc->tcp_rcv_tsecr; } + csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len)); iph->tot_len = htons(lro_desc->ip_tot_len); - iph->check = 0; - iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); - tcph->check = 0; tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b6d30acb600c..b66910aaef4d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -79,40 +79,11 @@ struct ipq { struct inet_peer *peer; }; -/* RFC 3168 support : - * We want to check ECN values of all fragments, do detect invalid combinations. - * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. - */ -#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ -#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ -#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ -#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ - static inline u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } -/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements - * Value : 0xff if frame should be dropped. - * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field - */ -static const u8 ip4_frag_ecn_table[16] = { - /* at least one fragment had CE, and others ECT_0 or ECT_1 */ - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - - /* invalid combinations : drop frame */ - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, -}; - static struct inet_frags ip4_frags; int ip_frag_nqueues(struct net *net) @@ -248,8 +219,7 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb dst is stale, drop it, and perform route lookup again */ - skb_dst_drop(head); + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -292,14 +262,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); - if (q == NULL) - goto out_nomem; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct ipq, q); - -out_nomem: - LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); - return NULL; } /* Is the fragment too far ahead to be part of ipq? */ @@ -526,9 +493,16 @@ found: qp->q.max_size = skb->len + ihl; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + qp->q.meat == qp->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + err = ip_frag_reasm(qp, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + skb_dst_drop(skb); inet_frag_lru_move(&qp->q); return -EINPROGRESS; @@ -554,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ipq_kill(qp); - ecn = ip4_frag_ecn_table[qp->ecn]; + ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0ef0e674ec5..987a4e5e07e2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -37,7 +37,7 @@ #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -108,15 +108,6 @@ fatal route to network, even if it were you who configured fatal static route: you are innocent. :-) - - - 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain - practically identical code. It would be good to glue them - together, but it is not very evident, how to make them modular. - sit is integral part of IPv6, ipip and gre are naturally modular. - We could extract common parts (hash table, ioctl etc) - to a separate module (ip_tunnel.c). - Alexey Kuznetsov. */ @@ -126,400 +117,137 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); -static void ipgre_tunnel_setup(struct net_device *dev); -static int ipgre_tunnel_bind_dev(struct net_device *dev); - -/* Fallback tunnel: no source, no destination, no key, no options */ - -#define HASH_SIZE 16 static int ipgre_net_id __read_mostly; -struct ipgre_net { - struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; - - struct net_device *fb_tunnel_dev; -}; - -/* Tunnel hash table */ - -/* - 4 hash tables: - - 3: (remote,local) - 2: (remote,*) - 1: (*,local) - 0: (*,*) +static int gre_tap_net_id __read_mostly; - We require exact key match i.e. if a key is present in packet - it will match only tunnel with the same key; if it is not present, - it will match only keyless tunnel. - - All keysless packets, if not matched configured keyless tunnels - will match fallback tunnel. - */ +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); -#define tunnels_r_l tunnels[3] -#define tunnels_r tunnels[2] -#define tunnels_l tunnels[1] -#define tunnels_wc tunnels[0] + if (!csum) + break; + /* Fall through. */ -static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; } - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; + return csum; } -/* Does key in tunnel parameters match packet */ -static bool ipgre_key_match(const struct ip_tunnel_parm *p, - __be16 flags, __be32 key) +static int ip_gre_calc_hlen(__be16 o_flags) { - if (p->i_flags & GRE_KEY) { - if (flags & GRE_KEY) - return key == p->i_key; - else - return false; /* key expected, none present */ - } else - return !(flags & GRE_KEY); -} + int addend = 4; -/* Given src, dst and key, find appropriate for input tunnel. */ + if (o_flags&TUNNEL_CSUM) + addend += 4; + if (o_flags&TUNNEL_KEY) + addend += 4; + if (o_flags&TUNNEL_SEQ) + addend += 4; + return addend; +} -static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, - __be32 remote, __be32 local, - __be16 flags, __be32 key, - __be16 gre_proto) +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *hdr_len) { - struct net *net = dev_net(dev); - int link = dev->ifindex; - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(key); - struct ip_tunnel *t, *cand = NULL; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? - ARPHRD_ETHER : ARPHRD_IPGRE; - int score, cand_score = 4; - - for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { - if (local != t->parms.iph.saddr || - remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; - for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { - if (remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; - for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; - for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { - if (t->parms.i_key != key || - !(t->dev->flags & IFF_UP)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + *hdr_len = ip_gre_calc_hlen(tpi->flags); - if (cand != NULL) - return cand; + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; - dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) - return netdev_priv(dev); + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - return NULL; -} + tpi->proto = greh->protocol; -static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - unsigned int h = HASH(key); - int prio = 0; - - if (local) - prio |= 1; - if (remote && !ipv4_is_multicast(remote)) { - prio |= 2; - h ^= HASH(remote); + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; } - return &ign->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel *t) -{ - return __ipgre_bucket(ign, &t->parms); -} - -static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t); + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); -} + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; -static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp; - struct ip_tunnel *iter; - - for (tp = ipgre_bucket(ign, t); - (iter = rtnl_dereference(*tp)) != NULL; - tp = &iter->next) { - if (t == iter) { - rcu_assign_pointer(*tp, t->next); - break; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + *hdr_len += 4; + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; } } -} - -static struct ip_tunnel *ipgre_tunnel_find(struct net *net, - struct ip_tunnel_parm *parms, - int type) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - int link = parms->link; - struct ip_tunnel *t; - struct ip_tunnel __rcu **tp; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - for (tp = __ipgre_bucket(ign, parms); - (t = rtnl_dereference(*tp)) != NULL; - tp = &t->next) - if (local == t->parms.iph.saddr && - remote == t->parms.iph.daddr && - key == t->parms.i_key && - link == t->parms.link && - type == t->dev->type) - break; - - return t; -} - -static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) -{ - struct ip_tunnel *t, *nt; - struct net_device *dev; - char name[IFNAMSIZ]; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); - if (t || !create) - return t; - - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else - strcpy(name, "gre%d"); - - dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); - if (!dev) - return NULL; - - dev_net_set(dev, net); - - nt = netdev_priv(dev); - nt->parms = *parms; - dev->rtnl_link_ops = &ipgre_link_ops; - - dev->mtu = ipgre_tunnel_bind_dev(dev); - if (register_netdevice(dev) < 0) - goto failed_free; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); - return nt; - -failed_free: - free_netdev(dev); - return NULL; -} - -static void ipgre_tunnel_uninit(struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - ipgre_tunnel_unlink(ign, netdev_priv(dev)); - dev_put(dev); + return 0; } - static void ipgre_err(struct sk_buff *skb, u32 info) { -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. + /* All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. - Moreover, Cisco "wise men" put GRE key to the third word - in GRE header. It makes impossible maintaining even soft state for keyed - GRE tunnels with enabled checksum. Tell them "thank you". - - Well, I wonder, rfc1812 was written by Cisco employee, - what the hell these idiots break standards established - by themselves??? - */ + Moreover, Cisco "wise men" put GRE key to the third word + in GRE header. It makes impossible maintaining even soft + state for keyed GRE tunnels with enabled checksum. Tell + them "thank you". + Well, I wonder, rfc1812 was written by Cisco employee, + what the hell these idiots break standards established + by themselves??? + */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph = (const struct iphdr *)skb->data; - __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); - int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - __be16 flags; - __be32 key = 0; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { + if (!csum_err) /* ignore csum errors. */ return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) - return; - - if (flags & GRE_KEY) - key = *(((__be32 *)p) + (grehlen / 4) - 1); - switch (type) { default: case ICMP_PARAMETERPROB: @@ -548,8 +276,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags, key, p[1]); + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); + + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->daddr, iph->saddr, tpi.key); if (t == NULL) return; @@ -578,158 +311,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; } -static inline u8 -ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) -{ - u8 inner = 0; - if (skb->protocol == htons(ETH_P_IP)) - inner = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - return INET_ECN_encapsulate(tos, inner); -} - static int ipgre_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip_tunnel *tunnel; - int offset = 4; - __be16 gre_proto; - int err; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - if (!pskb_may_pull(skb, 16)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) goto drop; - iph = ip_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop; - - if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); - gre_proto = *(__be16 *)(h + 2); + iph = ip_hdr(skb); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->saddr, iph->daddr, tpi.key); - tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, flags, key, - gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; - - secpath_reset(skb); - - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } - - skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); - skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_NET_IPGRE_BROADCAST - if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; - tunnel->dev->stats.multicast++; - skb->pkt_type = PACKET_BROADCAST; - } -#endif - - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->dev->stats.rx_crc_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->dev->stats.rx_fifo_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - tunnel->i_seqno = seqno + 1; - } - - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - err = IP_ECN_decapsulate(iph, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &iph->saddr, iph->tos); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - gro_cells_receive(&tunnel->gro_cells, skb); + ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - drop: kfree_skb(skb); return 0; @@ -746,7 +354,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; return skb; } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&GRE_CSUM) { + tunnel->parms.o_flags&TUNNEL_CSUM) { err = skb_checksum_help(skb); if (unlikely(err)) goto error; @@ -760,497 +368,157 @@ error: return ERR_PTR(err); } -static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *gre_build_header(struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + int hdr_len) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph; - const struct iphdr *tiph; - struct flowi4 fl4; - u8 tos; - __be16 df; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ - int gre_hlen; - __be32 dst; - int mtu; - u8 ttl; - int err; - int pkt_len; - - skb = handle_offloads(tunnel, skb); - if (IS_ERR(skb)) { - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } + struct gre_base_hdr *greh; - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb_push(skb, hdr_len); - old_iph = ip_hdr(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; - if (dev->type == ARPHRD_ETHER) - IPCB(skb)->flags = 0; + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (dev->header_ops && dev->type == ARPHRD_IPGRE) { - gre_hlen = 0; - if (skb->protocol == htons(ETH_P_IP)) - tiph = (const struct iphdr *)skb->data; - else - tiph = &tunnel->parms.iph; - } else { - gre_hlen = tunnel->hlen; - tiph = &tunnel->parms.iph; - } - - if ((dst = tiph->daddr) == 0) { - /* NBMA tunnel */ - - if (skb_dst(skb) == NULL) { - dev->stats.tx_fifo_errors++; - goto tx_error; + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; } - - if (skb->protocol == htons(ETH_P_IP)) { - rt = skb_rtable(skb); - dst = rt_nexthop(rt, old_iph->daddr); + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - const struct in6_addr *addr6; - struct neighbour *neigh; - bool do_tx_error_icmp; - int addr_type; - - neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) - goto tx_error; - - addr6 = (const struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); - - if (addr_type == IPV6_ADDR_ANY) { - addr6 = &ipv6_hdr(skb)->daddr; - addr_type = ipv6_addr_type(addr6); - } - - if ((addr_type & IPV6_ADDR_COMPATv4) == 0) - do_tx_error_icmp = true; - else { - do_tx_error_icmp = false; - dst = addr6->s6_addr32[3]; - } - neigh_release(neigh); - if (do_tx_error_icmp) - goto tx_error_icmp; + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *(__sum16 *)ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); } -#endif - else - goto tx_error; } - ttl = tiph->ttl; - tos = tiph->tos; - if (tos & 0x1) { - tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) - tos = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - } + return skb; +} - rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, - tunnel->parms.o_key, RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } - tdev = rt->dst.dev; +static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params, + __be16 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct tnl_ptk_info tpi; - if (tdev == dev) { - ip_rt_put(rt); - dev->stats.collisions++; - goto tx_error; + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; } - df = tiph->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (old_iph->frag_off&htons(IP_DF)); + tpi.flags = tunnel->parms.o_flags; + tpi.proto = proto; + tpi.key = tunnel->parms.o_key; + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + tpi.seq = htonl(tunnel->o_seqno); - if (!skb_is_gso(skb) && - (old_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + /* Push GRE header. */ + skb = gre_build_header(skb, &tpi, tunnel->hlen); + if (unlikely(!skb)) { + dev->stats.tx_dropped++; + return; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - if (!skb_is_gso(skb) && - mtu >= IPV6_MIN_MTU && - mtu < skb->len - tunnel->hlen + gre_hlen) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif + ip_tunnel_xmit(skb, dev, tnl_params); +} - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; +static netdev_tx_t ipgre_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tnl_params; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; - - if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) { - ip_rt_put(rt); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - dev_kfree_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - /* Warning : tiph value might point to freed memory */ - } + if (dev->header_ops) { + /* Need space for new headers */ + if (skb_cow_head(skb, dev->needed_headroom - + (tunnel->hlen + sizeof(struct iphdr)))); + goto free_skb; - skb_push(skb, gre_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*iph)); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ + tnl_params = (const struct iphdr *)skb->data; - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = IPPROTO_GRE; - iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - - tunnel_ip_select_ident(skb, old_iph, &rt->dst); - - if (ttl == 0) { - if (skb->protocol == htons(ETH_P_IP)) - iph->ttl = old_iph->ttl; -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; -#endif - else - iph->ttl = ip4_dst_hoplimit(&rt->dst); - } - - ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; - - if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); + /* Pull skb since ip_tunnel_xmit() needs skb->data pointing + * to gre header. + */ + skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + } else { + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (tunnel->parms.o_flags&GRE_SEQ) { - ++tunnel->o_seqno; - *ptr = htonl(tunnel->o_seqno); - ptr--; - } - if (tunnel->parms.o_flags&GRE_KEY) { - *ptr = tunnel->parms.o_key; - ptr--; - } - /* Skip GRE checksum if skb is getting offloaded. */ - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && - (tunnel->parms.o_flags&GRE_CSUM)) { - int offset = skb_transport_offset(skb); - - if (skb_has_shared_frag(skb)) { - err = __skb_linearize(skb); - if (err) - goto tx_error; - } - - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, - skb->len - offset, - 0)); - } + tnl_params = &tunnel->parms.iph; } - nf_reset(skb); + __gre_xmit(skb, dev, tnl_params, skb->protocol); - pkt_len = skb->len - skb_transport_offset(skb); - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } return NETDEV_TX_OK; -#if IS_ENABLED(CONFIG_IPV6) -tx_error_icmp: - dst_link_failure(skb); -#endif -tx_error: - dev->stats.tx_errors++; +free_skb: dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; return NETDEV_TX_OK; } -static int ipgre_tunnel_bind_dev(struct net_device *dev) +static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, + struct net_device *dev) { - struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; - const struct iphdr *iph; - int hlen = LL_MAX_HEADER; - int mtu = ETH_DATA_LEN; - int addend = sizeof(struct iphdr) + 4; - - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; - - /* Guess output device to choose reasonable mtu and needed_headroom */ - - if (iph->daddr) { - struct flowi4 fl4; - struct rtable *rt; - - rt = ip_route_output_gre(dev_net(dev), &fl4, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); - if (!IS_ERR(rt)) { - tdev = rt->dst.dev; - ip_rt_put(rt); - } - - if (dev->type != ARPHRD_ETHER) - dev->flags |= IFF_POINTOPOINT; - } + struct ip_tunnel *tunnel = netdev_priv(dev); - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - if (tdev) { - hlen = tdev->hard_header_len + tdev->needed_headroom; - mtu = tdev->mtu; - } - dev->iflink = tunnel->parms.link; - - /* Precalculate GRE options length */ - if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { - if (tunnel->parms.o_flags&GRE_CSUM) - addend += 4; - if (tunnel->parms.o_flags&GRE_KEY) - addend += 4; - if (tunnel->parms.o_flags&GRE_SEQ) - addend += 4; - } - dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len + addend; + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (mtu < 68) - mtu = 68; + __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); - tunnel->hlen = addend; - /* TCP offload with GRE SEQ is not supported. */ - if (!(tunnel->parms.o_flags & GRE_SEQ)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + return NETDEV_TX_OK; - return mtu; +free_skb: + dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; + return NETDEV_TX_OK; } -static int -ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - switch (cmd) { - case SIOCGETTUNNEL: - t = NULL; - if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipgre_tunnel_locate(net, &p, 0); - } - if (t == NULL) - t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; - break; - - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - if (!(p.i_flags&GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags&GRE_KEY)) - p.o_key = 0; - - t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - unsigned int nflags = 0; - - t = netdev_priv(dev); - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { - err = -EINVAL; - break; - } - ipgre_tunnel_unlink(ign, t); - synchronize_net(); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - t->parms.o_key = p.o_key; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - } - - if (t) { - err = 0; - if (cmd == SIOCCHGTUNNEL) { - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - if (t->parms.link != p.link) { - t->parms.link = p.link; - dev->mtu = ipgre_tunnel_bind_dev(dev); - netdev_state_change(dev); - } - } - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; - - case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == ign->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) - goto done; - err = -EPERM; - if (t == netdev_priv(ign->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; - default: - err = -EINVAL; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { + return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); + p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -done: - return err; -} + err = ip_tunnel_ioctl(dev, &p, cmd); + if (err) + return err; -static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) - return -EINVAL; - dev->mtu = new_mtu; + p.i_flags = tnl_flags_to_gre_flags(p.i_flags); + p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; return 0; } @@ -1280,25 +548,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) ... ftp fec0:6666:6666::193.233.7.65 ... - */ - static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ip_tunnel *t = netdev_priv(dev); - struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(iph+1); + struct iphdr *iph; + struct gre_base_hdr *greh; - memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); - p[0] = t->parms.o_flags; - p[1] = htons(type); + iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); + greh = (struct gre_base_hdr *)(iph+1); + greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); + greh->protocol = htons(type); - /* - * Set the source hardware address. - */ + memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); + /* Set the source hardware address. */ if (saddr) memcpy(&iph->saddr, saddr, 4); if (daddr) @@ -1306,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (iph->daddr) return t->hlen; - return -t->hlen; + return -(t->hlen + sizeof(*iph)); } static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) @@ -1360,31 +626,21 @@ static int ipgre_close(struct net_device *dev) } return 0; } - #endif static const struct net_device_ops ipgre_netdev_ops = { .ndo_init = ipgre_tunnel_init, - .ndo_uninit = ipgre_tunnel_uninit, + .ndo_uninit = ip_tunnel_uninit, #ifdef CONFIG_NET_IPGRE_BROADCAST .ndo_open = ipgre_open, .ndo_stop = ipgre_close, #endif - .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_start_xmit = ipgre_xmit, .ndo_do_ioctl = ipgre_tunnel_ioctl, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; -static void ipgre_dev_free(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - - gro_cells_destroy(&tunnel->gro_cells); - free_percpu(dev->tstats); - free_netdev(dev); -} - #define GRE_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ @@ -1393,35 +649,48 @@ static void ipgre_dev_free(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; - dev->destructor = ipgre_dev_free; + ip_tunnel_setup(dev, ipgre_net_id); +} - dev->type = ARPHRD_IPGRE; - dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +static void __gre_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); + tunnel->parms.iph.protocol = IPPROTO_GRE; + + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->flags = IFF_NOARP; - dev->iflink = 0; - dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - dev->features |= GRE_FEATURES; + dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; dev->hw_features |= GRE_FEATURES; + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported. */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } } static int ipgre_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - struct iphdr *iph; - int err; + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; + __gre_tunnel_init(dev); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + memcpy(dev->dev_addr, &iph->saddr, 4); + memcpy(dev->broadcast, &iph->daddr, 4); - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + dev->type = ARPHRD_IPGRE; + dev->flags = IFF_NOARP; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->addr_len = 4; if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -1435,106 +704,30 @@ static int ipgre_tunnel_init(struct net_device *dev) } else dev->header_ops = &ipgre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - err = gro_cells_init(&tunnel->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; - } - - return 0; -} - -static void ipgre_fb_tunnel_init(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *iph = &tunnel->parms.iph; - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - iph->version = 4; - iph->protocol = IPPROTO_GRE; - iph->ihl = 5; - tunnel->hlen = sizeof(struct iphdr) + 4; - - dev_hold(dev); + return ip_tunnel_init(dev); } - static const struct gre_protocol ipgre_protocol = { .handler = ipgre_rcv, .err_handler = ipgre_err, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) -{ - int prio; - - for (prio = 0; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - - t = rtnl_dereference(ign->tunnels[prio][h]); - - while (t != NULL) { - unregister_netdevice_queue(t->dev, head); - t = rtnl_dereference(t->next); - } - } - } -} - static int __net_init ipgre_init_net(struct net *net) { - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int err; - - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", - ipgre_tunnel_setup); - if (!ign->fb_tunnel_dev) { - err = -ENOMEM; - goto err_alloc_dev; - } - dev_net_set(ign->fb_tunnel_dev, net); - - ipgre_fb_tunnel_init(ign->fb_tunnel_dev); - ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; - - if ((err = register_netdev(ign->fb_tunnel_dev))) - goto err_reg_dev; - - rcu_assign_pointer(ign->tunnels_wc[0], - netdev_priv(ign->fb_tunnel_dev)); - return 0; - -err_reg_dev: - ipgre_dev_free(ign->fb_tunnel_dev); -err_alloc_dev: - return err; + return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } static void __net_exit ipgre_exit_net(struct net *net) { - struct ipgre_net *ign; - LIST_HEAD(list); - - ign = net_generic(net, ipgre_net_id); - rtnl_lock(); - ipgre_destroy_tunnels(ign, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); + ip_tunnel_delete_net(itn); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, .id = &ipgre_net_id, - .size = sizeof(struct ipgre_net), + .size = sizeof(struct ip_tunnel_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1579,8 +772,8 @@ out: return ipgre_tunnel_validate(tb, data); } -static void ipgre_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) +static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], + struct ip_tunnel_parm *parms) { memset(parms, 0, sizeof(*parms)); @@ -1593,10 +786,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1620,148 +813,46 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->iph.frag_off = htons(IP_DF); } -static int ipgre_tap_init(struct net_device *dev) +static int gre_tap_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + __gre_tunnel_init(dev); - ipgre_tunnel_bind_dev(dev); - - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; + return ip_tunnel_init(dev); } -static const struct net_device_ops ipgre_tap_netdev_ops = { - .ndo_init = ipgre_tap_init, - .ndo_uninit = ipgre_tunnel_uninit, - .ndo_start_xmit = ipgre_tunnel_xmit, +static const struct net_device_ops gre_tap_netdev_ops = { + .ndo_init = gre_tap_init, + .ndo_uninit = ip_tunnel_uninit, + .ndo_start_xmit = gre_tap_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipgre_tap_setup(struct net_device *dev) { - ether_setup(dev); - - dev->netdev_ops = &ipgre_tap_netdev_ops; - dev->destructor = ipgre_dev_free; - - dev->iflink = 0; - dev->features |= NETIF_F_NETNS_LOCAL; - - dev->features |= GRE_FEATURES; - dev->hw_features |= GRE_FEATURES; + dev->netdev_ops = &gre_tap_netdev_ops; + ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int mtu; - int err; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &nt->parms); - - if (ipgre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; - - if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) - eth_hw_addr_random(dev); - - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - err = register_netdevice(dev); - if (err) - goto out; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); + struct ip_tunnel_parm p; -out: - return err; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_newlink(dev, tb, &p); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); struct ip_tunnel_parm p; - int mtu; - - if (dev == ign->fb_tunnel_dev) - return -EINVAL; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &p); - - t = ipgre_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else { - t = nt; - - if (dev->type != ARPHRD_ETHER) { - unsigned int nflags = 0; - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags ^ nflags) & - (IFF_POINTOPOINT | IFF_BROADCAST)) - return -EINVAL; - } - ipgre_tunnel_unlink(ign, t); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - } - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - - t->parms.o_key = p.o_key; - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - - if (t->parms.link != p.link) { - t->parms.link = p.link; - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - netdev_state_change(dev); - } - - return 0; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_changelink(dev, tb, &p); } static size_t ipgre_get_size(const struct net_device *dev) @@ -1796,8 +887,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || @@ -1835,6 +926,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .validate = ipgre_tunnel_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; @@ -1848,13 +940,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .validate = ipgre_tap_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; -/* - * And now the modules code and kernel interface. - */ +static int __net_init ipgre_tap_init_net(struct net *net) +{ + return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL); +} + +static void __net_exit ipgre_tap_exit_net(struct net *net) +{ + struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); + ip_tunnel_delete_net(itn); +} + +static struct pernet_operations ipgre_tap_net_ops = { + .init = ipgre_tap_init_net, + .exit = ipgre_tap_exit_net, + .id = &gre_tap_net_id, + .size = sizeof(struct ip_tunnel_net), +}; static int __init ipgre_init(void) { @@ -1866,6 +973,10 @@ static int __init ipgre_init(void) if (err < 0) return err; + err = register_pernet_device(&ipgre_tap_net_ops); + if (err < 0) + goto pnet_tap_faied; + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); @@ -1880,16 +991,17 @@ static int __init ipgre_init(void) if (err < 0) goto tap_ops_failed; -out: - return err; + return 0; tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: + unregister_pernet_device(&ipgre_tap_net_ops); +pnet_tap_faied: unregister_pernet_device(&ipgre_net_ops); - goto out; + return err; } static void __exit ipgre_fini(void) @@ -1898,6 +1010,7 @@ static void __exit ipgre_fini(void) rtnl_link_unregister(&ipgre_link_ops); if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) pr_info("%s: can't remove protocol\n", __func__); + unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } @@ -1907,3 +1020,4 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); MODULE_ALIAS_NETDEV("gre0"); +MODULE_ALIAS_NETDEV("gretap0"); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 310a3647c83d..ec7264514a82 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -370,7 +370,6 @@ int ip_options_compile(struct net *net, } switch (optptr[3]&0xF) { case IPOPT_TS_TSONLY: - opt->ts = optptr - iph; if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; @@ -381,7 +380,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; if (rt) { spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); @@ -396,7 +394,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); @@ -429,12 +426,12 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 3; goto error; } - opt->ts = optptr - iph; if (skb) { optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); opt->is_changed = 1; } } + opt->ts = optptr - iph; break; case IPOPT_RA: if (optlen < 4) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5e12dca7b3dd..147abf5275aa 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) to->nf_trace = from->nf_trace; #endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c new file mode 100644 index 000000000000..e4147ec1665a --- /dev/null +++ b/net/ipv4/ip_tunnel.c @@ -0,0 +1,1035 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/rculist.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/ip_tunnels.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#endif + +static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, + __be32 key, __be32 remote) +{ + return hash_32((__force u32)key ^ (__force u32)remote, + IP_TNL_HASH_BITS); +} + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + tot->collisions = dev->stats.collisions; + + return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); + +static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, + __be16 flags, __be32 key) +{ + if (p->i_flags & TUNNEL_KEY) { + if (flags & TUNNEL_KEY) + return key == p->i_key; + else + /* key expected, none present */ + return false; + } else + return !(flags & TUNNEL_KEY); +} + +/* Fallback tunnel: no source, no destination, no key, no options + + Tunnel hash table: + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + Given src, dst and key, find appropriate for input tunnel. +*/ +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key) +{ + unsigned int hash; + struct ip_tunnel *t, *cand = NULL; + struct hlist_head *head; + + hash = ip_tunnel_hash(itn, key, remote); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local != t->parms.iph.saddr || + remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else + cand = t; + } + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + hash = ip_tunnel_hash(itn, key, 0); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if ((local != t->parms.iph.saddr && + (local != t->parms.iph.daddr || + !ipv4_is_multicast(local))) || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + if (flags & TUNNEL_NO_KEY) + goto skip_key_lookup; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + +skip_key_lookup: + if (cand) + return cand; + + if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) + return netdev_priv(itn->fb_tunnel_dev); + + + return NULL; +} +EXPORT_SYMBOL_GPL(ip_tunnel_lookup); + +static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + unsigned int h; + __be32 remote; + + if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) + remote = parms->iph.daddr; + else + remote = 0; + + h = ip_tunnel_hash(itn, parms->i_key, remote); + return &itn->tunnels[h]; +} + +static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) +{ + struct hlist_head *head = ip_bucket(itn, &t->parms); + + hlist_add_head_rcu(&t->hash_node, head); +} + +static void ip_tunnel_del(struct ip_tunnel *t) +{ + hlist_del_init_rcu(&t->hash_node); +} + +static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms, + int type) +{ + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip_tunnel *t = NULL; + struct hlist_head *head = ip_bucket(itn, parms); + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + } + return t; +} + +static struct net_device *__ip_tunnel_create(struct net *net, + const struct rtnl_link_ops *ops, + struct ip_tunnel_parm *parms) +{ + int err; + struct ip_tunnel *tunnel; + struct net_device *dev; + char name[IFNAMSIZ]; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else { + if (strlen(ops->kind) > (IFNAMSIZ - 3)) { + err = -E2BIG; + goto failed; + } + strlcpy(name, ops->kind, IFNAMSIZ); + strncat(name, "%d", 2); + } + + ASSERT_RTNL(); + dev = alloc_netdev(ops->priv_size, name, ops->setup); + if (!dev) { + err = -ENOMEM; + goto failed; + } + dev_net_set(dev, net); + + dev->rtnl_link_ops = ops; + + tunnel = netdev_priv(dev); + tunnel->parms = *parms; + + err = register_netdevice(dev); + if (err) + goto failed_free; + + return dev; + +failed_free: + free_netdev(dev); +failed: + return ERR_PTR(err); +} + +static inline struct rtable *ip_route_output_tunnel(struct net *net, + struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = oif; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->flowi4_tos = tos; + fl4->flowi4_proto = proto; + fl4->fl4_gre_key = key; + return ip_route_output_key(net, fl4); +} + +static int ip_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *iph; + int hlen = LL_MAX_HEADER; + int mtu = ETH_DATA_LEN; + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + iph = &tunnel->parms.iph; + + /* Guess output device to choose reasonable mtu and needed_headroom */ + if (iph->daddr) { + struct flowi4 fl4; + struct rtable *rt; + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); + if (!IS_ERR(rt)) { + tdev = rt->dst.dev; + ip_rt_put(rt); + } + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len + tdev->needed_headroom; + mtu = tdev->mtu; + } + dev->iflink = tunnel->parms.link; + + dev->needed_headroom = t_hlen + hlen; + mtu -= (dev->hard_header_len + t_hlen); + + if (mtu < 68) + mtu = 68; + + return mtu; +} + +static struct ip_tunnel *ip_tunnel_create(struct net *net, + struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + struct ip_tunnel *nt, *fbt; + struct net_device *dev; + + BUG_ON(!itn->fb_tunnel_dev); + fbt = netdev_priv(itn->fb_tunnel_dev); + dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + if (IS_ERR(dev)) + return NULL; + + dev->mtu = ip_tunnel_bind_dev(dev); + + nt = netdev_priv(dev); + ip_tunnel_add(itn, nt); + return nt; +} + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error) +{ + struct pcpu_tstats *tstats; + const struct iphdr *iph = ip_hdr(skb); + int err; + + secpath_reset(skb); + + skb->protocol = tpi->proto; + + skb->mac_header = skb->network_header; + __pskb_pull(skb, tunnel->hlen); + skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(iph->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + tunnel->dev->stats.multicast++; + skb->pkt_type = PACKET_BROADCAST; + } +#endif + + if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || + ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + if (tunnel->parms.i_flags&TUNNEL_SEQ) { + if (!(tpi->flags&TUNNEL_SEQ) || + (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_rcv); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *inner_iph; + struct iphdr *iph; + struct flowi4 fl4; + u8 tos, ttl; + __be16 df; + struct rtable *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + unsigned int max_headroom; /* The extra header space needed */ + __be32 dst; + int mtu; + + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + dst = tnl_params->daddr; + if (dst == 0) { + /* NBMA tunnel */ + + if (skb_dst(skb) == NULL) { + dev->stats.tx_fifo_errors++; + goto tx_error; + } + + if (skb->protocol == htons(ETH_P_IP)) { + rt = skb_rtable(skb); + dst = rt_nexthop(rt, inner_iph->daddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + const struct in6_addr *addr6; + struct neighbour *neigh; + bool do_tx_error_icmp; + int addr_type; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (neigh == NULL) + goto tx_error; + + addr6 = (const struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &ipv6_hdr(skb)->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + do_tx_error_icmp = true; + else { + do_tx_error_icmp = false; + dst = addr6->s6_addr32[3]; + } + neigh_release(neigh); + if (do_tx_error_icmp) + goto tx_error_icmp; + } +#endif + else + goto tx_error; + } + + tos = tnl_params->tos; + if (tos & 0x1) { + tos &= ~0x1; + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + dst, tnl_params->saddr, + tunnel->parms.o_key, + RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + tdev = rt->dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + + df = tnl_params->frag_off; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr); + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + df |= (inner_iph->frag_off&htons(IP_DF)); + + if (!skb_is_gso(skb) && + (inner_iph->frag_off&htons(IP_DF)) && + mtu < ntohs(inner_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < skb->len) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ip_rt_put(rt); + goto tx_error; + } + } +#endif + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + ttl = tnl_params->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; +#endif + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + + rt->dst.header_len; + if (max_headroom > dev->needed_headroom) { + dev->needed_headroom = max_headroom; + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; + } + } + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = tnl_params->protocol; + iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + + iptunnel_xmit(skb, dev); + return; + +#if IS_ENABLED(CONFIG_IPV6) +tx_error_icmp: + dst_link_failure(skb); +#endif +tx_error: + dev->stats.tx_errors++; + dev_kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_tunnel_xmit); + +static void ip_tunnel_update(struct ip_tunnel_net *itn, + struct ip_tunnel *t, + struct net_device *dev, + struct ip_tunnel_parm *p, + bool set_mtu) +{ + ip_tunnel_del(t); + t->parms.iph.saddr = p->iph.saddr; + t->parms.iph.daddr = p->iph.daddr; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->iph.saddr, 4); + memcpy(dev->broadcast, &p->iph.daddr, 4); + } + ip_tunnel_add(itn, t); + + t->parms.iph.ttl = p->iph.ttl; + t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; + + if (t->parms.link != p->link) { + int mtu; + + t->parms.link = p->link; + mtu = ip_tunnel_bind_dev(dev); + if (set_mtu) + dev->mtu = mtu; + } + netdev_state_change(dev); +} + +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + int err = 0; + struct ip_tunnel *t; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + BUG_ON(!itn->fb_tunnel_dev); + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == itn->fb_tunnel_dev) + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + if (!(p->i_flags&TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags&TUNNEL_KEY)) + p->o_key = 0; + + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + + if (!t && (cmd == SIOCADDTUNNEL)) + t = ip_tunnel_create(net, itn, p); + + if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { + err = -EINVAL; + break; + } + + t = netdev_priv(dev); + } + } + + if (t) { + err = 0; + ip_tunnel_update(itn, t, dev, p, true); + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + + if (dev == itn->fb_tunnel_dev) { + err = -ENOENT; + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(itn->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); + +static void ip_tunnel_dev_free(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + gro_cells_destroy(&tunnel->gro_cells); + free_percpu(dev->tstats); + free_netdev(dev); +} + +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (itn->fb_tunnel_dev != dev) { + ip_tunnel_del(netdev_priv(dev)); + unregister_netdevice_queue(dev, head); + } +} +EXPORT_SYMBOL_GPL(ip_tunnel_dellink); + +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname) +{ + struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); + struct ip_tunnel_parm parms; + + itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); + if (!itn->tunnels) + return -ENOMEM; + + if (!ops) { + itn->fb_tunnel_dev = NULL; + return 0; + } + memset(&parms, 0, sizeof(parms)); + if (devname) + strlcpy(parms.name, devname, IFNAMSIZ); + + rtnl_lock(); + itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); + rtnl_unlock(); + if (IS_ERR(itn->fb_tunnel_dev)) { + kfree(itn->tunnels); + return PTR_ERR(itn->fb_tunnel_dev); + } + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init_net); + +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +{ + int h; + + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { + struct ip_tunnel *t; + struct hlist_node *n; + struct hlist_head *thead = &itn->tunnels[h]; + + hlist_for_each_entry_safe(t, n, thead, hash_node) + unregister_netdevice_queue(t->dev, head); + } + if (itn->fb_tunnel_dev) + unregister_netdevice_queue(itn->fb_tunnel_dev, head); +} + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +{ + LIST_HEAD(list); + + rtnl_lock(); + ip_tunnel_destroy(itn, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); + kfree(itn->tunnels); +} +EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); + +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ip_tunnel_net *itn; + int mtu; + int err; + + nt = netdev_priv(dev); + itn = net_generic(net, nt->ip_tnl_net_id); + + if (ip_tunnel_find(itn, p, dev->type)) + return -EEXIST; + + nt->parms = *p; + err = register_netdevice(dev); + if (err) + goto out; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + mtu = ip_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + ip_tunnel_add(itn, nt); + +out: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_newlink); + +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (dev == itn->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + + t = ip_tunnel_find(itn, p, dev->type); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + if (dev->type != ARPHRD_ETHER) { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + } + } + + ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_changelink); + +int ip_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; + int err; + + dev->destructor = ip_tunnel_dev_free; + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&tunnel->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + iph->version = 4; + iph->ihl = 5; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init); + +void ip_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + /* fb_tunnel_dev will be unregisted in net-exit call. */ + if (itn->fb_tunnel_dev != dev) + ip_tunnel_del(netdev_priv(dev)); +} +EXPORT_SYMBOL_GPL(ip_tunnel_uninit); + +/* Do least required initialization, rest of init is done in tunnel_init call */ +void ip_tunnel_setup(struct net_device *dev, int net_id) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + tunnel->ip_tnl_net_id = net_id; +} +EXPORT_SYMBOL_GPL(ip_tunnel_setup); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c3a4233c0ac2..9d2bdb2c1d3f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -38,7 +38,7 @@ #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> @@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev); } while (0) -static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_errors = dev->stats.rx_errors; - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - static struct ip_tunnel *vti_tunnel_lookup(struct net *net, __be32 remote, __be32 local) { @@ -597,7 +559,7 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_start_xmit = vti_tunnel_xmit, .ndo_do_ioctl = vti_tunnel_ioctl, .ndo_change_mtu = vti_tunnel_change_mtu, - .ndo_get_stats64 = vti_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void vti_dev_free(struct net_device *dev) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index f01d1b1aff7f..59cb8c769056 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; + t->props.extra_flags = x->props.extra_flags; memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 98cbc6877019..efa1138fa523 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -206,7 +206,7 @@ static int __init ic_open_devs(void) struct ic_device *d, **last; struct net_device *dev; unsigned short oflags; - unsigned long start; + unsigned long start, next_msg; last = &ic_first_dev; rtnl_lock(); @@ -263,12 +263,23 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; + next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { + int wait, elapsed; + for_each_netdev(&init_net, dev) if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) goto have_carrier; msleep(1); + + if time_before(jiffies, next_msg) + continue; + + elapsed = jiffies_to_msecs(jiffies - start); + wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000; + pr_info("Waiting up to %d more seconds for network.\n", wait); + next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); } have_carrier: rtnl_unlock(); @@ -1522,7 +1533,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 8f024d41eefa..77bfcce64fe5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -111,227 +111,21 @@ #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> -#define HASH_SIZE 16 -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) - static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip_net_id __read_mostly; -struct ipip_net { - struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_wc[1]; - struct ip_tunnel __rcu **tunnels[4]; - - struct net_device *fb_tunnel_dev; -}; static int ipip_tunnel_init(struct net_device *dev); -static void ipip_tunnel_setup(struct net_device *dev); -static void ipip_dev_free(struct net_device *dev); static struct rtnl_link_ops ipip_link_ops __read_mostly; -static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - tot->collisions = dev->stats.collisions; - - return tot; -} - -static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, - __be32 remote, __be32 local) -{ - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(local); - struct ip_tunnel *t; - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1]) - if (local == t->parms.iph.saddr && - remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) - return t; - - for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0]) - if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) - return t; - - for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1]) - if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) - return t; - - t = rcu_dereference(ipn->tunnels_wc[0]); - if (t && (t->dev->flags&IFF_UP)) - return t; - return NULL; -} - -static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - unsigned int h = 0; - int prio = 0; - - if (remote) { - prio |= 2; - h ^= HASH(remote); - } - if (local) { - prio |= 1; - h ^= HASH(local); - } - return &ipn->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn, - struct ip_tunnel *t) -{ - return __ipip_bucket(ipn, &t->parms); -} - -static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp; - struct ip_tunnel *iter; - - for (tp = ipip_bucket(ipn, t); - (iter = rtnl_dereference(*tp)) != NULL; - tp = &iter->next) { - if (t == iter) { - rcu_assign_pointer(*tp, t->next); - break; - } - } -} - -static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); -} - -static int ipip_tunnel_create(struct net_device *dev) -{ - struct ip_tunnel *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - int err; - - err = ipip_tunnel_init(dev); - if (err < 0) - goto out; - - err = register_netdevice(dev); - if (err < 0) - goto out; - - strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ipip_link_ops; - - dev_hold(dev); - ipip_tunnel_link(ipn, t); - return 0; - -out: - return err; -} - -static struct ip_tunnel *ipip_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - struct ip_tunnel *t, *nt; - struct ip_tunnel __rcu **tp; - struct net_device *dev; - char name[IFNAMSIZ]; - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - for (tp = __ipip_bucket(ipn, parms); - (t = rtnl_dereference(*tp)) != NULL; - tp = &t->next) { - if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) - return t; - } - if (!create) - return NULL; - - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else - strcpy(name, "tunl%d"); - - dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); - if (dev == NULL) - return NULL; - - dev_net_set(dev, net); - - nt = netdev_priv(dev); - nt->parms = *parms; - - if (ipip_tunnel_create(dev) < 0) - goto failed_free; - - return nt; - -failed_free: - ipip_dev_free(dev); - return NULL; -} - -/* called with RTNL */ -static void ipip_tunnel_uninit(struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - if (dev == ipn->fb_tunnel_dev) - RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); - else - ipip_tunnel_unlink(ipn, netdev_priv(dev)); - dev_put(dev); -} - static int ipip_err(struct sk_buff *skb, u32 info) { @@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; int err; - - switch (type) { - default: - case ICMP_PARAMETERPROB: - return 0; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe they are just ether pollution. --ANK - */ - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - case ICMP_REDIRECT: - break; - } + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; err = -ENOENT; - t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); if (t == NULL) goto out; @@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info) else t->err_count = 1; t->err_time = jiffies; -out: +out: return err; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + static int ipip_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - int err; - - tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); - if (tunnel != NULL) { - struct pcpu_tstats *tstats; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - - secpath_reset(skb); - - skb->mac_header = skb->network_header; - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->pkt_type = PACKET_HOST; - - __skb_tunnel_rx(skb, tunnel->dev); - - err = IP_ECN_decapsulate(iph, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &iph->saddr, iph->tos); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - netif_rx(skb); - return 0; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); } return -1; @@ -463,329 +209,64 @@ drop: * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. */ - static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - u8 tos = tunnel->parms.iph.tos; - __be16 df = tiph->frag_off; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph; - struct iphdr *iph; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ - __be32 dst = tiph->daddr; - struct flowi4 fl4; - int mtu; - - if (skb->protocol != htons(ETH_P_IP)) - goto tx_error; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) + if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - old_iph = ip_hdr(skb); - - if (tos & 1) - tos = old_iph->tos; - - if (!dst) { - /* NBMA tunnel */ - if ((rt = skb_rtable(skb)) == NULL) { - dev->stats.tx_fifo_errors++; - goto tx_error; - } - dst = rt_nexthop(rt, old_iph->daddr); + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, - dst, tiph->saddr, - 0, 0, - IPPROTO_IPIP, RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } - tdev = rt->dst.dev; - - if (tdev == dev) { - ip_rt_put(rt); - dev->stats.collisions++; - goto tx_error; - } - - df |= old_iph->frag_off & htons(IP_DF); - - if (df) { - mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); - - if (mtu < 68) { - dev->stats.collisions++; - ip_rt_put(rt); - goto tx_error; - } - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if ((old_iph->frag_off & htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } - } - - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } - - /* - * Okay, now see if we can stuff it in the buffer as-is. - */ - max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); - - if (skb_headroom(skb) < max_headroom || skb_shared(skb) || - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - ip_rt_put(rt); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - dev_kfree_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - } - - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPIP; - iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = old_iph->ttl; - - iptunnel_xmit(skb, dev); + ip_tunnel_xmit(skb, dev, tiph); return NETDEV_TX_OK; -tx_error_icmp: - dst_link_failure(skb); tx_error: dev->stats.tx_errors++; dev_kfree_skb(skb); return NETDEV_TX_OK; } -static void ipip_tunnel_bind_dev(struct net_device *dev) -{ - struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; - const struct iphdr *iph; - - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; - - if (iph->daddr) { - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, - iph->daddr, iph->saddr, - 0, 0, - IPPROTO_IPIP, - RT_TOS(iph->tos), - tunnel->parms.link); - if (!IS_ERR(rt)) { - tdev = rt->dst.dev; - ip_rt_put(rt); - } - dev->flags |= IFF_POINTOPOINT; - } - - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); - - if (tdev) { - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); - dev->mtu = tdev->mtu - sizeof(struct iphdr); - } - dev->iflink = tunnel->parms.link; -} - -static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) -{ - struct net *net = dev_net(t->dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - ipip_tunnel_unlink(ipn, t); - synchronize_net(); - t->parms.iph.saddr = p->iph.saddr; - t->parms.iph.daddr = p->iph.daddr; - memcpy(t->dev->dev_addr, &p->iph.saddr, 4); - memcpy(t->dev->broadcast, &p->iph.daddr, 4); - ipip_tunnel_link(ipn, t); - t->parms.iph.ttl = p->iph.ttl; - t->parms.iph.tos = p->iph.tos; - t->parms.iph.frag_off = p->iph.frag_off; - if (t->parms.link != p->link) { - t->parms.link = p->link; - ipip_tunnel_bind_dev(t->dev); - } - netdev_state_change(t->dev); -} - static int -ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - switch (cmd) { - case SIOCGETTUNNEL: - t = NULL; - if (dev == ipn->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipip_tunnel_locate(net, &p, 0); - } - if (t == NULL) - t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; - break; - - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { - err = -EINVAL; - break; - } - t = netdev_priv(dev); - } - - ipip_tunnel_update(t, &p); - } - - if (t) { - err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; - - case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == ipn->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) - goto done; - err = -EPERM; - if (t->dev == ipn->fb_tunnel_dev) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; - default: - err = -EINVAL; - } - -done: - return err; -} + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; -static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + if (p.i_key || p.o_key || p.i_flags || p.o_flags) return -EINVAL; - dev->mtu = new_mtu; + if (p.iph.ttl) + p.iph.frag_off |= htons(IP_DF); + + err = ip_tunnel_ioctl(dev, &p, cmd); + if (err) + return err; + + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; + return 0; } static const struct net_device_ops ipip_netdev_ops = { - .ndo_uninit = ipip_tunnel_uninit, + .ndo_init = ipip_tunnel_init, + .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, .ndo_do_ioctl = ipip_tunnel_ioctl, - .ndo_change_mtu = ipip_tunnel_change_mtu, - .ndo_get_stats64 = ipip_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; -static void ipip_dev_free(struct net_device *dev) -{ - free_percpu(dev->tstats); - free_netdev(dev); -} - #define IPIP_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ @@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev) static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; - dev->destructor = ipip_dev_free; dev->type = ARPHRD_TUNNEL; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); - dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; @@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= IPIP_FEATURES; dev->hw_features |= IPIP_FEATURES; + ip_tunnel_setup(dev, ipip_net_id); } static int ipip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - tunnel->dev = dev; - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - ipip_tunnel_bind_dev(dev); - - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; -} - -static int __net_init ipip_fb_tunnel_init(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *iph = &tunnel->parms.iph; - struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - iph->version = 4; - iph->protocol = IPPROTO_IPIP; - iph->ihl = 5; - - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - dev_hold(dev); - rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); - return 0; + tunnel->hlen = 0; + tunnel->parms.iph.protocol = IPPROTO_IPIP; + return ip_tunnel_init(dev); } static void ipip_netlink_parms(struct nlattr *data[], @@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[], static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct net *net = dev_net(dev); - struct ip_tunnel *nt; - - nt = netdev_priv(dev); - ipip_netlink_parms(data, &nt->parms); - - if (ipip_tunnel_locate(net, &nt->parms, 0)) - return -EEXIST; + struct ip_tunnel_parm p; - return ipip_tunnel_create(dev); + ipip_netlink_parms(data, &p); + return ip_tunnel_newlink(dev, tb, &p); } static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t; struct ip_tunnel_parm p; - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - if (dev == ipn->fb_tunnel_dev) - return -EINVAL; ipip_netlink_parms(data, &p); @@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) return -EINVAL; - t = ipip_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else - t = netdev_priv(dev); - - ipip_tunnel_update(t, &p); - return 0; + return ip_tunnel_changelink(dev, tb, &p); } static size_t ipip_get_size(const struct net_device *dev) @@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { .setup = ipip_tunnel_setup, .newlink = ipip_newlink, .changelink = ipip_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipip_get_size, .fill_info = ipip_fill_info, }; @@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { .priority = 1, }; -static const char banner[] __initconst = - KERN_INFO "IPv4 over IPv4 tunneling driver\n"; - -static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) -{ - int prio; - - for (prio = 1; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - - t = rtnl_dereference(ipn->tunnels[prio][h]); - while (t != NULL) { - unregister_netdevice_queue(t->dev, head); - t = rtnl_dereference(t->next); - } - } - } -} - static int __net_init ipip_init_net(struct net *net) { - struct ipip_net *ipn = net_generic(net, ipip_net_id); - struct ip_tunnel *t; - int err; - - ipn->tunnels[0] = ipn->tunnels_wc; - ipn->tunnels[1] = ipn->tunnels_l; - ipn->tunnels[2] = ipn->tunnels_r; - ipn->tunnels[3] = ipn->tunnels_r_l; - - ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), - "tunl0", - ipip_tunnel_setup); - if (!ipn->fb_tunnel_dev) { - err = -ENOMEM; - goto err_alloc_dev; - } - dev_net_set(ipn->fb_tunnel_dev, net); - - err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev); - if (err) - goto err_reg_dev; - - if ((err = register_netdev(ipn->fb_tunnel_dev))) - goto err_reg_dev; - - t = netdev_priv(ipn->fb_tunnel_dev); - - strcpy(t->parms.name, ipn->fb_tunnel_dev->name); - return 0; - -err_reg_dev: - ipip_dev_free(ipn->fb_tunnel_dev); -err_alloc_dev: - /* nothing */ - return err; + return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } static void __net_exit ipip_exit_net(struct net *net) { - struct ipip_net *ipn = net_generic(net, ipip_net_id); - LIST_HEAD(list); - - rtnl_lock(); - ipip_destroy_tunnels(ipn, &list); - unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + ip_tunnel_delete_net(itn); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, .exit = ipip_exit_net, .id = &ipip_net_id, - .size = sizeof(struct ipip_net), + .size = sizeof(struct ip_tunnel_net), }; static int __init ipip_init(void) { int err; - printk(banner); + pr_info("ipip: IPv4 over IPv4 tunneling driver\n"); err = register_pernet_device(&ipip_net_ops); if (err < 0) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f95b3aa579e..9d9610ae7855 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -61,7 +61,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/compat.h> #include <linux/export.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/checksum.h> #include <net/netlink.h> #include <net/fib_rules.h> @@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - e = NLMSG_DATA(nlh); + e = nlmsg_data(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); @@ -910,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); - if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { + if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - e = NLMSG_DATA(nlh); + e = nlmsg_data(nlh); e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4c0cf63dd92e..c3e0adea9c27 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,4 +1,9 @@ -/* IPv4 specific functions of netfilter core */ +/* + * IPv4 specific functions of netfilter core + * + * Rusty Russell (C) 2000 -- This code is GPL. + * Patrick McHardy (C) 2006-2012 + */ #include <linux/kernel.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -40,14 +45,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) - return -1; + return PTR_ERR(rt); /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); if (skb_dst(skb)->error) - return -1; + return skb_dst(skb)->error; #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -56,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); if (IS_ERR(dst)) - return -1; + return PTR_ERR(dst);; skb_dst_set(skb, dst); } #endif @@ -66,7 +71,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) - return -1; + return -ENOMEM; return 0; } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ce2d43e1f09f..e7916c193932 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config IP_NF_QUEUE - tristate "IP Userspace queueing via NETLINK (OBSOLETE)" - depends on NETFILTER_ADVANCED - help - Netfilter has the ability to queue packets to user space: the - netlink device can be used to access them using this driver. - - This option enables the old IPv4-only "ip_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n @@ -84,7 +71,7 @@ config IP_NF_MATCH_ECN config IP_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' - depends on NETFILTER_ADVANCED + depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW) ---help--- This option allows you to match packets whose replies would go out via the interface the packet came in. diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7dc6a9743592..85a4f21aac1a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -6,6 +6,7 @@ * Some ARP specific bits are: * * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net> * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 79ca5e70d497..eadab1ed6500 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net) net->ipv4.arptable_filter = arpt_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv4.arptable_filter)) - return PTR_ERR(net->ipv4.arptable_filter); - return 0; + return PTR_RET(net->ipv4.arptable_filter); } static void __net_exit arptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3efcf87400c3..d23118d95ff9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -3,6 +3,7 @@ * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e) return ipt_get_target((struct ipt_entry *)e); } -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", @@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } @@ -361,8 +362,7 @@ ipt_do_table(struct sk_buff *skb, t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(skb, hook, in, out, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 7d168dcbd135..f8a222cb6448 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -4,6 +4,7 @@ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> * (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005-2007 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -37,7 +38,7 @@ #include <linux/skbuff.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netdevice.h> #include <linux/mm.h> #include <linux/moduleparam.h> @@ -45,6 +46,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_ULOG.h> #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <linux/bitops.h> #include <asm/unaligned.h> @@ -78,15 +80,23 @@ typedef struct { struct timer_list timer; /* the timer function */ } ulog_buff_t; -static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ +static int ulog_net_id __read_mostly; +struct ulog_net { + unsigned int nlgroup[ULOG_MAXNLGROUPS]; + ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; + struct sock *nflognl; + spinlock_t lock; +}; -static struct sock *nflognl; /* our socket */ -static DEFINE_SPINLOCK(ulog_lock); /* spinlock */ +static struct ulog_net *ulog_pernet(struct net *net) +{ + return net_generic(net, ulog_net_id); +} /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroupnum) +static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum) { - ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; + ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum]; pr_debug("ulog_send: timer is deleting\n"); del_timer(&ub->timer); @@ -103,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum) NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; pr_debug("throwing %d packets to netlink group %u\n", ub->qlen, nlgroupnum + 1); - netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); + netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, + GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; @@ -114,13 +125,16 @@ static void ulog_send(unsigned int nlgroupnum) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { + struct ulog_net *ulog = container_of((void *)data, + struct ulog_net, + nlgroup[*(unsigned int *)data]); pr_debug("timer function called, calling ulog_send\n"); /* lock to protect against somebody modifying our structure * from ipt_ulog_target at the same time */ - spin_lock_bh(&ulog_lock); - ulog_send(data); - spin_unlock_bh(&ulog_lock); + spin_lock_bh(&ulog->lock); + ulog_send(ulog, data); + spin_unlock_bh(&ulog->lock); } static struct sk_buff *ulog_alloc_skb(unsigned int size) @@ -160,6 +174,8 @@ static void ipt_ulog_packet(unsigned int hooknum, size_t size, copy_len; struct nlmsghdr *nlh; struct timeval tv; + struct net *net = dev_net(in ? in : out); + struct ulog_net *ulog = ulog_pernet(net); /* ffs == find first bit set, necessary because userspace * is already shifting groupnumber, but we need unshifted. @@ -172,11 +188,11 @@ static void ipt_ulog_packet(unsigned int hooknum, else copy_len = loginfo->copy_range; - size = NLMSG_SPACE(sizeof(*pm) + copy_len); + size = nlmsg_total_size(sizeof(*pm) + copy_len); - ub = &ulog_buffers[groupnum]; + ub = &ulog->ulog_buffers[groupnum]; - spin_lock_bh(&ulog_lock); + spin_lock_bh(&ulog->lock); if (!ub->skb) { if (!(ub->skb = ulog_alloc_skb(size))) @@ -186,7 +202,7 @@ static void ipt_ulog_packet(unsigned int hooknum, /* either the queue len is too high or we don't have * enough room in nlskb left. send it to userspace. */ - ulog_send(groupnum); + ulog_send(ulog, groupnum); if (!(ub->skb = ulog_alloc_skb(size))) goto alloc_failure; @@ -260,16 +276,16 @@ static void ipt_ulog_packet(unsigned int hooknum, if (ub->qlen >= loginfo->qthreshold) { if (loginfo->qthreshold > 1) nlh->nlmsg_type = NLMSG_DONE; - ulog_send(groupnum); + ulog_send(ulog, groupnum); } out_unlock: - spin_unlock_bh(&ulog_lock); + spin_unlock_bh(&ulog->lock); return; alloc_failure: pr_debug("Error building netlink message\n"); - spin_unlock_bh(&ulog_lock); + spin_unlock_bh(&ulog->lock); } static unsigned int @@ -376,54 +392,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = { .me = THIS_MODULE, }; -static int __init ulog_tg_init(void) +static int __net_init ulog_tg_net_init(struct net *net) { - int ret, i; + int i; + struct ulog_net *ulog = ulog_pernet(net); struct netlink_kernel_cfg cfg = { .groups = ULOG_MAXNLGROUPS, }; - pr_debug("init module\n"); - - if (nlbufsiz > 128*1024) { - pr_warning("Netlink buffer has to be <= 128kB\n"); - return -EINVAL; - } - + spin_lock_init(&ulog->lock); /* initialize ulog_buffers */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) - setup_timer(&ulog_buffers[i].timer, ulog_timer, i); + setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); - if (!nflognl) + ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); + if (!ulog->nflognl) return -ENOMEM; - ret = xt_register_target(&ulog_tg_reg); - if (ret < 0) { - netlink_kernel_release(nflognl); - return ret; - } if (nflog) - nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); + nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger); return 0; } -static void __exit ulog_tg_exit(void) +static void __net_exit ulog_tg_net_exit(struct net *net) { ulog_buff_t *ub; int i; - - pr_debug("cleanup_module\n"); + struct ulog_net *ulog = ulog_pernet(net); if (nflog) - nf_log_unregister(&ipt_ulog_logger); - xt_unregister_target(&ulog_tg_reg); - netlink_kernel_release(nflognl); + nf_log_unset(net, &ipt_ulog_logger); + + netlink_kernel_release(ulog->nflognl); /* remove pending timers and free allocated skb's */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ub = &ulog_buffers[i]; + ub = &ulog->ulog_buffers[i]; pr_debug("timer is deleting\n"); del_timer(&ub->timer); @@ -434,5 +439,50 @@ static void __exit ulog_tg_exit(void) } } +static struct pernet_operations ulog_tg_net_ops = { + .init = ulog_tg_net_init, + .exit = ulog_tg_net_exit, + .id = &ulog_net_id, + .size = sizeof(struct ulog_net), +}; + +static int __init ulog_tg_init(void) +{ + int ret; + pr_debug("init module\n"); + + if (nlbufsiz > 128*1024) { + pr_warn("Netlink buffer has to be <= 128kB\n"); + return -EINVAL; + } + + ret = register_pernet_subsys(&ulog_tg_net_ops); + if (ret) + goto out_pernet; + + ret = xt_register_target(&ulog_tg_reg); + if (ret < 0) + goto out_target; + + if (nflog) + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); + + return 0; + +out_target: + unregister_pernet_subsys(&ulog_tg_net_ops); +out_pernet: + return ret; +} + +static void __exit ulog_tg_exit(void) +{ + pr_debug("cleanup_module\n"); + if (nflog) + nf_log_unregister(&ipt_ulog_logger); + xt_unregister_target(&ulog_tg_reg); + unregister_pernet_subsys(&ulog_tg_net_ops); +} + module_init(ulog_tg_init); module_exit(ulog_tg_exit); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c30130062cd6..c49dcd0284a0 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, return dev_match; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; @@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 85d88f206447..cba5658ec82c 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) u_int8_t tos; __be32 saddr, daddr; u_int32_t mark; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || - iph->tos != tos) - if (ip_route_me_harder(skb, RTN_UNSPEC)) - ret = NF_DROP; + iph->tos != tos) { + err = ip_route_me_harder(skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } } return ret; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index eeaff7e4acb5..6383273d54e1 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -176,6 +176,7 @@ nf_nat_ipv4_out(unsigned int hooknum, #ifdef CONFIG_XFRM const struct nf_conn *ct; enum ip_conntrack_info ctinfo; + int err; #endif unsigned int ret; @@ -195,9 +196,11 @@ nf_nat_ipv4_out(unsigned int hooknum, ct->tuplehash[!dir].tuple.dst.u3.ip) || (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) - if (nf_xfrm_me_harder(skb, AF_INET) < 0) - ret = NF_DROP; + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } } #endif return ret; @@ -213,6 +216,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum, const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int ret; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -226,16 +230,19 @@ nf_nat_ipv4_local_fn(unsigned int hooknum, if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(skb, RTN_UNSPEC)) - ret = NF_DROP; + err = ip_route_me_harder(skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); } #ifdef CONFIG_XFRM else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) - if (nf_xfrm_me_harder(skb, AF_INET) < 0) - ret = NF_DROP; + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } #endif } return ret; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2820aa18b542..567d84168bd2 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,6 +1,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index f2ca12794081..4c48e434bb1f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -2,6 +2,7 @@ * * (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2010 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 5241d997ab75..a338dad41b7d 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -1,5 +1,6 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2010 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: short packet "); + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, + NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; } @@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9c3db10b22d3..9eea059dd621 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -2,6 +2,7 @@ * H.323 extension for NAT alteration. * * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net> * * This source code is licensed under General Public License version 2. * diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index a06d7d74817d..657d2307f031 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -13,6 +13,8 @@ * * Development of this code funded by Astaro AG (http://www.astaro.com/) * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + * * TODO: - NAT to a unique tuple, not to TCP source port * (needs netfilter tuple reservation) */ diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index ea44f02563b5..690d890111bb 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -21,6 +21,8 @@ * * Development of this code funded by Astaro AG (http://www.astaro.com/) * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + * */ #include <linux/module.h> diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index bac712293fd6..5f011cc89cd9 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -38,6 +38,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: James Morris <jmorris@intercode.com.au> + * + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2e91006d6076..7d93d62cd5fd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 32030a24e776..6da51d55d03a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -224,6 +224,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), + SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), + SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), @@ -267,6 +269,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), + SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e2851464f8f..550781a17b34 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2311,7 +2311,7 @@ nla_put_failure: return -EMSGSIZE; } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11c..b05c96e7af8b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; @@ -349,8 +348,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 960fd29d9b8e..fa2f63fc453b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,7 +28,7 @@ static int zero; static int one = 1; -static int two = 2; +static int four = 4; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -592,13 +592,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_frto_response", - .data = &sysctl_tcp_frto_response, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), @@ -733,13 +726,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "tcp_cookie_size", - .data = &sysctl_tcp_cookie_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_thin_linear_timeouts", .data = &sysctl_tcp_thin_linear_timeouts, .maxlen = sizeof(int), @@ -760,7 +746,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, - .extra2 = &two, + .extra2 = &four, }, { .procname = "udp_mem", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 47e854fcae24..dcb116dde216 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } /* Presumed zeroed, in order of appearance: * cookie_in_always, cookie_out_never, * s_data_constant, s_data_in, s_data_out @@ -775,7 +766,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) * Make sure that we have exactly size bytes * available to the caller, no more, no less. */ - skb->avail_size = size; + skb->reserved_tailroom = skb->end - skb->tail - size; return skb; } __kfree_skb(skb); @@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = NULL; - - if (sizeof(ctd) > optlen) - return -EINVAL; - if (copy_from_user(&ctd, optval, sizeof(ctd))) - return -EFAULT; - - if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || - ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) - return -EINVAL; - - if (ctd.tcpct_cookie_desired == 0) { - /* default to global value */ - } else if ((0x1 & ctd.tcpct_cookie_desired) || - ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || - ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { - return -EINVAL; - } - - if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { - /* Supercedes all other values */ - lock_sock(sk); - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } - tp->rx_opt.cookie_in_always = 0; /* false */ - tp->rx_opt.cookie_out_never = 1; /* true */ - release_sock(sk); - return err; - } - - /* Allocate ancillary memory before locking. - */ - if (ctd.tcpct_used > 0 || - (tp->cookie_values == NULL && - (sysctl_tcp_cookie_size > 0 || - ctd.tcpct_cookie_desired > 0 || - ctd.tcpct_s_data_desired > 0))) { - cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, - GFP_KERNEL); - if (cvp == NULL) - return -ENOMEM; - - kref_init(&cvp->kref); - } - lock_sock(sk); - tp->rx_opt.cookie_in_always = - (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); - tp->rx_opt.cookie_out_never = 0; /* false */ - - if (tp->cookie_values != NULL) { - if (cvp != NULL) { - /* Changed values are recorded by a changed - * pointer, ensuring the cookie will differ, - * without separately hashing each value later. - */ - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - } else { - cvp = tp->cookie_values; - } - } - - if (cvp != NULL) { - cvp->cookie_desired = ctd.tcpct_cookie_desired; - - if (ctd.tcpct_used > 0) { - memcpy(cvp->s_data_payload, ctd.tcpct_value, - ctd.tcpct_used); - cvp->s_data_desired = ctd.tcpct_used; - cvp->s_data_constant = 1; /* true */ - } else { - /* No constant payload data. */ - cvp->s_data_desired = ctd.tcpct_s_data_desired; - cvp->s_data_constant = 0; /* false */ - } - - tp->cookie_values = cvp; - } - release_sock(sk); - return err; - } default: /* fallthru */ break; @@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = tp->cookie_values; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < sizeof(ctd)) - return -EINVAL; - - memset(&ctd, 0, sizeof(ctd)); - ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? - TCP_COOKIE_IN_ALWAYS : 0) - | (tp->rx_opt.cookie_out_never ? - TCP_COOKIE_OUT_NEVER : 0); - - if (cvp != NULL) { - ctd.tcpct_flags |= (cvp->s_data_in ? - TCP_S_DATA_IN : 0) - | (cvp->s_data_out ? - TCP_S_DATA_OUT : 0); - - ctd.tcpct_cookie_desired = cvp->cookie_desired; - ctd.tcpct_s_data_desired = cvp->s_data_desired; - - memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], - cvp->cookie_pair_size); - ctd.tcpct_used = cvp->cookie_pair_size; - } - - if (put_user(sizeof(ctd), optlen)) - return -EFAULT; - if (copy_to_user(optval, &ctd, sizeof(ctd))) - return -EFAULT; - return 0; - } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, __be32 delta; unsigned int oldlen; unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + do { th->fin = th->psh = 0; + th->check = newcheck; - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb_transport_header(skb), @@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, th->cwr = 0; } while (skb->next); + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (gso_skb->destructor == tcp_wfree) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + delta = htonl(oldlen + (skb->tail - skb->transport_header) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + @@ -3408,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif -/* Each Responder maintains up to two secret values concurrently for - * efficient secret rollover. Each secret value has 4 states: - * - * Generating. (tcp_secret_generating != tcp_secret_primary) - * Generates new Responder-Cookies, but not yet used for primary - * verification. This is a short-term state, typically lasting only - * one round trip time (RTT). - * - * Primary. (tcp_secret_generating == tcp_secret_primary) - * Used both for generation and primary verification. - * - * Retiring. (tcp_secret_retiring != tcp_secret_secondary) - * Used for verification, until the first failure that can be - * verified by the newer Generating secret. At that time, this - * cookie's state is changed to Secondary, and the Generating - * cookie's state is changed to Primary. This is a short-term state, - * typically lasting only one round trip time (RTT). - * - * Secondary. (tcp_secret_retiring == tcp_secret_secondary) - * Used for secondary verification, after primary verification - * failures. This state lasts no more than twice the Maximum Segment - * Lifetime (2MSL). Then, the secret is discarded. - */ -struct tcp_cookie_secret { - /* The secret is divided into two parts. The digest part is the - * equivalent of previously hashing a secret and saving the state, - * and serves as an initialization vector (IV). The message part - * serves as the trailing secret. - */ - u32 secrets[COOKIE_WORKSPACE_WORDS]; - unsigned long expires; -}; - -#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) -#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) -#define TCP_SECRET_LIFE (HZ * 600) - -static struct tcp_cookie_secret tcp_secret_one; -static struct tcp_cookie_secret tcp_secret_two; - -/* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; -static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; -static struct tcp_cookie_secret *tcp_secret_secondary; - -static DEFINE_SPINLOCK(tcp_secret_locker); - -/* Select a pseudo-random word in the cookie workspace. - */ -static inline u32 tcp_cookie_work(const u32 *ws, const int n) -{ - return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; -} - -/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. - * Called in softirq context. - * Returns: 0 for success. - */ -int tcp_cookie_generator(u32 *bakery) -{ - unsigned long jiffy = jiffies; - - if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { - spin_lock_bh(&tcp_secret_locker); - if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { - /* refreshed by another */ - memcpy(bakery, - &tcp_secret_generating->secrets[0], - COOKIE_WORKSPACE_WORDS); - } else { - /* still needs refreshing */ - get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); - - /* The first time, paranoia assumes that the - * randomization function isn't as strong. But, - * this secret initialization is delayed until - * the last possible moment (packet arrival). - * Although that time is observable, it is - * unpredictably variable. Mash in the most - * volatile clock bits available, and expire the - * secret extra quickly. - */ - if (unlikely(tcp_secret_primary->expires == - tcp_secret_secondary->expires)) { - struct timespec tv; - - getnstimeofday(&tv); - bakery[COOKIE_DIGEST_WORDS+0] ^= - (u32)tv.tv_nsec; - - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_1MSL - + (0x0f & tcp_cookie_work(bakery, 0)); - } else { - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_LIFE - + (0xff & tcp_cookie_work(bakery, 1)); - tcp_secret_primary->expires = jiffy - + TCP_SECRET_2MSL - + (0x1f & tcp_cookie_work(bakery, 2)); - } - memcpy(&tcp_secret_secondary->secrets[0], - bakery, COOKIE_WORKSPACE_WORDS); - - rcu_assign_pointer(tcp_secret_generating, - tcp_secret_secondary); - rcu_assign_pointer(tcp_secret_retiring, - tcp_secret_primary); - /* - * Neither call_rcu() nor synchronize_rcu() needed. - * Retiring data is not freed. It is replaced after - * further (locked) pointer updates, and a quiet time - * (minimum 1MSL, maximum LIFE - 2MSL). - */ - } - spin_unlock_bh(&tcp_secret_locker); - } else { - rcu_read_lock_bh(); - memcpy(bakery, - &rcu_dereference(tcp_secret_generating)->secrets[0], - COOKIE_WORKSPACE_WORDS); - rcu_read_unlock_bh(); - } - return 0; -} -EXPORT_SYMBOL(tcp_cookie_generator); - void tcp_done(struct sock *sk) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; @@ -3590,7 +3348,6 @@ void __init tcp_init(void) unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3666,13 +3423,5 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); - memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); - memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); - tcp_secret_one.expires = jiffy; /* past due */ - tcp_secret_two.expires = jiffy; /* past due */ - tcp_secret_generating = &tcp_secret_one; - tcp_secret_primary = &tcp_secret_one; - tcp_secret_retiring = &tcp_secret_two; - tcp_secret_secondary = &tcp_secret_two; tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99f..aafd052865ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 2; +int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -108,17 +107,16 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) -#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -1159,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); - - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(end_seq, tp->frto_highmark)) - state->flag |= FLAG_ONLY_ORIG_SACKED; + if (!after(end_seq, tp->high_seq)) + state->flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_LOST) { @@ -1555,7 +1551,6 @@ static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); @@ -1728,12 +1723,6 @@ walk: start_seq, end_seq, dup_sack); advance_sp: - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct - * due to in-order walk - */ - if (after(end_seq, tp->frto_highmark)) - state.flag &= ~FLAG_ONLY_ORIG_SACKED; - i++; } @@ -1750,8 +1739,7 @@ advance_sp: tcp_verify_left_out(tp); if ((state.reord < tp->fackets_out) && - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1825,197 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static int tcp_is_sackfrto(const struct tcp_sock *tp) -{ - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); -} - -/* F-RTO can only be used if TCP has never retransmitted anything other than - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) - */ -bool tcp_use_frto(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - - if (!sysctl_tcp_frto) - return false; - - /* MTU probe and F-RTO won't really play nicely along currently */ - if (icsk->icsk_mtup.probe_size) - return false; - - if (tcp_is_sackfrto(tp)) - return true; - - /* Avoid expensive walking of rexmit queue if possible */ - if (tp->retrans_out > 1) - return false; - - skb = tcp_write_queue_head(sk); - if (tcp_skb_is_last(sk, skb)) - return true; - skb = tcp_write_queue_next(sk, skb); /* Skips head */ - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - return false; - /* Short-circuit when first non-SACKed skb has been checked */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - break; - } - return true; -} - -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO - * recovery a bit and use heuristics in tcp_process_frto() to detect if - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to - * keep retrans_out counting accurate (with SACK F-RTO, other than head - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS - * bits are handled if the Loss state is really to be entered (in - * tcp_enter_frto_loss). - * - * Do like tcp_enter_loss() would; when RTO expires the second time it - * does: - * "Reduce ssthresh if it has not yet been made inside this window." - */ -void tcp_enter_frto(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || - tp->snd_una == tp->high_seq || - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && - !icsk->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(sk); - /* Our state is too optimistic in ssthresh() call because cwnd - * is not reduced until tcp_enter_frto_loss() when previous F-RTO - * recovery has not yet completed. Pattern would be this: RTO, - * Cumulative ACK, RTO (2xRTO for the same segment does not end - * up here twice). - * RFC4138 should be more specific on what to do, even though - * RTO is quite unlikely to occur after the first Cumulative ACK - * due to back-off and complexity of triggering events ... - */ - if (tp->frto_counter) { - u32 stored_cwnd; - stored_cwnd = tp->snd_cwnd; - tp->snd_cwnd = 2; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = stored_cwnd; - } else { - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - } - /* ... in theory, cong.control module could do "any tricks" in - * ssthresh(), which means that ca_state, lost bits and lost_out - * counter would have to be faked before the call occurs. We - * consider that too expensive, unlikely and hacky, so modules - * using these in ssthresh() must deal these incompatibility - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 - */ - tcp_ca_event(sk, CA_EVENT_FRTO); - } - - tp->undo_marker = tp->snd_una; - tp->undo_retrans = 0; - - skb = tcp_write_queue_head(sk); - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_verify_left_out(tp); - - /* Too bad if TCP was application limited */ - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - - /* Earlier loss recovery underway (see RFC4138; Appendix B). - * The last condition is necessary at least in tp->frto_counter case. - */ - if (tcp_is_sackfrto(tp) && (tp->frto_counter || - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && - after(tp->high_seq, tp->snd_una)) { - tp->frto_highmark = tp->high_seq; - } else { - tp->frto_highmark = tp->snd_nxt; - } - tcp_set_ca_state(sk, TCP_CA_Disorder); - tp->high_seq = tp->snd_nxt; - tp->frto_counter = 1; -} - -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, - * which indicates that we should follow the traditional RTO recovery, - * i.e. mark everything lost and do go-back-N retransmission. - */ -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - tp->lost_out = 0; - tp->retrans_out = 0; - if (tcp_is_reno(tp)) - tcp_reset_reno_sack(tp); - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - /* - * Count the retransmission made on RTO correctly (only when - * waiting for the first ACK and did not get it)... - */ - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { - /* For some reason this R-bit might get cleared? */ - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out += tcp_skb_pcount(skb); - /* ...enter this if branch just for the first segment */ - flag |= FLAG_DATA_ACKED; - } else { - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - } - - /* Marking forward transmissions that were made after RTO lost - * can cause unnecessary retransmissions in some scenarios, - * SACK blocks will mitigate that in some but not in all cases. - * We used to not mark them but it was causing break-ups with - * receivers that do only in-order receival. - * - * TODO: we could detect presence of such receiver and select - * different behavior per flow. - */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; - } - } - tcp_verify_left_out(tp); - - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - tp->frto_counter = 0; - - tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); - tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->snd_nxt; - TCP_ECN_queue_cwr(tp); - - tcp_clear_all_retrans_hints(tp); -} - static void tcp_clear_retrans_partial(struct tcp_sock *tp) { tp->retrans_out = 0; @@ -2042,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + bool new_recovery = false; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || + !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + new_recovery = true; tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); @@ -2059,11 +1859,8 @@ void tcp_enter_loss(struct sock *sk, int how) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - if (!how) { - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - tp->undo_marker = tp->snd_una; - } else { + tp->undo_marker = tp->snd_una; + if (how) { tp->sacked_out = 0; tp->fackets_out = 0; } @@ -2090,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - /* Abort F-RTO algorithm if one is in progress */ - tp->frto_counter = 0; + + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + */ + tp->frto = sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2150,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples * available, or RTO is scheduled to fire first. */ - if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) + if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || + (flag & FLAG_ECE) || !tp->srtt) return false; delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); - tp->early_retrans_delayed = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, + TCP_RTO_MAX); return true; } @@ -2274,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; - /* Do not perform any recovery during F-RTO algorithm */ - if (tp->frto_counter) - return false; - /* Trick#1: The loss is proven. */ if (tp->lost_out) return true; @@ -2321,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) * interval if appropriate. */ if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && + (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && !tcp_may_send_now(sk)) return !tcp_pause_early_retransmit(sk, flag); @@ -2638,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) return failed; } -/* Undo during loss recovery after partial ACK. */ -static bool tcp_try_undo_loss(struct sock *sk) +/* Undo during loss recovery after partial ACK or using F-RTO. */ +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_may_undo(tp)) { + if (frto_undo || tcp_may_undo(tp)) { struct sk_buff *skb; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -2657,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk) tp->lost_out = 0; tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + if (frto_undo) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; } @@ -2681,6 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -2758,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) tcp_verify_left_out(tp); - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -2875,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) tcp_set_ca_state(sk, TCP_CA_Recovery); } +/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are + * recovered or spurious. Otherwise retransmits more on partial ACKs. + */ +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + bool recovered = !before(tp->snd_una, tp->high_seq); + + if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ + if (flag & FLAG_ORIG_SACK_ACKED) { + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + tcp_try_undo_loss(sk, true); + return; + } + if (after(tp->snd_nxt, tp->high_seq) && + (flag & FLAG_DATA_SACKED || is_dupack)) { + tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ + } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { + tp->high_seq = tp->snd_nxt; + __tcp_push_pending_frames(sk, tcp_current_mss(sk), + TCP_NAGLE_OFF); + if (after(tp->snd_nxt, tp->high_seq)) + return; /* Step 2.b */ + tp->frto = 0; + } + } + + if (recovered) { + /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ + icsk->icsk_retransmits = 0; + tcp_try_undo_recovery(sk); + return; + } + if (flag & FLAG_DATA_ACKED) + icsk->icsk_retransmits = 0; + if (tcp_is_reno(tp)) { + /* A Reno DUPACK means new data in F-RTO step 2.b above are + * delivered. Lower inflight to clock out (re)tranmissions. + */ + if (after(tp->snd_nxt, tp->high_seq) && is_dupack) + tcp_add_reno_sack(sk); + else if (flag & FLAG_SND_UNA_ADVANCED) + tcp_reset_reno_sack(tp); + } + if (tcp_try_undo_loss(sk, false)) + return; + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2921,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { - case TCP_CA_Loss: - icsk->icsk_retransmits = 0; - if (tcp_try_undo_recovery(sk)) - return; - break; - case TCP_CA_CWR: /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ @@ -2957,18 +2807,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: - if (flag & FLAG_DATA_ACKED) - icsk->icsk_retransmits = 0; - if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) - tcp_reset_reno_sack(tp); - if (!tcp_try_undo_loss(sk)) { - tcp_moderate_cwnd(tp); - tcp_xmit_retransmit_queue(sk); - return; - } + tcp_process_loss(sk, flag, is_dupack); if (icsk->icsk_ca_state != TCP_CA_Open) return; - /* Loss is undone; fall through to processing in Open state. */ + /* Fall through to processing in Open state. */ default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -3081,6 +2923,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ void tcp_rearm_rto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* If the retrans timer is currently being used by Fast Open @@ -3094,12 +2937,13 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (tp->early_retrans_delayed) { + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked - * when the delayed ER timer fires and is rescheduled. + * when the retrans timer fires and is rescheduled. */ if (delta > 0) rto = delta; @@ -3107,7 +2951,6 @@ void tcp_rearm_rto(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } - tp->early_retrans_delayed = 0; } /* This function is called when the delayed ER timer fires. TCP enters @@ -3195,8 +3038,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; last_ackt = skb->tstamp; @@ -3205,6 +3046,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_SACKED_ACKED) @@ -3405,165 +3248,74 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } -/* A very conservative spurious RTO response algorithm: reduce cwnd and - * continue in congestion avoidance. - */ -static void tcp_conservative_spur_to_response(struct tcp_sock *tp) +/* RFC 5961 7 [ACK Throttling] */ +static void tcp_send_challenge_ack(struct sock *sk) { - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_cnt = 0; - TCP_ECN_queue_cwr(tp); - tcp_moderate_cwnd(tp); -} + /* unprotected vars, we dont care of overwrites */ + static u32 challenge_timestamp; + static unsigned int challenge_count; + u32 now = jiffies / HZ; -/* A conservative spurious RTO response algorithm: reduce cwnd using - * PRR and continue in congestion avoidance. - */ -static void tcp_cwr_spur_to_response(struct sock *sk) -{ - tcp_enter_cwr(sk, 0); + if (now != challenge_timestamp) { + challenge_timestamp = now; + challenge_count = 0; + } + if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } } -static void tcp_undo_spur_to_response(struct sock *sk, int flag) +static void tcp_store_ts_recent(struct tcp_sock *tp) { - if (flag & FLAG_ECE) - tcp_cwr_spur_to_response(sk); - else - tcp_undo_cwr(sk, true); + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); } -/* F-RTO spurious RTO detection algorithm (RFC4138) - * - * F-RTO affects during two new ACKs following RTO (well, almost, see inline - * comments). State (ACK number) is kept in frto_counter. When ACK advances - * window (but not to or beyond highest sequence sent before RTO): - * On First ACK, send two new segments out. - * On Second ACK, RTO was likely spurious. Do spurious response (response - * algorithm is not part of the F-RTO detection algorithm - * given in RFC4138 but can be selected separately). - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding - * of Nagle, this is done using frto_counter states 2 and 3, when a new data - * segment of any size sent during F-RTO, state 2 is upgraded to 3. - * - * Rationale: if the RTO was spurious, new ACKs should arrive from the - * original window even after we transmit two new data segments. - * - * SACK version: - * on first step, wait until first cumulative ACK arrives, then move to - * the second step. In second step, the next ACK decides. - * - * F-RTO is implemented (mainly) in four functions: - * - tcp_use_frto() is used to determine if TCP is can use F-RTO - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is - * called when tcp_use_frto() showed green light - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm - * - tcp_enter_frto_loss() is called if there is not enough evidence - * to prove that the RTO is indeed spurious. It transfers the control - * from F-RTO to the conventional RTO recovery - */ -static bool tcp_process_frto(struct sock *sk, int flag) +static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) { - struct tcp_sock *tp = tcp_sk(sk); - - tcp_verify_left_out(tp); - - /* Duplicate the behavior from Loss state (fastretrans_alert) */ - if (flag & FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; - - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) - tp->undo_marker = 0; - - if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); - return true; - } - - if (!tcp_is_sackfrto(tp)) { - /* RFC4138 shortcoming in step 2; should also have case c): - * ACK isn't duplicate nor advances window, e.g., opposite dir - * data, winupdate + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. */ - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) - return true; - - if (!(flag & FLAG_DATA_ACKED)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), - flag); - return true; - } - } else { - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { - if (!tcp_packets_in_flight(tp)) { - tcp_enter_frto_loss(sk, 2, flag); - return true; - } - - /* Prevent sending of new data. */ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)); - return true; - } - - if ((tp->frto_counter >= 2) && - (!(flag & FLAG_FORWARD_PROGRESS) || - ((flag & FLAG_DATA_SACKED) && - !(flag & FLAG_ONLY_ORIG_SACKED)))) { - /* RFC4138 shortcoming (see comment above) */ - if (!(flag & FLAG_FORWARD_PROGRESS) && - (flag & FLAG_NOT_DUP)) - return true; - - tcp_enter_frto_loss(sk, 3, flag); - return true; - } - } - - if (tp->frto_counter == 1) { - /* tcp_may_send_now needs to see updated state */ - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; - tp->frto_counter = 2; - - if (!tcp_may_send_now(sk)) - tcp_enter_frto_loss(sk, 2, flag); - return true; - } else { - switch (sysctl_tcp_frto_response) { - case 2: - tcp_undo_spur_to_response(sk, flag); - break; - case 1: - tcp_conservative_spur_to_response(tp); - break; - default: - tcp_cwr_spur_to_response(sk); - break; - } - tp->frto_counter = 0; - tp->undo_marker = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); } - return false; } -/* RFC 5961 7 [ACK Throttling] */ -static void tcp_send_challenge_ack(struct sock *sk) +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { - /* unprotected vars, we dont care of overwrites */ - static u32 challenge_timestamp; - static unsigned int challenge_count; - u32 now = jiffies / HZ; + struct tcp_sock *tp = tcp_sk(sk); + bool is_tlp_dupack = (ack == tp->tlp_high_seq) && + !(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED)); - if (now != challenge_timestamp) { - challenge_timestamp = now; - challenge_count = 0; + /* Mark the end of TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. + */ + if (is_tlp_dupack) { + tp->tlp_high_seq = 0; + return; } - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); - tcp_send_ack(sk); + + if (after(ack, tp->tlp_high_seq)) { + tp->tlp_high_seq = 0; + /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ + if (!(flag & FLAG_DSACKING_ACK)) { + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_Open); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } } } @@ -3581,7 +3333,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - bool frto_cwnd = false; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3601,7 +3352,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (tp->early_retrans_delayed) + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) @@ -3610,6 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + if (flag & FLAG_UPDATE_TS_RECENT) + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3654,30 +3412,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) pkts_acked = prior_packets - tp->packets_out; - if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, flag); - /* Guarantee sacktag reordering detection against wrap-arounds */ - if (before(tp->frto_highmark, tp->snd_una)) - tp->frto_highmark = 0; - if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && - tcp_may_raise_cwnd(sk, flag)) + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) + if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); } + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) dst_confirm(dst); } + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); return 1; no_queue: @@ -3691,6 +3448,9 @@ no_queue: */ if (tcp_send_head(sk)) tcp_ack_probe(sk); + + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); return 1; invalid_ack: @@ -3715,8 +3475,8 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab, +void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; @@ -3800,31 +3560,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o */ break; #endif - case TCPOPT_COOKIE: - /* This option is variable length. - */ - switch (opsize) { - case TCPOLEN_COOKIE_BASE: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_PAIR: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_MIN+0: - case TCPOLEN_COOKIE_MIN+2: - case TCPOLEN_COOKIE_MIN+4: - case TCPOLEN_COOKIE_MIN+6: - case TCPOLEN_COOKIE_MAX: - /* 16-bit multiple */ - opt_rx->cookie_plus = opsize; - *hvpp = ptr; - break; - default: - /* ignore option */ - break; - } - break; - case TCPOPT_EXP: /* Fast Open option shares code 254 using a * 16 bits magic number. It's valid only in @@ -3870,8 +3605,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr * If it is wrong it falls back on tcp_parse_options(). */ static bool tcp_fast_parse_options(const struct sk_buff *skb, - const struct tcphdr *th, - struct tcp_sock *tp, const u8 **hvpp) + const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3885,7 +3619,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -3930,27 +3664,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif -static inline void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5266,12 +4979,10 @@ out: static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { - const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp, &hash_location) && - tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5546,14 +5257,9 @@ slow_path: return 0; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5625,12 +5331,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; - const u8 *hash_location; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, &hash_location, 0, NULL); + tcp_parse_options(synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5661,14 +5366,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) { - const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5765,30 +5468,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; - if (cvp != NULL && - cvp->cookie_pair_size > 0 && - tp->rx_opt.cookie_plus > 0) { - int cookie_size = tp->rx_opt.cookie_plus - - TCPOLEN_COOKIE_BASE; - int cookie_pair_size = cookie_size - + cvp->cookie_desired; - - /* A cookie extension option was sent and returned. - * Note that each incoming SYNACK replaces the - * Responder cookie. The initial exchange is most - * fragile, as protection against spoofing relies - * entirely upon the sequence and timestamp (above). - * This replacement strategy allows the correct pair to - * pass through, while any others will be filtered via - * Responder verification later. - */ - if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { - memcpy(&cvp->cookie_pair[cvp->cookie_desired], - hash_location, cookie_size); - cvp->cookie_pair_size = cookie_pair_size; - } - } - smp_mb(); tcp_finish_connect(sk, skb); @@ -5989,7 +5668,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -6140,11 +5820,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - /* step 6: check the URG bit */ tcp_urg(sk, skb, th); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a8ec457310f..2278669b1d85 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 mtu = tcp_sk(sk)->mtu_info; - /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs - * send out by Linux are always <576bytes so they should go through - * unfragmented). - */ - if (sk->sk_state == TCP_LISTEN) - return; - dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = info; if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); @@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping, bool nocache) { @@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, 0, false); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sk_buff *skb, struct sk_buff *skb_synack, - struct request_sock *req, - struct request_values *rvp) + struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, - want_cookie ? NULL : &foc); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_release; - - /* Secret recipe starts with IP addresses */ - *mess++ ^= (__force u32)daddr; - *mess++ ^= (__force u32)saddr; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_release; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * of tcp_v4_send_synack()->tcp_select_initial_window(). */ skb_synack = tcp_make_synack(sk, dst, req, - (struct request_values *)&tmp_ext, fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { @@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (fastopen_cookie_present(&foc) && foc.len != 0) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, - (struct request_values *)&tmp_ext)) + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) goto drop_and_free; return 0; @@ -1950,6 +1908,50 @@ void tcp_v4_early_demux(struct sk_buff *skb) } } +/* Packet is added to VJ-style prequeue for processing in process + * context, if a reader task is waiting. Apparently, this exciting + * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) + * failed somewhere. Latency? Burstiness? Well, at least now we will + * see, why it failed. 8)8) --ANK + * + */ +bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sysctl_tcp_low_latency || !tp->ucopy.task) + return false; + + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + + __skb_queue_tail(&tp->ucopy.prequeue, skb); + tp->ucopy.memory += skb->truesize; + if (tp->ucopy.memory > sk->sk_rcvbuf) { + struct sk_buff *skb1; + + BUG_ON(sock_owned_by_user(sk)); + + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPPREQUEUEDROPPED); + } + + tp->ucopy.memory = 0; + } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { + wake_up_interruptible_sync_poll(sk_sleep(sk), + POLLIN | POLLRDNORM | POLLRDBAND); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * tcp_rto_min(sk)) / 4, + TCP_RTO_MAX); + } + return true; +} +EXPORT_SYMBOL(tcp_prequeue); + /* * From tcp_input.c */ @@ -2197,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* TCP Cookie Transactions */ - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ @@ -2659,7 +2655,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) __u16 srcp = ntohs(inet->inet_sport); int rx_queue; - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index b6f3583ddfe8..da14436c1735 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) { struct cg_proto *cg_proto; struct tcp_memcontrol *tcp; - u64 val; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) @@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) tcp = tcp_from_cgproto(cg_proto); percpu_counter_destroy(&tcp->tcp_sockets_allocated); - - val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); } EXPORT_SYMBOL(tcp_destroy_cgroup); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b83a49cc3816..05eaf8904613 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; @@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); - struct tcp_sock *oldtp = tcp_sk(sk); - struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - - /* TCP Cookie Transactions require space for the cookie pair, - * as it differs for each connection. There is no need to - * copy any s_data_payload stored at the original socket. - * Failure will prevent resuming the connection. - * - * Presumed copied, in order of appearance: - * cookie_in_always, cookie_out_never - */ - if (oldcvp != NULL) { - struct tcp_cookie_values *newcvp = - kzalloc(sizeof(*newtp->cookie_values), - GFP_ATOMIC); - - if (newcvp != NULL) { - kref_init(&newcvp->kref); - newcvp->cookie_desired = - oldcvp->cookie_desired; - newtp->cookie_values = newcvp; - } else { - /* Not Yet Implemented */ - newtp->cookie_values = NULL; - } - } /* Now setup tcp_sock */ newtp->pred_flags = 0; @@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); @@ -440,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_enable_early_retrans(newtp); + newtp->tlp_high_seq = 0; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -449,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->frto_counter = 0; - newtp->frto_highmark = 0; - if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && !try_module_get(newicsk->icsk_ca_ops->owner)) newicsk->icsk_ca_ops = &tcp_init_congestion_ops; @@ -459,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = newtp->pushed_seq = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; @@ -537,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, bool fastopen) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); @@ -547,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -647,7 +615,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != - tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) + tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074da..b735c23a961d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,27 +65,22 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ -EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinitely with F-RTO */ - if (tp->frto_counter == 2) - tp->frto_counter = 3; - tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || tp->early_retrans_delayed) + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); } @@ -384,7 +379,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) -#define OPTION_COOKIE_EXTENSION (1 << 4) #define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { @@ -398,36 +392,6 @@ struct tcp_out_options { struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; -/* The sysctl int routines are generic, so check consistency here. - */ -static u8 tcp_cookie_size_check(u8 desired) -{ - int cookie_size; - - if (desired > 0) - /* previously specified */ - return desired; - - cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); - if (cookie_size <= 0) - /* no default specified */ - return 0; - - if (cookie_size <= TCP_COOKIE_MIN) - /* value too small, specify minimum */ - return TCP_COOKIE_MIN; - - if (cookie_size >= TCP_COOKIE_MAX) - /* value too large, specify maximum */ - return TCP_COOKIE_MAX; - - if (cookie_size & 1) - /* 8-bit multiple, illegal, fix it */ - cookie_size++; - - return (u8)cookie_size; -} - /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -446,27 +410,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, { u16 options = opts->options; /* mungable copy */ - /* Having both authentication and cookies for security is redundant, - * and there's certainly not enough room. Instead, the cookie-less - * extension variant is proposed. - * - * Consider the pessimal case with authentication. The options - * could look like: - * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 - */ if (unlikely(OPTION_MD5 & options)) { - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - *ptr++ = htonl((TCPOPT_COOKIE << 24) | - (TCPOLEN_COOKIE_BASE << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } else { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } - options &= ~OPTION_COOKIE_EXTENSION; + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; @@ -495,44 +441,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - /* Specification requires after timestamp, so do it now. - * - * Consider the pessimal case without authentication. The options - * could look like: - * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 - */ - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - __u8 *cookie_copy = opts->hash_location; - u8 cookie_size = opts->hash_size; - - /* 8-bit multiple handled in tcp_cookie_size_check() above, - * and elsewhere. - */ - if (0x2 & cookie_size) { - __u8 *p = (__u8 *)ptr; - - /* 16-bit multiple */ - *p++ = TCPOPT_COOKIE; - *p++ = TCPOLEN_COOKIE_BASE + cookie_size; - *p++ = *cookie_copy++; - *p++ = *cookie_copy++; - ptr++; - cookie_size -= 2; - } else { - /* 32-bit multiple */ - *ptr++ = htonl(((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_COOKIE << 8) | - TCPOLEN_COOKIE_BASE) + - cookie_size); - } - - if (cookie_size > 0) { - memcpy(ptr, cookie_copy, cookie_size); - ptr += (cookie_size / 4); - } - } - if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -591,11 +499,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? - tcp_cookie_size_check(cvp->cookie_desired) : - 0; struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG @@ -647,52 +551,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, tp->syn_fastopen = 1; } } - /* Note that timestamps are required by the specification. - * - * Odd numbers of bytes are prohibited by the specification, ensuring - * that the cookie is 16-bit aligned, and the resulting cookie pair is - * 32-bit aligned. - */ - if (*md5 == NULL && - (OPTION_TS & opts->options) && - cookie_size > 0) { - int need = TCPOLEN_COOKIE_BASE + cookie_size; - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - - if (need > remaining) { - /* try shrinking cookie to fit */ - cookie_size -= 2; - need -= 4; - } - } - while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { - cookie_size -= 4; - need -= 4; - } - if (TCP_COOKIE_MIN <= cookie_size) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; - opts->hash_size = cookie_size; - - /* Remember for future incarnations. */ - cvp->cookie_desired = cookie_size; - - if (cvp->cookie_desired != cvp->cookie_pair_size) { - /* Currently use random bytes as a nonce, - * assuming these are completely unpredictable - * by hostile users of the same system. - */ - get_random_bytes(&cvp->cookie_pair[0], - cookie_size); - cvp->cookie_pair_size = cookie_size; - } - remaining -= need; - } - } return MAX_TCP_OPTION_SPACE - remaining; } @@ -702,14 +561,10 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? - xvp->cookie_plus : - 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); @@ -757,28 +612,7 @@ static unsigned int tcp_synack_options(struct sock *sk, remaining -= need; } } - /* Similar rationale to tcp_syn_options() applies here, too. - * If the <SYN> options fit, the same options should fit now! - */ - if (*md5 == NULL && - ireq->tstamp_ok && - cookie_plus > TCPOLEN_COOKIE_BASE) { - int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - } - if (need <= remaining) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; - remaining -= need; - } else { - /* There's no error return, so flag it. */ - xvp->cookie_out_never = 1; /* true */ - opts->hash_size = 0; - } - } + return MAX_TCP_OPTION_SPACE - remaining; } @@ -953,7 +787,7 @@ void __init tcp_tasklet_init(void) * We cant xmit new skbs from this context, as we might already * hold qdisc lock. */ -static void tcp_wfree(struct sk_buff *skb) +void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); @@ -1012,6 +846,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, __net_timestamp(skb); if (likely(clone_it)) { + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else @@ -1298,7 +1139,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); - skb->avail_size -= eat; len -= eat; if (!len) return; @@ -1633,11 +1473,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (nonagle & TCP_NAGLE_PUSH) return true; - /* Don't use the nagle rule for urgent data (or for the final FIN). - * Nagle can be ignored during F-RTO too (see RFC4138). - */ - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) + /* Don't use the nagle rule for urgent data (or for the final FIN). */ + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1810,8 +1647,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return true; @@ -1959,6 +1799,9 @@ static int tcp_mtu_probe(struct sock *sk) * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * + * Send at most one packet when push_one > 0. Temporarily ignore + * cwnd limit to force at most one packet out when push_one == 2. + * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ @@ -1994,8 +1837,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, goto repair; /* Skip network transmission */ cwnd_quota = tcp_cwnd_test(tp, skb); - if (!cwnd_quota) - break; + if (!cwnd_quota) { + if (push_one == 2) + /* Force out a loss probe pkt. */ + cwnd_quota = 1; + else + break; + } if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; @@ -2049,10 +1897,129 @@ repair: if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; + + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); tcp_cwnd_validate(sk); return false; } - return !tp->packets_out && tcp_send_head(sk); + return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +} + +bool tcp_schedule_loss_probe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 timeout, tlp_time_stamp, rto_time_stamp; + u32 rtt = tp->srtt >> 3; + + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) + return false; + /* No consecutive loss probes. */ + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { + tcp_rearm_rto(sk); + return false; + } + /* Don't do any loss probe on a Fast Open connection before 3WHS + * finishes. + */ + if (sk->sk_state == TCP_SYN_RECV) + return false; + + /* TLP is only scheduled when next timer event is RTO. */ + if (icsk->icsk_pending != ICSK_TIME_RETRANS) + return false; + + /* Schedule a loss probe in 2*RTT for SACK capable connections + * in Open state, that are either limited by cwnd or application. + */ + if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + return false; + + if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && + tcp_send_head(sk)) + return false; + + /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + * for delayed ack when there's one outstanding packet. + */ + timeout = rtt << 1; + if (tp->packets_out == 1) + timeout = max_t(u32, timeout, + (rtt + (rtt >> 1) + TCP_DELACK_MAX)); + timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + + /* If RTO is shorter, just schedule TLP in its place. */ + tlp_time_stamp = tcp_time_stamp + timeout; + rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; + if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { + s32 delta = rto_time_stamp - tcp_time_stamp; + if (delta > 0) + timeout = delta; + } + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, + TCP_RTO_MAX); + return true; +} + +/* When probe timeout (PTO) fires, send a new segment if one exists, else + * retransmit the last segment. + */ +void tcp_send_loss_probe(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int pcount; + int mss = tcp_current_mss(sk); + int err = -1; + + if (tcp_send_head(sk) != NULL) { + err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + goto rearm_timer; + } + + /* At most one outstanding TLP retransmission. */ + if (tp->tlp_high_seq) + goto rearm_timer; + + /* Retransmit last segment. */ + skb = tcp_write_queue_tail(sk); + if (WARN_ON(!skb)) + goto rearm_timer; + + pcount = tcp_skb_pcount(skb); + if (WARN_ON(!pcount)) + goto rearm_timer; + + if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + goto rearm_timer; + skb = tcp_write_queue_tail(sk); + } + + if (WARN_ON(!skb || !tcp_skb_pcount(skb))) + goto rearm_timer; + + /* Probe with zero data doesn't trigger fast recovery. */ + if (skb->len > 0) + err = __tcp_retransmit_skb(sk, skb); + + /* Record snd_nxt for loss detection. */ + if (likely(!err)) + tp->tlp_high_seq = tp->snd_nxt; + +rearm_timer: + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); + + if (likely(!err)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBES); + return; } /* Push out any pending frames which were held back due to @@ -2386,8 +2353,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - /* make sure skb->data is aligned on arches that require it */ - if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + /* make sure skb->data is aligned on arches that require it + * and check if ack-trimming & collapsing extended the headroom + * beyond what csum_start can cover. + */ + if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || + skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : @@ -2673,32 +2644,24 @@ int tcp_send_synack(struct sock *sk) * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer - * rvp: request_values pointer * * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; - struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); - const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; - int s_data_desired = 0; - if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) - s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, - sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2707,6 +2670,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); + security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) @@ -2740,9 +2704,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp, foc) - + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2760,40 +2723,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPHDR_SYN | TCPHDR_ACK); - if (OPTION_COOKIE_EXTENSION & opts.options) { - if (s_data_desired) { - u8 *buf = skb_put(skb, s_data_desired); - - /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, s_data_desired); - TCP_SKB_CB(skb)->end_seq += s_data_desired; - } - - if (opts.hash_size > 0) { - __u32 workspace[SHA_WORKSPACE_WORDS]; - u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; - u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; - - /* Secret recipe depends on the Timestamp, (future) - * Sequence and Acknowledgment Numbers, Initiator - * Cookie, and others handled by IP variant caller. - */ - *tail-- ^= opts.tsval; - *tail-- ^= tcp_rsk(req)->rcv_isn + 1; - *tail-- ^= TCP_SKB_CB(skb)->seq + 1; - - /* recommended */ - *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); - *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ - - sha_transform((__u32 *)&xvp->cookie_bakery[0], - (char *)mess, - &workspace[0]); - opts.hash_location = - (__u8 *)&xvp->cookie_bakery[0]; - } - } - th->seq = htonl(TCP_SKB_CB(skb)->seq); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac30c498..4b85e6f636c9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - if (tp->early_retrans_delayed) { - tcp_resume_early_retransmit(sk); - return; - } if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); @@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk) WARN_ON(tcp_write_queue_empty(sk)); + tp->tlp_high_seq = 0; + if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits @@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), mib_idx); } - if (tcp_use_frto(sk)) { - tcp_enter_frto(sk); - } else { - tcp_enter_loss(sk, 0); - } + tcp_enter_loss(sk, 0); if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, @@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk) } event = icsk->icsk_pending; - icsk->icsk_pending = 0; switch (event) { + case ICSK_TIME_EARLY_RETRANS: + tcp_resume_early_retransmit(sk); + break; + case ICSK_TIME_LOSS_PROBE: + tcp_send_loss_probe(sk); + break; case ICSK_TIME_RETRANS: + icsk->icsk_pending = 0; tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: + icsk->icsk_pending = 0; tcp_probe_timer(sk); break; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 1b91bf48e277..76a1e23259e1 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); break; - case CA_EVENT_FRTO: + case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 265c42cf963c..2722db024a0b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); + if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) @@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); + if (static_key_false(&udp_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } } /* @@ -2272,31 +2279,88 @@ void __init udp_init(void) int udp4_ufo_send_check(struct sk_buff *skb) { - const struct iphdr *iph; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) + if (!pskb_may_pull(skb, sizeof(struct udphdr))) return -EINVAL; - iph = ip_hdr(skb); - uh = udp_hdr(skb); + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } return 0; } +static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + int mac_len = skb->mac_len; + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + int outer_hlen; + netdev_features_t enc_features; + + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) + goto out; + + skb->encapsulation = 0; + __skb_pull(skb, tnl_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + outer_hlen = skb_tnl_header_len(skb); + skb = segs; + do { + struct udphdr *uh; + int udp_offset = outer_hlen - tnl_hlen; + + skb->mac_len = mac_len; + + skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, udp_offset); + uh = udp_hdr(skb); + uh->len = htons(skb->len - udp_offset); + + /* csum segment if tunnel sets skb with csum. */ + if (unlikely(uh->check)) { + struct iphdr *iph = ip_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - udp_offset, + IPPROTO_UDP, 0); + uh->check = csum_fold(skb_checksum(skb, udp_offset, + skb->len - udp_offset, 0)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + } + skb->ip_summed = CHECKSUM_NONE; + } while ((skb = skb->next)); +out: + return segs; +} + struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; - int offset; - __wsum csum; - mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; @@ -2306,6 +2370,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int type = skb_shinfo(skb)->gso_type; if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; @@ -2316,20 +2381,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - segs = skb_segment(skb, features); + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } out: return segs; } - diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 505b30ad9182..7927db0a9279 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return 0; return inet_sk_diag_fill(sk, NULL, skb, req, - sk_user_ns(NETLINK_CB(cb->skb).ssk), + sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -64,14 +64,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - 64)), GFP_KERNEL); + rep = nlmsg_new(sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + 64, + GFP_KERNEL); if (!rep) goto out; err = inet_sk_diag_fill(sk, NULL, rep, req, - sk_user_ns(NETLINK_CB(in_skb).ssk), + sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index fe5189e2e114..eb1dd4d643f2 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); - /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, + /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + top_iph->tos = 0; + else + top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + top_iph->tos = INET_ECN_encapsulate(top_iph->tos, XFRM_MODE_SKB_CB(skb)->tos); flags = x->props.flags; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ed0b9e2e797a..11b13ea69db4 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -156,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION config IPV6_SIT tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" select INET_TUNNEL + select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE default y ---help--- @@ -201,6 +202,7 @@ config IPV6_TUNNEL config IPV6_GRE tristate "IPv6: GRE tunnel" select IPV6_TUNNEL + select NET_IP_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2c7e615f902..d1ab6ab29a55 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -70,6 +70,7 @@ #include <net/snmp.h> #include <net/af_ieee802154.h> +#include <net/firewire.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/ndisc.h> @@ -168,8 +169,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev); -static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); - static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -421,6 +420,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_regen_rndid((unsigned long) ndev); } #endif + ndev->token = in6addr_any; if (netif_running(dev) && addrconf_qdisc_ok(dev)) ndev->if_flags |= IF_READY; @@ -544,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -605,6 +604,77 @@ errout: return err; } +static int inet6_netconf_dump_devconf(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int h, s_h; + int idx, s_idx; + struct net_device *dev; + struct inet6_dev *idev; + struct hlist_head *head; + + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ + net->dev_base_seq; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + idev = __in6_dev_get(dev); + if (!idev) + goto cont; + + if (inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + -1) <= 0) { + rcu_read_unlock(); + goto done; + } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + rcu_read_unlock(); + } + if (h == NETDEV_HASHENTRIES) { + if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } + if (h == NETDEV_HASHENTRIES + 1) { + if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } +done: + cb->args[0] = h; + cb->args[1] = idx; + + return skb->len; +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -806,6 +876,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; ifa->cstamp = ifa->tstamp = jiffies; + ifa->tokenized = false; ifa->rt = rt; @@ -837,7 +908,7 @@ out2: rcu_read_unlock_bh(); if (likely(err == 0)) - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); ifa = ERR_PTR(err); @@ -927,7 +998,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_DELADDR, ifp); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); /* * Purge or update corresponding prefix @@ -1668,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) return 0; } +static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) +{ + union fwnet_hwaddr *ha; + + if (dev->addr_len != FWNET_ALEN) + return -1; + + ha = (union fwnet_hwaddr *)dev->dev_addr; + + memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); + eui[0] ^= 2; + return 0; +} + static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) { /* XXX: inherit EUI-64 from other interface -- yoshfuji */ @@ -1732,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_gre(eui, dev); case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); + case ARPHRD_IEEE1394: + return addrconf_ifid_ieee1394(eui, dev); } return -1; } @@ -2046,11 +2133,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) struct inet6_ifaddr *ifp; struct in6_addr addr; int create = 0, update_lft = 0; + bool tokenized = false; if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && - ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { + + if (!ipv6_addr_any(&in6_dev->token)) { + read_lock_bh(&in6_dev->lock); + memcpy(addr.s6_addr + 8, + in6_dev->token.s6_addr + 8, 8); + read_unlock_bh(&in6_dev->lock); + tokenized = true; + } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && + ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); return; } @@ -2091,6 +2186,7 @@ ok: update_lft = create = 1; ifp->cstamp = jiffies; + ifp->tokenized = tokenized; addrconf_dad_start(ifp); } @@ -2529,6 +2625,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; + struct net_device *sp_dev; + struct inet6_ifaddr *sp_ifa; + struct rt6_info *sp_rt; /* ::1 */ @@ -2540,6 +2639,30 @@ static void init_loopback(struct net_device *dev) } add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + + /* Add routes to other interface's IPv6 addresses */ + for_each_netdev(dev_net(dev), sp_dev) { + if (!strcmp(sp_dev->name, dev->name)) + continue; + + idev = __in6_dev_get(sp_dev); + if (!idev) + continue; + + read_lock_bh(&idev->lock); + list_for_each_entry(sp_ifa, &idev->addr_list, if_list) { + + if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) + continue; + + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + + /* Failure cases are ignored */ + if (!IS_ERR(sp_rt)) + ip6_ins_rt(sp_rt); + } + read_unlock_bh(&idev->lock); + } } static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) @@ -2573,7 +2696,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_ARCNET) && (dev->type != ARPHRD_INFINIBAND) && - (dev->type != ARPHRD_IEEE802154)) { + (dev->type != ARPHRD_IEEE802154) && + (dev->type != ARPHRD_IEEE1394)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -2961,7 +3085,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } in6_ifa_put(ifa); @@ -3510,7 +3634,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { }; static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3576,7 +3700,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, } static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3807,6 +3931,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, NLM_F_MULTI); if (err <= 0) break; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } break; } @@ -3864,6 +3989,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_ip_idx = ip_idx = cb->args[2]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -3915,8 +4041,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, - void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; @@ -4049,7 +4174,8 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(sizeof(struct ifla_cacheinfo)) + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */ } static inline size_t inet6_if_nlmsg_size(void) @@ -4136,6 +4262,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); + if (nla == NULL) + goto nla_put_failure; + read_lock_bh(&idev->lock); + memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); + read_unlock_bh(&idev->lock); + return 0; nla_put_failure: @@ -4163,6 +4296,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return 0; } +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +{ + struct inet6_ifaddr *ifp; + struct net_device *dev = idev->dev; + bool update_rs = false; + + if (token == NULL) + return -EINVAL; + if (ipv6_addr_any(token)) + return -EINVAL; + if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + return -EINVAL; + if (!ipv6_accept_ra(idev)) + return -EINVAL; + if (idev->cnf.rtr_solicits <= 0) + return -EINVAL; + + write_lock_bh(&idev->lock); + + BUILD_BUG_ON(sizeof(token->s6_addr) != 16); + memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8); + + write_unlock_bh(&idev->lock); + + if (!idev->dead && (idev->if_flags & IF_READY)) { + struct in6_addr ll_addr; + + ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | + IFA_F_OPTIMISTIC); + + /* If we're not ready, then normal ifup will take care + * of this. Otherwise, we need to request our rs here. + */ + ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters); + update_rs = true; + } + + write_lock_bh(&idev->lock); + + if (update_rs) + idev->if_flags |= IF_RS_SENT; + + /* Well, that's kinda nasty ... */ + list_for_each_entry(ifp, &idev->addr_list, if_list) { + spin_lock(&ifp->lock); + if (ifp->tokenized) { + ifp->valid_lft = 0; + ifp->prefered_lft = 0; + } + spin_unlock(&ifp->lock); + } + + write_unlock_bh(&idev->lock); + return 0; +} + +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + int err = -EINVAL; + struct inet6_dev *idev = __in6_dev_get(dev); + struct nlattr *tb[IFLA_INET6_MAX + 1]; + + if (!idev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) + BUG(); + + if (tb[IFLA_INET6_TOKEN]) + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + + return err; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 portid, u32 seq, int event, unsigned int flags) { @@ -4341,6 +4548,8 @@ errout: static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { + struct net *net = dev_net(ifp->idev->dev); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -4366,6 +4575,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) dst_free(&ifp->rt->dst); break; } + atomic_inc(&net->ipv6.dev_addr_genid); } static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4784,26 +4994,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __net_init addrconf_init_net(struct net *net) { - int err; + int err = -ENOMEM; struct ipv6_devconf *all, *dflt; - err = -ENOMEM; - all = &ipv6_devconf; - dflt = &ipv6_devconf_dflt; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) - goto err_alloc_all; + dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; - dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) - goto err_alloc_dflt; - } else { - /* these will be inherited by all namespaces */ - dflt->autoconf = ipv6_defaults.autoconf; - dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; - } + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; @@ -4848,26 +5052,11 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -/* - * Device notifier - */ - -int register_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(register_inet6addr_notifier); - -int unregister_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(unregister_inet6addr_notifier); - static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, + .set_link_af = inet6_set_link_af, }; /* @@ -4940,7 +5129,7 @@ int __init addrconf_init(void) __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr, NULL); __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - NULL, NULL); + inet6_netconf_dump_devconf, NULL); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d051e5f4bf34..72104562c864 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr) } EXPORT_SYMBOL(__ipv6_addr_type); +static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); + +int register_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_notifier); + +int unregister_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_notifier); + +int inet6addr_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_notifier_call_chain); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index aad64352cb60..f083a583a05c 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_LABEL] = { .len = sizeof(u32), }, }; -static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; @@ -436,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, if (!tb[IFAL_ADDRESS]) return -EINVAL; - pfx = nla_data(tb[IFAL_ADDRESS]); - if (!pfx) - return -EINVAL; if (!tb[IFAL_LABEL]) return -EINVAL; @@ -533,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; @@ -561,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, if (!tb[IFAL_ADDRESS]) return -EINVAL; - addr = nla_data(tb[IFAL_ADDRESS]); - if (!addr) - return -EINVAL; rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6b793bfc0e10..ab5c7ad482cd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,7 +49,6 @@ #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> -#include <net/ipip.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> @@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev = NULL; rcu_read_lock(); - if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another one @@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_port = inet->inet_sport; } - if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin->sin6_scope_id = sk->sk_bound_dev_if; + sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, + sk->sk_bound_dev_if); *uaddr_len = sizeof(*sin); return 0; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index f5a54782a340..4b56cbbc7890 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -124,7 +124,7 @@ ipv4_connected: goto out; } - if (addr_type&IPV6_ADDR_LINKLOCAL) { + if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { if (sk->sk_bound_dev_if && @@ -355,18 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = serr->port; - sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; if (np->sndflow) sin->sin6_flowinfo = ip6_flowinfo(ip6h); - if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin->sin6_scope_id = IP6CB(skb)->iif; + sin->sin6_scope_id = + ipv6_iface_scope_id(&sin->sin6_addr, + IP6CB(skb)->iif); } else { ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), &sin->sin6_addr); + sin->sin6_scope_id = 0; } } @@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; - sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); - if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin->sin6_scope_id = IP6CB(skb)->iif; + sin->sin6_scope_id = + ipv6_iface_scope_id(&sin->sin6_addr, + IP6CB(skb)->iif); } else { struct inet_sock *inet = inet_sk(sk); ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin->sin6_addr); + sin->sin6_scope_id = 0; if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } @@ -592,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; sin6.sin6_flowinfo = 0; - sin6.sin6_scope_id = 0; + sin6.sin6_scope_id = + ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr, + opt->iif); put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fff5bdd8b680..71b900c3f4ff 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -434,7 +434,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Source addr check */ - if (addr_type & IPV6_ADDR_LINKLOCAL) + if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; /* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9bfab19ff3c0..e4311cbc8b4e 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,6 +54,10 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (ipv6_rcv_saddr_equal(sk, sk2)) break; } + if (!relax && reuse && sk2->sk_reuse && + sk2->sk_state != TCP_LISTEN && + ipv6_rcv_saddr_equal(sk, sk2)) + break; } } @@ -169,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (sk->sk_bound_dev_if && - ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = sk->sk_bound_dev_if; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + sk->sk_bound_dev_if); } EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b973ed3d06cf..46e88433ec7d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy) spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net) spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; - struct ipv6_fl_socklist *sfl, **sflp; + struct ipv6_fl_socklist *sfl; + struct ipv6_fl_socklist __rcu **sflp; struct ip6_flowlabel *fl, *fl1 = NULL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e4efffe2522e..d3ddd8400354 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -38,6 +38,7 @@ #include <net/sock.h> #include <net/ip.h> +#include <net/ip_tunnels.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/addrconf.h> @@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr) #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] -static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, @@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &tunnel->dev->stats; int err = -1; u8 proto; - int pkt_len; struct sk_buff *new_skb; if (dev->type == ARPHRD_ETHER) @@ -801,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } - + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); - return 0; tx_err_link_failure: stats->tx_carrier_errors++; @@ -1271,7 +1217,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_do_ioctl = ip6gre_tunnel_ioctl, .ndo_change_mtu = ip6gre_tunnel_change_mtu, - .ndo_get_stats64 = ip6gre_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ip6gre_dev_free(struct net_device *dev) @@ -1520,7 +1466,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6gre_tunnel_change_mtu, - .ndo_get_stats64 = ip6gre_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ip6gre_tap_setup(struct net_device *dev) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index b1876e52091e..2bab2aa59745 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,18 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 Errata ID: 3480 + * Interface-Local scope spans only a single interface on a + * node and is useful only for loopback transmission of + * multicast. Packets with interface-local scope received + * from another node must be discarded. + */ + if (!(skb->pkt_type == PACKET_LOOPBACK || + dev->flags & IFF_LOOPBACK) && + ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) + goto err; + /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope * field contains the reserved value 0; if such a packet is received, it @@ -281,7 +293,8 @@ int ip6_mc_input(struct sk_buff *skb) * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && - !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&hdr->daddr) & + (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 8234c1dcdf72..71b766ee821d 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -92,14 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u8 *prevhdr; int offset = 0; - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 155eccfa7760..d2eedf192330 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) { - err = sock_tx_timestamp(sk, &tx_flags); - if (err) - goto error; - } + if (sk->sk_type == SOCK_DGRAM) + sock_tx_timestamp(sk, &tx_flags); /* * Let's try using as much space as possible. diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fff83cbc197f..1e55866cead7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -47,6 +47,7 @@ #include <net/icmp.h> #include <net/ip.h> +#include <net/ip_tunnels.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> @@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; int err = -1; - int pkt_len; if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); @@ -1035,19 +1035,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 96bfb4e4b820..241fb8ad9fcf 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; + ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); @@ -1100,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); - if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { + if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; + ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 76ef4353d518..2712ab22a174 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -610,8 +610,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } } #endif - if (!dev->addr_len) - send_sllao = 0; if (send_sllao) optlen += ndisc_opt_addr_space(dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 429089cb073d..72836f40b730 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -1,3 +1,9 @@ +/* + * IPv6 specific functions of netfilter core + * + * Rusty Russell (C) 2000 -- This code is GPL. + * Patrick McHardy (C) 2006-2012 + */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/ipv6.h> @@ -29,7 +35,7 @@ int ip6_route_me_harder(struct sk_buff *skb) IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return -EINVAL; + return dst->error; } /* Drop old route. */ @@ -43,7 +49,7 @@ int ip6_route_me_harder(struct sk_buff *skb) skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); if (IS_ERR(dst)) - return -1; + return PTR_ERR(dst); skb_dst_set(skb, dst); } #endif @@ -53,7 +59,7 @@ int ip6_route_me_harder(struct sk_buff *skb) if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) - return -1; + return -ENOMEM; return 0; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index c72532a60d88..4433ab40e7de 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -105,7 +105,7 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' - depends on NETFILTER_ADVANCED + depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW) ---help--- This option allows you to match packets whose replies would go out via the interface the packet came in. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 341b54ade72c..44400c216dc6 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -3,6 +3,7 @@ * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -284,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); @@ -296,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, + nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 83acc1405a18..590f767db5d4 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -18,9 +18,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; - __wsum src_sum = 0, dst_sum = 0; struct in6_addr pfx; - unsigned int i; + __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; @@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { - src_sum = csum_add(src_sum, - (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); - dst_sum = csum_add(dst_sum, - (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); - } + src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); + dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; @@ -57,7 +52,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, if (pfx_len - i >= 32) mask = 0; else - mask = htonl(~((1 << (pfx_len - i)) - 1)); + mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; @@ -114,6 +109,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", + .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, @@ -124,6 +120,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { }, { .name = "DNPT", + .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index ed3b427b2841..70f9abc0efe9 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -7,6 +7,8 @@ * Authors: * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> * + * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net> + * * Based on net/ipv4/netfilter/ipt_REJECT.c * * This program is free software; you can redistribute it and/or diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5060d54199ab..e0983f3648a6 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, return ret; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *) skb_dst(skb); + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; @@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ipv6_hdr(skb); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 6134a1ebfb1b..e075399d8b72 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -38,7 +38,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) struct in6_addr saddr, daddr; u_int8_t hop_limit; u_int32_t flowlabel, mark; - + int err; #if 0 /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -65,8 +65,11 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { + err = ip6_route_me_harder(skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } return ret; } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index e0e788d25b14..6383f90efda8 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -179,6 +179,7 @@ nf_nat_ipv6_out(unsigned int hooknum, #ifdef CONFIG_XFRM const struct nf_conn *ct; enum ip_conntrack_info ctinfo; + int err; #endif unsigned int ret; @@ -197,9 +198,11 @@ nf_nat_ipv6_out(unsigned int hooknum, &ct->tuplehash[!dir].tuple.dst.u3) || (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) - if (nf_xfrm_me_harder(skb, AF_INET6) < 0) - ret = NF_DROP; + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(skb, AF_INET6); + if (err < 0) + ret = NF_DROP_ERR(err); + } } #endif return ret; @@ -215,6 +218,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int ret; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct ipv6hdr)) @@ -227,16 +231,19 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - if (ip6_route_me_harder(skb)) - ret = NF_DROP; + err = ip6_route_me_harder(skb); + if (err < 0) + ret = NF_DROP_ERR(err); } #ifdef CONFIG_XFRM else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) - if (nf_xfrm_me_harder(skb, AF_INET6)) - ret = NF_DROP; + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(skb, AF_INET6); + if (err < 0) + ret = NF_DROP_ERR(err); + } #endif } return ret; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 2b6c226f5198..97bcf2bae857 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) sizeof(sin6.sin6_addr)); nf_ct_put(ct); - - if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6.sin6_scope_id = sk->sk_bound_dev_if; - else - sin6.sin6_scope_id = 0; - + sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, + sk->sk_bound_dev_if); return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 24df3dde0076..b3807c5cb888 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, + NULL, NULL, "nf_ct_icmpv6: invalid new with type %d ", type + 128); return false; @@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } @@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed "); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54087e96d7b8..dffdc1a389c5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -14,6 +14,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv6-nf: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -39,6 +41,7 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> +#include <net/inet_ecn.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #include <linux/sysctl.h> #include <linux/netfilter.h> @@ -136,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} + static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct frag_queue *nq; @@ -164,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static inline struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, - struct in6_addr *dst) + struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -174,19 +182,18 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.user = user; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); - if (q == NULL) - goto oom; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct frag_queue, q); - -oom: - return NULL; } @@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; unsigned int payload_len; int offset, end; + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) { pr_debug("Already completed\n"); @@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -317,6 +327,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; add_frag_mem_limit(&fq->q, skb->truesize); @@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) { struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; + u8 ecn; inet_frag_kill(&fq->q, &nf_frags); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; + /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - @@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ @@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); local_bh_enable(); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 330b5e7b7df6..eedff8ccded5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -263,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another @@ -498,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_port = 0; sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); } sock_recv_ts_and_drops(msg, sk, skb); @@ -802,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && - ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) + __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3c6a77290c6e..790d9f4b8b0b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -26,6 +26,9 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -55,6 +58,7 @@ #include <net/ndisc.h> #include <net/addrconf.h> #include <net/inet_frag.h> +#include <net/inet_ecn.h> struct ip6frag_skb_cb { @@ -64,6 +68,10 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} static struct inet_frags ip6_frags; @@ -116,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) fq->user = arg->user; fq->saddr = *arg->src; fq->daddr = *arg->dst; + fq->ecn = arg->ecn; } EXPORT_SYMBOL(ip6_frag_init); @@ -170,7 +179,8 @@ static void ip6_frag_expire(unsigned long data) } static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, + const struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -180,14 +190,16 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); - if (q == NULL) + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; - + } return container_of(q, struct frag_queue, q); } @@ -198,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net_device *dev; int offset, end; struct net *net = dev_net(skb_dst(skb)->dev); + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -215,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -315,6 +330,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + fq->ecn |= ecn; add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. @@ -326,9 +342,17 @@ found: } if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } + skb_dst_drop(skb); inet_frag_lru_move(&fq->q); return -1; @@ -358,9 +382,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int payload_len; unsigned int nhoff; int sum_truesize; + u8 ecn; inet_frag_kill(&fq->q, &ip6_frags); + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; + /* Make the one we just received the head. */ if (prev) { head = prev->next; @@ -459,6 +488,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; /* Yes, and fold redundant checksum back. 8) */ @@ -522,7 +552,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); if (fq != NULL) { int ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e5fe0041adfa..ad0aa6b0b86a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2355,7 +2355,7 @@ beginning: return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a return ip6_route_del(&cfg); } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, 0, NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 02f96dcbcf02..335363478bbf 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -49,7 +49,7 @@ #include <net/ip.h> #include <net/udp.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/dsfield.h> @@ -87,41 +87,6 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; -static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->rx_errors = dev->stats.rx_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - /* * Must be invoked with rcu_read_lock */ @@ -899,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, skb_dst(skb), NULL); iptunnel_xmit(skb, dev); return NETDEV_TX_OK; @@ -1200,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_start_xmit = ipip6_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, - .ndo_get_stats64= ipip6_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8a0848b60b35..d5dda20bd717 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b6460055df5..e51bd1a58264 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -386,9 +386,17 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst) dst->ops->redirect(dst, sk, skb); + goto out; } if (type == ICMPV6_PKT_TOOBIG) { + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); @@ -454,7 +462,6 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); @@ -466,7 +473,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -481,13 +488,12 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) { struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); return res; @@ -940,9 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -980,50 +984,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *d; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_free; - - /* Secret recipe starts with IP addresses */ - d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_free; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1101,7 +1062,6 @@ have_isn: goto drop_and_release; if (tcp_v6_send_synack(sk, dst, &fl6, req, - (struct request_values *)&tmp_ext, skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 599e1ba6d1ce..da6019b63730 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -450,15 +450,16 @@ try_again: sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (is_udp4) + if (is_udp4) { ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin6->sin6_addr); - else { + sin6->sin6_scope_id = 0; + } else { sin6->sin6_addr = ipv6_hdr(skb)->saddr; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); } } @@ -1118,7 +1119,7 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && - ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) + __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) @@ -1285,10 +1286,18 @@ do_confirm: void udpv6_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + inet6_destroy_sock(sk); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index cf05cf073c51..3bb3a891a424 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb) const struct ipv6hdr *ipv6h; struct udphdr *uh; + /* UDP Tunnel offload on ipv6 is not yet supported. */ + if (skb->encapsulation) + return -EINVAL; + if (!pskb_may_pull(skb, sizeof(*uh))) return -EINVAL; @@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + if (unlikely(type & ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9bf6a74a71d2..4770d515c2c8 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) sizeof(top_iph->flow_lbl)); top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); - dsfield = XFRM_MODE_SKB_CB(skb)->tos; - dsfield = INET_ECN_encapsulate(dsfield, dsfield); + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + dsfield = 0; + else + dsfield = XFRM_MODE_SKB_CB(skb)->tos; + dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index d07e3a626446..0578d4fa00a9 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self) IRDA_DEBUG(2, "%s()\n", __func__); - skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, - GFP_ATOMIC); + skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL); if (skb == NULL) { IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n", __func__); @@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto); + sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); if (sk == NULL) return -ENOMEM; @@ -1386,6 +1385,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -2583,8 +2584,10 @@ bed: NULL, NULL, NULL); /* Check if the we got some results */ - if (!self->cachedaddr) - return -EAGAIN; /* Didn't find any devices */ + if (!self->cachedaddr) { + err = -EAGAIN; /* Didn't find any devices */ + goto out; + } daddr = self->cachedaddr; /* Cleanup */ self->cachedaddr = 0; diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 52079f19bbbe..b797daac063c 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line) IRDA_ASSERT(ircomm != NULL, return NULL;); - self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); + self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL); if (self == NULL) return NULL; diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 29340a9a6fb9..e1b37f5a2691 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap, { struct iriap_cb *self; - IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); self = instance; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 6115a44c0a24..1064621da6f6 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -66,8 +66,15 @@ const char *irlmp_reasons[] = { "LM_LAP_RESET", "LM_INIT_DISCONNECT", "ERROR, NOT USED", + "UNKNOWN", }; +const char *irlmp_reason_str(LM_REASON reason) +{ + reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1); + return irlmp_reasons[reason]; +} + /* * Function irlmp_init (void) * @@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, { struct lsap_cb *lsap; - IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a7d11ffe4284..ae691651b721 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] = #define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) -/* macros to set/get socket control buffer at correct offset */ -#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ -#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) -#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ -#define CB_TRGCLS_LEN (TRGCLS_SIZE) - #define __iucv_sock_wait(sk, condition, timeo, ret) \ do { \ DEFINE_WAIT(__wait); \ @@ -1141,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + IUCV_SKB_CB(skb)->tag = txmsg.tag; if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_sent); @@ -1224,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) return -ENOMEM; /* copy target class to control buffer of new skb */ - memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class; /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); @@ -1256,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, /* store msg target class in the second 4 bytes of skb ctrl buffer */ /* Note: the first 4 bytes are reserved for msg tag */ - memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = msg->class; /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ if ((msg->flags & IUCV_IPRMDATA) && len > 7) { @@ -1292,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } } + IUCV_SKB_CB(skb)->offset = 0; if (sock_queue_rcv_skb(sk, skb)) skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); } @@ -1327,6 +1322,9 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; + u32 offset; + + msg->msg_namelen = 0; if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && @@ -1346,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - rlen = skb->len; /* real length of skb */ + offset = IUCV_SKB_CB(skb)->offset; + rlen = skb->len - offset; /* real length of skb */ copied = min_t(unsigned int, rlen, len); if (!rlen) sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN; cskb = skb; - if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { + if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1370,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, * get the trgcls from the control buffer of the skb due to * fragmentation of original iucv message. */ err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, - CB_TRGCLS_LEN, CB_TRGCLS(skb)); + sizeof(IUCV_SKB_CB(skb)->class), + (void *)&IUCV_SKB_CB(skb)->class); if (err) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); @@ -1382,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, /* SOCK_STREAM: re-queue skb if it contains unreceived data */ if (sk->sk_type == SOCK_STREAM) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); + if (copied < rlen) { + IUCV_SKB_CB(skb)->offset = offset + copied; goto done; } } @@ -1403,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, spin_lock_bh(&iucv->message_q.lock); rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { + IUCV_SKB_CB(rskb)->offset = 0; if (sock_queue_rcv_skb(sk, rskb)) { skb_queue_head(&iucv->backlog_skb_q, rskb); @@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, return iucv_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP; @@ -1830,7 +1831,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { + if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } @@ -2091,6 +2092,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); + IUCV_SKB_CB(skb)->offset = 0; spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { if (sock_queue_rcv_skb(sk, skb)) { @@ -2195,8 +2197,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, /* fall through and receive zero length data */ case 0: /* plain data frame */ - memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, - CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class; err = afiucv_hs_callback_rx(sk, skb); break; default: diff --git a/net/key/af_key.c b/net/key/af_key.c index 556fdafdd1ea..5b1e5af25713 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2201,7 +2201,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->priority = pol->sadb_x_policy_priority; - sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); if (!xp->family) { err = -EINVAL; @@ -2214,7 +2214,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (xp->selector.sport) xp->selector.sport_mask = htons(0xffff); - sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); xp->selector.prefixlen_d = sa->sadb_address_prefixlen; @@ -2315,7 +2315,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa memset(&sel, 0, sizeof(sel)); - sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2323,7 +2323,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (sel.sport) sel.sport_mask = htons(0xffff); - sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], + sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2693,6 +2693,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d36875f3427e..6984c3a353cd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -114,7 +114,6 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); static inline struct l2tp_net *l2tp_pernet(struct net *net) { @@ -192,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) } else { /* Socket is owned by kernelspace */ sk = tunnel->sock; + sock_hold(sk); } out: @@ -210,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk) } sock_put(sk); } + sock_put(sk); } EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); @@ -373,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk struct sk_buff *skbp; struct sk_buff *tmp; u32 ns = L2TP_SKB_CB(skb)->ns; - struct l2tp_stats *sstats; spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); @@ -384,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", session->name, ns, L2TP_SKB_CB(skbp)->ns, skb_queue_len(&session->reorder_q)); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_oos_packets++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_oos_packets); goto out; } } @@ -403,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * { struct l2tp_tunnel *tunnel = session->tunnel; int length = L2TP_SKB_CB(skb)->length; - struct l2tp_stats *tstats, *sstats; /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ skb_orphan(skb); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += length; - sstats->rx_packets++; - sstats->rx_bytes += length; - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&tunnel->stats.rx_packets); + atomic_long_add(length, &tunnel->stats.rx_bytes); + atomic_long_inc(&session->stats.rx_packets); + atomic_long_add(length, &session->stats.rx_bytes); if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ @@ -450,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) { struct sk_buff *skb; struct sk_buff *tmp; - struct l2tp_stats *sstats; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other @@ -458,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) */ start: spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skb, tmp) { if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); + atomic_long_inc(&session->stats.rx_errors); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -623,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; int offset; u32 ns, nr; - struct l2tp_stats *sstats = &session->stats; /* The ref count is increased since we now hold a pointer to * the session. Take care to decrement the refcnt when exiting @@ -640,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: cookie mismatch (%u/%u). Discarding.\n", tunnel->name, tunnel->tunnel_id, session->session_id); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_cookie_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } ptr += session->peer_cookie_len; @@ -711,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -732,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } @@ -788,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * packets */ if (L2TP_SKB_CB(skb)->ns != session->nr) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -816,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, return; discard: - u64_stats_update_begin(&sstats->syncp); - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); if (session->deref) @@ -828,6 +803,23 @@ discard: } EXPORT_SYMBOL(l2tp_recv_common); +/* Drop skbs from the session's reorder_q + */ +int l2tp_session_queue_purge(struct l2tp_session *session) +{ + struct sk_buff *skb = NULL; + BUG_ON(!session); + BUG_ON(session->magic != L2TP_SESSION_MAGIC); + while ((skb = skb_dequeue(&session->reorder_q))) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + if (session->deref) + (*session->deref)(session); + } + return 0; +} +EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); + /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame * here. The skb is not on a list when we get here. * Returns 0 if the packet was a data packet and was successfully passed on. @@ -843,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u32 tunnel_id, session_id; u16 version; int length; - struct l2tp_stats *tstats; if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) goto discard_bad_csum; @@ -932,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, discard_bad_csum: LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - tstats->rx_errors++; - u64_stats_update_end(&tstats->syncp); + atomic_long_inc(&tunnel->stats.rx_errors); kfree_skb(skb); return 0; @@ -1062,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; unsigned int len = skb->len; int error; - struct l2tp_stats *tstats, *sstats; /* Debug */ if (session->send_seq) @@ -1091,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, error = ip_queue_xmit(skb, fl); /* Update stats */ - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); if (error >= 0) { - tstats->tx_packets++; - tstats->tx_bytes += len; - sstats->tx_packets++; - sstats->tx_bytes += len; + atomic_long_inc(&tunnel->stats.tx_packets); + atomic_long_add(len, &tunnel->stats.tx_bytes); + atomic_long_inc(&session->stats.tx_packets); + atomic_long_add(len, &session->stats.tx_bytes); } else { - tstats->tx_errors++; - sstats->tx_errors++; + atomic_long_inc(&tunnel->stats.tx_errors); + atomic_long_inc(&session->stats.tx_errors); } - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); return 0; } @@ -1282,6 +1263,7 @@ static void l2tp_tunnel_destruct(struct sock *sk) /* No longer an encapsulation socket. See net/ipv4/udp.c */ (udp_sk(sk))->encap_type = 0; (udp_sk(sk))->encap_rcv = NULL; + (udp_sk(sk))->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; @@ -1311,7 +1293,7 @@ end: /* When the tunnel is closed, all the attached sessions need to go too. */ -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { int hash; struct hlist_node *walk; @@ -1334,25 +1316,13 @@ again: hlist_del_init(&session->hlist); - /* Since we should hold the sock lock while - * doing any unbinding, we need to release the - * lock we're holding before taking that lock. - * Hold a reference to the sock so it doesn't - * disappear as we're jumping between locks. - */ if (session->ref != NULL) (*session->ref)(session); write_unlock_bh(&tunnel->hlist_lock); - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } + __l2tp_session_unhash(session); + l2tp_session_queue_purge(session); if (session->session_close != NULL) (*session->session_close)(session); @@ -1360,6 +1330,8 @@ again: if (session->deref != NULL) (*session->deref)(session); + l2tp_session_dec_refcount(session); + write_lock_bh(&tunnel->hlist_lock); /* Now restart from the beginning of this hash @@ -1372,6 +1344,17 @@ again: } write_unlock_bh(&tunnel->hlist_lock); } +EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); + +/* Tunnel socket destroy hook for UDP encapsulation */ +static void l2tp_udp_encap_destroy(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } +} /* Really kill the tunnel. * Come here only when all sessions have been cleared from the tunnel. @@ -1397,19 +1380,21 @@ static void l2tp_tunnel_del_work(struct work_struct *work) return; sock = sk->sk_socket; - BUG_ON(!sock); - /* If the tunnel socket was created directly by the kernel, use the - * sk_* API to release the socket now. Otherwise go through the - * inet_* layer to shut the socket down, and let userspace close it. + /* If the tunnel socket was created by userspace, then go through the + * inet layer to shut the socket down, and let userspace close it. + * Otherwise, if we created the socket directly within the kernel, use + * the sk API to release it here. * In either case the tunnel resources are freed in the socket * destructor when the tunnel socket goes away. */ - if (sock->file == NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + if (tunnel->fd >= 0) { + if (sock) + inet_shutdown(sock, 2); } else { - inet_shutdown(sock, 2); + if (sock) + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); } l2tp_tunnel_sock_put(sk); @@ -1668,6 +1653,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) udpv6_encap_enable(); @@ -1723,6 +1709,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_closeall(tunnel); return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); @@ -1731,62 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); */ void l2tp_session_free(struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = session->tunnel; BUG_ON(atomic_read(&session->ref_count) != 0); - tunnel = session->tunnel; - if (tunnel != NULL) { + if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); + if (session->session_id != 0) + atomic_dec(&l2tp_session_count); + sock_put(tunnel->sock); + session->tunnel = NULL; + l2tp_tunnel_dec_refcount(tunnel); + } + + kfree(session); - /* Delete the session from the hash */ + return; +} +EXPORT_SYMBOL_GPL(l2tp_session_free); + +/* Remove an l2tp session from l2tp_core's hash lists. + * Provides a tidyup interface for pseudowire code which can't just route all + * shutdown via. l2tp_session_delete and a pseudowire-specific session_close + * callback. + */ +void __l2tp_session_unhash(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; + + /* Remove the session from core hashes */ + if (tunnel) { + /* Remove from the per-tunnel hash */ write_lock_bh(&tunnel->hlist_lock); hlist_del_init(&session->hlist); write_unlock_bh(&tunnel->hlist_lock); - /* Unlink from the global hash if not L2TPv2 */ + /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); synchronize_rcu(); } - - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); - - sock_put(tunnel->sock); - - /* This will delete the tunnel context if this - * is the last session on the tunnel. - */ - session->tunnel = NULL; - l2tp_tunnel_dec_refcount(tunnel); } - - kfree(session); - - return; } -EXPORT_SYMBOL_GPL(l2tp_session_free); +EXPORT_SYMBOL_GPL(__l2tp_session_unhash); /* This function is used by the netlink SESSION_DELETE command and by pseudowire modules. */ int l2tp_session_delete(struct l2tp_session *session) { + if (session->ref) + (*session->ref)(session); + __l2tp_session_unhash(session); + l2tp_session_queue_purge(session); if (session->session_close != NULL) (*session->session_close)(session); - + if (session->deref) + (*session->deref)(session); l2tp_session_dec_refcount(session); - return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); - /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8eb8f1d47f3a..485a490fd990 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -36,16 +36,15 @@ enum { struct sk_buff; struct l2tp_stats { - u64 tx_packets; - u64 tx_bytes; - u64 tx_errors; - u64 rx_packets; - u64 rx_bytes; - u64 rx_seq_discards; - u64 rx_oos_packets; - u64 rx_errors; - u64 rx_cookie_discards; - struct u64_stats_sync syncp; + atomic_long_t tx_packets; + atomic_long_t tx_bytes; + atomic_long_t tx_errors; + atomic_long_t rx_packets; + atomic_long_t rx_bytes; + atomic_long_t rx_seq_discards; + atomic_long_t rx_oos_packets; + atomic_long_t rx_errors; + atomic_long_t rx_cookie_discards; }; struct l2tp_tunnel; @@ -240,11 +239,14 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +extern void __l2tp_session_unhash(struct l2tp_session *session); extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); +extern int l2tp_session_queue_purge(struct l2tp_session *session); extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index c3813bc84552..072d7202e182 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); if (tunnel->show != NULL) tunnel->show(m, tunnel); @@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "\n"); } - seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (session->show != NULL) session->show(m, session); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7f41b7051269..571db8dd2292 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + sk_refcnt_debug_dec(sk); } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 41f2f8126ebc..b8a6039314e8 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -241,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + inet6_destroy_sock(sk); } @@ -683,6 +690,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_addr = ipv6_hdr(skb)->saddr; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = IP6CB(skb)->iif; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c1bab22db85e..0825ff26e113 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -246,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np = NULL; #endif - struct l2tp_stats stats; - unsigned int start; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); @@ -265,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&tunnel->stats.syncp); - stats.tx_packets = tunnel->stats.tx_packets; - stats.tx_bytes = tunnel->stats.tx_bytes; - stats.tx_errors = tunnel->stats.tx_errors; - stats.rx_packets = tunnel->stats.rx_packets; - stats.rx_bytes = tunnel->stats.rx_bytes; - stats.rx_errors = tunnel->stats.rx_errors; - stats.rx_seq_discards = tunnel->stats.rx_seq_discards; - stats.rx_oos_packets = tunnel->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&tunnel->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&tunnel->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&tunnel->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); @@ -612,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; struct sock *sk = NULL; - struct l2tp_stats stats; - unsigned int start; sk = tunnel->sock; @@ -656,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&session->stats.syncp); - stats.tx_packets = session->stats.tx_packets; - stats.tx_bytes = session->stats.tx_bytes; - stats.tx_errors = session->stats.tx_errors; - stats.rx_packets = session->stats.rx_packets; - stats.rx_bytes = session->stats.rx_bytes; - stats.rx_errors = session->stats.rx_errors; - stats.rx_seq_discards = session->stats.rx_seq_discards; - stats.rx_oos_packets = session->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&session->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&session->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&session->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6a53371dba1f..637a341c1e2d 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -97,6 +97,7 @@ #include <net/ip.h> #include <net/udp.h> #include <net/xfrm.h> +#include <net/inet_common.h> #include <asm/byteorder.h> #include <linux/atomic.h> @@ -259,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int session->name); /* Not bound. Nothing we can do, so discard. */ - session->stats.rx_errors++; + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } @@ -447,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); struct sock *sk = ps->sock; - struct sk_buff *skb; + struct socket *sock = sk->sk_socket; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (session->session_id == 0) - goto out; - - if (sk != NULL) { - lock_sock(sk); - - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { - pppox_unbind_sock(sk); - sk->sk_state = PPPOX_DEAD; - sk->sk_state_change(sk); - } - - /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } - release_sock(sk); + if (sock) { + inet_shutdown(sock, 2); + /* Don't let the session go away before our socket does */ + l2tp_session_inc_refcount(session); } - -out: return; } @@ -483,19 +466,12 @@ out: */ static void pppol2tp_session_destruct(struct sock *sk) { - struct l2tp_session *session; - - if (sk->sk_user_data != NULL) { - session = sk->sk_user_data; - if (session == NULL) - goto out; - + struct l2tp_session *session = sk->sk_user_data; + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); l2tp_session_dec_refcount(session); } - -out: return; } @@ -525,16 +501,13 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); if (session != NULL) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } + __l2tp_session_unhash(session); + l2tp_session_queue_purge(session); sock_put(sk); } + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); release_sock(sk); @@ -880,18 +853,6 @@ out: return error; } -/* Called when deleting sessions via the netlink interface. - */ -static int pppol2tp_session_delete(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock == NULL) - l2tp_session_dec_refcount(session); - - return 0; -} - #endif /* CONFIG_L2TP_V3 */ /* getname() support. @@ -1025,14 +986,14 @@ end: static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, struct l2tp_stats *stats) { - dest->tx_packets = stats->tx_packets; - dest->tx_bytes = stats->tx_bytes; - dest->tx_errors = stats->tx_errors; - dest->rx_packets = stats->rx_packets; - dest->rx_bytes = stats->rx_bytes; - dest->rx_seq_discards = stats->rx_seq_discards; - dest->rx_oos_packets = stats->rx_oos_packets; - dest->rx_errors = stats->rx_errors; + dest->tx_packets = atomic_long_read(&stats->tx_packets); + dest->tx_bytes = atomic_long_read(&stats->tx_bytes); + dest->tx_errors = atomic_long_read(&stats->tx_errors); + dest->rx_packets = atomic_long_read(&stats->rx_packets); + dest->rx_bytes = atomic_long_read(&stats->rx_bytes); + dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards); + dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets); + dest->rx_errors = atomic_long_read(&stats->rx_errors); } /* Session ioctl helper. @@ -1666,14 +1627,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) tunnel->name, (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', atomic_read(&tunnel->ref_count) - 1); - seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); } static void pppol2tp_seq_session_show(struct seq_file *m, void *v) @@ -1708,14 +1669,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (po) seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); @@ -1839,7 +1800,7 @@ static const struct pppox_proto pppol2tp_proto = { static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = { .session_create = pppol2tp_session_create, - .session_delete = pppol2tp_session_delete, + .session_delete = l2tp_session_delete, }; #endif /* CONFIG_L2TP_V3 */ diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 88709882c464..48aaa89253e0 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; + msg->msg_namelen = 0; + lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4a5fbf83cd1e..9972e07a2f96 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2031,7 +2031,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index a4dcaf1dd4b6..5c9e021994ba 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -88,8 +88,6 @@ struct mac802154_sub_if_data { #define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) -#define MAC802154_MAX_XMIT_ATTEMPTS 3 - #define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */ extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced; @@ -114,5 +112,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev); u16 mac802154_dev_get_pan_id(const struct net_device *dev); void mac802154_dev_set_pan_id(struct net_device *dev, u16 val); void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); +u8 mac802154_dev_get_dsn(const struct net_device *dev); #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index d8d277006089..a99910d4d52f 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .start_req = mac802154_mlme_start_req, .get_pan_id = mac802154_dev_get_pan_id, .get_short_addr = mac802154_dev_get_short_addr, + .get_dsn = mac802154_dev_get_dsn, }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f47781ab0ccc..8ded97cf1c33 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val) } } +u8 mac802154_dev_get_dsn(const struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + return priv->dsn++; +} + static void phy_chan_notify(struct work_struct *work) { struct phy_chan_notify_work *nw = container_of(work, @@ -167,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work) struct mac802154_sub_if_data *priv = netdev_priv(nw->dev); int res; + mutex_lock(&priv->hw->phy->pib_lock); res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan); if (res) pr_debug("set_channel failed\n"); + else { + priv->hw->phy->current_channel = priv->chan; + priv->hw->phy->current_page = priv->page; + } + mutex_unlock(&priv->hw->phy->pib_lock); kfree(nw); } @@ -186,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) priv->chan = chan; spin_unlock_bh(&priv->mib_lock); + mutex_lock(&priv->hw->phy->pib_lock); if (priv->hw->phy->current_channel != priv->chan || priv->hw->phy->current_page != priv->page) { + mutex_unlock(&priv->hw->phy->pib_lock); + work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; @@ -195,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) INIT_WORK(&work->work, phy_chan_notify); work->dev = dev; queue_work(priv->hw->dev_workqueue, &work->work); - } + } else + mutex_unlock(&priv->hw->phy->pib_lock); } diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 4e09d070995a..6d1647399d4f 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -25,6 +25,7 @@ #include <linux/if_arp.h> #include <linux/crc-ccitt.h> +#include <net/ieee802154_netdev.h> #include <net/mac802154.h> #include <net/wpan-phy.h> @@ -39,12 +40,12 @@ struct xmit_work { struct mac802154_priv *priv; u8 chan; u8 page; - u8 xmit_attempts; }; static void mac802154_xmit_worker(struct work_struct *work) { struct xmit_work *xw = container_of(work, struct xmit_work, work); + struct mac802154_sub_if_data *sdata; int res; mutex_lock(&xw->priv->phy->pib_lock); @@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work) pr_debug("set_channel failed\n"); goto out; } + + xw->priv->phy->current_channel = xw->chan; + xw->priv->phy->current_page = xw->page; } res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb); + if (res) + pr_debug("transmission failed\n"); out: mutex_unlock(&xw->priv->phy->pib_lock); - if (res) { - if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) { - queue_work(xw->priv->dev_workqueue, &xw->work); - return; - } else - pr_debug("transmission failed for %d times", - MAC802154_MAX_XMIT_ATTEMPTS); - } + /* Restart the netif queue on each sub_if_data object. */ + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &xw->priv->slaves, list) + netif_wake_queue(sdata->dev); + rcu_read_unlock(); dev_kfree_skb(xw->skb); @@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, u8 page, u8 chan) { struct xmit_work *work; + struct mac802154_sub_if_data *sdata; if (!(priv->phy->channels_supported[page] & (1 << chan))) { WARN_ON(1); @@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* Stop the netif queue on each sub_if_data object. */ + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &priv->slaves, list) + netif_stop_queue(sdata->dev); + rcu_read_unlock(); + INIT_WORK(&work->work, mac802154_xmit_worker); work->skb = skb; work->priv = priv; work->page = page; work->chan = chan; - work->xmit_attempts = 0; queue_work(priv->dev_workqueue, &work->work); diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index d20c6d3c247d..2ca2f4dceab7 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -145,6 +145,8 @@ static int mac802154_header_create(struct sk_buff *skb, head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ fc = mac_cb_type(skb); + if (mac_cb_is_ackreq(skb)) + fc |= IEEE802154_FC_ACK_REQ; if (!saddr) { spin_lock_bh(&priv->mib_lock); @@ -358,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev) dev->header_ops = &mac802154_header_ops; dev->needed_tailroom = 2; /* FCS */ dev->mtu = IEEE802154_MTU; - dev->tx_queue_len = 10; + dev->tx_queue_len = 300; dev->type = ARPHRD_IEEE802154; dev->flags = IFF_NOARP | IFF_BROADCAST; dev->watchdog_timeo = 0; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a9c488b6c50d..07c865a31a3d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -5,6 +5,7 @@ * way. * * Rusty Russell (C)2000 -- This code is GPL. + * Patrick McHardy (c) 2006-2012 */ #include <linux/kernel.h> #include <linux/netfilter.h> @@ -276,10 +277,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif +static int __net_init netfilter_net_init(struct net *net) +{ #ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); + net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", + net->proc_net); + if (!net->nf.proc_netfilter) { + if (!net_eq(net, &init_net)) + pr_err("cannot create netfilter proc entry"); + + return -ENOMEM; + } #endif + return 0; +} + +static void __net_exit netfilter_net_exit(struct net *net) +{ + remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_net_ops = { + .init = netfilter_net_init, + .exit = netfilter_net_exit, +}; void __init netfilter_init(void) { @@ -289,11 +310,8 @@ void __init netfilter_init(void) INIT_LIST_HEAD(&nf_hooks[i][h]); } -#ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); - if (!proc_net_netfilter) + if (register_pernet_subsys(&netfilter_net_ops) < 0) panic("cannot create netfilter proc entry"); -#endif if (netfilter_log_init() < 0) panic("cannot initialize nf_log"); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 0f92dc24cb89..d7df6ac2c6f1 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -339,7 +339,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); ipset_nest_end(skb, atd); - return -EMSGSIZE; + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; } static int diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 1ba9dbc0e107..86f5e26f39d3 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -15,7 +15,6 @@ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/spinlock.h> -#include <linux/netlink.h> #include <linux/rculist.h> #include <net/netlink.h> @@ -1085,7 +1084,7 @@ static int dump_init(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; @@ -1301,7 +1300,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct sk_buff *skb2; struct nlmsgerr *errmsg; size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *cmdattr; u32 *errline; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f2627226a087..10a30b4fc7db 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -104,6 +104,15 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { @@ -414,6 +423,15 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 4b677cf6bf7d..d6a59154d710 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -87,7 +87,16 @@ hash_net4_data_copy(struct hash_net4_elem *dst, static inline void hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -308,7 +317,16 @@ hash_net6_data_copy(struct hash_net6_elem *dst, static inline void hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 6ba985f1c96f..f2b0a3c30130 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -198,7 +198,16 @@ hash_netiface4_data_copy(struct hash_netiface4_elem *dst, static inline void hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -494,7 +503,7 @@ hash_netiface6_data_copy(struct hash_netiface6_elem *dst, static inline void hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } static inline int @@ -504,6 +513,15 @@ hash_netiface6_data_match(const struct hash_netiface6_elem *elem) } static inline void +hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + +static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { elem->elem = 0; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index af20c0c5ced2..349deb672a2d 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -104,6 +104,15 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { @@ -375,6 +384,15 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 8371c2bac2e4..09c744aa8982 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -174,9 +174,13 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, { const struct set_elem *e = list_set_elem(map, i); - if (i == map->size - 1 && e->id != IPSET_INVALID_ID) - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + if (e->id != IPSET_INVALID_ID) { + const struct set_elem *x = list_set_elem(map, map->size - 1); + + /* Last element replaced or pushed off */ + if (x->id != IPSET_INVALID_ID) + ip_set_put_byindex(x->id); + } if (with_timeout(map->timeout)) list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0b779d7df881..dfd7b65b3d2a 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) module_put(app->module); } +static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) +{ + kfree(inc->timeout_table); + kfree(inc); +} + +static void ip_vs_app_inc_rcu_free(struct rcu_head *head) +{ + struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); + + ip_vs_app_inc_destroy(inc); +} /* * Allocate/initialize app incarnation and register it in proto apps. @@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, return 0; out: - kfree(inc->timeout_table); - kfree(inc); + ip_vs_app_inc_destroy(inc); return ret; } @@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) list_del(&inc->a_list); - kfree(inc->timeout_table); - kfree(inc); + call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); } @@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) { int result; - atomic_inc(&inc->usecnt); - if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) - atomic_dec(&inc->usecnt); + result = ip_vs_app_get(inc->app); + if (result) + atomic_inc(&inc->usecnt); return result; } @@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) */ void ip_vs_app_inc_put(struct ip_vs_app *inc) { - ip_vs_app_put(inc->app); atomic_dec(&inc->usecnt); + ip_vs_app_put(inc->app); } @@ -218,6 +228,7 @@ out_unlock: /* * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services + * Caller should use synchronize_rcu() or rcu_barrier() */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { @@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, unsigned int flag, __u32 seq, int diff) { /* spinlock is to keep updating cp->flags atomic */ - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { vseq->previous_delta = vseq->delta; vseq->delta += diff; vseq->init_seq = seq; cp->flags |= flag; } - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 704e514e02ab..de6475894a39 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly; struct ip_vs_aligned_lock { - rwlock_t l; + spinlock_t l; } __attribute__((__aligned__(SMP_CACHE_BYTES))); /* lock array for conn table */ static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; -static inline void ct_read_lock(unsigned int key) -{ - read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock(unsigned int key) -{ - read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_lock(unsigned int key) -{ - write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_unlock(unsigned int key) -{ - write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_lock_bh(unsigned int key) -{ - read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock_bh(unsigned int key) -{ - read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - static inline void ct_write_lock_bh(unsigned int key) { - write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } static inline void ct_write_unlock_bh(unsigned int key) { - write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } @@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey_conn(cp); - ct_write_lock(hash); + ct_write_lock_bh(hash); spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); + hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pF\n", @@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) } spin_unlock(&cp->lock); - ct_write_unlock(hash); + ct_write_unlock_bh(hash); return ret; } @@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) /* * UNhashes ip_vs_conn from ip_vs_conn_tab. - * returns bool success. + * returns bool success. Caller should hold conn reference. */ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) { @@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) /* unhash it and decrease its reference counter */ hash = ip_vs_conn_hashkey_conn(cp); - ct_write_lock(hash); + ct_write_lock_bh(hash); spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - hlist_del(&cp->c_list); + hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; @@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) ret = 0; spin_unlock(&cp->lock); - ct_write_unlock(hash); + ct_write_unlock_bh(hash); + + return ret; +} + +/* Try to unlink ip_vs_conn from ip_vs_conn_tab. + * returns bool success. + */ +static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) +{ + unsigned int hash; + bool ret; + + hash = ip_vs_conn_hashkey_conn(cp); + + ct_write_lock_bh(hash); + spin_lock(&cp->lock); + + if (cp->flags & IP_VS_CONN_F_HASHED) { + ret = false; + /* Decrease refcnt and unlink conn only if we are last user */ + if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { + hlist_del_rcu(&cp->c_list); + cp->flags &= ~IP_VS_CONN_F_HASHED; + ret = true; + } + } else + ret = atomic_read(&cp->refcnt) ? false : true; + + spin_unlock(&cp->lock); + ct_write_unlock_bh(hash); return ret; } @@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->cport == cp->cport && p->vport == cp->vport && + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { + if (p->cport == cp->cport && p->vport == cp->vport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); return cp; } } - ct_read_unlock(hash); + rcu_read_unlock(); return NULL; } @@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, false); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (!ip_vs_conn_net_eq(cp, p->net)) - continue; - if (p->pe_data && p->pe->ct_match) { - if (p->pe == cp->pe && p->pe->ct_match(p, cp)) - goto out; + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { + if (unlikely(p->pe_data && p->pe->ct_match)) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { + if (__ip_vs_conn_get(cp)) + goto out; + } continue; } @@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) * p->vaddr is a fwmark */ ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && + p->vport == cp->vport && p->cport == cp->cport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) - goto out; + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { + if (__ip_vs_conn_get(cp)) + goto out; + } } cp = NULL; out: - if (cp) - atomic_inc(&cp->refcnt); - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) */ hash = ip_vs_conn_hashkey_param(p, true); - ct_read_lock(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == p->af && - p->vport == cp->cport && p->cport == cp->dport && + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { + if (p->vport == cp->cport && p->cport == cp->dport && + cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && ip_vs_conn_net_eq(cp, p->net)) { + if (!__ip_vs_conn_get(cp)) + continue; /* HIT */ - atomic_inc(&cp->refcnt); ret = cp; break; } } - ct_read_unlock(hash); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), @@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { if (ip_vs_conn_unhash(cp)) { - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if (cp->flags & IP_VS_CONN_F_NO_CPORT) { atomic_dec(&ip_vs_conn_no_cport_cnt); cp->flags &= ~IP_VS_CONN_F_NO_CPORT; cp->cport = cport; } - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); /* hash on new dport */ ip_vs_conn_hash(cp); @@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) return; /* Increase the refcnt counter of the dest */ - atomic_inc(&dest->refcnt); + ip_vs_dest_hold(dest); conn_flags = atomic_read(&dest->conn_flags); if (cp->protocol != IPPROTO_UDP) @@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) * Check if there is a destination for the connection, if so * bind the connection to the destination. */ -struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +void ip_vs_try_bind_dest(struct ip_vs_conn *cp) { struct ip_vs_dest *dest; + rcu_read_lock(); dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); if (dest) { struct ip_vs_proto_data *pd; - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if (cp->dest) { - spin_unlock(&cp->lock); - return dest; + spin_unlock_bh(&cp->lock); + rcu_read_unlock(); + return; } /* Applications work depending on the forwarding method @@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) ip_vs_unbind_app(cp); ip_vs_bind_dest(cp, dest); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); /* Update its packet transmitter */ cp->packet_xmit = NULL; @@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) if (pd && atomic_read(&pd->appcnt)) ip_vs_bind_app(cp, pd->pp); } - return dest; + rcu_read_unlock(); } @@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) dest->flags &= ~IP_VS_DEST_F_OVERLOAD; } - /* - * Simply decrease the refcnt of the dest, because the - * dest will be either in service's destination list - * or in the trash. - */ - atomic_dec(&dest->refcnt); + ip_vs_dest_put(dest); } static int expire_quiescent_template(struct netns_ipvs *ipvs, @@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct) * Simply decrease the refcnt of the template, * don't restart its timer. */ - atomic_dec(&ct->refcnt); + __ip_vs_conn_put(ct); return 0; } return 1; } +static void ip_vs_conn_rcu_free(struct rcu_head *head) +{ + struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn, + rcu_head); + + ip_vs_pe_put(cp->pe); + kfree(cp->pe_data); + kmem_cache_free(ip_vs_conn_cachep, cp); +} + static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; struct net *net = ip_vs_conn_net(cp); struct netns_ipvs *ipvs = net_ipvs(net); - cp->timeout = 60*HZ; - - /* - * hey, I'm using it - */ - atomic_inc(&cp->refcnt); - /* * do I control anybody? */ if (atomic_read(&cp->n_control)) goto expire_later; - /* - * unhash it if it is hashed in the conn table - */ - if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) - goto expire_later; - - /* - * refcnt==1 implies I'm the only one referrer - */ - if (likely(atomic_read(&cp->refcnt) == 1)) { + /* Unlink conn if not referenced anymore */ + if (likely(ip_vs_conn_unlink(cp))) { /* delete the timer if it is activated by other users */ del_timer(&cp->timer); @@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_conn_drop_conntrack(cp); } - ip_vs_pe_put(cp->pe); - kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); + call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); atomic_dec(&ipvs->conn_count); - - kmem_cache_free(ip_vs_conn_cachep, cp); return; } - /* hash it back to the table */ - ip_vs_conn_hash(cp); - expire_later: - IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", - atomic_read(&cp->refcnt)-1, + IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", + atomic_read(&cp->refcnt), atomic_read(&cp->n_control)); + atomic_inc(&cp->refcnt); + cp->timeout = 60*HZ; + if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); ip_vs_conn_put(cp); } - +/* Modify timer, so that it expires as soon as possible. + * Can be called without reference only if under RCU lock. + */ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) { - if (del_timer(&cp->timer)) - mod_timer(&cp->timer, jiffies); + /* Using mod_timer_pending will ensure the timer is not + * modified after the final del_timer in ip_vs_conn_expire. + */ + if (timer_pending(&cp->timer) && + time_after(cp->timer.expires, jiffies)) + mod_timer_pending(&cp->timer, jiffies); } @@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, p->protocol); - cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); + cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { IP_VS_ERR_RL("%s(): no memory\n", __func__); return NULL; @@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; - ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); + ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; - ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); + ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr); cp->vport = p->vport; /* proto should only be IPPROTO_IP if d_addr is a fwmark */ - ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, - &cp->daddr, daddr); + ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, + &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; @@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, cp->pe = p->pe; cp->pe_data = p->pe_data; cp->pe_data_len = p->pe_data_len; + } else { + cp->pe = NULL; + cp->pe_data = NULL; + cp->pe_data_len = 0; } spin_lock_init(&cp->lock); @@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, */ atomic_set(&cp->refcnt, 1); + cp->control = NULL; atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); + cp->packet_xmit = NULL; + cp->app = NULL; + cp->app_data = NULL; + /* reset struct ip_vs_seq */ + cp->in_seq.delta = 0; + cp->out_seq.delta = 0; + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); /* Bind the connection with a destination server */ + cp->dest = NULL; ip_vs_bind_dest(cp, dest); /* Set its state and timeout */ cp->state = 0; + cp->old_state = 0; cp->timeout = 3*HZ; cp->sync_endtime = jiffies & ~3UL; @@ -952,14 +966,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + /* __ip_vs_conn_get() is not needed by + * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show + */ if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; } } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } return NULL; @@ -977,6 +994,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; + struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -985,19 +1003,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if (cp->c_list.next) - return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); + e = rcu_dereference(hlist_next_rcu(&cp->c_list)); + if (e) + return hlist_entry(e, struct ip_vs_conn, c_list); + rcu_read_unlock(); idx = l - ip_vs_conn_tab; - ct_read_unlock_bh(idx); - while (++idx < ip_vs_conn_tab_size) { - ct_read_lock_bh(idx); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } - ct_read_unlock_bh(idx); + rcu_read_unlock(); } iter->l = NULL; return NULL; @@ -1009,7 +1027,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) struct hlist_head *l = iter->l; if (l) - ct_read_unlock_bh(l - ip_vs_conn_tab); + rcu_read_unlock(); } static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) @@ -1188,7 +1206,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) void ip_vs_random_dropentry(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; /* * Randomly scan 1/32 of the whole table every second @@ -1199,9 +1217,9 @@ void ip_vs_random_dropentry(struct net *net) /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(hash); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1228,12 +1246,15 @@ void ip_vs_random_dropentry(struct net *net) IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(hash); + rcu_read_unlock(); } } @@ -1244,7 +1265,7 @@ void ip_vs_random_dropentry(struct net *net) static void ip_vs_conn_flush(struct net *net) { int idx; - struct ip_vs_conn *cp; + struct ip_vs_conn *cp, *cp_c; struct netns_ipvs *ipvs = net_ipvs(net); flush_again: @@ -1252,19 +1273,22 @@ flush_again: /* * Lock is actually needed in this loop. */ - ct_write_lock_bh(idx); + rcu_read_lock(); - hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); - if (cp->control) { + cp_c = cp->control; + /* cp->control is valid only with reference to cp */ + if (cp_c && __ip_vs_conn_get(cp)) { IP_VS_DBG(4, "del conn template\n"); - ip_vs_conn_expire_now(cp->control); + ip_vs_conn_expire_now(cp_c); + __ip_vs_conn_put(cp); } } - ct_write_unlock_bh(idx); + rcu_read_unlock(); } /* the counter may be not NULL, because maybe some conn entries @@ -1331,7 +1355,7 @@ int __init ip_vs_conn_init(void) INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { - rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); + spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l); } /* calculate the random value for connection hash */ @@ -1342,6 +1366,8 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { + /* Wait all ip_vs_conn_rcu_free() callbacks to complete */ + rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a40a59..f26fe3353a30 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -int ip_vs_net_id __read_mostly; -#ifdef IP_VS_GENERIC_NETNS -EXPORT_SYMBOL(ip_vs_net_id); -#endif +static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -206,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, { ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, vport, p); - p->pe = svc->pe; + p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -299,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { + struct ip_vs_scheduler *sched; + /* * No template found or the dest of the connection * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; + struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; @@ -449,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } - dest = svc->scheduler->schedule(svc, skb); + sched = rcu_dereference(svc->scheduler); + dest = sched->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; @@ -507,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) { - ip_vs_service_put(svc); return NF_DROP; } @@ -533,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; - ip_vs_service_put(svc); - /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { @@ -571,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { - ip_vs_service_put(svc); + if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) return NF_ACCEPT; - } - - ip_vs_service_put(svc); /* * Notify the client that the destination is unreachable, and @@ -643,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - int err = ip_defrag(skb, user); + int err; + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); if (!err) ip_send_check(ip_hdr(skb)); @@ -1164,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(net, af, iph.protocol, - &iph.saddr, - pptr[0])) { + if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, + pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST @@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - struct net *net = - dev_net(skb_dst(skb)->dev); - if (!skb->dev) skb->dev = net->loopback_dev; icmpv6_send(skb, @@ -1226,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_out(hooknum, skb, AF_INET); - local_bh_enable(); - return verdict; + return ip_vs_out(hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1259,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_out(hooknum, skb, AF_INET6); - local_bh_enable(); - return verdict; + return ip_vs_out(hooknum, skb, AF_INET6); } #endif @@ -1394,19 +1376,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - rcu_read_lock(); ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); - rcu_read_unlock(); /* Client uses PMTUD? */ if (!(cih->frag_off & htons(IP_DF))) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { - spin_lock(&dest->dst_lock); - if (dest->dst_cache) - mtu = dst_mtu(dest->dst_cache); - spin_unlock(&dest->dst_lock); + struct ip_vs_dest_dst *dest_dst; + + rcu_read_lock(); + dest_dst = rcu_dereference(dest->dest_dst); + if (dest_dst) + mtu = dst_mtu(dest_dst->dst_cache); + rcu_read_unlock(); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); @@ -1577,7 +1560,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, &iph); @@ -1654,7 +1638,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1722,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_in(hooknum, skb, AF_INET); - local_bh_enable(); - return verdict; + return ip_vs_in(hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1787,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - unsigned int verdict; - - /* Disable BH in LOCAL_OUT until all places are fixed */ - local_bh_disable(); - verdict = ip_vs_in(hooknum, skb, AF_INET6); - local_bh_enable(); - return verdict; + return ip_vs_in(hooknum, skb, AF_INET6); } #endif @@ -1815,13 +1786,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum); @@ -1835,6 +1808,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); @@ -1843,7 +1817,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c68198bf9128..9e4074c26dc2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -55,9 +55,6 @@ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ static DEFINE_MUTEX(__ip_vs_mutex); -/* lock for service table */ -static DEFINE_RWLOCK(__ip_vs_svc_lock); - /* sysctl variables */ #ifdef CONFIG_IP_VS_DEBUG @@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void) /* Protos */ -static void __ip_vs_del_service(struct ip_vs_service *svc); +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); #ifdef CONFIG_IP_VS_IPV6 @@ -257,9 +254,9 @@ ip_vs_use_count_dec(void) #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) /* the service table hashed by <protocol, addr, port> */ -static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ -static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* @@ -271,16 +268,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, { register unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; + __u32 ahash; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - addr_fold ^= ((size_t)net>>8); + ahash = ntohl(addr_fold); + ahash ^= ((size_t) net >> 8); - return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) - & IP_VS_SVC_TAB_MASK; + return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & + IP_VS_SVC_TAB_MASK; } /* @@ -312,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) */ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, &svc->addr, svc->port); - list_add(&svc->s_list, &ip_vs_svc_table[hash]); + hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); - list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } svc->flags |= IP_VS_SVC_F_HASHED; @@ -342,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* Remove it from the svc_table table */ - list_del(&svc->s_list); + hlist_del_rcu(&svc->s_list); } else { /* Remove it from the svc_fwm_table table */ - list_del(&svc->f_list); + hlist_del_rcu(&svc->f_list); } svc->flags &= ~IP_VS_SVC_F_HASHED; @@ -367,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); - list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) @@ -394,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(net, fwmark); - list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af && net_eq(svc->net, net)) { /* HIT */ @@ -405,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) return NULL; } +/* Find service, called under RCU lock */ struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, - const union nf_inet_addr *vaddr, __be16 vport) +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; struct netns_ipvs *ipvs = net_ipvs(net); - read_lock(&__ip_vs_svc_lock); - /* * Check the table hashed by fwmark first */ @@ -449,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, } out: - if (svc) - atomic_inc(&svc->usecnt); - read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", fwmark, ip_vs_proto_name(protocol), IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), @@ -469,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) dest->svc = svc; } +static void ip_vs_service_free(struct ip_vs_service *svc) +{ + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); + kfree(svc); +} + static void __ip_vs_unbind_svc(struct ip_vs_dest *dest) { @@ -476,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) dest->svc = NULL; if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + ip_vs_service_free(svc); } } @@ -506,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af, & IP_VS_RTAB_MASK; } -/* - * Hashes ip_vs_dest in rs_table by <proto,addr,port>. - * should be called with locked tables. - */ -static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) +/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ +static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned int hash; - if (!list_empty(&dest->d_list)) { - return 0; - } + if (dest->in_rs_table) + return; /* * Hash by proto,addr,port, @@ -524,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ipvs->rs_table[hash]); - - return 1; + hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); + dest->in_rs_table = 1; } -/* - * UNhashes ip_vs_dest from rs_table. - * should be called with locked tables. - */ -static int ip_vs_rs_unhash(struct ip_vs_dest *dest) +/* Unhash ip_vs_dest from rs_table. */ +static void ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* * Remove it from the rs_table table. */ - if (!list_empty(&dest->d_list)) { - list_del_init(&dest->d_list); + if (dest->in_rs_table) { + hlist_del_rcu(&dest->d_list); + dest->in_rs_table = 0; } - - return 1; } -/* - * Lookup real service by <proto,addr,port> in the real service table. - */ -struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, - const union nf_inet_addr *daddr, - __be16 dport) +/* Check if real service by <proto,addr,port> is present */ +bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport) { struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash; struct ip_vs_dest *dest; - /* - * Check for "full" addressed entries - * Return the first found entry - */ + /* Check for "full" addressed entries */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&ipvs->rs_lock); - list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { - if ((dest->af == af) - && ip_vs_addr_equal(af, &dest->addr, daddr) - && (dest->port == dport) - && ((dest->protocol == protocol) || - dest->vfwmark)) { + rcu_read_lock(); + hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { + if (dest->port == dport && + dest->af == af && + ip_vs_addr_equal(af, &dest->addr, daddr) && + (dest->protocol == protocol || dest->vfwmark)) { /* HIT */ - read_unlock(&ipvs->rs_lock); - return dest; + rcu_read_unlock(); + return true; } } - read_unlock(&ipvs->rs_lock); + rcu_read_unlock(); - return NULL; + return false; } -/* - * Lookup destination by {addr,port} in the given service +/* Lookup destination by {addr,port} in the given service + * Called under RCU lock. */ static struct ip_vs_dest * ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, @@ -592,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find the destination for the given service */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->af == svc->af) && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && (dest->port == dport)) { @@ -606,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, /* * Find destination by {daddr,dport,vaddr,protocol} - * Cretaed to be used in ip_vs_process_message() in + * Created to be used in ip_vs_process_message() in * the backup synchronization daemon. It finds the * destination to be bound to the received connection * on the backup. - * - * ip_vs_lookup_real_service() looked promissing, but - * seems not working as expected. + * Called under RCU lock, no refcnt is returned. */ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, @@ -625,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -633,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, dest = ip_vs_lookup_dest(svc, daddr, port); if (!dest) dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); - if (dest) - atomic_inc(&dest->refcnt); - ip_vs_service_put(svc); return dest; } +void ip_vs_dest_dst_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest_dst *dest_dst = container_of(head, + struct ip_vs_dest_dst, + rcu_head); + + dst_release(dest_dst->dst_cache); + kfree(dest_dst); +} + +/* Release dest_dst and dst_cache for dest in user context */ +static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) +{ + struct ip_vs_dest_dst *old; + + old = rcu_dereference_protected(dest->dest_dst, 1); + if (old) { + RCU_INIT_POINTER(dest->dest_dst, NULL); + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); + } +} + /* * Lookup dest by {svc,addr,port} in the destination trash. * The destination trash is used to hold the destinations that are removed @@ -653,19 +653,25 @@ static struct ip_vs_dest * ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { - struct ip_vs_dest *dest, *nxt; + struct ip_vs_dest *dest; struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); + /* We can not reuse dest while in grace period + * because conns still can use dest->svc + */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -675,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ - return dest; - } - - /* - * Try to purge the destination from trash if not referenced - */ - if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " - "from trash\n", - dest->vfwmark, - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port)); - list_del(&dest->n_list); - ip_vs_dst_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree(dest); + list_del(&dest->t_list); + ip_vs_dest_hold(dest); + goto out; } } - return NULL; + dest = NULL; + +out: + spin_unlock_bh(&ipvs->dest_trash_lock); + + return dest; } +static void ip_vs_dest_free(struct ip_vs_dest *dest) +{ + __ip_vs_dst_cache_reset(dest); + __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); + kfree(dest); +} /* * Clean up all the destinations in the trash @@ -706,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * When the ip_vs_control_clearup is activated by ipvs module exit, * the service tables must have been flushed and all the connections * are expired, and the refcnt of each destination in the trash must - * be 1, so we simply release them here. + * be 0, so we simply release them here. */ static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { - list_del(&dest->n_list); - ip_vs_dst_reset(dest); - __ip_vs_unbind_svc(dest); - free_percpu(dest->stats.cpustats); - kfree(dest); + del_timer_sync(&ipvs->dest_trash_timer); + /* No need to use dest_trash_lock */ + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { + list_del(&dest->t_list); + ip_vs_dest_free(dest); } } @@ -768,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_scheduler *sched; int conn_flags; /* set the weight and the flags */ @@ -783,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -809,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->l_threshold = udest->l_threshold; spin_lock_bh(&dest->dst_lock); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - if (add) - ip_vs_start_estimator(svc->net, &dest->stats); - - write_lock_bh(&__ip_vs_svc_lock); - - /* Wait until all other svc users go away */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - + sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { - list_add(&dest->n_list, &svc->destinations); + ip_vs_start_estimator(svc->net, &dest->stats); + list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; + if (sched->add_dest) + sched->add_dest(svc, dest); + } else { + if (sched->upd_dest) + sched->upd_dest(svc, dest); } - - /* call the update_service, because server weight may be changed */ - if (svc->scheduler->update_service) - svc->scheduler->update_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); } @@ -881,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->persistconns, 0); atomic_set(&dest->refcnt, 1); - INIT_LIST_HEAD(&dest->d_list); + INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); spin_lock_init(&dest->stats.lock); __ip_vs_update_dest(svc, dest, udest, 1); @@ -923,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Check if the dest already exists in the list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest != NULL) { IP_VS_DBG(1, "%s(): dest already exists\n", __func__); @@ -948,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); - /* - * Get the destination from the trash - */ - list_del(&dest->n_list); - __ip_vs_update_dest(svc, dest, udest, 1); ret = 0; } else { @@ -992,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ip_vs_addr_copy(svc->af, &daddr, &udest->addr); - /* - * Lookup the destination list - */ + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &daddr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); @@ -1008,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } +static void ip_vs_dest_wait_readers(struct rcu_head *head) +{ + struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, + rcu_head); + + /* End of grace period after unlinking */ + clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); +} + /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, + bool cleanup) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1021,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&ipvs->rs_lock); - /* - * Decrease the refcnt of the dest, and free the dest - * if nobody refers to it (refcnt=0). Otherwise, throw - * the destination into the trash. - */ - if (atomic_dec_and_test(&dest->refcnt)) { - IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n", - dest->vfwmark, - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port)); - ip_vs_dst_reset(dest); - /* simply decrease svc->refcnt here, let the caller check - and release the service if nobody refers to it. - Only user context can release destination and service, - and only one user context can update virtual service at a - time, so the operation here is OK */ - atomic_dec(&dest->svc->refcnt); - free_percpu(dest->stats.cpustats); - kfree(dest); - } else { - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " - "dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), - ntohs(dest->port), - atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ipvs->dest_trash); - atomic_inc(&dest->refcnt); + if (!cleanup) { + set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); + call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); } + + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + /* dest lives in trash without reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_dest_put(dest); } @@ -1068,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, /* * Remove it from the d-linked destination list. */ - list_del(&dest->n_list); + list_del_rcu(&dest->n_list); svc->num_dests--; - /* - * Call the update_service function of its scheduler - */ - if (svcupd && svc->scheduler->update_service) - svc->scheduler->update_service(svc); + if (svcupd) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched->del_dest) + sched->del_dest(svc, dest); + } } @@ -1090,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) EnterFunction(2); + /* We use function that requires RCU lock */ + rcu_read_lock(); dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + rcu_read_unlock(); if (dest == NULL) { IP_VS_DBG(1, "%s(): destination not found!\n", __func__); return -ENOENT; } - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - /* * Unlink dest from the service */ __ip_vs_unlink_dest(svc, dest, 1); - write_unlock_bh(&__ip_vs_svc_lock); - /* * Delete the destination */ - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, false); LeaveFunction(2); return 0; } +static void ip_vs_dest_trash_expire(unsigned long data) +{ + struct net *net = (struct net *) data; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest *dest, *next; + + spin_lock(&ipvs->dest_trash_lock); + list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { + /* Skip if dest is in grace period */ + if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) + continue; + if (atomic_read(&dest->refcnt) > 0) + continue; + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + ntohs(dest->port)); + list_del(&dest->t_list); + ip_vs_dest_free(dest); + } + if (!list_empty(&ipvs->dest_trash)) + mod_timer(&ipvs->dest_trash_timer, + jiffies + IP_VS_DEST_TRASH_PERIOD); + spin_unlock(&ipvs->dest_trash_lock); +} /* * Add a service into the service hash table @@ -1176,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, } /* I'm the first user of the service */ - atomic_set(&svc->usecnt, 0); atomic_set(&svc->refcnt, 0); svc->af = u->af; @@ -1190,7 +1196,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, svc->net = net; INIT_LIST_HEAD(&svc->destinations); - rwlock_init(&svc->sched_lock); + spin_lock_init(&svc->sched_lock); spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ @@ -1200,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, sched = NULL; /* Bind the ct retriever */ - ip_vs_bind_pe(svc, pe); + RCU_INIT_POINTER(svc->pe, pe); pe = NULL; /* Update the virtual service counters */ @@ -1216,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ipvs->num_services++; /* Hash the service into the service table */ - write_lock_bh(&__ip_vs_svc_lock); ip_vs_svc_hash(svc); - write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; /* Now there is a service - full throttle */ @@ -1228,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, out_err: if (svc != NULL) { - ip_vs_unbind_scheduler(svc); - if (svc->inc) { - local_bh_disable(); - ip_vs_app_inc_put(svc->inc); - local_bh_enable(); - } - if (svc->stats.cpustats) - free_percpu(svc->stats.cpustats); - kfree(svc); + ip_vs_unbind_scheduler(svc, sched); + ip_vs_service_free(svc); } ip_vs_scheduler_put(sched); ip_vs_pe_put(pe); @@ -1286,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } #endif - write_lock_bh(&__ip_vs_svc_lock); - - /* - * Wait until all other svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched != old_sched) { + /* Bind the new scheduler */ + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + old_sched = sched; + goto out; + } + /* Unbind the old scheduler on success */ + ip_vs_unbind_scheduler(svc, old_sched); + } /* * Set the flags and timeout value @@ -1300,57 +1302,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; - old_sched = svc->scheduler; - if (sched != old_sched) { - /* - * Unbind the old scheduler - */ - if ((ret = ip_vs_unbind_scheduler(svc))) { - old_sched = sched; - goto out_unlock; - } - - /* - * Bind the new scheduler - */ - if ((ret = ip_vs_bind_scheduler(svc, sched))) { - /* - * If ip_vs_bind_scheduler fails, restore the old - * scheduler. - * The main reason of failure is out of memory. - * - * The question is if the old scheduler can be - * restored all the time. TODO: if it cannot be - * restored some time, we must delete the service, - * otherwise the system may crash. - */ - ip_vs_bind_scheduler(svc, old_sched); - old_sched = sched; - goto out_unlock; - } - } - - old_pe = svc->pe; - if (pe != old_pe) { - ip_vs_unbind_pe(svc); - ip_vs_bind_pe(svc, pe); - } + old_pe = rcu_dereference_protected(svc->pe, 1); + if (pe != old_pe) + rcu_assign_pointer(svc->pe, pe); -out_unlock: - write_unlock_bh(&__ip_vs_svc_lock); out: ip_vs_scheduler_put(old_sched); ip_vs_pe_put(old_pe); return ret; } +static void ip_vs_service_rcu_free(struct rcu_head *head) +{ + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} /* * Delete a service from the service list * - The service must be unlinked, unlocked and not referenced! * - We are called under _bh lock */ -static void __ip_vs_del_service(struct ip_vs_service *svc) +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) { struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; @@ -1366,27 +1341,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ - old_sched = svc->scheduler; - ip_vs_unbind_scheduler(svc); + old_sched = rcu_dereference_protected(svc->scheduler, 1); + ip_vs_unbind_scheduler(svc, old_sched); ip_vs_scheduler_put(old_sched); - /* Unbind persistence engine */ - old_pe = svc->pe; - ip_vs_unbind_pe(svc); + /* Unbind persistence engine, keep svc->pe */ + old_pe = rcu_dereference_protected(svc->pe, 1); ip_vs_pe_put(old_pe); - /* Unbind app inc */ - if (svc->inc) { - ip_vs_app_inc_put(svc->inc); - svc->inc = NULL; - } - /* * Unlink the whole destination list */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(svc->net, dest); + __ip_vs_del_dest(svc->net, dest, cleanup); } /* @@ -1400,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Free the service if nobody refers to it */ - if (atomic_read(&svc->refcnt) == 0) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", + if (atomic_dec_and_test(&svc->refcnt)) { + IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port), atomic_read(&svc->usecnt)); - free_percpu(svc->stats.cpustats); - kfree(svc); + ntohs(svc->port)); + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); } /* decrease the module use count */ @@ -1416,23 +1383,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) /* * Unlink a service from list and try to delete it if its refcnt reached 0 */ -static void ip_vs_unlink_service(struct ip_vs_service *svc) +static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) { + /* Hold svc to avoid double release from dest_trash */ + atomic_inc(&svc->refcnt); /* * Unhash it from the service table */ - write_lock_bh(&__ip_vs_svc_lock); - ip_vs_svc_unhash(svc); - /* - * Wait until all the svc users go away. - */ - IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - - __ip_vs_del_service(svc); - - write_unlock_bh(&__ip_vs_svc_lock); + __ip_vs_del_service(svc, cleanup); } /* @@ -1442,7 +1402,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) { if (svc == NULL) return -EEXIST; - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, false); return 0; } @@ -1451,19 +1411,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(struct net *net) +static int ip_vs_flush(struct net *net, bool cleanup) { int idx; - struct ip_vs_service *svc, *nxt; + struct ip_vs_service *svc; + struct hlist_node *n; /* * Flush the service table hashed by <netns,protocol,addr,port> */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], - s_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], + s_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1471,10 +1432,10 @@ static int ip_vs_flush(struct net *net) * Flush the service table hashed by fwmark */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, - &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net)) - ip_vs_unlink_service(svc); + ip_vs_unlink_service(svc, cleanup); } } @@ -1490,32 +1451,29 @@ void ip_vs_service_net_cleanup(struct net *net) EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(net); + ip_vs_flush(net, true); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } -/* - * Release dst hold by dst_cache - */ + +/* Put all references for device (dst_cache) */ static inline void -__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) { spin_lock_bh(&dest->dst_lock); - if (dest->dst_cache && dest->dst_cache->dev == dev) { + if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) { IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", dev->name, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - ip_vs_dst_reset(dest); + __ip_vs_dst_cache_reset(dest); } spin_unlock_bh(&dest->dst_lock); } -/* - * Netdev event receiver - * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to - * a device that is "unregister" it must be released. +/* Netdev event receiver + * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -1527,35 +1485,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, struct ip_vs_dest *dest; unsigned int idx; - if (event != NETDEV_UNREGISTER || !ipvs) + if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); EnterFunction(2); mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } } - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) { list_for_each_entry(dest, &svc->destinations, n_list) { - __ip_vs_dev_reset(dest, dev); + ip_vs_forget_dev(dest, dev); } } } } - list_for_each_entry(dest, &ipvs->dest_trash, n_list) { - __ip_vs_dev_reset(dest, dev); + spin_lock_bh(&ipvs->dest_trash_lock); + list_for_each_entry(dest, &ipvs->dest_trash, t_list) { + ip_vs_forget_dev(dest, dev); } + spin_unlock_bh(&ipvs->dest_trash_lock); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); return NOTIFY_DONE; @@ -1568,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) { struct ip_vs_dest *dest; - write_lock_bh(&__ip_vs_svc_lock); list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_zero_stats(&dest->stats); } ip_vs_zero_stats(&svc->stats); - write_unlock_bh(&__ip_vs_svc_lock); return 0; } @@ -1583,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { if (net_eq(svc->net, net)) ip_vs_zero_service(svc); } @@ -1808,6 +1766,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "backup_only", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -1912,7 +1876,7 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - struct list_head *table; + struct hlist_head *table; int bucket; }; @@ -1945,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; @@ -1956,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], + f_list) { if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; @@ -1969,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) -__acquires(__ip_vs_svc_lock) { - read_lock_bh(&__ip_vs_svc_lock); + rcu_read_lock(); return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct list_head *e; + struct hlist_node *e; struct ip_vs_iter *iter; struct ip_vs_service *svc; @@ -1992,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (iter->table == ip_vs_svc_table) { /* next service in table hashed by protocol */ - if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, s_list); - + e = rcu_dereference(hlist_next_rcu(&svc->s_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, s_list); while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], - s_list) { + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_table[iter->bucket], + s_list) { return svc; } } @@ -2009,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } /* next service in hashed by fwmark */ - if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) - return list_entry(e, struct ip_vs_service, f_list); + e = rcu_dereference(hlist_next_rcu(&svc->f_list)); + if (e) + return hlist_entry(e, struct ip_vs_service, f_list); scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], - f_list) + hlist_for_each_entry_rcu(svc, + &ip_vs_svc_fwm_table[iter->bucket], + f_list) return svc; } @@ -2023,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) -__releases(__ip_vs_svc_lock) { - read_unlock_bh(&__ip_vs_svc_lock); + rcu_read_unlock(); } @@ -2043,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -2051,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - svc->scheduler->name); + sched->name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name, + sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, svc->scheduler->name, + svc->fwmark, sched->name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2073,7 +2040,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) else seq_putc(seq, '\n'); - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, @@ -2383,7 +2350,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -2418,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } /* Lookup the exact service by <protocol, addr, port> or fwmark */ + rcu_read_lock(); if (usvc.fwmark == 0) svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); + rcu_read_unlock(); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2474,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2497,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net, int ret = 0; for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2516,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net, } for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; @@ -2545,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; + rcu_read_lock(); if (get->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); + rcu_read_unlock(); if (svc) { int count = 0; @@ -2732,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; + rcu_read_lock(); if (entry->fwmark) svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else svc = __ip_vs_service_find(net, AF_INET, entry->protocol, &addr, entry->port); + rcu_read_unlock(); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2894,6 +2870,7 @@ nla_put_failure: static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_service *svc) { + struct ip_vs_scheduler *sched; struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; @@ -2914,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, goto nla_put_failure; } - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || + sched = rcu_dereference_protected(svc->scheduler, 1); + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || (svc->pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || @@ -2965,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -2976,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3036,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net, usvc->fwmark = 0; } + rcu_read_lock(); if (usvc->fwmark) svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); + rcu_read_unlock(); *ret_svc = svc; /* If a full entry was requested, check for the additional fields */ @@ -3392,7 +3372,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net); + ret = ip_vs_flush(net, false); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(net, info->attrs); @@ -3741,6 +3721,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; + tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); @@ -3783,13 +3764,14 @@ int __net_init ip_vs_control_net_init(struct net *net) int idx; struct netns_ipvs *ipvs = net_ipvs(net); - rwlock_init(&ipvs->rs_lock); - /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); + INIT_HLIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); + spin_lock_init(&ipvs->dest_trash_lock); + setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, + (unsigned long) net); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); @@ -3819,6 +3801,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + /* Some dest can be in grace period even before cleanup, we have to + * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. + */ + rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); @@ -3864,10 +3850,10 @@ int __init ip_vs_control_init(void) EnterFunction(2); - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); + INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); } smp_wmb(); /* Do we really need it now ? */ diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 7f3b0cc00b7a..ccab120df45e 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -51,7 +51,7 @@ * IPVS DH bucket */ struct ip_vs_dh_bucket { - struct ip_vs_dest *dest; /* real server (cache) */ + struct ip_vs_dest __rcu *dest; /* real server (cache) */ }; /* @@ -64,6 +64,10 @@ struct ip_vs_dh_bucket { #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) +struct ip_vs_dh_state { + struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE]; + struct rcu_head rcu_head; +}; /* * Returns hash value for IPVS DH entry @@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, - const union nf_inet_addr *addr) +ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr) { - return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; + return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest); } @@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, * Assign all the hash buckets of the specified table with the service. */ static int -ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) +ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc) { int i; struct ip_vs_dh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; + bool empty; - b = tbl; + b = &s->buckets[0]; p = &svc->destinations; + empty = list_empty(p); for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { - if (list_empty(p)) { - b->dest = NULL; - } else { + dest = rcu_dereference_protected(b->dest, 1); + if (dest) + ip_vs_dest_put(dest); + if (empty) + RCU_INIT_POINTER(b->dest, NULL); + else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); - atomic_inc(&dest->refcnt); - b->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(b->dest, dest); p = p->next; } @@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) /* * Flush all the hash buckets of the specified table. */ -static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) +static void ip_vs_dh_flush(struct ip_vs_dh_state *s) { int i; struct ip_vs_dh_bucket *b; + struct ip_vs_dest *dest; - b = tbl; + b = &s->buckets[0]; for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { - if (b->dest) { - atomic_dec(&b->dest->refcnt); - b->dest = NULL; + dest = rcu_dereference_protected(b->dest, 1); + if (dest) { + ip_vs_dest_put(dest); + RCU_INIT_POINTER(b->dest, NULL); } b++; } @@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) static int ip_vs_dh_init_svc(struct ip_vs_service *svc) { - struct ip_vs_dh_bucket *tbl; + struct ip_vs_dh_state *s; /* allocate the DH table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, - GFP_KERNEL); - if (tbl == NULL) + s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL); + if (s == NULL) return -ENOMEM; - svc->sched_data = tbl; + svc->sched_data = s; IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - /* assign the hash buckets with the updated service */ - ip_vs_dh_assign(tbl, svc); + /* assign the hash buckets with current dests */ + ip_vs_dh_reassign(s, svc); return 0; } -static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +static void ip_vs_dh_done_svc(struct ip_vs_service *svc) { - struct ip_vs_dh_bucket *tbl = svc->sched_data; + struct ip_vs_dh_state *s = svc->sched_data; /* got to clean up hash buckets here */ - ip_vs_dh_flush(tbl); + ip_vs_dh_flush(s); /* release the table itself */ - kfree(svc->sched_data); + kfree_rcu(s, rcu_head); IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - - return 0; } -static int ip_vs_dh_update_svc(struct ip_vs_service *svc) +static int ip_vs_dh_dest_changed(struct ip_vs_service *svc, + struct ip_vs_dest *dest) { - struct ip_vs_dh_bucket *tbl = svc->sched_data; - - /* got to clean up hash buckets here */ - ip_vs_dh_flush(tbl); + struct ip_vs_dh_state *s = svc->sched_data; /* assign the hash buckets with the updated service */ - ip_vs_dh_assign(tbl, svc); + ip_vs_dh_reassign(s, svc); return 0; } @@ -212,19 +217,20 @@ static struct ip_vs_dest * ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; - struct ip_vs_dh_bucket *tbl; + struct ip_vs_dh_state *s; struct ip_vs_iphdr iph; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - tbl = (struct ip_vs_dh_bucket *)svc->sched_data; - dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); + s = (struct ip_vs_dh_state *) svc->sched_data; + dest = ip_vs_dh_get(svc->af, s, &iph.daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { + ip_vs_scheduler_err(svc, "no destination available"); return NULL; } @@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, - .update_service = ip_vs_dh_update_svc, + .add_dest = ip_vs_dh_dest_changed, + .del_dest = ip_vs_dh_dest_changed, .schedule = ip_vs_dh_schedule, }; @@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void) static void __exit ip_vs_dh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 0fac6017b6fb..6bee6d0c73a5 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -56,7 +56,7 @@ * Make a summary from each cpu */ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, - struct ip_vs_cpu_stats *stats) + struct ip_vs_cpu_stats __percpu *stats) { int i; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4f53a5f04437..77c173282f38 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * hopefully it will succeed on the retransmitted * packet. */ + rcu_read_lock(); ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, iph->ihl * 4, start-data, end-start, buf, buf_len); + rcu_read_unlock(); if (ret) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); @@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void) int rv; rv = register_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns on error */ return rv; } @@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void) static void __exit ip_vs_ftp_exit(void) { unregister_pernet_subsys(&ip_vs_ftp_ops); + /* rcu_barrier() is called by netns */ } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index fdd89b9564ea..b2cc2528a4df 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -90,11 +90,12 @@ * IP address and its destination server */ struct ip_vs_lblc_entry { - struct list_head list; + struct hlist_node list; int af; /* address family */ union nf_inet_addr addr; /* destination IP address */ - struct ip_vs_dest *dest; /* real server (cache) */ + struct ip_vs_dest __rcu *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ + struct rcu_head rcu_head; }; @@ -102,12 +103,14 @@ struct ip_vs_lblc_entry { * IPVS lblc hash table */ struct ip_vs_lblc_table { - struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ + struct rcu_head rcu_head; + struct hlist_head __rcu bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ + struct timer_list periodic_timer; /* collect stale entries */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ - struct timer_list periodic_timer; /* collect stale entries */ int rover; /* rover for expire check */ int counter; /* counter for no expire */ + bool dead; }; @@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = { static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { - list_del(&en->list); + struct ip_vs_dest *dest; + + hlist_del_rcu(&en->list); /* * We don't kfree dest because it is referred either by its service * or the trash dest list. */ - atomic_dec(&en->dest->refcnt); - kfree(en); + dest = rcu_dereference_protected(en->dest, 1); + ip_vs_dest_put(dest); + kfree_rcu(en, rcu_head); } @@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr); - list_add(&en->list, &tbl->bucket[hash]); + hlist_add_head_rcu(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); } -/* - * Get ip_vs_lblc_entry associated with supplied parameters. Called under read - * lock - */ +/* Get ip_vs_lblc_entry associated with supplied parameters. */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, const union nf_inet_addr *addr) @@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, unsigned int hash = ip_vs_lblc_hashkey(af, addr); struct ip_vs_lblc_entry *en; - list_for_each_entry(en, &tbl->bucket[hash], list) + hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list) if (ip_vs_addr_equal(af, &en->addr, addr)) return en; @@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, /* * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP - * address to a server. Called under write lock. + * address to a server. Called under spin lock. */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, @@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, ip_vs_addr_copy(dest->af, &en->addr, daddr); en->lastuse = jiffies; - atomic_inc(&dest->refcnt); - en->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(en->dest, dest); ip_vs_lblc_hash(tbl, en); - } else if (en->dest != dest) { - atomic_dec(&en->dest->refcnt); - atomic_inc(&dest->refcnt); - en->dest = dest; + } else { + struct ip_vs_dest *old_dest; + + old_dest = rcu_dereference_protected(en->dest, 1); + if (old_dest != dest) { + ip_vs_dest_put(old_dest); + ip_vs_dest_hold(dest); + /* No ordering constraints for refcnt */ + RCU_INIT_POINTER(en->dest, dest); + } } return en; @@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, /* * Flush all the entries of the specified table. */ -static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) +static void ip_vs_lblc_flush(struct ip_vs_service *svc) { - struct ip_vs_lblc_entry *en, *nxt; + struct ip_vs_lblc_table *tbl = svc->sched_data; + struct ip_vs_lblc_entry *en; + struct hlist_node *next; int i; + spin_lock_bh(&svc->sched_lock); + tbl->dead = 1; for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { - list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { + hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } } + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblc_expiration(struct ip_vs_service *svc) @@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc) static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct ip_vs_lblc_entry *en, *nxt; + struct ip_vs_lblc_entry *en; + struct hlist_node *next; unsigned long now = jiffies; int i, j; for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + sysctl_lblc_expiration(svc))) @@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data) unsigned long now = jiffies; int goal; int i, j; - struct ip_vs_lblc_entry *en, *nxt; + struct ip_vs_lblc_entry *en; + struct hlist_node *next; if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ @@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) * Initialize the hash buckets */ for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { - INIT_LIST_HEAD(&tbl->bucket[i]); + INIT_HLIST_HEAD(&tbl->bucket[i]); } tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; + tbl->dead = 0; /* * Hook periodic timer for garbage collection @@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblc_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; @@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) del_timer_sync(&tbl->periodic_timer); /* got to clean up table entries here */ - ip_vs_lblc_flush(tbl); + ip_vs_lblc_flush(svc); /* release the table itself */ - kfree(tbl); + kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } @@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) * The server with weight=0 is quiesced and will not receive any * new connection. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; if (atomic_read(&dest->weight) > 0) { @@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { struct ip_vs_dest *d; - list_for_each_entry(d, &svc->destinations, n_list) { + list_for_each_entry_rcu(d, &svc->destinations, n_list) { if (atomic_read(&d->activeconns)*2 < atomic_read(&d->weight)) { return 1; @@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - read_lock(&svc->sched_lock); en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); if (en) { /* We only hold a read lock, but this is atomic */ @@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * free up entries from the trash at any time. */ - if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) - dest = en->dest; + dest = rcu_dereference(en->dest); + if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && + atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) + goto out; } - read_unlock(&svc->sched_lock); - - /* If the destination has a weight and is not overloaded, use it */ - if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) - goto out; /* No cache entry or it is invalid, time to schedule */ dest = __ip_vs_lblc_schedule(svc); @@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); - ip_vs_lblc_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); + if (!tbl->dead) + ip_vs_lblc_new(tbl, &iph.daddr, dest); + spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", @@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_pernet_subsys(&ip_vs_lblc_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c03b6a3ade2f..feb9656eac58 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -89,40 +89,44 @@ */ struct ip_vs_dest_set_elem { struct list_head list; /* list link */ - struct ip_vs_dest *dest; /* destination server */ + struct ip_vs_dest __rcu *dest; /* destination server */ + struct rcu_head rcu_head; }; struct ip_vs_dest_set { atomic_t size; /* set size */ unsigned long lastmod; /* last modified time */ struct list_head list; /* destination list */ - rwlock_t lock; /* lock for this list */ }; -static struct ip_vs_dest_set_elem * -ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) +static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, + struct ip_vs_dest *dest, bool check) { struct ip_vs_dest_set_elem *e; - list_for_each_entry(e, &set->list, list) { - if (e->dest == dest) - /* already existed */ - return NULL; + if (check) { + list_for_each_entry(e, &set->list, list) { + struct ip_vs_dest *d; + + d = rcu_dereference_protected(e->dest, 1); + if (d == dest) + /* already existed */ + return; + } } e = kmalloc(sizeof(*e), GFP_ATOMIC); if (e == NULL) - return NULL; + return; - atomic_inc(&dest->refcnt); - e->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(e->dest, dest); - list_add(&e->list, &set->list); + list_add_rcu(&e->list, &set->list); atomic_inc(&set->size); set->lastmod = jiffies; - return e; } static void @@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) struct ip_vs_dest_set_elem *e; list_for_each_entry(e, &set->list, list) { - if (e->dest == dest) { + struct ip_vs_dest *d; + + d = rcu_dereference_protected(e->dest, 1); + if (d == dest) { /* HIT */ atomic_dec(&set->size); set->lastmod = jiffies; - atomic_dec(&e->dest->refcnt); - list_del(&e->list); - kfree(e); + ip_vs_dest_put(dest); + list_del_rcu(&e->list); + kfree_rcu(e, rcu_head); break; } } @@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) { struct ip_vs_dest_set_elem *e, *ep; - write_lock(&set->lock); list_for_each_entry_safe(e, ep, &set->list, list) { + struct ip_vs_dest *d; + + d = rcu_dereference_protected(e->dest, 1); /* * We don't kfree dest because it is referred either * by its service or by the trash dest list. */ - atomic_dec(&e->dest->refcnt); - list_del(&e->list); - kfree(e); + ip_vs_dest_put(d); + list_del_rcu(&e->list); + kfree_rcu(e, rcu_head); } - write_unlock(&set->lock); } /* get weighted least-connection node in the destination set */ @@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) return NULL; /* select the first destination server, whose weight > 0 */ - list_for_each_entry(e, &set->list, list) { - least = e->dest; + list_for_each_entry_rcu(e, &set->list, list) { + least = rcu_dereference(e->dest); if (least->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* find the destination with the weighted least load */ nextstage: - list_for_each_entry(e, &set->list, list) { - dest = e->dest; + list_for_each_entry_continue_rcu(e, &set->list, list) { + dest = rcu_dereference(e->dest); if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* select the first destination server, whose weight > 0 */ list_for_each_entry(e, &set->list, list) { - most = e->dest; + most = rcu_dereference_protected(e->dest, 1); if (atomic_read(&most->weight) > 0) { moh = ip_vs_dest_conn_overhead(most); goto nextstage; @@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* find the destination with the weighted most load */ nextstage: - list_for_each_entry(e, &set->list, list) { - dest = e->dest; + list_for_each_entry_continue(e, &set->list, list) { + dest = rcu_dereference_protected(e->dest, 1); doh = ip_vs_dest_conn_overhead(dest); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ if ((moh * atomic_read(&dest->weight) < @@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) * IP address and its destination server set */ struct ip_vs_lblcr_entry { - struct list_head list; + struct hlist_node list; int af; /* address family */ union nf_inet_addr addr; /* destination IP address */ struct ip_vs_dest_set set; /* destination server set */ unsigned long lastuse; /* last used time */ + struct rcu_head rcu_head; }; @@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry { * IPVS lblcr hash table */ struct ip_vs_lblcr_table { - struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ + struct rcu_head rcu_head; + struct hlist_head __rcu bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ struct timer_list periodic_timer; /* collect stale entries */ int rover; /* rover for expire check */ int counter; /* counter for no expire */ + bool dead; }; @@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = { static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { - list_del(&en->list); + hlist_del_rcu(&en->list); ip_vs_dest_set_eraseall(&en->set); - kfree(en); + kfree_rcu(en, rcu_head); } @@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); - list_add(&en->list, &tbl->bucket[hash]); + hlist_add_head_rcu(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); } -/* - * Get ip_vs_lblcr_entry associated with supplied parameters. Called under - * read lock. - */ +/* Get ip_vs_lblcr_entry associated with supplied parameters. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *addr) @@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, unsigned int hash = ip_vs_lblcr_hashkey(af, addr); struct ip_vs_lblcr_entry *en; - list_for_each_entry(en, &tbl->bucket[hash], list) + hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list) if (ip_vs_addr_equal(af, &en->addr, addr)) return en; @@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, /* * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. Called under write lock. + * IP address to a server. Called under spin lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, @@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, /* initialize its dest set */ atomic_set(&(en->set.size), 0); INIT_LIST_HEAD(&en->set.list); - rwlock_init(&en->set.lock); + + ip_vs_dest_set_insert(&en->set, dest, false); ip_vs_lblcr_hash(tbl, en); + return en; } - write_lock(&en->set.lock); - ip_vs_dest_set_insert(&en->set, dest); - write_unlock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest, true); return en; } @@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, /* * Flush all the entries of the specified table. */ -static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) +static void ip_vs_lblcr_flush(struct ip_vs_service *svc) { + struct ip_vs_lblcr_table *tbl = svc->sched_data; int i; - struct ip_vs_lblcr_entry *en, *nxt; + struct ip_vs_lblcr_entry *en; + struct hlist_node *next; - /* No locking required, only called during cleanup. */ + spin_lock_bh(&svc->sched_lock); + tbl->dead = 1; for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { - list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { + hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); } } + spin_unlock_bh(&svc->sched_lock); } static int sysctl_lblcr_expiration(struct ip_vs_service *svc) @@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int i, j; - struct ip_vs_lblcr_entry *en, *nxt; + struct ip_vs_lblcr_entry *en; + struct hlist_node *next; for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_after(en->lastuse + sysctl_lblcr_expiration(svc), now)) continue; @@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data) unsigned long now = jiffies; int goal; int i, j; - struct ip_vs_lblcr_entry *en, *nxt; + struct ip_vs_lblcr_entry *en; + struct hlist_node *next; if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ @@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&svc->sched_lock); - list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { + spin_lock(&svc->sched_lock); + hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&svc->sched_lock); + spin_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) * Initialize the hash buckets */ for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { - INIT_LIST_HEAD(&tbl->bucket[i]); + INIT_HLIST_HEAD(&tbl->bucket[i]); } tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; + tbl->dead = 0; /* * Hook periodic timer for garbage collection @@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblcr_table *tbl = svc->sched_data; @@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) del_timer_sync(&tbl->periodic_timer); /* got to clean up table entries here */ - ip_vs_lblcr_flush(tbl); + ip_vs_lblcr_flush(svc); /* release the table itself */ - kfree(tbl); + kfree_rcu(tbl, rcu_head); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", sizeof(*tbl)); - - return 0; } @@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) * The server with weight=0 is quiesced and will not receive any * new connection. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { struct ip_vs_dest *d; - list_for_each_entry(d, &svc->destinations, n_list) { + list_for_each_entry_rcu(d, &svc->destinations, n_list) { if (atomic_read(&d->activeconns)*2 < atomic_read(&d->weight)) { return 1; @@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_lblcr_table *tbl = svc->sched_data; struct ip_vs_iphdr iph; - struct ip_vs_dest *dest = NULL; + struct ip_vs_dest *dest; struct ip_vs_lblcr_entry *en; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); @@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { - /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; /* Get the least loaded destination */ - read_lock(&en->set.lock); dest = ip_vs_dest_set_min(&en->set); - read_unlock(&en->set.lock); /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && - time_after(jiffies, en->set.lastmod + + time_after(jiffies, en->set.lastmod + sysctl_lblcr_expiration(svc))) { - struct ip_vs_dest *m; + spin_lock_bh(&svc->sched_lock); + if (atomic_read(&en->set.size) > 1) { + struct ip_vs_dest *m; - write_lock(&en->set.lock); - m = ip_vs_dest_set_max(&en->set); - if (m) - ip_vs_dest_set_erase(&en->set, m); - write_unlock(&en->set.lock); + m = ip_vs_dest_set_max(&en->set); + if (m) + ip_vs_dest_set_erase(&en->set, m); + } + spin_unlock_bh(&svc->sched_lock); } /* If the destination is not overloaded, use it */ - if (dest && !is_overloaded(dest, svc)) { - read_unlock(&svc->sched_lock); + if (dest && !is_overloaded(dest, svc)) goto out; - } /* The cache entry is invalid, time to schedule */ dest = __ip_vs_lblcr_schedule(svc); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); - read_unlock(&svc->sched_lock); return NULL; } /* Update our cache entry */ - write_lock(&en->set.lock); - ip_vs_dest_set_insert(&en->set, dest); - write_unlock(&en->set.lock); - } - read_unlock(&svc->sched_lock); - - if (dest) + spin_lock_bh(&svc->sched_lock); + if (!tbl->dead) + ip_vs_dest_set_insert(&en->set, dest, true); + spin_unlock_bh(&svc->sched_lock); goto out; + } /* No cache entry, time to schedule */ dest = __ip_vs_lblcr_schedule(svc); @@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* If we fail to create a cache entry, we'll just use the valid dest */ - write_lock(&svc->sched_lock); - ip_vs_lblcr_new(tbl, &iph.daddr, dest); - write_unlock(&svc->sched_lock); + spin_lock_bh(&svc->sched_lock); + if (!tbl->dead) + ip_vs_lblcr_new(tbl, &iph.daddr, dest); + spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", @@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_pernet_subsys(&ip_vs_lblcr_ops); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index f391819c0cca..5128e338a749 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * served, but no new connection is assigned to the server. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || atomic_read(&dest->weight) == 0) continue; @@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void) static void __exit ip_vs_lc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); + synchronize_rcu(); } module_init(ip_vs_lc_init); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 984d9c137d84..646cfd4baa73 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * new connections. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD || !atomic_read(&dest->weight)) @@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void) static void __exit ip_vs_nq_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); + synchronize_rcu(); } module_init(ip_vs_nq_init); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 5cf859ccb31b..1a82b29ce8ea 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -13,20 +13,8 @@ /* IPVS pe list */ static LIST_HEAD(ip_vs_pe); -/* lock for service table */ -static DEFINE_SPINLOCK(ip_vs_pe_lock); - -/* Bind a service with a pe */ -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe) -{ - svc->pe = pe; -} - -/* Unbind a service from its pe */ -void ip_vs_unbind_pe(struct ip_vs_service *svc) -{ - svc->pe = NULL; -} +/* semaphore for IPVS PEs. */ +static DEFINE_MUTEX(ip_vs_pe_mutex); /* Get pe in the pe list by name */ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) @@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, pe_name); - spin_lock_bh(&ip_vs_pe_lock); - - list_for_each_entry(pe, &ip_vs_pe, n_list) { + rcu_read_lock(); + list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) { /* Test and get the modules atomically */ if (pe->module && !try_module_get(pe->module)) { @@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) } if (strcmp(pe_name, pe->name)==0) { /* HIT */ - spin_unlock_bh(&ip_vs_pe_lock); + rcu_read_unlock(); return pe; } if (pe->module) module_put(pe->module); } + rcu_read_unlock(); - spin_unlock_bh(&ip_vs_pe_lock); return NULL; } @@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) /* increase the module use count */ ip_vs_use_count_inc(); - spin_lock_bh(&ip_vs_pe_lock); - - if (!list_empty(&pe->n_list)) { - spin_unlock_bh(&ip_vs_pe_lock); - ip_vs_use_count_dec(); - pr_err("%s(): [%s] pe already linked\n", - __func__, pe->name); - return -EINVAL; - } - + mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist * in the pe list. */ list_for_each_entry(tmp, &ip_vs_pe, n_list) { if (strcmp(tmp->name, pe->name) == 0) { - spin_unlock_bh(&ip_vs_pe_lock); + mutex_unlock(&ip_vs_pe_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] pe already existed " "in the system\n", __func__, pe->name); @@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) } } /* Add it into the d-linked pe list */ - list_add(&pe->n_list, &ip_vs_pe); - spin_unlock_bh(&ip_vs_pe_lock); + list_add_rcu(&pe->n_list, &ip_vs_pe); + mutex_unlock(&ip_vs_pe_mutex); pr_info("[%s] pe registered.\n", pe->name); @@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe); /* Unregister a pe from the pe list */ int unregister_ip_vs_pe(struct ip_vs_pe *pe) { - spin_lock_bh(&ip_vs_pe_lock); - if (list_empty(&pe->n_list)) { - spin_unlock_bh(&ip_vs_pe_lock); - pr_err("%s(): [%s] pe is not in the list. failed\n", - __func__, pe->name); - return -EINVAL; - } - + mutex_lock(&ip_vs_pe_mutex); /* Remove it from the d-linked pe list */ - list_del(&pe->n_list); - spin_unlock_bh(&ip_vs_pe_lock); + list_del_rcu(&pe->n_list); + mutex_unlock(&ip_vs_pe_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 12475ef88daf..00cc0241ed87 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void) static void __exit ip_vs_sip_cleanup(void) { unregister_ip_vs_pe(&ip_vs_sip_pe); + synchronize_rcu(); } module_init(ip_vs_sip_init); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index ae8ec6f27688..6e14a7b5602f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + rcu_read_lock(); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; - int ihl; + int ihl, cofs; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), - sizeof(_sctpch), &_sctpch); + cofs = ihl + sizeof(sctp_sctphdr_t); + sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, */ if ((sch->type == SCTP_CID_COOKIE_ECHO) || (sch->type == SCTP_CID_COOKIE_ACK)) { - sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + - sch->length), sizeof(_sctpch), &_sctpch); - if (sch) { - if (sch->type == SCTP_CID_ABORT) + int clen = ntohs(sch->length); + + if (clen >= sizeof(sctp_chunkhdr_t)) { + sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), + sizeof(_sctpch), &_sctpch); + if (sch && sch->type == SCTP_CID_ABORT) chunk_type = sch->type; } } @@ -992,9 +994,9 @@ static void sctp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); set_sctp_state(pd, cp, direction, skb); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } static inline __u16 sctp_app_hashkey(__be16 port) @@ -1014,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc) hash = sctp_app_hashkey(port); - spin_lock_bh(&ipvs->sctp_app_lock); list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); - spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->sctp_app_lock); + list_del_rcu(&inc->p_list); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) @@ -1053,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&ipvs->sctp_app_lock); - list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1074,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->sctp_app_lock); + rcu_read_unlock(); out: return result; } @@ -1088,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); - spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9af653a75825..50a15944c6c1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ + rcu_read_lock(); if (th->syn && - (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, th->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, if (th == NULL) return; - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); set_tcp_state(pd, cp, direction, th); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } static inline __u16 tcp_app_hashkey(__be16 port) @@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) hash = tcp_app_hashkey(port); - spin_lock_bh(&ipvs->tcp_app_lock); list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) static void tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); - spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->tcp_app_lock); + list_del_rcu(&inc->p_list); } @@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&ipvs->tcp_app_lock); - list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->tcp_app_lock); + rcu_read_unlock(); out: return result; @@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] : tcp_timeouts[IP_VS_TCP_S_LISTEN]); - spin_unlock(&cp->lock); + spin_unlock_bh(&cp->lock); } /* --------------------------------------------- @@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); - spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 503a842c90d2..b62a3c0ff9bf 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); - svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + rcu_read_lock(); + svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, + &iph->daddr, uh->dest); if (svc) { int ignored; @@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * It seems that we are very loaded. * We have to drop this packet :( */ - ip_vs_service_put(svc); + rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); - else { - ip_vs_service_put(svc); + else *verdict = NF_DROP; - } + rcu_read_unlock(); return 0; } - ip_vs_service_put(svc); } + rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) hash = udp_app_hashkey(port); - - spin_lock_bh(&ipvs->udp_app_lock); list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &ipvs->udp_apps[hash]); + list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]); atomic_inc(&pd->appcnt); out: - spin_unlock_bh(&ipvs->udp_app_lock); return ret; } @@ -380,12 +377,9 @@ static void udp_unregister_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(net); - spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); - list_del(&inc->p_list); - spin_unlock_bh(&ipvs->udp_app_lock); + list_del_rcu(&inc->p_list); } @@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&ipvs->udp_app_lock); - list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { + rcu_read_lock(); + list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&ipvs->udp_app_lock); + rcu_read_unlock(); out: return result; @@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); - spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c49b388d1085..c35986c793d9 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_rr_update_svc(struct ip_vs_service *svc) +static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { - svc->sched_data = &svc->destinations; + struct list_head *p; + + spin_lock_bh(&svc->sched_lock); + p = (struct list_head *) svc->sched_data; + /* dest is already unlinked, so p->prev is not valid but + * p->next is valid, use it to reach previous entry. + */ + if (p == &dest->n_list) + svc->sched_data = p->next->prev; + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct list_head *p, *q; - struct ip_vs_dest *dest; + struct list_head *p; + struct ip_vs_dest *dest, *last; + int pass = 0; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - write_lock(&svc->sched_lock); - p = (struct list_head *)svc->sched_data; - p = p->next; - q = p; + spin_lock_bh(&svc->sched_lock); + p = (struct list_head *) svc->sched_data; + last = dest = list_entry(p, struct ip_vs_dest, n_list); + do { - /* skip list head */ - if (q == &svc->destinations) { - q = q->next; - continue; + list_for_each_entry_continue_rcu(dest, + &svc->destinations, + n_list) { + if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && + atomic_read(&dest->weight) > 0) + /* HIT */ + goto out; + if (dest == last) + goto stop; } - - dest = list_entry(q, struct ip_vs_dest, n_list); - if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && - atomic_read(&dest->weight) > 0) - /* HIT */ - goto out; - q = q->next; - } while (q != p); - write_unlock(&svc->sched_lock); + pass++; + /* Previous dest could be unlinked, do not loop forever. + * If we stay at head there is no need for 2nd pass. + */ + } while (pass < 2 && p != &svc->destinations); + +stop: + spin_unlock_bh(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: - svc->sched_data = q; - write_unlock(&svc->sched_lock); + svc->sched_data = &dest->n_list; + spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), @@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, - .update_service = ip_vs_rr_update_svc, + .add_dest = NULL, + .del_dest = ip_vs_rr_del_dest, .schedule = ip_vs_rr_schedule, }; @@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void) static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); + synchronize_rcu(); } module_init(ip_vs_rr_init); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index d6bf20d6cdbe..4dbcda6258bc 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err); */ static LIST_HEAD(ip_vs_schedulers); -/* lock for service table */ -static DEFINE_SPINLOCK(ip_vs_sched_lock); +/* semaphore for schedulers */ +static DEFINE_MUTEX(ip_vs_sched_mutex); /* @@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, { int ret; - svc->scheduler = scheduler; - if (scheduler->init_service) { ret = scheduler->init_service(svc); if (ret) { @@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, return ret; } } - + rcu_assign_pointer(svc->scheduler, scheduler); return 0; } @@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -int ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc, + struct ip_vs_scheduler *sched) { - struct ip_vs_scheduler *sched = svc->scheduler; + struct ip_vs_scheduler *cur_sched; - if (!sched) - return 0; + cur_sched = rcu_dereference_protected(svc->scheduler, 1); + /* This check proves that old 'sched' was installed */ + if (!cur_sched) + return; - if (sched->done_service) { - if (sched->done_service(svc) != 0) { - pr_err("%s(): done error\n", __func__); - return -EINVAL; - } - } - - svc->scheduler = NULL; - return 0; + if (sched->done_service) + sched->done_service(svc); + /* svc->scheduler can not be set to NULL */ } @@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); - spin_lock_bh(&ip_vs_sched_lock); + mutex_lock(&ip_vs_sched_mutex); list_for_each_entry(sched, &ip_vs_schedulers, n_list) { /* @@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) } if (strcmp(sched_name, sched->name)==0) { /* HIT */ - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); return sched; } if (sched->module) module_put(sched->module); } - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); return NULL; } @@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { + struct ip_vs_scheduler *sched; + + sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - svc->scheduler->name, svc->fwmark, - svc->fwmark, msg); + sched->name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - svc->scheduler->name, - ip_vs_proto_name(svc->protocol), + sched->name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } @@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) /* increase the module use count */ ip_vs_use_count_inc(); - spin_lock_bh(&ip_vs_sched_lock); + mutex_lock(&ip_vs_sched_mutex); if (!list_empty(&scheduler->n_list)) { - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] scheduler already linked\n", __func__, scheduler->name); @@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) */ list_for_each_entry(sched, &ip_vs_schedulers, n_list) { if (strcmp(scheduler->name, sched->name) == 0) { - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] scheduler already existed " "in the system\n", __func__, scheduler->name); @@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) * Add it into the d-linked scheduler list */ list_add(&scheduler->n_list, &ip_vs_schedulers); - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); pr_info("[%s] scheduler registered.\n", scheduler->name); @@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) return -EINVAL; } - spin_lock_bh(&ip_vs_sched_lock); + mutex_lock(&ip_vs_sched_mutex); if (list_empty(&scheduler->n_list)) { - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); pr_err("%s(): [%s] scheduler is not in the list. failed\n", __func__, scheduler->name); return -EINVAL; @@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) * Remove it from the d-linked scheduler list */ list_del(&scheduler->n_list); - spin_unlock_bh(&ip_vs_sched_lock); + mutex_unlock(&ip_vs_sched_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 89ead246ed3d..f3205925359a 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * new connections. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; @@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_sed_dest_overhead(dest); @@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void) static void __exit ip_vs_sed_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); + synchronize_rcu(); } module_init(ip_vs_sed_init); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e33126994628..0df269d7c99f 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -53,7 +53,7 @@ * IPVS SH bucket */ struct ip_vs_sh_bucket { - struct ip_vs_dest *dest; /* real server (cache) */ + struct ip_vs_dest __rcu *dest; /* real server (cache) */ }; /* @@ -66,6 +66,10 @@ struct ip_vs_sh_bucket { #define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) +struct ip_vs_sh_state { + struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; + struct rcu_head rcu_head; +}; /* * Returns hash value for IPVS SH entry @@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, - const union nf_inet_addr *addr) +ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) { - return (tbl[ip_vs_sh_hashkey(af, addr)]).dest; + return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); } @@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, * Assign all the hash buckets of the specified table with the service. */ static int -ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) +ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc) { int i; struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; int d_count; + bool empty; - b = tbl; + b = &s->buckets[0]; p = &svc->destinations; + empty = list_empty(p); d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { - if (list_empty(p)) { - b->dest = NULL; - } else { + dest = rcu_dereference_protected(b->dest, 1); + if (dest) + ip_vs_dest_put(dest); + if (empty) + RCU_INIT_POINTER(b->dest, NULL); + else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); - atomic_inc(&dest->refcnt); - b->dest = dest; + ip_vs_dest_hold(dest); + RCU_INIT_POINTER(b->dest, dest); IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", i, IP_VS_DBG_ADDR(svc->af, &dest->addr), @@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) /* * Flush all the hash buckets of the specified table. */ -static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) +static void ip_vs_sh_flush(struct ip_vs_sh_state *s) { int i; struct ip_vs_sh_bucket *b; + struct ip_vs_dest *dest; - b = tbl; + b = &s->buckets[0]; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { - if (b->dest) { - atomic_dec(&b->dest->refcnt); - b->dest = NULL; + dest = rcu_dereference_protected(b->dest, 1); + if (dest) { + ip_vs_dest_put(dest); + RCU_INIT_POINTER(b->dest, NULL); } b++; } @@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) static int ip_vs_sh_init_svc(struct ip_vs_service *svc) { - struct ip_vs_sh_bucket *tbl; + struct ip_vs_sh_state *s; /* allocate the SH table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, - GFP_KERNEL); - if (tbl == NULL) + s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); + if (s == NULL) return -ENOMEM; - svc->sched_data = tbl; + svc->sched_data = s; IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - /* assign the hash buckets with the updated service */ - ip_vs_sh_assign(tbl, svc); + /* assign the hash buckets with current dests */ + ip_vs_sh_reassign(s, svc); return 0; } -static int ip_vs_sh_done_svc(struct ip_vs_service *svc) +static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { - struct ip_vs_sh_bucket *tbl = svc->sched_data; + struct ip_vs_sh_state *s = svc->sched_data; /* got to clean up hash buckets here */ - ip_vs_sh_flush(tbl); + ip_vs_sh_flush(s); /* release the table itself */ - kfree(svc->sched_data); + kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - - return 0; } -static int ip_vs_sh_update_svc(struct ip_vs_service *svc) +static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, + struct ip_vs_dest *dest) { - struct ip_vs_sh_bucket *tbl = svc->sched_data; - - /* got to clean up hash buckets here */ - ip_vs_sh_flush(tbl); + struct ip_vs_sh_state *s = svc->sched_data; /* assign the hash buckets with the updated service */ - ip_vs_sh_assign(tbl, svc); + ip_vs_sh_reassign(s, svc); return 0; } @@ -225,15 +230,15 @@ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; - struct ip_vs_sh_bucket *tbl; + struct ip_vs_sh_state *s; struct ip_vs_iphdr iph; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_sh_bucket *)svc->sched_data; - dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr); + s = (struct ip_vs_sh_state *) svc->sched_data; + dest = ip_vs_sh_get(svc->af, s, &iph.saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, - .update_service = ip_vs_sh_update_svc, + .add_dest = ip_vs_sh_dest_changed, + .del_dest = ip_vs_sh_dest_changed, + .upd_dest = ip_vs_sh_dest_changed, .schedule = ip_vs_sh_schedule, }; @@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void) static void __exit ip_vs_sh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); + synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 44fd10c539ac..8e57077e5540 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) return; - spin_lock(&ipvs->sync_buff_lock); + spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return; } @@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, if (!buff) { buff = ip_vs_sync_buff_create_v0(ipvs); if (!buff) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; } - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ cp = cp->control; @@ -641,9 +641,9 @@ sloop: pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } - spin_lock(&ipvs->sync_buff_lock); + spin_lock_bh(&ipvs->sync_buff_lock); if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return; } @@ -683,7 +683,7 @@ sloop: if (!buff) { buff = ip_vs_sync_buff_create(ipvs); if (!buff) { - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -750,7 +750,7 @@ sloop: } } - spin_unlock(&ipvs->sync_buff_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ @@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, kfree(param->pe_data); dest = cp->dest; - spin_lock(&cp->lock); + spin_lock_bh(&cp->lock); if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { if (flags & IP_VS_CONN_F_INACTIVE) { @@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; cp->flags = flags; - spin_unlock(&cp->lock); - if (!dest) { - dest = ip_vs_try_bind_dest(cp); - if (dest) - atomic_dec(&dest->refcnt); - } + spin_unlock_bh(&cp->lock); + if (!dest) + ip_vs_try_bind_dest(cp); } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ + rcu_read_lock(); dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); - if (dest) - atomic_dec(&dest->refcnt); + rcu_read_unlock(); if (!cp) { if (param->pe_data) kfree(param->pe_data); @@ -1692,11 +1689,7 @@ static int sync_thread_backup(void *data) break; } - /* disable bottom half, because it accesses the data - shared by softirq while getting/creating conns */ - local_bh_disable(); ip_vs_process_message(tinfo->net, tinfo->buf, len); - local_bh_enable(); } } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index bc1bfc48a17f..c60a81c4ce9a 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * new connections. */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; @@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) * Find the destination with the least load. */ nextstage: - list_for_each_entry_continue(dest, &svc->destinations, n_list) { + list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); @@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void) static void __exit ip_vs_wlc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); + synchronize_rcu(); } module_init(ip_vs_wlc_init); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 231be7dd547a..0e68555bceb9 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -29,14 +29,45 @@ #include <net/ip_vs.h> +/* The WRR algorithm depends on some caclulations: + * - mw: maximum weight + * - di: weight step, greatest common divisor from all weights + * - cw: current required weight + * As result, all weights are in the [di..mw] range with a step=di. + * + * First, we start with cw = mw and select dests with weight >= cw. + * Then cw is reduced with di and all dests are checked again. + * Last pass should be with cw = di. We have mw/di passes in total: + * + * pass 1: cw = max weight + * pass 2: cw = max weight - di + * pass 3: cw = max weight - 2 * di + * ... + * last pass: cw = di + * + * Weights are supposed to be >= di but we run in parallel with + * weight changes, it is possible some dest weight to be reduced + * below di, bad if it is the only available dest. + * + * So, we modify how mw is calculated, now it is reduced with (di - 1), + * so that last cw is 1 to catch such dests with weight below di: + * pass 1: cw = max weight - (di - 1) + * pass 2: cw = max weight - di - (di - 1) + * pass 3: cw = max weight - 2 * di - (di - 1) + * ... + * last pass: cw = 1 + * + */ + /* * current destination pointer for weighted round-robin scheduling */ struct ip_vs_wrr_mark { - struct list_head *cl; /* current list head */ + struct ip_vs_dest *cl; /* current dest or head */ int cw; /* current weight */ int mw; /* maximum weight */ int di; /* decreasing interval */ + struct rcu_head rcu_head; }; @@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) if (mark == NULL) return -ENOMEM; - mark->cl = &svc->destinations; - mark->cw = 0; - mark->mw = ip_vs_wrr_max_weight(svc); + mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); + mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); + mark->cw = mark->mw; svc->sched_data = mark; return 0; } -static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) +static void ip_vs_wrr_done_svc(struct ip_vs_service *svc) { + struct ip_vs_wrr_mark *mark = svc->sched_data; + /* * Release the mark variable */ - kfree(svc->sched_data); - - return 0; + kfree_rcu(mark, rcu_head); } -static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) +static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, + struct ip_vs_dest *dest) { struct ip_vs_wrr_mark *mark = svc->sched_data; - mark->cl = &svc->destinations; - mark->mw = ip_vs_wrr_max_weight(svc); + spin_lock_bh(&svc->sched_lock); + mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); - if (mark->cw > mark->mw) - mark->cw = 0; + mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); + if (mark->cw > mark->mw || !mark->cw) + mark->cw = mark->mw; + else if (mark->di > 1) + mark->cw = (mark->cw / mark->di) * mark->di + 1; + spin_unlock_bh(&svc->sched_lock); return 0; } @@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; + struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; - struct list_head *p; + bool last_pass = false, restarted = false; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); - /* - * This loop will always terminate, because mark->cw in (0, max_weight] - * and at least one server has its weight equal to max_weight. - */ - write_lock(&svc->sched_lock); - p = mark->cl; + spin_lock_bh(&svc->sched_lock); + dest = mark->cl; + /* No available dests? */ + if (mark->mw == 0) + goto err_noavail; + last = dest; + /* Stop only after all dests were checked for weight >= 1 (last pass) */ while (1) { - if (mark->cl == &svc->destinations) { - /* it is at the head of the destination list */ - - if (mark->cl == mark->cl->next) { - /* no dest entry */ - ip_vs_scheduler_err(svc, - "no destination available: " - "no destinations present"); - dest = NULL; - goto out; - } - - mark->cl = svc->destinations.next; - mark->cw -= mark->di; - if (mark->cw <= 0) { - mark->cw = mark->mw; - /* - * Still zero, which means no available servers. - */ - if (mark->cw == 0) { - mark->cl = &svc->destinations; - ip_vs_scheduler_err(svc, - "no destination available"); - dest = NULL; - goto out; - } - } - } else - mark->cl = mark->cl->next; - - if (mark->cl != &svc->destinations) { - /* not at the head of the list */ - dest = list_entry(mark->cl, struct ip_vs_dest, n_list); + list_for_each_entry_continue_rcu(dest, + &svc->destinations, + n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && - atomic_read(&dest->weight) >= mark->cw) { - /* got it */ - break; - } + atomic_read(&dest->weight) >= mark->cw) + goto found; + if (dest == stop) + goto err_over; } - - if (mark->cl == p && mark->cw == mark->di) { - /* back to the start, and no dest is found. - It is only possible when all dests are OVERLOADED */ - dest = NULL; - ip_vs_scheduler_err(svc, - "no destination available: " - "all destinations are overloaded"); - goto out; + mark->cw -= mark->di; + if (mark->cw <= 0) { + mark->cw = mark->mw; + /* Stop if we tried last pass from first dest: + * 1. last_pass: we started checks when cw > di but + * then all dests were checked for w >= 1 + * 2. last was head: the first and only traversal + * was for weight >= 1, for all dests. + */ + if (last_pass || + &last->n_list == &svc->destinations) + goto err_over; + restarted = true; + } + last_pass = mark->cw <= mark->di; + if (last_pass && restarted && + &last->n_list != &svc->destinations) { + /* First traversal was for w >= 1 but only + * for dests after 'last', now do the same + * for all dests up to 'last'. + */ + stop = last; } } +found: IP_VS_DBG_BUF(6, "WRR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + mark->cl = dest; out: - write_unlock(&svc->sched_lock); + spin_unlock_bh(&svc->sched_lock); return dest; + +err_noavail: + mark->cl = dest; + dest = NULL; + ip_vs_scheduler_err(svc, "no destination available"); + goto out; + +err_over: + mark->cl = dest; + dest = NULL; + ip_vs_scheduler_err(svc, "no destination available: " + "all destinations are overloaded"); + goto out; } @@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, - .update_service = ip_vs_wrr_update_svc, + .add_dest = ip_vs_wrr_dest_changed, + .del_dest = ip_vs_wrr_dest_changed, + .upd_dest = ip_vs_wrr_dest_changed, .schedule = ip_vs_wrr_schedule, }; @@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void) static void __exit ip_vs_wrr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); + synchronize_rcu(); } module_init(ip_vs_wrr_init); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index ee6b7a9f1ec2..b75ff6429a04 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -17,6 +17,8 @@ * - not all connections have destination server, for example, * connections in backup server when fwmark is used * - bypass connections use daddr from packet + * - we can use dst without ref while sending in RCU section, we use + * ref when returning NF_ACCEPT for NAT-ed packet via loopback * LOCAL_OUT rules: * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) * - skb->pkt_type is not set yet @@ -51,39 +53,54 @@ enum { */ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ + IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ }; +static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) +{ + return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); +} + +static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) +{ + kfree(dest_dst); +} + /* * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, - u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, + struct dst_entry *dst, u32 dst_cookie) { - struct dst_entry *old_dst; + struct ip_vs_dest_dst *old; + + old = rcu_dereference_protected(dest->dest_dst, + lockdep_is_held(&dest->dst_lock)); - old_dst = dest->dst_cache; - dest->dst_cache = dst; - dest->dst_rtos = rtos; - dest->dst_cookie = dst_cookie; - dst_release(old_dst); + if (dest_dst) { + dest_dst->dst_cache = dst; + dest_dst->dst_cookie = dst_cookie; + } + rcu_assign_pointer(dest->dest_dst, dest_dst); + + if (old) + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); } -static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) +static inline struct ip_vs_dest_dst * +__ip_vs_dst_check(struct ip_vs_dest *dest) { - struct dst_entry *dst = dest->dst_cache; + struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); + struct dst_entry *dst; - if (!dst) + if (!dest_dst) return NULL; - if ((dst->obsolete || rtos != dest->dst_rtos) && - dst->ops->check(dst, dest->dst_cookie) == NULL) { - dest->dst_cache = NULL; - dst_release(dst); + dst = dest_dst->dst_cache; + if (dst->obsolete && + dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; - } - dst_hold(dst); - return dst; + return dest_dst; } static inline bool @@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) /* Get route to daddr, update *saddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - u32 rtos, int rt_mode, __be32 *saddr) + int rt_mode, __be32 *saddr) { struct flowi4 fl4; struct rtable *rt; @@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; - fl4.flowi4_tos = rtos; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -124,7 +140,7 @@ retry: if (PTR_ERR(rt) == -EINVAL && *saddr && rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { *saddr = 0; - flowi4_update_output(&fl4, 0, rtos, daddr, 0); + flowi4_update_output(&fl4, 0, 0, daddr, 0); goto retry; } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -132,7 +148,7 @@ retry: } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); *saddr = fl4.saddr; - flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); loop++; goto retry; } @@ -141,113 +157,140 @@ retry: } /* Get route to destination or remote server */ -static struct rtable * +static int __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, - __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) + __be32 daddr, int rt_mode, __be32 *ret_saddr) { struct net *net = dev_net(skb_dst(skb)->dev); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ - int local; + struct iphdr *iph; + __be16 df; + int mtu; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - if (!(rt = (struct rtable *) - __ip_vs_dst_check(dest, rtos))) { - rt = do_output_route4(net, dest->addr.ip, rtos, - rt_mode, &dest->dst_saddr.ip); + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rtable *) dest_dst->dst_cache; + else { + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock_bh(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + goto err_unreach; + } + rt = do_output_route4(net, dest->addr.ip, rt_mode, + &dest_dst->dst_saddr.ip); if (!rt) { - spin_unlock(&dest->dst_lock); - return NULL; + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); + goto err_unreach; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " - "rtos=%X\n", - &dest->addr.ip, &dest->dst_saddr.ip, - atomic_read(&rt->dst.__refcnt), rtos); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); + spin_unlock_bh(&dest->dst_lock); + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", + &dest->addr.ip, &dest_dst->dst_saddr.ip, + atomic_read(&rt->dst.__refcnt)); } daddr = dest->addr.ip; if (ret_saddr) - *ret_saddr = dest->dst_saddr.ip; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.ip; } else { __be32 saddr = htonl(INADDR_ANY); + noref = 0; + /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, &saddr); if (!rt) - return NULL; + goto err_unreach; if (ret_saddr) *ret_saddr = saddr; } - local = rt->rt_flags & RTCF_LOCAL; + local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", (rt->rt_flags & RTCF_LOCAL) ? "local":"non-local", &daddr); - ip_rt_put(rt); - return NULL; - } - if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && - !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { - IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " - "requires NAT method, dest: %pI4\n", - &ip_hdr(skb)->daddr, &daddr); - ip_rt_put(rt); - return NULL; + goto err_put; } - if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { - IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " - "to non-local address, dest: %pI4\n", - &ip_hdr(skb)->saddr, &daddr); - ip_rt_put(rt); - return NULL; + iph = ip_hdr(skb); + if (likely(!local)) { + if (unlikely(ipv4_is_loopback(iph->saddr))) { + IP_VS_DBG_RL("Stopping traffic from loopback address " + "%pI4 to non-local address, dest: %pI4\n", + &iph->saddr, &daddr); + goto err_put; + } + } else { + ort = skb_rtable(skb); + if (!(rt_mode & IP_VS_RT_MODE_RDR) && + !(ort->rt_flags & RTCF_LOCAL)) { + IP_VS_DBG_RL("Redirect from non-local address %pI4 to " + "local requires NAT method, dest: %pI4\n", + &iph->daddr, &daddr); + goto err_put; + } + /* skb to local stack, preserve old route */ + if (!noref) + ip_rt_put(rt); + return local; } - return rt; -} - -/* Reroute packet to local IPv4 stack after DNAT */ -static int -__ip_vs_reroute_locally(struct sk_buff *skb) -{ - struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; - struct net *net = dev_net(dev); - struct iphdr *iph = ip_hdr(skb); - - if (rt_is_input_route(rt)) { - unsigned long orefdst = skb->_skb_refdst; - - if (ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) - return 0; - refdst_drop(orefdst); + if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { + mtu = dst_mtu(&rt->dst); + df = iph->frag_off & htons(IP_DF); } else { - struct flowi4 fl4 = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowi4_tos = RT_TOS(iph->tos), - .flowi4_mark = skb->mark, - }; - - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return 0; - if (!(rt->rt_flags & RTCF_LOCAL)) { - ip_rt_put(rt); - return 0; + struct sock *sk = skb->sk; + + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); + if (mtu < 68) { + IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); + goto err_put; } - /* Drop old route. */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + ort = skb_rtable(skb); + if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + /* MTU check allowed? */ + df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0; } - return 1; + + /* MTU checking */ + if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr); + goto err_put; + } + + skb_dst_drop(skb); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); + + return local; + +err_put: + if (!noref) + ip_rt_put(rt); + return -1; + +err_unreach: + dst_link_failure(skb); + return -1; } #ifdef CONFIG_IP_VS_IPV6 @@ -294,44 +337,57 @@ out_err: /* * Get route to destination or remote server */ -static struct rt6_info * +static int __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, - int do_xfrm, int rt_mode) + struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); + struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct rt6_info *ort; /* Original route */ struct dst_entry *dst; - int local; + int mtu; + int local, noref = 1; if (dest) { - spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); - if (!rt) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) + rt = (struct rt6_info *) dest_dst->dst_cache; + else { u32 cookie; + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock_bh(&dest->dst_lock); + if (!dest_dst) { + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + goto err_unreach; + } dst = __ip_vs_route_output_v6(net, &dest->addr.in6, - &dest->dst_saddr.in6, + &dest_dst->dst_saddr.in6, do_xfrm); if (!dst) { - spin_unlock(&dest->dst_lock); - return NULL; + __ip_vs_dst_set(dest, NULL, NULL, 0); + spin_unlock_bh(&dest->dst_lock); + ip_vs_dest_dst_free(dest_dst); + goto err_unreach; } rt = (struct rt6_info *) dst; cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", - &dest->addr.in6, &dest->dst_saddr.in6, + &dest->addr.in6, &dest_dst->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) - *ret_saddr = dest->dst_saddr.in6; - spin_unlock(&dest->dst_lock); + *ret_saddr = dest_dst->dst_saddr.in6; } else { + noref = 0; dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); if (!dst) - return NULL; + goto err_unreach; rt = (struct rt6_info *) dst; } @@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", local ? "local":"non-local", daddr); - dst_release(&rt->dst); - return NULL; + goto err_put; } - if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && - !((ort = (struct rt6_info *) skb_dst(skb)) && - __ip_vs_is_local_route6(ort))) { - IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " - "requires NAT method, dest: %pI6c\n", - &ipv6_hdr(skb)->daddr, daddr); - dst_release(&rt->dst); - return NULL; + if (likely(!local)) { + if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && + ipv6_addr_type(&ipv6_hdr(skb)->saddr) & + IPV6_ADDR_LOOPBACK)) { + IP_VS_DBG_RL("Stopping traffic from loopback address " + "%pI6c to non-local address, " + "dest: %pI6c\n", + &ipv6_hdr(skb)->saddr, daddr); + goto err_put; + } + } else { + ort = (struct rt6_info *) skb_dst(skb); + if (!(rt_mode & IP_VS_RT_MODE_RDR) && + !__ip_vs_is_local_route6(ort)) { + IP_VS_DBG_RL("Redirect from non-local address %pI6c " + "to local requires NAT method, " + "dest: %pI6c\n", + &ipv6_hdr(skb)->daddr, daddr); + goto err_put; + } + /* skb to local stack, preserve old route */ + if (!noref) + dst_release(&rt->dst); + return local; } - if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - ipv6_addr_type(&ipv6_hdr(skb)->saddr) & - IPV6_ADDR_LOOPBACK)) { - IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " - "to non-local address, dest: %pI6c\n", - &ipv6_hdr(skb)->saddr, daddr); - dst_release(&rt->dst); - return NULL; + + /* MTU checking */ + if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) + mtu = dst_mtu(&rt->dst); + else { + struct sock *sk = skb->sk; + + mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); + if (mtu < IPV6_MIN_MTU) { + IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, + IPV6_MIN_MTU); + goto err_put; + } + ort = (struct rt6_info *) skb_dst(skb); + if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); } - return rt; + if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { + if (!skb->dev) + skb->dev = net->loopback_dev; + /* only send ICMP too big on first fragment */ + if (!ipvsh->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); + goto err_put; + } + + skb_dst_drop(skb); + if (noref) { + if (!local) + skb_dst_set_noref_force(skb, &rt->dst); + else + skb_dst_set(skb, dst_clone(&rt->dst)); + } else + skb_dst_set(skb, &rt->dst); + + return local; + +err_put: + if (!noref) + dst_release(&rt->dst); + return -1; + +err_unreach: + dst_link_failure(skb); + return -1; } #endif -/* - * Release dest->dst_cache before a dest is removed - */ -void -ip_vs_dst_reset(struct ip_vs_dest *dest) +/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ +static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, + struct ip_vs_conn *cp) { - struct dst_entry *old_dst; + int ret = NF_ACCEPT; + + skb->ipvs_property = 1; + if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) + ret = ip_vs_confirm_conntrack(skb); + if (ret == NF_ACCEPT) { + nf_reset(skb); + skb_forward_csum(skb); + } + return ret; +} + +/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ +static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, + struct ip_vs_conn *cp, int local) +{ + int ret = NF_STOLEN; - old_dst = dest->dst_cache; - dest->dst_cache = NULL; - dst_release(old_dst); - dest->dst_saddr.ip = 0; + skb->ipvs_property = 1; + if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) + ip_vs_notrack(skb); + else + ip_vs_update_conntrack(skb, cp, 1); + if (!local) { + skb_forward_csum(skb); + NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, + dst_output); + } else + ret = NF_ACCEPT; + return ret; } -#define IP_VS_XMIT_TUNNEL(skb, cp) \ -({ \ - int __ret = NF_ACCEPT; \ - \ - (skb)->ipvs_property = 1; \ - if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ - __ret = ip_vs_confirm_conntrack(skb); \ - if (__ret == NF_ACCEPT) { \ - nf_reset(skb); \ - skb_forward_csum(skb); \ - } \ - __ret; \ -}) - -#define IP_VS_XMIT_NAT(pf, skb, cp, local) \ -do { \ - (skb)->ipvs_property = 1; \ - if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ - ip_vs_notrack(skb); \ - else \ - ip_vs_update_conntrack(skb, cp, 1); \ - if (local) \ - return NF_ACCEPT; \ - skb_forward_csum(skb); \ - NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - skb_dst(skb)->dev, dst_output); \ -} while (0) - -#define IP_VS_XMIT(pf, skb, cp, local) \ -do { \ - (skb)->ipvs_property = 1; \ - if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ - ip_vs_notrack(skb); \ - if (local) \ - return NF_ACCEPT; \ - skb_forward_csum(skb); \ - NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - skb_dst(skb)->dev, dst_output); \ -} while (0) +/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ +static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, + struct ip_vs_conn *cp, int local) +{ + int ret = NF_STOLEN; + + skb->ipvs_property = 1; + if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) + ip_vs_notrack(skb); + if (!local) { + skb_forward_csum(skb); + NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, + dst_output); + } else + ret = NF_ACCEPT; + return ret; +} /* @@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { /* we do not touch skb and do not need pskb ptr */ - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); + return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } @@ -443,52 +550,29 @@ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); - int mtu; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), - IP_VS_RT_MODE_NON_LOCAL, NULL))) - goto tx_error_icmp; - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && - !skb_is_gso(skb)) { - ip_rt_put(rt); - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, + NULL) < 0) goto tx_error; - } - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { - ip_rt_put(rt); - return NF_STOLEN; - } - ip_send_check(ip_hdr(skb)); - - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); + ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rt6_info *rt; /* Route to the other host */ - int mtu; - EnterFunction(10); - rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, - IP_VS_RT_MODE_NON_LOCAL); - if (!rt) - goto tx_error_icmp; - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->dst); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, + ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; - } - - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(skb == NULL)) { - dst_release(&rt->dst); - return NF_STOLEN; - } - - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ - int mtu; - struct iphdr *iph = ip_hdr(skb); - int local; + int local, rc, was_input; EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; - p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); + + p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), - IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR, NULL))) - goto tx_error_icmp; - local = rt->rt_flags & RTCF_LOCAL; + was_input = rt_is_input_route(skb_rtable(skb)); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR, NULL); + if (local < 0) + goto tx_error; + rt = skb_rtable(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); - goto tx_error_put; + goto tx_error; } } #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(cp->daddr.ip) && - rt_is_input_route(skb_rtable(skb))) { + if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && - !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, - "ip_vs_nat_xmit(): frag needed for"); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct iphdr))) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) - goto tx_error_put; + goto tx_error; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); - if (!local) { - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - ip_rt_put(rt); - /* - * Some IPv4 replies get local address from routes, - * not from iph, so while we DNAT after routing - * we need this second input/output route. - */ - if (!__ip_vs_reroute_locally(skb)) - goto tx_error; - } - IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length @@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); - return NF_STOLEN; + return rc; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_put: - ip_rt_put(rt); - goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ - int mtu; - int local; + int local, rc; EnterFunction(10); + rcu_read_lock(); /* check if it is a connection of no-client-port */ - if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; - p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); + p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR)))) - goto tx_error_icmp; - local = __ip_vs_is_local_route6(rt); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + ipvsh, 0, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR); + if (local < 0) + goto tx_error; + rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); - goto tx_error_put; + goto tx_error; } } #endif @@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, - "ip_vs_nat_xmit_v6(): frag needed for"); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; - if (!local || !skb->dev) { - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* destined to loopback, do we need to change route? */ - dst_release(&rt->dst); - } - IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length @@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); LeaveFunction(10); - return NF_STOLEN; + return rc; -tx_error_icmp: - dst_link_failure(skb); tx_error: LeaveFunction(10); kfree_skb(skb); + rcu_read_unlock(); return NF_STOLEN; -tx_error_put: - dst_release(&rt->dst); - goto tx_error; } #endif @@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, __be16 df; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ - int mtu; - int ret; + int ret, local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_CONNECT, - &saddr))) - goto tx_error_icmp; - if (rt->rt_flags & RTCF_LOCAL) { - ip_rt_put(rt); - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); + rcu_read_lock(); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT | + IP_VS_RT_MODE_TUNNEL, &saddr); + if (local < 0) + goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } + rt = skb_rtable(skb); tdev = rt->dst.dev; - mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); - if (mtu < 68) { - IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); - goto tx_error_put; - } - if (rt_is_output_route(skb_rtable(skb))) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - /* Copy DF, reset fragment offset and MF */ df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; - } - /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); - if (skb_headroom(skb) < max_headroom - || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - ip_rt_put(rt); - kfree_skb(skb); - IP_VS_ERR_RL("%s(): no memory\n", __func__); - return NF_STOLEN; - } + + if (!new_skb) + goto tx_error; consume_skb(skb); skb = new_skb; old_iph = ip_hdr(skb); @@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* * Push down and install the IPIP header. */ @@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - ret = IP_VS_XMIT_TUNNEL(skb, cp); + ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) ip_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_put: - ip_rt_put(rt); - goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 @@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ipv6hdr *old_iph = ipv6_hdr(skb); struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ - int mtu; - int ret; + int ret, local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, - &saddr, 1, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL)))) - goto tx_error_icmp; - if (__ip_vs_is_local_route6(rt)) { - dst_release(&rt->dst); - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); + rcu_read_lock(); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, + &saddr, ipvsh, 1, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_TUNNEL); + if (local < 0) + goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); } + rt = (struct rt6_info *) skb_dst(skb); tdev = rt->dst.dev; - mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); - if (mtu < IPV6_MIN_MTU) { - IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, - IPV6_MIN_MTU); - goto tx_error_put; - } - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - /* MTU checking: Notice that 'mtu' have been adjusted before hand */ - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!ipvsh->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; - } - /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); - if (skb_headroom(skb) < max_headroom - || skb_cloned(skb) || skb_shared(skb)) { + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - dst_release(&rt->dst); - kfree_skb(skb); - IP_VS_ERR_RL("%s(): no memory\n", __func__); - return NF_STOLEN; - } + + if (!new_skb) + goto tx_error; consume_skb(skb); skb = new_skb; old_iph = ipv6_hdr(skb); @@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* * Push down and install the IPIP header. */ @@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - ret = IP_VS_XMIT_TUNNEL(skb, cp); + ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) ip6_local_out(skb); else if (ret == NF_DROP) kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_put: - dst_release(&rt->dst); - goto tx_error; } #endif @@ -1060,59 +1004,36 @@ int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rtable *rt; /* Route to the other host */ - struct iphdr *iph = ip_hdr(skb); - int mtu; + int local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), - IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_KNOWN_NH, NULL))) - goto tx_error_icmp; - if (rt->rt_flags & RTCF_LOCAL) { - ip_rt_put(rt); - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && - !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_KNOWN_NH, NULL); + if (local < 0) goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { - ip_rt_put(rt); - return NF_STOLEN; - } ip_send_check(ip_hdr(skb)); - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; - tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct rt6_info *rt; /* Route to the other host */ - int mtu; + int local; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL)))) - goto tx_error_icmp; - if (__ip_vs_is_local_route6(rt)) { - dst_release(&rt->dst); - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->dst); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); + rcu_read_lock(); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + ipvsh, 0, + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL); + if (local < 0) goto tx_error; + if (local) { + rcu_read_unlock(); + return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); } - /* - * Call ip_send_check because we are not sure it is called - * after ip_defrag. Is copy-on-write needed? - */ - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(skb == NULL)) { - dst_release(&rt->dst); - return NF_STOLEN; - } - - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); + ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; -tx_error_icmp: - dst_link_failure(skb); tx_error: kfree_skb(skb); + rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct rtable *rt; /* Route to the other host */ - int mtu; int rc; int local; - int rt_mode; + int rt_mode, was_input; EnterFunction(10); @@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* * mangle and send the packet here (only for VS/NAT) */ + was_input = rt_is_input_route(skb_rtable(skb)); /* LOCALNODE from FORWARD hook is not supported */ rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(ip_hdr(skb)->tos), - rt_mode, NULL))) - goto tx_error_icmp; - local = rt->rt_flags & RTCF_LOCAL; + rcu_read_lock(); + local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); + if (local < 0) + goto tx_error; + rt = skb_rtable(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic @@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); - goto tx_error_put; + goto tx_error; } } #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(cp->daddr.ip) && - rt_is_input_route(skb_rtable(skb))) { + if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && - !skb_is_gso(skb)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; ip_vs_nat_icmp(skb, pp, cp, 0); - if (!local) { - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - ip_rt_put(rt); - /* - * Some IPv4 replies get local address from routes, - * not from iph, so while we DNAT after routing - * we need this second input/output route. - */ - if (!__ip_vs_reroute_locally(skb)) - goto tx_error; - } - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); - - rc = NF_STOLEN; + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); + rcu_read_unlock(); goto out; - tx_error_icmp: - dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); return rc; - tx_error_put: - ip_rt_put(rt); - goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, - struct ip_vs_iphdr *iph) + struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ - int mtu; int rc; int local; int rt_mode; @@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) - rc = cp->packet_xmit(skb, cp, pp, iph); + rc = cp->packet_xmit(skb, cp, pp, ipvsh); else rc = NF_ACCEPT; /* do not touch skb anymore */ @@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, rt_mode))) - goto tx_error_icmp; - - local = __ip_vs_is_local_route6(rt); + rcu_read_lock(); + local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + ipvsh, 0, rt_mode); + if (local < 0) + goto tx_error; + rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); - goto tx_error_put; + goto tx_error; } } #endif @@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); - goto tx_error_put; - } - - /* MTU checking */ - mtu = dst_mtu(&rt->dst); - if (__mtu_check_toobig_v6(skb, mtu)) { - if (!skb->dev) { - struct net *net = dev_net(skb_dst(skb)->dev); - - skb->dev = net->loopback_dev; - } - /* only send ICMP too big on first fragment */ - if (!iph->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error_put; + goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) - goto tx_error_put; + goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) - goto tx_error_put; + goto tx_error; ip_vs_nat_icmp_v6(skb, pp, cp, 0); - if (!local || !skb->dev) { - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* destined to loopback, do we need to change route? */ - dst_release(&rt->dst); - } - /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); - - rc = NF_STOLEN; + rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); + rcu_read_unlock(); goto out; -tx_error_icmp: - dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); + rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); return rc; -tx_error_put: - dst_release(&rt->dst); - goto tx_error; } #endif diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index dbdaa1149260..b8b95f4027ca 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -2,6 +2,7 @@ * * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> * based on HW's ip_conntrack_irc.c as well as other modules + * (C) 2006 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c8e001a9c45b..ebb81d64436c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -5,6 +5,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2005-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -48,6 +49,7 @@ #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_helper.h> #define NF_CONNTRACK_VERSION "0.5.0" @@ -1259,7 +1261,7 @@ void nf_ct_iterate_cleanup(struct net *net, EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); struct __nf_ct_flush_report { - u32 pid; + u32 portid; int report; }; @@ -1274,7 +1276,7 @@ static int kill_report(struct nf_conn *i, void *data) /* If we fail to deliver the event, death_by_timeout() will retry */ if (nf_conntrack_event_report(IPCT_DESTROY, i, - fr->pid, fr->report) < 0) + fr->portid, fr->report) < 0) return 1; /* Avoid the delivery of the destroy event in death_by_timeout(). */ @@ -1297,10 +1299,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 portid, int report) { struct __nf_ct_flush_report fr = { - .pid = pid, + .portid = portid, .report = report, }; nf_ct_iterate_cleanup(net, kill_report, &fr); @@ -1364,30 +1366,48 @@ void nf_conntrack_cleanup_end(void) */ void nf_conntrack_cleanup_net(struct net *net) { + LIST_HEAD(single); + + list_add(&net->exit_list, &single); + nf_conntrack_cleanup_net_list(&single); +} + +void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) +{ + int busy; + struct net *net; + /* * This makes sure all current packets have passed through * netfilter framework. Roll on, two-stage module * delete... */ synchronize_net(); - i_see_dead_people: - nf_ct_iterate_cleanup(net, kill_all, NULL); - nf_ct_release_dying_list(net); - if (atomic_read(&net->ct.count) != 0) { +i_see_dead_people: + busy = 0; + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_iterate_cleanup(net, kill_all, NULL); + nf_ct_release_dying_list(net); + if (atomic_read(&net->ct.count) != 0) + busy = 1; + } + if (busy) { schedule(); goto i_see_dead_people; } - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_proto_pernet_fini(net); - nf_conntrack_helper_pernet_fini(net); - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_tstamp_pernet_fini(net); - nf_conntrack_acct_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); - kmem_cache_destroy(net->ct.nf_conntrack_cachep); - kfree(net->ct.slabname); - free_percpu(net->ct.stat); + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); + free_percpu(net->ct.stat); + } } void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index b5d2eb8bf0d5..1df176146567 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -1,8 +1,10 @@ /* Event cache for netfilter. */ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> +/* + * (C) 2005 Harald Welte <laforge@gnumonks.org> + * (C) 2005 Patrick McHardy <kaber@trash.net> + * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8c10e3db3d9b..c63b618cd619 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -3,6 +3,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (c) 2005-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -40,7 +41,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, - u32 pid, int report) + u32 portid, int report) { struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); @@ -54,7 +55,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, hlist_del(&exp->lnode); master_help->expecting[exp->class]--; - nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); + nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report); nf_ct_expect_put(exp); NF_CT_STAT_INC(net, expect_delete); @@ -412,7 +413,7 @@ out: } int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report) + u32 portid, int report) { int ret; @@ -425,7 +426,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret < 0) goto out; spin_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); + nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); return ret; out: spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 62fb8faedb80..6b217074237b 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -3,6 +3,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 7df7b36d2e24..bdebd03bc8cd 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -2,6 +2,7 @@ * H.323 connection tracking helper * * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net> * * This source code is licensed under General Public License version 2. * diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a9740bd6fe54..974a2a4adefa 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -3,6 +3,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -339,6 +340,13 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, { const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; /* Called from the helper function, this call never fails */ help = nfct_help(ct); @@ -346,8 +354,10 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, - "nf_ct_%s: dropping packet: %s ", helper->name, fmt); + nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, + "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); + + va_end(args); } EXPORT_SYMBOL_GPL(nf_ct_helper_log); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 70985c5d0ffa..0fd2976db7ee 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -1,6 +1,7 @@ /* IRC extension for IP connection tracking, Version 1.21 * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> * based on RR's ip_conntrack_ftp.c + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9904b15f600e..6d0f8a17c5b7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2409,6 +2409,92 @@ out: return skb->len; } +static int +ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conntrack_expect *exp, *last; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nf_conn *ct = cb->data; + struct nf_conn_help *help = nfct_help(ct); + u_int8_t l3proto = nfmsg->nfgen_family; + + if (cb->args[0]) + return 0; + + rcu_read_lock(); + last = (struct nf_conntrack_expect *)cb->args[1]; +restart: + hlist_for_each_entry(exp, &help->expectations, lnode) { + if (l3proto && exp->tuple.src.l3num != l3proto) + continue; + if (cb->args[1]) { + if (exp != last) + continue; + cb->args[1] = 0; + } + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + exp) < 0) { + if (!atomic_inc_not_zero(&exp->use)) + continue; + cb->args[1] = (unsigned long)exp; + goto out; + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } + cb->args[0] = 1; +out: + rcu_read_unlock(); + if (last) + nf_ct_expect_put(last); + + return skb->len; +} + +static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + int err; + struct net *net = sock_net(ctnl); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; + u16 zone = 0; + struct netlink_dump_control c = { + .dump = ctnetlink_exp_ct_dump_table, + .done = ctnetlink_exp_done, + }; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + if (err < 0) + return err; + + if (cda[CTA_EXPECT_ZONE]) { + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + } + + h = nf_conntrack_find_get(net, zone, &tuple); + if (!h) + return -ENOENT; + + ct = nf_ct_tuplehash_to_ctrack(h); + c.data = ct; + + err = netlink_dump_start(ctnl, skb, nlh, &c); + nf_ct_put(ct); + + return err; +} + static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, @@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = ctnetlink_exp_dump_table, - .done = ctnetlink_exp_done, - }; - return netlink_dump_start(ctnl, skb, nlh, &c); + if (cda[CTA_EXPECT_MASTER]) + return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda); + else { + struct netlink_dump_control c = { + .dump = ctnetlink_exp_dump_table, + .done = ctnetlink_exp_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index e6678d2b624e..7bd03decd36c 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -11,6 +11,8 @@ * * Development of this code funded by Astaro AG (http://www.astaro.com/) * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + * * Limitations: * - We blindly assume that control connections are always * established in PNS->PAC direction. This is a violation diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 58ab4050830c..0ab9636ac57e 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -3,6 +3,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 432f95780003..a99b6c3427b0 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, + NULL, msg); return false; } @@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; } @@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } @@ -969,6 +970,10 @@ static int __init nf_conntrack_proto_dccp_init(void) { int ret; + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&dccp_proto4); if (ret < 0) goto out_dccp4; @@ -977,16 +982,12 @@ static int __init nf_conntrack_proto_dccp_init(void) if (ret < 0) goto out_dccp6; - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&dccp_proto6); out_dccp6: nf_ct_l4proto_unregister(&dccp_proto4); out_dccp4: + unregister_pernet_subsys(&dccp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index bd7d01d9c7e7..9d9c0dade602 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -21,6 +21,7 @@ * * Development of this code funded by Astaro AG (http://www.astaro.com/) * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> @@ -420,18 +421,18 @@ static int __init nf_ct_proto_gre_init(void) { int ret; - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); out_gre4: + unregister_pernet_subsys(&proto_gre_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 480f616d5936..1314d33f6bcf 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -1,6 +1,9 @@ /* * Connection tracking protocol helper module for SCTP. * + * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com> + * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net> + * * SCTP is defined in RFC 2960. References to various sections in this code * are to this RFC. * @@ -888,6 +891,10 @@ static int __init nf_conntrack_proto_sctp_init(void) { int ret; + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); if (ret < 0) goto out_sctp4; @@ -896,16 +903,12 @@ static int __init nf_conntrack_proto_sctp_init(void) if (ret < 0) goto out_sctp6; - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); out_sctp6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out_sctp4: + unregister_pernet_subsys(&sctp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 83876e9877f1..4d4d8f1d01fc 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1,5 +1,7 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -720,7 +722,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -772,7 +774,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -780,7 +782,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -793,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -802,7 +804,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -949,7 +951,7 @@ static int tcp_packet(struct nf_conn *ct, } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored in " "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; @@ -959,7 +961,7 @@ static int tcp_packet(struct nf_conn *ct, dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: @@ -969,8 +971,8 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid RST */ spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid RST "); + nf_log_packet(net, pf, 0, skb, NULL, NULL, + NULL, "nf_ct_tcp: invalid RST "); return -NF_ACCEPT; } if (index == TCP_RST_SET diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 59623cc56e8d..9d7721cbce4b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -1,5 +1,6 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -119,7 +120,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; } @@ -127,7 +128,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -143,7 +144,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 157489581c31..2750e6c69f82 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; } @@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; } @@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* UDPLITE mandates checksums */ if (!hdr->check) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } @@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; } @@ -371,6 +371,10 @@ static int __init nf_conntrack_proto_udplite_init(void) { int ret; + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); if (ret < 0) goto out_udplite4; @@ -379,16 +383,12 @@ static int __init nf_conntrack_proto_udplite_init(void) if (ret < 0) goto out_udplite6; - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); out_udplite6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); out_udplite4: + unregister_pernet_subsys(&udplite_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0e7d423324c3..e0c4373b4747 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1593,10 +1593,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, end += strlen("\r\n\r\n") + clen; msglen = origlen = end - dptr; - if (msglen > datalen) { - nf_ct_helper_log(skb, ct, "incomplete/bad SIP message"); - return NF_DROP; - } + if (msglen > datalen) + return NF_ACCEPT; ret = process_sip_msg(skb, ct, protoff, dataoff, &dptr, &msglen); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6bcce401fd1c..bd700b4013c1 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1,5 +1,6 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -545,16 +546,20 @@ out_init: return ret; } -static void nf_conntrack_pernet_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { - nf_conntrack_standalone_fini_sysctl(net); - nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup_net(net); + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + nf_conntrack_standalone_fini_sysctl(net); + nf_conntrack_standalone_fini_proc(net); + } + nf_conntrack_cleanup_net_list(net_exit_list); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_pernet_init, - .exit = nf_conntrack_pernet_exit, + .init = nf_conntrack_pernet_init, + .exit_batch = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) @@ -568,6 +573,7 @@ static int __init nf_conntrack_standalone_init(void) register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); if (!nf_ct_netfilter_header) { pr_err("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; goto out_sysctl; } #endif diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index e9936c830208..e68ab4fbd71f 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -1,5 +1,5 @@ /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> - * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9e312695c818..388656d5a9ec 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,6 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); @@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } +void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +{ + const struct nf_logger *log; + + if (pf == NFPROTO_UNSPEC) + return; + + mutex_lock(&nf_log_mutex); + log = rcu_dereference_protected(net->nf.nf_loggers[pf], + lockdep_is_held(&nf_log_mutex)); + if (log == NULL) + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); + + mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_set); + +void nf_log_unset(struct net *net, const struct nf_logger *logger) +{ + int i; + const struct nf_logger *log; + + mutex_lock(&nf_log_mutex); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = rcu_dereference_protected(net->nf.nf_loggers[i], + lockdep_is_held(&nf_log_mutex)); + if (log == logger) + RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); + } + mutex_unlock(&nf_log_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL(nf_log_unset); + /* return EEXIST if the same logger is registered, 0 on success. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger) { - const struct nf_logger *llog; int i; - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; for (i = 0; i < ARRAY_SIZE(logger->list); i++) @@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) } else { /* register at end of list to honor first register win */ list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); - llog = rcu_dereference_protected(nf_loggers[pf], - lockdep_is_held(&nf_log_mutex)); - if (llog == NULL) - rcu_assign_pointer(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); @@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { - const struct nf_logger *c_logger; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { - c_logger = rcu_dereference_protected(nf_loggers[i], - lockdep_is_held(&nf_log_mutex)); - if (c_logger == logger) - RCU_INIT_POINTER(nf_loggers[i], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) list_del(&logger->list[i]); - } mutex_unlock(&nf_log_mutex); - - synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unregister); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +int nf_log_bind_pf(struct net *net, u_int8_t pf, + const struct nf_logger *logger) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[pf], logger); + rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } EXPORT_SYMBOL(nf_log_bind_pf); -void nf_log_unbind_pf(u_int8_t pf) +void nf_log_unbind_pf(struct net *net, u_int8_t pf) { - if (pf >= ARRAY_SIZE(nf_loggers)) + if (pf >= ARRAY_SIZE(net->nf.nf_loggers)) return; mutex_lock(&nf_log_mutex); - RCU_INIT_POINTER(nf_loggers[pf], NULL); + RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); -void nf_log_packet(u_int8_t pf, +void nf_log_packet(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf, const struct nf_logger *logger; rcu_read_lock(); - logger = rcu_dereference(nf_loggers[pf]); + logger = rcu_dereference(net->nf.nf_loggers[pf]); if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); + mutex_lock(&nf_log_mutex); - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos) static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { + struct net *net = seq_file_net(s); + (*pos)++; - if (*pos >= ARRAY_SIZE(nf_loggers)) + if (*pos >= ARRAY_SIZE(net->nf.nf_loggers)) return NULL; return pos; @@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v) const struct nf_logger *logger; struct nf_logger *t; int ret; + struct net *net = seq_file_net(s); - logger = rcu_dereference_protected(nf_loggers[*pos], + logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], lockdep_is_held(&nf_log_mutex)); if (!logger) @@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = { static int nflog_open(struct inode *inode, struct file *file) { - return seq_open(file, &nflog_seq_ops); + return seq_open_net(inode, file, &nflog_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations nflog_file_ops = { @@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = { .open = nflog_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; @@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; + struct net *net = current->nsproxy->net_ns; if (write) { if (size > sizeof(buf)) @@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, return -EFAULT; if (!strcmp(buf, "NONE")) { - nf_log_unbind_pf(tindex); + nf_log_unbind_pf(net, tindex); return 0; } mutex_lock(&nf_log_mutex); @@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[tindex], logger); + rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = rcu_dereference_protected(nf_loggers[tindex], + logger = rcu_dereference_protected(net->nf.nf_loggers[tindex], lockdep_is_held(&nf_log_mutex)); if (!logger) table->data = "NONE"; @@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write, return r; } -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { int i; - - for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { - snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); - nf_log_sysctl_table[i].procname = - nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; - nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); - nf_log_sysctl_table[i].mode = 0644; - nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; - nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; + struct ctl_table *table; + + table = nf_log_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_log_sysctl_table, + sizeof(nf_log_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } else { + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + snprintf(nf_log_sysctl_fnames[i], + 3, "%d", i); + nf_log_sysctl_table[i].procname = + nf_log_sysctl_fnames[i]; + nf_log_sysctl_table[i].data = NULL; + nf_log_sysctl_table[i].maxlen = + NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].mode = 0644; + nf_log_sysctl_table[i].proc_handler = + nf_log_proc_dostring; + nf_log_sysctl_table[i].extra1 = + (void *)(unsigned long) i; + } } - nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", - nf_log_sysctl_table); - if (!nf_log_dir_header) - return -ENOMEM; + net->nf.nf_log_dir_header = register_net_sysctl(net, + "net/netfilter/nf_log", + table); + if (!net->nf.nf_log_dir_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void netfilter_log_sysctl_exit(struct net *net) +{ + struct ctl_table *table; + + table = net->nf.nf_log_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_log_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); } #else -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net) { return 0; } + +static void netfilter_log_sysctl_exit(struct net *net) +{ +} #endif /* CONFIG_SYSCTL */ -int __init netfilter_log_init(void) +static int __net_init nf_log_net_init(struct net *net) { - int i, r; + int ret = -ENOMEM; + #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, - proc_net_netfilter, &nflog_file_ops)) - return -1; + net->nf.proc_netfilter, &nflog_file_ops)) + return ret; #endif + ret = netfilter_log_sysctl_init(net); + if (ret < 0) + goto out_sysctl; + + return 0; - /* Errors will trigger panic, unroll on error is unnecessary. */ - r = netfilter_log_sysctl_init(); - if (r < 0) - return r; +out_sysctl: + /* For init_net: errors will trigger panic, don't unroll on error. */ + if (!net_eq(net, &init_net)) + remove_proc_entry("nf_log", net->nf.proc_netfilter); + + return ret; +} + +static void __net_exit nf_log_net_exit(struct net *net) +{ + netfilter_log_sysctl_exit(net); + remove_proc_entry("nf_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nf_log_net_ops = { + .init = nf_log_net_init, + .exit = nf_log_net_exit, +}; + +int __init netfilter_log_init(void) +{ + int i, ret; + + ret = register_pernet_subsys(&nf_log_net_ops); + if (ret < 0) + return ret; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&(nf_loggers_l[i])); diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 3b67c9d11273..eb772380a202 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -1,6 +1,7 @@ /* Amanda extension for TCP NAT alteration. * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> * based on a copy of HW's ip_nat_irc.c as well as other modules + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 8d5769c6d16e..2e469ca2ca55 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -87,9 +87,10 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) struct flowi fl; unsigned int hh_len; struct dst_entry *dst; + int err; - if (xfrm_decode_session(skb, &fl, family) < 0) - return -1; + err = xfrm_decode_session(skb, &fl, family); + return err; dst = skb_dst(skb); if (dst->xfrm) @@ -98,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); if (IS_ERR(dst)) - return -1; + return PTR_ERR(dst); skb_dst_drop(skb); skb_dst_set(skb, dst); @@ -107,7 +108,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) - return -1; + return -ENOMEM; return 0; } EXPORT_SYMBOL(nf_xfrm_me_harder); @@ -467,33 +468,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; - bool hash; }; -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int nf_nat_proto_clean(struct nf_conn *i, void *data) +/* kill conntracks with affected NAT section */ +static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; struct nf_conn_nat *nat = nfct_nat(i); if (!nat) return 0; - if (!(i->status & IPS_SRC_NAT_DONE)) - return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; - if (clean->hash) { - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); - } else { - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | - IPS_SEQ_ADJUST); - } - return 0; + return i->status & IPS_NAT_MASK ? 1 : 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) @@ -505,16 +495,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - - /* Step 2 - clean NAT section */ - clean.hash = false; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -526,16 +508,9 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - /* Step 2 - clean NAT section */ - clean.hash = false; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -773,7 +748,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 23c2b38676a6..5fea563afe30 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -2,6 +2,7 @@ * * (C) 2000-2002 Harald Welte <laforge@netfilter.org> * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2007-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d812c1235b30..5ccf01e35390 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,3 +1,8 @@ +/* + * Rusty Russell (C)2000 -- This code is GPL. + * Patrick McHardy (c) 2006-2012 + */ + #include <linux/kernel.h> #include <linux/slab.h> #include <linux/init.h> diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index d578ec251712..572d87dc116f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -24,10 +24,9 @@ #include <linux/skbuff.h> #include <asm/uaccess.h> #include <net/sock.h> -#include <net/netlink.h> #include <linux/init.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> MODULE_LICENSE("GPL"); @@ -62,11 +61,6 @@ void nfnl_unlock(__u8 subsys_id) } EXPORT_SYMBOL_GPL(nfnl_unlock); -static struct mutex *nfnl_get_lock(__u8 subsys_id) -{ - return &table[subsys_id].mutex; -} - int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { nfnl_lock(n->subsys_id); @@ -118,22 +112,30 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, +struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, + u32 dst_portid, gfp_t gfp_mask) +{ + return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); +} +EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); + +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { - return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) { - return netlink_set_err(net->nfnl, pid, group, error); + return netlink_set_err(net->nfnl, portid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, + int flags) { - return netlink_unicast(net->nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, portid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); @@ -149,7 +151,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; /* All the messages must at least contain nfgenmsg */ - if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) + if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; type = nlh->nlmsg_type; @@ -177,7 +179,7 @@ replay: } { - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; @@ -199,7 +201,7 @@ replay: rcu_read_unlock(); nfnl_lock(subsys_id); if (rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(nfnl_get_lock(subsys_id))) != ss || + lockdep_is_held(&table[subsys_id].mutex)) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 589d686f0b4c..dc3fd5d44464 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); + if (strlen(acct_name) == 0) + return -EINVAL; list_for_each_entry(nfacct, &nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f248db572972..faf1e9300d8a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -3,6 +3,7 @@ * nfetlink. * * (C) 2005 by Harald Welte <laforge@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * Based on the old ipv4-only ipt_ULOG.c: * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> @@ -19,7 +20,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/netfilter.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_log.h> #include <linux/spinlock.h> @@ -32,6 +33,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/netfilter/nfnetlink_log.h> #include <linux/atomic.h> @@ -56,6 +58,7 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; + struct net *net; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ int peer_portid; /* PORTID of the peer process */ @@ -71,25 +74,34 @@ struct nfulnl_instance { struct rcu_head rcu; }; -static DEFINE_SPINLOCK(instances_lock); -static atomic_t global_seq; - #define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS]; static unsigned int hash_init; +static int nfnl_log_net_id __read_mostly; + +struct nfnl_log_net { + spinlock_t instances_lock; + struct hlist_head instance_table[INSTANCE_BUCKETS]; + atomic_t global_seq; +}; + +static struct nfnl_log_net *nfnl_log_pernet(struct net *net) +{ + return net_generic(net, nfnl_log_net_id); +} + static inline u_int8_t instance_hashfn(u_int16_t group_num) { return ((group_num & 0xff) % INSTANCE_BUCKETS); } static struct nfulnl_instance * -__instance_lookup(u_int16_t group_num) +__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num) { struct hlist_head *head; struct nfulnl_instance *inst; - head = &instance_table[instance_hashfn(group_num)]; + head = &log->instance_table[instance_hashfn(group_num)]; hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->group_num == group_num) return inst; @@ -104,12 +116,12 @@ instance_get(struct nfulnl_instance *inst) } static struct nfulnl_instance * -instance_lookup_get(u_int16_t group_num) +instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num) { struct nfulnl_instance *inst; rcu_read_lock_bh(); - inst = __instance_lookup(group_num); + inst = __instance_lookup(log, group_num); if (inst && !atomic_inc_not_zero(&inst->use)) inst = NULL; rcu_read_unlock_bh(); @@ -119,7 +131,11 @@ instance_lookup_get(u_int16_t group_num) static void nfulnl_instance_free_rcu(struct rcu_head *head) { - kfree(container_of(head, struct nfulnl_instance, rcu)); + struct nfulnl_instance *inst = + container_of(head, struct nfulnl_instance, rcu); + + put_net(inst->net); + kfree(inst); module_put(THIS_MODULE); } @@ -133,13 +149,15 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) +instance_create(struct net *net, u_int16_t group_num, + int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; + struct nfnl_log_net *log = nfnl_log_pernet(net); int err; - spin_lock_bh(&instances_lock); - if (__instance_lookup(group_num)) { + spin_lock_bh(&log->instances_lock); + if (__instance_lookup(log, group_num)) { err = -EEXIST; goto out_unlock; } @@ -163,6 +181,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); + inst->net = get_net(net); inst->peer_user_ns = user_ns; inst->peer_portid = portid; inst->group_num = group_num; @@ -174,14 +193,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) inst->copy_range = NFULNL_COPY_RANGE_MAX; hlist_add_head_rcu(&inst->hlist, - &instance_table[instance_hashfn(group_num)]); + &log->instance_table[instance_hashfn(group_num)]); + - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); return inst; out_unlock: - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); return ERR_PTR(err); } @@ -210,11 +230,12 @@ __instance_destroy(struct nfulnl_instance *inst) } static inline void -instance_destroy(struct nfulnl_instance *inst) +instance_destroy(struct nfnl_log_net *log, + struct nfulnl_instance *inst) { - spin_lock_bh(&instances_lock); + spin_lock_bh(&log->instances_lock); __instance_destroy(inst); - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); } static int @@ -298,7 +319,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) } static struct sk_buff * -nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -307,13 +328,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = alloc_skb(n, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = alloc_skb(pkt_size, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(&init_net, pkt_size, + peer_portid, GFP_ATOMIC); if (!skb) pr_err("nfnetlink_log: can't even alloc %u bytes\n", pkt_size); @@ -336,7 +358,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, + status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -370,7 +392,8 @@ nfulnl_timer(unsigned long data) /* This is an inline function, we don't really care about a long * list of arguments */ static inline int -__build_packet_message(struct nfulnl_instance *inst, +__build_packet_message(struct nfnl_log_net *log, + struct nfulnl_instance *inst, const struct sk_buff *skb, unsigned int data_len, u_int8_t pf, @@ -536,7 +559,7 @@ __build_packet_message(struct nfulnl_instance *inst, /* global sequence number */ if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) && nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, - htonl(atomic_inc_return(&global_seq)))) + htonl(atomic_inc_return(&log->global_seq)))) goto nla_put_failure; if (data_len) { @@ -592,13 +615,15 @@ nfulnl_log_packet(u_int8_t pf, const struct nf_loginfo *li; unsigned int qthreshold; unsigned int plen; + struct net *net = dev_net(in ? in : out); + struct nfnl_log_net *log = nfnl_log_pernet(net); if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; else li = &default_loginfo; - inst = instance_lookup_get(li->u.ulog.group); + inst = instance_lookup_get(log, li->u.ulog.group); if (!inst) return; @@ -609,7 +634,7 @@ nfulnl_log_packet(u_int8_t pf, /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -673,14 +698,15 @@ nfulnl_log_packet(u_int8_t pf, } if (!inst->skb) { - inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size); + inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, + size); if (!inst->skb) goto alloc_failure; } inst->qlen++; - __build_packet_message(inst, skb, data_len, pf, + __build_packet_message(log, inst, skb, data_len, pf, hooknum, in, out, prefix, plen); if (inst->qlen >= qthreshold) @@ -709,24 +735,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; + struct nfnl_log_net *log = nfnl_log_pernet(n->net); if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this portid */ - spin_lock_bh(&instances_lock); + spin_lock_bh(&log->instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *t2; struct nfulnl_instance *inst; - struct hlist_head *head = &instance_table[i]; + struct hlist_head *head = &log->instance_table[i]; hlist_for_each_entry_safe(inst, t2, head, hlist) { - if ((net_eq(n->net, &init_net)) && - (n->portid == inst->peer_portid)) + if (n->portid == inst->peer_portid) __instance_destroy(inst); } } - spin_unlock_bh(&instances_lock); + spin_unlock_bh(&log->instances_lock); } return NOTIFY_DONE; } @@ -767,6 +793,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; + struct net *net = sock_net(ctnl); + struct nfnl_log_net *log = nfnl_log_pernet(net); int ret = 0; if (nfula[NFULA_CFG_CMD]) { @@ -776,14 +804,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: - return nf_log_bind_pf(pf, &nfulnl_logger); + return nf_log_bind_pf(net, pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: - nf_log_unbind_pf(pf); + nf_log_unbind_pf(net, pf); return 0; } } - inst = instance_lookup_get(group_num); + inst = instance_lookup_get(log, group_num); if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; @@ -797,9 +825,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, goto out_put; } - inst = instance_create(group_num, + inst = instance_create(net, group_num, NETLINK_CB(skb).portid, - sk_user_ns(NETLINK_CB(skb).ssk)); + sk_user_ns(NETLINK_CB(skb).sk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); goto out; @@ -811,7 +839,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, goto out; } - instance_destroy(inst); + instance_destroy(log, inst); goto out_put; default: ret = -ENOTSUPP; @@ -894,55 +922,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = { #ifdef CONFIG_PROC_FS struct iter_state { + struct seq_net_private p; unsigned int bucket; }; -static struct hlist_node *get_first(struct iter_state *st) +static struct hlist_node *get_first(struct net *net, struct iter_state *st) { + struct nfnl_log_net *log; if (!st) return NULL; + log = nfnl_log_pernet(net); + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { - if (!hlist_empty(&instance_table[st->bucket])) - return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); + struct hlist_head *head = &log->instance_table[st->bucket]; + + if (!hlist_empty(head)) + return rcu_dereference_bh(hlist_first_rcu(head)); } return NULL; } -static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) +static struct hlist_node *get_next(struct net *net, struct iter_state *st, + struct hlist_node *h) { h = rcu_dereference_bh(hlist_next_rcu(h)); while (!h) { + struct nfnl_log_net *log; + struct hlist_head *head; + if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); + log = nfnl_log_pernet(net); + head = &log->instance_table[st->bucket]; + h = rcu_dereference_bh(hlist_first_rcu(head)); } return h; } -static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) +static struct hlist_node *get_idx(struct net *net, struct iter_state *st, + loff_t pos) { struct hlist_node *head; - head = get_first(st); + head = get_first(net, st); if (head) - while (pos && (head = get_next(st, head))) + while (pos && (head = get_next(net, st, head))) pos--; return pos ? NULL : head; } -static void *seq_start(struct seq_file *seq, loff_t *pos) +static void *seq_start(struct seq_file *s, loff_t *pos) __acquires(rcu_bh) { rcu_read_lock_bh(); - return get_idx(seq->private, *pos); + return get_idx(seq_file_net(s), s->private, *pos); } static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; - return get_next(s->private, v); + return get_next(seq_file_net(s), s->private, v); } static void seq_stop(struct seq_file *s, void *v) @@ -971,8 +1012,8 @@ static const struct seq_operations nful_seq_ops = { static int nful_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &nful_seq_ops, - sizeof(struct iter_state)); + return seq_open_net(inode, file, &nful_seq_ops, + sizeof(struct iter_state)); } static const struct file_operations nful_file_ops = { @@ -980,17 +1021,43 @@ static const struct file_operations nful_file_ops = { .open = nful_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* PROC_FS */ -static int __init nfnetlink_log_init(void) +static int __net_init nfnl_log_net_init(struct net *net) { - int i, status = -ENOMEM; + unsigned int i; + struct nfnl_log_net *log = nfnl_log_pernet(net); for (i = 0; i < INSTANCE_BUCKETS; i++) - INIT_HLIST_HEAD(&instance_table[i]); + INIT_HLIST_HEAD(&log->instance_table[i]); + spin_lock_init(&log->instances_lock); + +#ifdef CONFIG_PROC_FS + if (!proc_create("nfnetlink_log", 0440, + net->nf.proc_netfilter, &nful_file_ops)) + return -ENOMEM; +#endif + return 0; +} + +static void __net_exit nfnl_log_net_exit(struct net *net) +{ + remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nfnl_log_net_ops = { + .init = nfnl_log_net_init, + .exit = nfnl_log_net_exit, + .id = &nfnl_log_net_id, + .size = sizeof(struct nfnl_log_net), +}; + +static int __init nfnetlink_log_init(void) +{ + int status = -ENOMEM; /* it's not really all that important to have a random value, so * we can do this from the init function, even if there hasn't @@ -1000,29 +1067,25 @@ static int __init nfnetlink_log_init(void) netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { - printk(KERN_ERR "log: failed to create netlink socket\n"); + pr_err("log: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); if (status < 0) { - printk(KERN_ERR "log: failed to register logger\n"); + pr_err("log: failed to register logger\n"); goto cleanup_subsys; } -#ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_log", 0440, - proc_net_netfilter, &nful_file_ops)) { - status = -ENOMEM; + status = register_pernet_subsys(&nfnl_log_net_ops); + if (status < 0) { + pr_err("log: failed to register pernet ops\n"); goto cleanup_logger; } -#endif return status; -#ifdef CONFIG_PROC_FS cleanup_logger: nf_log_unregister(&nfulnl_logger); -#endif cleanup_subsys: nfnetlink_subsys_unregister(&nfulnl_subsys); cleanup_netlink_notifier: @@ -1032,10 +1095,8 @@ cleanup_netlink_notifier: static void __exit nfnetlink_log_fini(void) { + unregister_pernet_subsys(&nfnl_log_net_ops); nf_log_unregister(&nfulnl_logger); -#ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_log", proc_net_netfilter); -#endif nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 858fd52c1040..ef3cdb4bfeea 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -30,6 +30,7 @@ #include <linux/list.h> #include <net/sock.h> #include <net/netfilter/nf_queue.h> +#include <net/netns/generic.h> #include <net/netfilter/nfnetlink_queue.h> #include <linux/atomic.h> @@ -66,23 +67,31 @@ struct nfqnl_instance { typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static DEFINE_SPINLOCK(instances_lock); +static int nfnl_queue_net_id __read_mostly; #define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; +struct nfnl_queue_net { + spinlock_t instances_lock; + struct hlist_head instance_table[INSTANCE_BUCKETS]; +}; + +static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net) +{ + return net_generic(net, nfnl_queue_net_id); +} static inline u_int8_t instance_hashfn(u_int16_t queue_num) { - return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; + return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS; } static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) +instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num) { struct hlist_head *head; struct nfqnl_instance *inst; - head = &instance_table[instance_hashfn(queue_num)]; + head = &q->instance_table[instance_hashfn(queue_num)]; hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->queue_num == queue_num) return inst; @@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int portid) +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, + int portid) { struct nfqnl_instance *inst; unsigned int h; int err; - spin_lock(&instances_lock); - if (instance_lookup(queue_num)) { + spin_lock(&q->instances_lock); + if (instance_lookup(q, queue_num)) { err = -EEXIST; goto out_unlock; } @@ -112,7 +122,7 @@ instance_create(u_int16_t queue_num, int portid) inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; + inst->copy_range = 0xffff; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); @@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid) } h = instance_hashfn(queue_num); - hlist_add_head_rcu(&inst->hlist, &instance_table[h]); + hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]); - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); return inst; out_free: kfree(inst); out_unlock: - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); return ERR_PTR(err); } @@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst) } static void -instance_destroy(struct nfqnl_instance *inst) +instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst) { - spin_lock(&instances_lock); + spin_lock(&q->instances_lock); __instance_destroy(inst); - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); } static inline void @@ -217,14 +227,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } +static void +nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} + static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, struct nf_queue_entry *entry, __be32 **packet_id_ptr) { - sk_buff_data_t old_tail; size_t size; size_t data_len = 0, cap_len = 0; + int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -236,7 +291,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -246,8 +301,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) - + nla_total_size(sizeof(u_int32_t))); /* cap_len */ + + nla_total_size(sizeof(u_int32_t)); /* cap_len */ + + if (entskb->tstamp.tv64) + size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); outdev = entry->outdev; @@ -265,7 +322,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (data_len == 0 || data_len > entskb->len) data_len = entskb->len; - size += nla_total_size(data_len); + + if (!entskb->head_frag || + skb_headlen(entskb) < L1_CACHE_BYTES || + skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(entskb); + + if (skb_has_frag_list(entskb)) + hlen = entskb->len; + hlen = min_t(int, data_len, hlen); + size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; } @@ -273,11 +339,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); - skb = alloc_skb(size, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, + GFP_ATOMIC); if (!skb) return NULL; - old_tail = skb->tail; nlh = nlmsg_put(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg), 0); @@ -382,31 +448,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, goto nla_put_failure; } + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) + goto nla_put_failure; + + if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + goto nla_put_failure; + if (data_len) { struct nlattr *nla; - int sz = nla_attr_size(data_len); - if (skb_tailroom(skb) < nla_total_size(data_len)) { - printk(KERN_WARNING "nf_queue: no tailroom!\n"); - kfree_skb(skb); - return NULL; - } + if (skb_tailroom(skb) < sizeof(*nla) + hlen) + goto nla_put_failure; - nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla = (struct nlattr *)skb_put(skb, sizeof(*nla)); nla->nla_type = NFQA_PAYLOAD; - nla->nla_len = sz; + nla->nla_len = nla_attr_size(data_len); - if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) - BUG(); + nfqnl_zcopy(skb, entskb, data_len, hlen); } - if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) - goto nla_put_failure; - - if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) - goto nla_put_failure; - - nlh->nlmsg_len = skb->tail - old_tail; + nlh->nlmsg_len = skb->len; return skb; nla_put_failure: @@ -423,9 +484,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) int err = -ENOBUFS; __be32 *packet_id_ptr; int failopen = 0; + struct net *net = dev_net(entry->indev ? + entry->indev : entry->outdev); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); /* rcu_read_lock()ed by nf_hook_slow() */ - queue = instance_lookup(queuenum); + queue = instance_lookup(q, queuenum); if (!queue) { err = -ESRCH; goto err_out; @@ -462,7 +526,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -575,15 +639,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) /* drop all packets with either indev or outdev == ifindex from all queue * instances */ static void -nfqnl_dev_drop(int ifindex) +nfqnl_dev_drop(struct net *net, int ifindex) { int i; + struct nfnl_queue_net *q = nfnl_queue_pernet(net); rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; + struct hlist_head *head = &q->instance_table[i]; hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, dev_cmp, ifindex); @@ -600,12 +665,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) - nfqnl_dev_drop(dev->ifindex); + nfqnl_dev_drop(dev_net(dev), dev->ifindex); return NOTIFY_DONE; } @@ -618,24 +680,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; + struct nfnl_queue_net *q = nfnl_queue_pernet(n->net); if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this portid */ - spin_lock(&instances_lock); + spin_lock(&q->instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *t2; struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; + struct hlist_head *head = &q->instance_table[i]; hlist_for_each_entry_safe(inst, t2, head, hlist) { - if ((n->net == &init_net) && - (n->portid == inst->peer_portid)) + if (n->portid == inst->peer_portid) __instance_destroy(inst); } } - spin_unlock(&instances_lock); + spin_unlock(&q->instances_lock); } return NOTIFY_DONE; } @@ -656,11 +718,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) +static struct nfqnl_instance * +verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid) { struct nfqnl_instance *queue; - queue = instance_lookup(queue_num); + queue = instance_lookup(q, queue_num); if (!queue) return ERR_PTR(-ENODEV); @@ -704,7 +767,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); + struct net *net = sock_net(ctnl); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + + queue = verdict_instance_lookup(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -752,10 +819,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, enum ip_conntrack_info uninitialized_var(ctinfo); struct nf_conn *ct = NULL; - queue = instance_lookup(queue_num); - if (!queue) + struct net *net = sock_net(ctnl); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); + queue = instance_lookup(q, queue_num); + if (!queue) + queue = verdict_instance_lookup(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -819,6 +889,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; + struct net *net = sock_net(ctnl); + struct nfnl_queue_net *q = nfnl_queue_pernet(net); int ret = 0; if (nfqa[NFQA_CFG_CMD]) { @@ -832,7 +904,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - queue = instance_lookup(queue_num); + queue = instance_lookup(q, queue_num); if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; @@ -845,7 +917,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).portid); + queue = instance_create(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -856,7 +929,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -ENODEV; goto err_out_unlock; } - instance_destroy(queue); + instance_destroy(q, queue); break; case NFQNL_CFG_CMD_PF_BIND: case NFQNL_CFG_CMD_PF_UNBIND: @@ -950,19 +1023,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = { #ifdef CONFIG_PROC_FS struct iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *get_first(struct seq_file *seq) { struct iter_state *st = seq->private; + struct net *net; + struct nfnl_queue_net *q; if (!st) return NULL; + net = seq_file_net(seq); + q = nfnl_queue_pernet(net); for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { - if (!hlist_empty(&instance_table[st->bucket])) - return instance_table[st->bucket].first; + if (!hlist_empty(&q->instance_table[st->bucket])) + return q->instance_table[st->bucket].first; } return NULL; } @@ -970,13 +1048,17 @@ static struct hlist_node *get_first(struct seq_file *seq) static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) { struct iter_state *st = seq->private; + struct net *net = seq_file_net(seq); h = h->next; while (!h) { + struct nfnl_queue_net *q; + if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = instance_table[st->bucket].first; + q = nfnl_queue_pernet(net); + h = q->instance_table[st->bucket].first; } return h; } @@ -992,11 +1074,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : head; } -static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(instances_lock) +static void *seq_start(struct seq_file *s, loff_t *pos) + __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock) { - spin_lock(&instances_lock); - return get_idx(seq, *pos); + spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock); + return get_idx(s, *pos); } static void *seq_next(struct seq_file *s, void *v, loff_t *pos) @@ -1006,9 +1088,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(instances_lock) + __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock) { - spin_unlock(&instances_lock); + spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock); } static int seq_show(struct seq_file *s, void *v) @@ -1032,7 +1114,7 @@ static const struct seq_operations nfqnl_seq_ops = { static int nfqnl_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &nfqnl_seq_ops, + return seq_open_net(inode, file, &nfqnl_seq_ops, sizeof(struct iter_state)); } @@ -1041,39 +1123,63 @@ static const struct file_operations nfqnl_file_ops = { .open = nfqnl_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* PROC_FS */ -static int __init nfnetlink_queue_init(void) +static int __net_init nfnl_queue_net_init(struct net *net) { - int i, status = -ENOMEM; + unsigned int i; + struct nfnl_queue_net *q = nfnl_queue_pernet(net); for (i = 0; i < INSTANCE_BUCKETS; i++) - INIT_HLIST_HEAD(&instance_table[i]); + INIT_HLIST_HEAD(&q->instance_table[i]); + + spin_lock_init(&q->instances_lock); + +#ifdef CONFIG_PROC_FS + if (!proc_create("nfnetlink_queue", 0440, + net->nf.proc_netfilter, &nfqnl_file_ops)) + return -ENOMEM; +#endif + return 0; +} + +static void __net_exit nfnl_queue_net_exit(struct net *net) +{ + remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); +} + +static struct pernet_operations nfnl_queue_net_ops = { + .init = nfnl_queue_net_init, + .exit = nfnl_queue_net_exit, + .id = &nfnl_queue_net_id, + .size = sizeof(struct nfnl_queue_net), +}; + +static int __init nfnetlink_queue_init(void) +{ + int status = -ENOMEM; netlink_register_notifier(&nfqnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfqnl_subsys); if (status < 0) { - printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + pr_err("nf_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } -#ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) + status = register_pernet_subsys(&nfnl_queue_net_ops); + if (status < 0) { + pr_err("nf_queue: failed to register pernet ops\n"); goto cleanup_subsys; -#endif - + } register_netdevice_notifier(&nfqnl_dev_notifier); nf_register_queue_handler(&nfqh); return status; -#ifdef CONFIG_PROC_FS cleanup_subsys: nfnetlink_subsys_unregister(&nfqnl_subsys); -#endif cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); return status; @@ -1083,9 +1189,7 @@ static void __exit nfnetlink_queue_fini(void) { nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); -#ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -#endif + unregister_pernet_subsys(&nfnl_queue_net_ops); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 686c7715d777..1a73b18683b6 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -2,6 +2,7 @@ * x_tables core - Backend for {ip,ip6,arp}_tables * * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org> + * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * Based on existing ip_tables code which is * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index ba92824086f3..3228d7f24eb4 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -124,6 +124,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_audit_info *info = par->targinfo; struct audit_buffer *ab; + if (audit_enabled == 0) + goto errout; + ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); if (ab == NULL) goto errout; diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index fa40096940a1..fe573f6c9e91 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - struct sbuff *m = sb_open(); + struct sbuff *m; + struct net *net = dev_net(in ? in : out); + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = sb_open(); if (!loginfo) loginfo = &default_loginfo; @@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - struct sbuff *m = sb_open(); + struct sbuff *m; + struct net *net = dev_net(in ? in : out); + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = sb_open(); if (!loginfo) loginfo = &default_loginfo; @@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = { }; #endif +static int __net_init log_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger); +#endif + return 0; +} + +static void __net_exit log_net_exit(struct net *net) +{ + nf_log_unset(net, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + nf_log_unset(net, &ip6t_log_logger); +#endif +} + +static struct pernet_operations log_net_ops = { + .init = log_net_init, + .exit = log_net_exit, +}; + static int __init log_tg_init(void) { int ret; + ret = register_pernet_subsys(&log_net_ops); + if (ret < 0) + goto err_pernet; + ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); if (ret < 0) - return ret; + goto err_target; nf_log_register(NFPROTO_IPV4, &ipt_log_logger); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); #endif return 0; + +err_target: + unregister_pernet_subsys(&log_net_ops); +err_pernet: + return ret; } static void __exit log_tg_exit(void) { + unregister_pernet_subsys(&log_net_ops); nf_log_unregister(&ipt_log_logger); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_log_unregister(&ip6t_log_logger); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 817f9e9f2b16..1e2fae32f81b 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb) } #endif -static unsigned int -nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +static u32 +nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) { - if (par->family == NFPROTO_IPV4) - queue = (((u64) hash_v4(skb) * info->queues_total) >> - 32) + queue; + if (par->family == NFPROTO_IPV4) + queue += ((u64) hash_v4(skb) * info->queues_total) >> 32; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - queue = (((u64) hash_v6(skb) * info->queues_total) >> - 32) + queue; + else if (par->family == NFPROTO_IPV6) + queue += ((u64) hash_v6(skb) * info->queues_total) >> 32; #endif - } + + return queue; +} + +static unsigned int +nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = nfqueue_hash(skb, par); + return NF_QUEUE_NR(queue); } @@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) static int nfqueue_tg_check(const struct xt_tgchk_param *par) { - const struct xt_NFQ_info_v2 *info = par->targinfo; + const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; if (unlikely(!rnd_inited)) { @@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) info->queues_total, maxid); return -ERANGE; } - if (par->target->revision == 2 && info->bypass > 1) + if (par->target->revision == 2 && info->flags > 1) return -EINVAL; + if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK) + return -EINVAL; + return 0; } +static unsigned int +nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_NFQ_info_v3 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) { + if (info->flags & NFQ_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = info->queuenum + cpu % info->queues_total; + } else + queue = nfqueue_hash(skb, par); + } + + return NF_QUEUE_NR(queue); +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", @@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_NFQ_info_v2), .me = THIS_MODULE, }, + { + .name = "NFQUEUE", + .revision = 3, + .family = NFPROTO_UNSPEC, + .checkentry = nfqueue_tg_check, + .target = nfqueue_tg_v3, + .targetsize = sizeof(struct xt_NFQ_info_v3), + .me = THIS_MODULE, + }, }; static int __init nfqueue_tg_init(void) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 71a266de5fb4..a75240f0d42b 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -2,6 +2,7 @@ * This is a module which is used for setting the MSS option in TCP packets. * * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> + * Copyright (C) 2007 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 61805d7b38aa..188404b9b002 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -3,6 +3,7 @@ * information. (Superset of Rusty's minimalistic state match.) * * (C) 2001 Marc Boucher (marc@mbsi.ca). + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * This program is free software; you can redistribute it and/or modify diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f330e8beaf69..0199e7bb8f81 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -3,6 +3,7 @@ * separately for each hashbucket (sourceip/sourceport/dstip/dstport) * * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * Development of this code was funded by Astaro AG, http://www.astaro.com/ diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index a4c1e4528cac..bef850596558 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -1,5 +1,6 @@ /* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index a5e673d32bda..647d989a01e6 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; + struct net *net = dev_net(p->in ? p->in : p->out); if (!info) return false; @@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(p->family, p->hooknum, skb, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, p->out, NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, @@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, + nf_log_packet(net, p->family, p->hooknum, skb, p->in, + p->out, NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 847d495cd4de..8a6c6ea466d8 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1189,8 +1189,6 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; - u32 skip_addr4 = cb->args[2]; - u32 skip_addr6 = cb->args[3]; u32 iter_bkt; u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; @@ -1215,7 +1213,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < skip_addr4) + if (iter_addr4++ < cb->args[2]) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1231,7 +1229,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < skip_addr6) + if (iter_addr6++ < cb->args[3]) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1250,10 +1248,10 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, unlabel_staticlist_return: rcu_read_unlock(); - cb->args[0] = skip_bkt; - cb->args[1] = skip_chain; - cb->args[2] = skip_addr4; - cb->args[3] = skip_addr6; + cb->args[0] = iter_bkt; + cb->args[1] = iter_chain; + cb->args[2] = iter_addr4; + cb->args[3] = iter_addr6; return skb->len; } @@ -1273,12 +1271,9 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, { struct netlbl_unlhsh_walk_arg cb_arg; struct netlbl_unlhsh_iface *iface; - u32 skip_addr4 = cb->args[0]; - u32 skip_addr6 = cb->args[1]; - u32 iter_addr4 = 0; + u32 iter_addr4 = 0, iter_addr6 = 0; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) - u32 iter_addr6 = 0; struct netlbl_af6list *addr6; #endif @@ -1292,7 +1287,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, goto unlabel_staticlistdef_return; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < skip_addr4) + if (iter_addr4++ < cb->args[0]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, @@ -1305,7 +1300,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < skip_addr6) + if (iter_addr6++ < cb->args[1]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, @@ -1320,8 +1315,8 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, unlabel_staticlistdef_return: rcu_read_unlock(); - cb->args[0] = skip_addr4; - cb->args[1] = skip_addr6; + cb->args[0] = iter_addr4; + cb->args[1] = iter_addr6; return skb->len; } diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig new file mode 100644 index 000000000000..5d6e8c05b3d4 --- /dev/null +++ b/net/netlink/Kconfig @@ -0,0 +1,10 @@ +# +# Netlink Sockets +# + +config NETLINK_DIAG + tristate "NETLINK: socket monitoring interface" + default n + ---help--- + Support for NETLINK socket monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/netlink/Makefile b/net/netlink/Makefile index bdd6ddf4e95b..e837917f6c03 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -3,3 +3,6 @@ # obj-y := af_netlink.o genetlink.o + +obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o +netlink_diag-y := diag.o diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1e3fd5bfcd86..da5601d13a7f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3,6 +3,7 @@ * * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> + * Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -55,87 +56,45 @@ #include <linux/types.h> #include <linux/audit.h> #include <linux/mutex.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/scm.h> #include <net/netlink.h> -#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) -#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) - -struct netlink_sock { - /* struct sock has to be the first member of netlink_sock */ - struct sock sk; - u32 portid; - u32 dst_portid; - u32 dst_group; - u32 flags; - u32 subscriptions; - u32 ngroups; - unsigned long *groups; - unsigned long state; - wait_queue_head_t wait; - struct netlink_callback *cb; - struct mutex *cb_mutex; - struct mutex cb_def_mutex; - void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); - struct module *module; -}; +#include "af_netlink.h" struct listeners { struct rcu_head rcu; unsigned long masks[0]; }; +/* state bits */ +#define NETLINK_CONGESTED 0x0 + +/* flags */ #define NETLINK_KERNEL_SOCKET 0x1 #define NETLINK_RECV_PKTINFO 0x2 #define NETLINK_BROADCAST_SEND_ERROR 0x4 #define NETLINK_RECV_NO_ENOBUFS 0x8 -static inline struct netlink_sock *nlk_sk(struct sock *sk) -{ - return container_of(sk, struct netlink_sock, sk); -} - static inline int netlink_is_kernel(struct sock *sk) { return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_portid_hash { - struct hlist_head *table; - unsigned long rehash_time; - - unsigned int mask; - unsigned int shift; - - unsigned int entries; - unsigned int max_shift; - - u32 rnd; -}; - -struct netlink_table { - struct nl_portid_hash hash; - struct hlist_head mc_list; - struct listeners __rcu *listeners; - unsigned int flags; - unsigned int groups; - struct mutex *cb_mutex; - struct module *module; - void (*bind)(int group); - int registered; -}; - -static struct netlink_table *nl_table; +struct netlink_table *nl_table; +EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); +static void netlink_skb_destructor(struct sk_buff *skb); -static DEFINE_RWLOCK(nl_table_lock); +DEFINE_RWLOCK(nl_table_lock); +EXPORT_SYMBOL_GPL(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); @@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } +static void netlink_overrun(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { + if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { + sk->sk_err = ENOBUFS; + sk->sk_error_report(sk); + } + } + atomic_inc(&sk->sk_drops); +} + +static void netlink_rcv_wake(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (skb_queue_empty(&sk->sk_receive_queue)) + clear_bit(NETLINK_CONGESTED, &nlk->state); + if (!test_bit(NETLINK_CONGESTED, &nlk->state)) + wake_up_interruptible(&nlk->wait); +} + +#ifdef CONFIG_NETLINK_MMAP +static bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +} + +static bool netlink_rx_is_mmaped(struct sock *sk) +{ + return nlk_sk(sk)->rx_ring.pg_vec != NULL; +} + +static bool netlink_tx_is_mmaped(struct sock *sk) +{ + return nlk_sk(sk)->tx_ring.pg_vec != NULL; +} + +static __pure struct page *pgvec_to_page(const void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + else + return virt_to_page(addr); +} + +static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; i++) { + if (pg_vec[i] != NULL) { + if (is_vmalloc_addr(pg_vec[i])) + vfree(pg_vec[i]); + else + free_pages((unsigned long)pg_vec[i], order); + } + } + kfree(pg_vec); +} + +static void *alloc_one_pg_vec_page(unsigned long order) +{ + void *buffer; + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | + __GFP_NOWARN | __GFP_NORETRY; + + buffer = (void *)__get_free_pages(gfp_flags, order); + if (buffer != NULL) + return buffer; + + buffer = vzalloc((1 << order) * PAGE_SIZE); + if (buffer != NULL) + return buffer; + + gfp_flags &= ~__GFP_NORETRY; + return (void *)__get_free_pages(gfp_flags, order); +} + +static void **alloc_pg_vec(struct netlink_sock *nlk, + struct nl_mmap_req *req, unsigned int order) +{ + unsigned int block_nr = req->nm_block_nr; + unsigned int i; + void **pg_vec, *ptr; + + pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); + if (pg_vec == NULL) + return NULL; + + for (i = 0; i < block_nr; i++) { + pg_vec[i] = ptr = alloc_one_pg_vec_page(order); + if (pg_vec[i] == NULL) + goto err1; + } + + return pg_vec; +err1: + free_pg_vec(pg_vec, order, block_nr); + return NULL; +} + +static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, + bool closing, bool tx_ring) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring; + struct sk_buff_head *queue; + void **pg_vec = NULL; + unsigned int order = 0; + int err; + + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + + if (!closing) { + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; + } + + if (req->nm_block_nr) { + if (ring->pg_vec != NULL) + return -EBUSY; + + if ((int)req->nm_block_size <= 0) + return -EINVAL; + if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) + return -EINVAL; + if (req->nm_frame_size < NL_MMAP_HDRLEN) + return -EINVAL; + if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) + return -EINVAL; + + ring->frames_per_block = req->nm_block_size / + req->nm_frame_size; + if (ring->frames_per_block == 0) + return -EINVAL; + if (ring->frames_per_block * req->nm_block_nr != + req->nm_frame_nr) + return -EINVAL; + + order = get_order(req->nm_block_size); + pg_vec = alloc_pg_vec(nlk, req, order); + if (pg_vec == NULL) + return -ENOMEM; + } else { + if (req->nm_frame_nr) + return -EINVAL; + } + + err = -EBUSY; + mutex_lock(&nlk->pg_vec_lock); + if (closing || atomic_read(&nlk->mapped) == 0) { + err = 0; + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + } + mutex_unlock(&nlk->pg_vec_lock); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); + return err; +} + +static void netlink_mm_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct socket *sock = file->private_data; + struct sock *sk = sock->sk; + + if (sk) + atomic_inc(&nlk_sk(sk)->mapped); +} + +static void netlink_mm_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct socket *sock = file->private_data; + struct sock *sk = sock->sk; + + if (sk) + atomic_dec(&nlk_sk(sk)->mapped); +} + +static const struct vm_operations_struct netlink_mmap_ops = { + .open = netlink_mm_open, + .close = netlink_mm_close, +}; + +static int netlink_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring; + unsigned long start, size, expected; + unsigned int i; + int err = -EINVAL; + + if (vma->vm_pgoff) + return -EINVAL; + + mutex_lock(&nlk->pg_vec_lock); + + expected = 0; + for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { + if (ring->pg_vec == NULL) + continue; + expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; + } + + if (expected == 0) + goto out; + + size = vma->vm_end - vma->vm_start; + if (size != expected) + goto out; + + start = vma->vm_start; + for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { + if (ring->pg_vec == NULL) + continue; + + for (i = 0; i < ring->pg_vec_len; i++) { + struct page *page; + void *kaddr = ring->pg_vec[i]; + unsigned int pg_num; + + for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { + page = pgvec_to_page(kaddr); + err = vm_insert_page(vma, start, page); + if (err < 0) + goto out; + start += PAGE_SIZE; + kaddr += PAGE_SIZE; + } + } + } + + atomic_inc(&nlk->mapped); + vma->vm_ops = &netlink_mmap_ops; + err = 0; +out: + mutex_unlock(&nlk->pg_vec_lock); + return 0; +} + +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +{ +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + struct page *p_start, *p_end; + + /* First page is flushed through netlink_{get,set}_status */ + p_start = pgvec_to_page(hdr + PAGE_SIZE); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); + while (p_start <= p_end) { + flush_dcache_page(p_start); + p_start++; + } +#endif +} + +static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) +{ + smp_rmb(); + flush_dcache_page(pgvec_to_page(hdr)); + return hdr->nm_status; +} + +static void netlink_set_status(struct nl_mmap_hdr *hdr, + enum nl_mmap_status status) +{ + hdr->nm_status = status; + flush_dcache_page(pgvec_to_page(hdr)); + smp_wmb(); +} + +static struct nl_mmap_hdr * +__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) +{ + unsigned int pg_vec_pos, frame_off; + + pg_vec_pos = pos / ring->frames_per_block; + frame_off = pos % ring->frames_per_block; + + return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); +} + +static struct nl_mmap_hdr * +netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, + enum nl_mmap_status status) +{ + struct nl_mmap_hdr *hdr; + + hdr = __netlink_lookup_frame(ring, pos); + if (netlink_get_status(hdr) != status) + return NULL; + + return hdr; +} + +static struct nl_mmap_hdr * +netlink_current_frame(const struct netlink_ring *ring, + enum nl_mmap_status status) +{ + return netlink_lookup_frame(ring, ring->head, status); +} + +static struct nl_mmap_hdr * +netlink_previous_frame(const struct netlink_ring *ring, + enum nl_mmap_status status) +{ + unsigned int prev; + + prev = ring->head ? ring->head - 1 : ring->frame_max; + return netlink_lookup_frame(ring, prev, status); +} + +static void netlink_increment_head(struct netlink_ring *ring) +{ + ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; +} + +static void netlink_forward_ring(struct netlink_ring *ring) +{ + unsigned int head = ring->head, pos = head; + const struct nl_mmap_hdr *hdr; + + do { + hdr = __netlink_lookup_frame(ring, pos); + if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) + break; + if (hdr->nm_status != NL_MMAP_STATUS_SKIP) + break; + netlink_increment_head(ring); + } while (ring->head != head); +} + +static bool netlink_dump_space(struct netlink_sock *nlk) +{ + struct netlink_ring *ring = &nlk->rx_ring; + struct nl_mmap_hdr *hdr; + unsigned int n; + + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); + if (hdr == NULL) + return false; + + n = ring->head + ring->frame_max / 2; + if (n > ring->frame_max) + n -= ring->frame_max; + + hdr = __netlink_lookup_frame(ring, n); + + return hdr->nm_status == NL_MMAP_STATUS_UNUSED; +} + +static unsigned int netlink_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + unsigned int mask; + int err; + + if (nlk->rx_ring.pg_vec != NULL) { + /* Memory mapped sockets don't call recvmsg(), so flow control + * for dumps is performed here. A dump is allowed to continue + * if at least half the ring is unused. + */ + while (nlk->cb != NULL && netlink_dump_space(nlk)) { + err = netlink_dump(sk); + if (err < 0) { + sk->sk_err = err; + sk->sk_error_report(sk); + break; + } + } + netlink_rcv_wake(sk); + } + + mask = datagram_poll(file, sock, wait); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (nlk->rx_ring.pg_vec) { + netlink_forward_ring(&nlk->rx_ring); + if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) + mask |= POLLIN | POLLRDNORM; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + spin_lock_bh(&sk->sk_write_queue.lock); + if (nlk->tx_ring.pg_vec) { + if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) + mask |= POLLOUT | POLLWRNORM; + } + spin_unlock_bh(&sk->sk_write_queue.lock); + + return mask; +} + +static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) +{ + return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); +} + +static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, + struct netlink_ring *ring, + struct nl_mmap_hdr *hdr) +{ + unsigned int size; + void *data; + + size = ring->frame_size - NL_MMAP_HDRLEN; + data = (void *)hdr + NL_MMAP_HDRLEN; + + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->len = 0; + + skb->destructor = netlink_skb_destructor; + NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; + NETLINK_CB(skb).sk = sk; +} + +static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, + u32 dst_portid, u32 dst_group, + struct sock_iocb *siocb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring; + struct nl_mmap_hdr *hdr; + struct sk_buff *skb; + unsigned int maxlen; + bool excl = true; + int err = 0, len = 0; + + /* Netlink messages are validated by the receiver before processing. + * In order to avoid userspace changing the contents of the message + * after validation, the socket and the ring may only be used by a + * single process, otherwise we fall back to copying. + */ + if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || + atomic_read(&nlk->mapped) > 1) + excl = false; + + mutex_lock(&nlk->pg_vec_lock); + + ring = &nlk->tx_ring; + maxlen = ring->frame_size - NL_MMAP_HDRLEN; + + do { + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); + if (hdr == NULL) { + if (!(msg->msg_flags & MSG_DONTWAIT) && + atomic_read(&nlk->tx_ring.pending)) + schedule(); + continue; + } + if (hdr->nm_len > maxlen) { + err = -EINVAL; + goto out; + } + + netlink_frame_flush_dcache(hdr); + + if (likely(dst_portid == 0 && dst_group == 0 && excl)) { + skb = alloc_skb_head(GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; + } + sock_hold(sk); + netlink_ring_setup_skb(skb, sk, ring, hdr); + NETLINK_CB(skb).flags |= NETLINK_SKB_TX; + __skb_put(skb, hdr->nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); + atomic_inc(&ring->pending); + } else { + skb = alloc_skb(hdr->nm_len, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; + } + __skb_put(skb, hdr->nm_len); + memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + } + + netlink_increment_head(ring); + + NETLINK_CB(skb).portid = nlk->portid; + NETLINK_CB(skb).dst_group = dst_group; + NETLINK_CB(skb).creds = siocb->scm->creds; + + err = security_netlink_send(sk, skb); + if (err) { + kfree_skb(skb); + goto out; + } + + if (unlikely(dst_group)) { + atomic_inc(&skb->users); + netlink_broadcast(sk, skb, dst_portid, dst_group, + GFP_KERNEL); + } + err = netlink_unicast(sk, skb, dst_portid, + msg->msg_flags & MSG_DONTWAIT); + if (err < 0) + goto out; + len += err; + + } while (hdr != NULL || + (!(msg->msg_flags & MSG_DONTWAIT) && + atomic_read(&nlk->tx_ring.pending))); + + if (len > 0) + err = len; +out: + mutex_unlock(&nlk->pg_vec_lock); + return err; +} + +static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) +{ + struct nl_mmap_hdr *hdr; + + hdr = netlink_mmap_hdr(skb); + hdr->nm_len = skb->len; + hdr->nm_group = NETLINK_CB(skb).dst_group; + hdr->nm_pid = NETLINK_CB(skb).creds.pid; + hdr->nm_uid = NETLINK_CB(skb).creds.uid; + hdr->nm_gid = NETLINK_CB(skb).creds.gid; + netlink_frame_flush_dcache(hdr); + netlink_set_status(hdr, NL_MMAP_STATUS_VALID); + + NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; + kfree_skb(skb); +} + +static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct netlink_ring *ring = &nlk->rx_ring; + struct nl_mmap_hdr *hdr; + + spin_lock_bh(&sk->sk_receive_queue.lock); + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); + if (hdr == NULL) { + spin_unlock_bh(&sk->sk_receive_queue.lock); + kfree_skb(skb); + netlink_overrun(sk); + return; + } + netlink_increment_head(ring); + __skb_queue_tail(&sk->sk_receive_queue, skb); + spin_unlock_bh(&sk->sk_receive_queue.lock); + + hdr->nm_len = skb->len; + hdr->nm_group = NETLINK_CB(skb).dst_group; + hdr->nm_pid = NETLINK_CB(skb).creds.pid; + hdr->nm_uid = NETLINK_CB(skb).creds.uid; + hdr->nm_gid = NETLINK_CB(skb).creds.gid; + netlink_set_status(hdr, NL_MMAP_STATUS_COPY); +} + +#else /* CONFIG_NETLINK_MMAP */ +#define netlink_skb_is_mmaped(skb) false +#define netlink_rx_is_mmaped(sk) false +#define netlink_tx_is_mmaped(sk) false +#define netlink_mmap sock_no_mmap +#define netlink_poll datagram_poll +#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 +#endif /* CONFIG_NETLINK_MMAP */ + static void netlink_destroy_callback(struct netlink_callback *cb) { kfree_skb(cb->skb); @@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb) kfree(cb); } +static void netlink_skb_destructor(struct sk_buff *skb) +{ +#ifdef CONFIG_NETLINK_MMAP + struct nl_mmap_hdr *hdr; + struct netlink_ring *ring; + struct sock *sk; + + /* If a packet from the kernel to userspace was freed because of an + * error without being delivered to userspace, the kernel must reset + * the status. In the direction userspace to kernel, the status is + * always reset here after the packet was processed and freed. + */ + if (netlink_skb_is_mmaped(skb)) { + hdr = netlink_mmap_hdr(skb); + sk = NETLINK_CB(skb).sk; + + if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + ring = &nlk_sk(sk)->tx_ring; + } else { + if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { + hdr->nm_len = 0; + netlink_set_status(hdr, NL_MMAP_STATUS_VALID); + } + ring = &nlk_sk(sk)->rx_ring; + } + + WARN_ON(atomic_read(&ring->pending) == 0); + atomic_dec(&ring->pending); + sock_put(sk); + + skb->data = NULL; + } +#endif + if (skb->sk != NULL) + sock_rfree(skb); +} + +static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) +{ + WARN_ON(skb->sk != NULL); + skb->sk = sk; + skb->destructor = netlink_skb_destructor; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_charge(sk, skb->truesize); +} + static void netlink_sock_destruct(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk) } skb_queue_purge(&sk->sk_receive_queue); +#ifdef CONFIG_NETLINK_MMAP + if (1) { + struct nl_mmap_req req; + + memset(&req, 0, sizeof(req)); + if (nlk->rx_ring.pg_vec) + netlink_set_ring(sk, &req, true, false); + memset(&req, 0, sizeof(req)); + if (nlk->tx_ring.pg_vec) + netlink_set_ring(sk, &req, true, true); + } +#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -440,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); +#ifdef CONFIG_NETLINK_MMAP + mutex_init(&nlk->pg_vec_lock); +#endif sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; @@ -771,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, return 0; } -static void netlink_overrun(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { - if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); - } - } - atomic_inc(&sk->sk_drops); -} - static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; @@ -836,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(0, &nlk->state)) { + if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + test_bit(NETLINK_CONGESTED, &nlk->state)) && + !netlink_skb_is_mmaped(skb)) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -851,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, add_wait_queue(&nlk->wait, &wait); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(0, &nlk->state)) && + test_bit(NETLINK_CONGESTED, &nlk->state)) && !sock_flag(sk, SOCK_DEAD)) *timeo = schedule_timeout(*timeo); @@ -865,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, } return 1; } - skb_set_owner_r(skb, sk); + netlink_skb_set_owner_r(skb, sk); return 0; } @@ -873,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); +#ifdef CONFIG_NETLINK_MMAP + if (netlink_skb_is_mmaped(skb)) + netlink_queue_mmaped_skb(sk, skb); + else if (netlink_rx_is_mmaped(sk)) + netlink_ring_set_copied(sk, skb); + else +#endif /* CONFIG_NETLINK_MMAP */ + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); return len; } @@ -896,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) { int delta; - skb_orphan(skb); + WARN_ON(skb->sk != NULL); + if (netlink_skb_is_mmaped(skb)) + return skb; delta = skb->end - skb->tail; if (delta * 2 < skb->truesize) @@ -916,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) return skb; } -static void netlink_rcv_wake(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(0, &nlk->state); - if (!test_bit(0, &nlk->state)) - wake_up_interruptible(&nlk->wait); -} - static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, struct sock *ssk) { @@ -935,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { ret = skb->len; - skb_set_owner_r(skb, sk); - NETLINK_CB(skb).ssk = ssk; + netlink_skb_set_owner_r(skb, sk); + NETLINK_CB(skb).sk = ssk; nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -982,6 +1583,69 @@ retry: } EXPORT_SYMBOL(netlink_unicast); +struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, + u32 dst_portid, gfp_t gfp_mask) +{ +#ifdef CONFIG_NETLINK_MMAP + struct sock *sk = NULL; + struct sk_buff *skb; + struct netlink_ring *ring; + struct nl_mmap_hdr *hdr; + unsigned int maxlen; + + sk = netlink_getsockbyportid(ssk, dst_portid); + if (IS_ERR(sk)) + goto out; + + ring = &nlk_sk(sk)->rx_ring; + /* fast-path without atomic ops for common case: non-mmaped receiver */ + if (ring->pg_vec == NULL) + goto out_put; + + skb = alloc_skb_head(gfp_mask); + if (skb == NULL) + goto err1; + + spin_lock_bh(&sk->sk_receive_queue.lock); + /* check again under lock */ + if (ring->pg_vec == NULL) + goto out_free; + + maxlen = ring->frame_size - NL_MMAP_HDRLEN; + if (maxlen < size) + goto out_free; + + netlink_forward_ring(ring); + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); + if (hdr == NULL) + goto err2; + netlink_ring_setup_skb(skb, sk, ring, hdr); + netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); + atomic_inc(&ring->pending); + netlink_increment_head(ring); + + spin_unlock_bh(&sk->sk_receive_queue.lock); + return skb; + +err2: + kfree_skb(skb); + spin_unlock_bh(&sk->sk_receive_queue.lock); + netlink_overrun(sk); +err1: + sock_put(sk); + return NULL; + +out_free: + kfree_skb(skb); + spin_unlock_bh(&sk->sk_receive_queue.lock); +out_put: + sock_put(sk); +out: +#endif + return alloc_skb(size, gfp_mask); +} +EXPORT_SYMBOL_GPL(netlink_alloc_skb); + int netlink_has_listeners(struct sock *sk, unsigned int group) { int res = 0; @@ -1006,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_sock *nlk = nlk_sk(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !test_bit(0, &nlk->state)) { - skb_set_owner_r(skb, sk); + !test_bit(NETLINK_CONGESTED, &nlk->state)) { + netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); } @@ -1242,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optlen >= sizeof(int) && + if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && + optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -1284,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, case NETLINK_NO_ENOBUFS: if (val) { nlk->flags |= NETLINK_RECV_NO_ENOBUFS; - clear_bit(0, &nlk->state); + clear_bit(NETLINK_CONGESTED, &nlk->state); wake_up_interruptible(&nlk->wait); } else { nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; } err = 0; break; +#ifdef CONFIG_NETLINK_MMAP + case NETLINK_RX_RING: + case NETLINK_TX_RING: { + struct nl_mmap_req req; + + /* Rings might consume more memory than queue limits, require + * CAP_NET_ADMIN. + */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optlen < sizeof(req)) + return -EINVAL; + if (copy_from_user(&req, optval, sizeof(req))) + return -EFAULT; + err = netlink_set_ring(sk, &req, false, + optname == NETLINK_TX_RING); + break; + } +#endif /* CONFIG_NETLINK_MMAP */ default: err = -ENOPROTOOPT; } @@ -1401,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } + if (netlink_tx_is_mmaped(sk) && + msg->msg_iov->iov_base == NULL) { + err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, + siocb); + goto out; + } + err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -1695,7 +2386,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; - int size = NLMSG_LENGTH(len); + int size = nlmsg_msg_size(len); nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; @@ -1704,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) - memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); return nlh; } EXPORT_SYMBOL(__nlmsg_put); @@ -1733,9 +2424,13 @@ static int netlink_dump(struct sock *sk) alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); + if (!netlink_rx_is_mmaped(sk) && + atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + goto errout_skb; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); if (!skb) goto errout_skb; + netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); @@ -1790,6 +2485,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb == NULL) return -ENOBUFS; + /* Memory mapped dump requests need to be copied to avoid looping + * on the pending state in netlink_mmap_sendmsg() while the CB hold + * a reference to the skb. + */ + if (netlink_skb_is_mmaped(skb)) { + skb = skb_copy(skb, GFP_KERNEL); + if (skb == NULL) { + kfree(cb); + return -ENOBUFS; + } + } else + atomic_inc(&skb->users); + cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -1850,7 +2558,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (err) payload += nlmsg_len(nlh); - skb = nlmsg_new(payload, GFP_KERNEL); + skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), + NETLINK_CB(in_skb).portid, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -2116,7 +2825,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = datagram_poll, + .poll = netlink_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -2124,7 +2833,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = sock_no_mmap, + .mmap = netlink_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h new file mode 100644 index 000000000000..ed8522265f4e --- /dev/null +++ b/net/netlink/af_netlink.h @@ -0,0 +1,82 @@ +#ifndef _AF_NETLINK_H +#define _AF_NETLINK_H + +#include <net/sock.h> + +#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) +#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) + +struct netlink_ring { + void **pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + atomic_t pending; +}; + +struct netlink_sock { + /* struct sock has to be the first member of netlink_sock */ + struct sock sk; + u32 portid; + u32 dst_portid; + u32 dst_group; + u32 flags; + u32 subscriptions; + u32 ngroups; + unsigned long *groups; + unsigned long state; + wait_queue_head_t wait; + struct netlink_callback *cb; + struct mutex *cb_mutex; + struct mutex cb_def_mutex; + void (*netlink_rcv)(struct sk_buff *skb); + void (*netlink_bind)(int group); + struct module *module; +#ifdef CONFIG_NETLINK_MMAP + struct mutex pg_vec_lock; + struct netlink_ring rx_ring; + struct netlink_ring tx_ring; + atomic_t mapped; +#endif /* CONFIG_NETLINK_MMAP */ +}; + +static inline struct netlink_sock *nlk_sk(struct sock *sk) +{ + return container_of(sk, struct netlink_sock, sk); +} + +struct nl_portid_hash { + struct hlist_head *table; + unsigned long rehash_time; + + unsigned int mask; + unsigned int shift; + + unsigned int entries; + unsigned int max_shift; + + u32 rnd; +}; + +struct netlink_table { + struct nl_portid_hash hash; + struct hlist_head mc_list; + struct listeners __rcu *listeners; + unsigned int flags; + unsigned int groups; + struct mutex *cb_mutex; + struct module *module; + void (*bind)(int group); + int registered; +}; + +extern struct netlink_table *nl_table; +extern rwlock_t nl_table_lock; + +#endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c new file mode 100644 index 000000000000..1af29624b92f --- /dev/null +++ b/net/netlink/diag.c @@ -0,0 +1,227 @@ +#include <linux/module.h> + +#include <net/sock.h> +#include <linux/netlink.h> +#include <linux/sock_diag.h> +#include <linux/netlink_diag.h> + +#include "af_netlink.h" + +#ifdef CONFIG_NETLINK_MMAP +static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, + struct sk_buff *nlskb) +{ + struct netlink_diag_ring ndr; + + ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; + ndr.ndr_block_nr = ring->pg_vec_len; + ndr.ndr_frame_size = ring->frame_size; + ndr.ndr_frame_nr = ring->frame_max + 1; + + return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); +} + +static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + int ret; + + mutex_lock(&nlk->pg_vec_lock); + ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); + if (!ret) + ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, + nlskb); + mutex_unlock(&nlk->pg_vec_lock); + + return ret; +} +#else +static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +{ + return 0; +} +#endif + +static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->groups == NULL) + return 0; + + return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups), + nlk->groups); +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_diag_req *req, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct netlink_diag_msg *rep; + struct netlink_sock *nlk = nlk_sk(sk); + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); + if (!nlh) + return -EMSGSIZE; + + rep = nlmsg_data(nlh); + rep->ndiag_family = AF_NETLINK; + rep->ndiag_type = sk->sk_type; + rep->ndiag_protocol = sk->sk_protocol; + rep->ndiag_state = sk->sk_state; + + rep->ndiag_ino = sk_ino; + rep->ndiag_portid = nlk->portid; + rep->ndiag_dst_portid = nlk->dst_portid; + rep->ndiag_dst_group = nlk->dst_group; + sock_diag_save_cookie(sk, rep->ndiag_cookie); + + if ((req->ndiag_show & NDIAG_SHOW_GROUPS) && + sk_diag_dump_groups(sk, skb)) + goto out_nlmsg_trim; + + if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) + goto out_nlmsg_trim; + + if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && + sk_diag_put_rings_cfg(sk, skb)) + goto out_nlmsg_trim; + + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + int protocol, int s_num) +{ + struct netlink_table *tbl = &nl_table[protocol]; + struct nl_portid_hash *hash = &tbl->hash; + struct net *net = sock_net(skb->sk); + struct netlink_diag_req *req; + struct sock *sk; + int ret = 0, num = 0, i; + + req = nlmsg_data(cb->nlh); + + for (i = 0; i <= hash->mask; i++) { + sk_for_each(sk, &hash->table[i]) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { + num++; + continue; + } + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + goto done; + } + + num++; + } + } + + sk_for_each_bound(sk, &tbl->mc_list) { + if (sk_hashed(sk)) + continue; + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { + num++; + continue; + } + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + goto done; + } + num++; + } +done: + cb->args[0] = num; + cb->args[1] = protocol; + + return ret; +} + +static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct netlink_diag_req *req; + int s_num = cb->args[0]; + + req = nlmsg_data(cb->nlh); + + read_lock(&nl_table_lock); + + if (req->sdiag_protocol == NDIAG_PROTO_ALL) { + int i; + + for (i = cb->args[1]; i < MAX_LINKS; i++) { + if (__netlink_diag_dump(skb, cb, i, s_num)) + break; + s_num = 0; + } + } else { + if (req->sdiag_protocol >= MAX_LINKS) { + read_unlock(&nl_table_lock); + return -ENOENT; + } + + __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); + } + + read_unlock(&nl_table_lock); + + return skb->len; +} + +static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct netlink_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = netlink_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler netlink_diag_handler = { + .family = AF_NETLINK, + .dump = netlink_diag_handler_dump, +}; + +static int __init netlink_diag_init(void) +{ + return sock_diag_register(&netlink_diag_handler); +} + +static void __exit netlink_diag_exit(void) +{ + sock_diag_unregister(&netlink_diag_handler); +} + +module_init(netlink_diag_init); +module_exit(netlink_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f2aabb6f4105..5a55be3f17a5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family, int err = 0; BUG_ON(grp->name[0] == '\0'); + BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); genl_lock(); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d1fa1d9ffd2e..103bd704b5fc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1173,6 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { + memset(sax, 0, sizeof(*sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index fd01ac6e0bf4..d6faa47c9bba 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -555,7 +555,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -796,6 +797,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && @@ -841,6 +844,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); + memset(sockaddr, 0, sizeof(*sockaddr)); sockaddr->sa_family = AF_NFC; sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; sockaddr->dsap = ui_cb->dsap; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ac2defeeba83..894b6cbdd929 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *current_tci = vhdr->h_vlan_TCI; @@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb) if (unlikely(err)) return err; - __vlan_hwaccel_put_tag(skb, ntohs(tci)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci)); return 0; } @@ -110,15 +110,15 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); - if (!__vlan_put_tag(skb, current_tag)) + if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); } - __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); + __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e87a26506dba..d2f9f2e57298 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -44,6 +44,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/inetdevice.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/openvswitch.h> #include <linux/rculist.h> #include <linux/dmi.h> @@ -56,38 +57,59 @@ #include "flow.h" #include "vport-internal_dev.h" -/** - * struct ovs_net - Per net-namespace data for ovs. - * @dps: List of datapaths to enable dumping them all out. - * Protected by genl_mutex. - */ -struct ovs_net { - struct list_head dps; -}; - -static int ovs_net_id __read_mostly; #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) static void rehash_flow_table(struct work_struct *work); static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); +int ovs_net_id __read_mostly; + +static void ovs_notify(struct sk_buff *skb, struct genl_info *info, + struct genl_multicast_group *grp) +{ + genl_notify(skb, genl_info_net(info), info->snd_portid, + grp->id, info->nlhdr, GFP_KERNEL); +} + /** * DOC: Locking: * - * Writes to device state (add/remove datapath, port, set operations on vports, - * etc.) are protected by RTNL. - * - * Writes to other state (flow table modifications, set miscellaneous datapath - * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside - * genl_mutex. + * All writes e.g. Writes to device state (add/remove datapath, port, set + * operations on vports, etc.), Writes to other state (flow table + * modifications, set miscellaneous datapath parameters, etc.) are protected + * by ovs_lock. * * Reads are protected by RCU. * * There are a few special cases (mostly stats) that have their own * synchronization but they nest under all of above and don't interact with * each other. + * + * The RTNL lock nests inside ovs_mutex. */ +static DEFINE_MUTEX(ovs_mutex); + +void ovs_lock(void) +{ + mutex_lock(&ovs_mutex); +} + +void ovs_unlock(void) +{ + mutex_unlock(&ovs_mutex); +} + +#ifdef CONFIG_LOCKDEP +int lockdep_ovsl_is_held(void) +{ + if (debug_locks) + return lockdep_is_held(&ovs_mutex); + else + return 1; +} +#endif + static struct vport *new_vport(const struct vport_parms *); static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, const struct dp_upcall_info *); @@ -95,7 +117,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex, struct sk_buff *, const struct dp_upcall_info *); -/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ +/* Must be called with rcu_read_lock or ovs_mutex. */ static struct datapath *get_dp(struct net *net, int dp_ifindex) { struct datapath *dp = NULL; @@ -113,10 +135,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) return dp; } -/* Must be called with rcu_read_lock or RTNL lock. */ +/* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { - struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); + struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); } @@ -168,7 +190,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) return NULL; } -/* Called with RTNL lock and genl_lock. */ +/* Called with ovs_mutex. */ static struct vport *new_vport(const struct vport_parms *parms) { struct vport *vport; @@ -180,14 +202,12 @@ static struct vport *new_vport(const struct vport_parms *parms) hlist_add_head_rcu(&vport->dp_hash_node, head); } - return vport; } -/* Called with RTNL lock. */ void ovs_dp_detach_port(struct vport *p) { - ASSERT_RTNL(); + ASSERT_OVSL(); /* First drop references to device. */ hlist_del_rcu(&p->dp_hash_node); @@ -337,6 +357,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, return err; } +static size_t key_attr_size(void) +{ + return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */ + + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ + + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ + + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ + + nla_total_size(28); /* OVS_KEY_ATTR_ND */ +} + +static size_t upcall_msg_size(const struct sk_buff *skb, + const struct nlattr *userdata) +{ + size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ + + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ + + /* OVS_PACKET_ATTR_USERDATA */ + if (userdata) + size += NLA_ALIGN(userdata->nla_len); + + return size; +} + static int queue_userspace_packet(struct net *net, int dp_ifindex, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) @@ -345,7 +394,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, struct sk_buff *nskb = NULL; struct sk_buff *user_skb; /* to be queued to userspace */ struct nlattr *nla; - unsigned int len; int err; if (vlan_tx_tag_present(skb)) { @@ -353,7 +401,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, if (!nskb) return -ENOMEM; - nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); + nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); if (!nskb) return -ENOMEM; @@ -366,13 +414,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, goto out; } - len = sizeof(struct ovs_header); - len += nla_total_size(skb->len); - len += nla_total_size(FLOW_BUFSIZE); - if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) - len += nla_total_size(8); - - user_skb = genlmsg_new(len, GFP_ATOMIC); + user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; @@ -387,13 +429,15 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_nest_end(user_skb, nla); if (upcall_info->userdata) - nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, - nla_get_u64(upcall_info->userdata)); + __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, + nla_len(upcall_info->userdata), + nla_data(upcall_info->userdata)); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); skb_copy_and_csum_dev(skb, nla_data(nla)); + genlmsg_end(user_skb, upcall); err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: @@ -401,13 +445,13 @@ out: return err; } -/* Called with genl_mutex. */ +/* Called with ovs_mutex. */ static int flush_flows(struct datapath *dp) { struct flow_table *old_table; struct flow_table *new_table; - old_table = genl_dereference(dp->table); + old_table = ovsl_dereference(dp->table); new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); if (!new_table) return -ENOMEM; @@ -543,7 +587,7 @@ static int validate_userspace(const struct nlattr *attr) { static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; @@ -660,8 +704,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || - !a[OVS_PACKET_ATTR_ACTIONS] || - nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) + !a[OVS_PACKET_ATTR_ACTIONS]) goto err; len = nla_len(a[OVS_PACKET_ATTR_PACKET]); @@ -671,7 +714,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err; skb_reserve(packet, NET_IP_ALIGN); - memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); + nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); skb_reset_mac_header(packet); eth = eth_hdr(packet); @@ -679,7 +722,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); @@ -742,7 +785,7 @@ err: } static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { - [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, + [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, }; @@ -758,7 +801,7 @@ static struct genl_ops dp_packet_genl_ops[] = { static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) { int i; - struct flow_table *table = genl_dereference(dp->table); + struct flow_table *table = ovsl_dereference(dp->table); stats->n_flows = ovs_flow_tbl_count(table); @@ -800,7 +843,17 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; -/* Called with genl_lock. */ +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) +{ + return NLMSG_ALIGN(sizeof(struct ovs_header)) + + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ + + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ + + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ +} + +/* Called with ovs_mutex. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) @@ -814,8 +867,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, u8 tcp_flags; int err; - sf_acts = rcu_dereference_protected(flow->sf_acts, - lockdep_genl_is_held()); + sf_acts = ovsl_dereference(flow->sf_acts); ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) @@ -878,25 +930,10 @@ error: static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) { const struct sw_flow_actions *sf_acts; - int len; - - sf_acts = rcu_dereference_protected(flow->sf_acts, - lockdep_genl_is_held()); - /* OVS_FLOW_ATTR_KEY */ - len = nla_total_size(FLOW_BUFSIZE); - /* OVS_FLOW_ATTR_ACTIONS */ - len += nla_total_size(sf_acts->actions_len); - /* OVS_FLOW_ATTR_STATS */ - len += nla_total_size(sizeof(struct ovs_flow_stats)); - /* OVS_FLOW_ATTR_TCP_FLAGS */ - len += nla_total_size(1); - /* OVS_FLOW_ATTR_USED */ - len += nla_total_size(8); + sf_acts = ovsl_dereference(flow->sf_acts); - len += NLMSG_ALIGN(sizeof(struct ovs_header)); - - return genlmsg_new(len, GFP_KERNEL); + return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); } static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, @@ -945,12 +982,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; } + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); error = -ENODEV; if (!dp) - goto error; + goto err_unlock_ovs; - table = genl_dereference(dp->table); + table = ovsl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); if (!flow) { struct sw_flow_actions *acts; @@ -958,7 +996,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) - goto error; + goto err_unlock_ovs; /* Expand table, if necessary, to make room. */ if (ovs_flow_tbl_need_to_expand(table)) { @@ -968,7 +1006,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!IS_ERR(new_table)) { rcu_assign_pointer(dp->table, new_table); ovs_flow_tbl_deferred_destroy(table); - table = genl_dereference(dp->table); + table = ovsl_dereference(dp->table); } } @@ -976,7 +1014,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow = ovs_flow_alloc(); if (IS_ERR(flow)) { error = PTR_ERR(flow); - goto error; + goto err_unlock_ovs; } flow->key = key; clear_stats(flow); @@ -1009,11 +1047,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) error = -EEXIST; if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) - goto error; + goto err_unlock_ovs; /* Update actions. */ - old_acts = rcu_dereference_protected(flow->sf_acts, - lockdep_genl_is_held()); + old_acts = ovsl_dereference(flow->sf_acts); acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; if (acts_attrs && (old_acts->actions_len != nla_len(acts_attrs) || @@ -1024,7 +1061,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) new_acts = ovs_flow_actions_alloc(acts_attrs); error = PTR_ERR(new_acts); if (IS_ERR(new_acts)) - goto error; + goto err_unlock_ovs; rcu_assign_pointer(flow->sf_acts, new_acts); ovs_flow_deferred_free_acts(old_acts); @@ -1040,11 +1077,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&flow->lock); } } + ovs_unlock(); if (!IS_ERR(reply)) - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_flow_multicast_group.id, info->nlhdr, - GFP_KERNEL); + ovs_notify(reply, info, &ovs_dp_flow_multicast_group); else netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); @@ -1052,6 +1088,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) error_free_flow: ovs_flow_free(flow); +err_unlock_ovs: + ovs_unlock(); error: return error; } @@ -1074,21 +1112,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (err) return err; + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) - return -ENODEV; + if (!dp) { + err = -ENODEV; + goto unlock; + } - table = genl_dereference(dp->table); + table = ovsl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); - if (!flow) - return -ENOENT; + if (!flow) { + err = -ENOENT; + goto unlock; + } reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); - if (IS_ERR(reply)) - return PTR_ERR(reply); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + goto unlock; + } + ovs_unlock(); return genlmsg_reply(reply, info); +unlock: + ovs_unlock(); + return err; } static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -1103,25 +1152,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) int err; int key_len; + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) - return -ENODEV; - - if (!a[OVS_FLOW_ATTR_KEY]) - return flush_flows(dp); + if (!dp) { + err = -ENODEV; + goto unlock; + } + if (!a[OVS_FLOW_ATTR_KEY]) { + err = flush_flows(dp); + goto unlock; + } err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); if (err) - return err; + goto unlock; - table = genl_dereference(dp->table); + table = ovsl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); - if (!flow) - return -ENOENT; + if (!flow) { + err = -ENOENT; + goto unlock; + } reply = ovs_flow_cmd_alloc_info(flow); - if (!reply) - return -ENOMEM; + if (!reply) { + err = -ENOMEM; + goto unlock; + } ovs_flow_tbl_remove(table, flow); @@ -1130,10 +1187,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) BUG_ON(err < 0); ovs_flow_deferred_free(flow); + ovs_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); + ovs_notify(reply, info, &ovs_dp_flow_multicast_group); return 0; +unlock: + ovs_unlock(); + return err; } static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -1142,11 +1202,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) struct datapath *dp; struct flow_table *table; + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) + if (!dp) { + ovs_unlock(); return -ENODEV; + } - table = genl_dereference(dp->table); + table = ovsl_dereference(dp->table); for (;;) { struct sw_flow *flow; @@ -1167,6 +1230,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = bucket; cb->args[1] = obj; } + ovs_unlock(); return skb->len; } @@ -1212,6 +1276,16 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = { .name = OVS_DATAPATH_MCGROUP }; +static size_t ovs_dp_cmd_msg_size(void) +{ + size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); + + msgsize += nla_total_size(IFNAMSIZ); + msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); + + return msgsize; +} + static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1250,7 +1324,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, struct sk_buff *skb; int retval; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -1262,7 +1336,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, return skb; } -/* Called with genl_mutex and optionally with RTNL lock also. */ +/* Called with ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1296,12 +1370,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - rtnl_lock(); + ovs_lock(); err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_unlock_rtnl; + goto err_unlock_ovs; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1352,37 +1426,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail(&dp->list_node, &ovs_net->dps); - rtnl_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_datapath_multicast_group.id, info->nlhdr, - GFP_KERNEL); + ovs_unlock(); + + ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); return 0; err_destroy_local_port: - ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); + ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(genl_dereference(dp->table)); + ovs_flow_tbl_destroy(ovsl_dereference(dp->table)); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); -err_unlock_rtnl: - rtnl_unlock(); +err_unlock_ovs: + ovs_unlock(); err: return err; } -/* Called with genl_mutex. */ +/* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { int i; - rtnl_lock(); - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; struct hlist_node *n; @@ -1393,14 +1464,11 @@ static void __dp_destroy(struct datapath *dp) } list_del(&dp->list_node); - ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); - /* rtnl_unlock() will wait until all the references to devices that - * are pending unregistration have been dropped. We do it here to - * ensure that any internal devices (which contain DP pointers) are - * fully destroyed before freeing the datapath. + /* OVSP_LOCAL is datapath internal port. We need to make sure that + * all port in datapath are destroyed first before freeing datapath. */ - rtnl_unlock(); + ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); call_rcu(&dp->rcu, destroy_dp_rcu); } @@ -1411,24 +1479,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - return err; + goto unlock; reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) - return err; + goto unlock; __dp_destroy(dp); + ovs_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_datapath_multicast_group.id, info->nlhdr, - GFP_KERNEL); + ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); return 0; +unlock: + ovs_unlock(); + return err; } static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) @@ -1437,9 +1508,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + err = PTR_ERR(dp); if (IS_ERR(dp)) - return PTR_ERR(dp); + goto unlock; reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); @@ -1447,31 +1520,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(reply); netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_datapath_multicast_group.id, err); - return 0; + err = 0; + goto unlock; } - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_datapath_multicast_group.id, info->nlhdr, - GFP_KERNEL); + ovs_unlock(); + ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); return 0; +unlock: + ovs_unlock(); + return err; } static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; + int err; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); - if (IS_ERR(dp)) - return PTR_ERR(dp); + if (IS_ERR(dp)) { + err = PTR_ERR(dp); + goto unlock; + } reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) - return PTR_ERR(reply); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + goto unlock; + } + ovs_unlock(); return genlmsg_reply(reply, info); + +unlock: + ovs_unlock(); + return err; } static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -1481,6 +1568,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int skip = cb->args[0]; int i = 0; + ovs_lock(); list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, @@ -1489,6 +1577,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; i++; } + ovs_unlock(); cb->args[0] = i; @@ -1541,7 +1630,7 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = { .name = OVS_VPORT_MCGROUP }; -/* Called with RTNL lock or RCU read lock. */ +/* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1580,7 +1669,7 @@ error: return err; } -/* Called with RTNL lock or RCU read lock. */ +/* Called with ovs_mutex or RCU read lock. */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { @@ -1592,14 +1681,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } + BUG_ON(retval < 0); + return skb; } -/* Called with RTNL lock or RCU read lock. */ +/* Called with ovs_mutex or RCU read lock. */ static struct vport *lookup_vport(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) @@ -1625,9 +1712,9 @@ static struct vport *lookup_vport(struct net *net, if (!dp) return ERR_PTR(-ENODEV); - vport = ovs_vport_rtnl_rcu(dp, port_no); + vport = ovs_vport_ovsl_rcu(dp, port_no); if (!vport) - return ERR_PTR(-ENOENT); + return ERR_PTR(-ENODEV); return vport; } else return ERR_PTR(-EINVAL); @@ -1649,7 +1736,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) !a[OVS_VPORT_ATTR_UPCALL_PID]) goto exit; - rtnl_lock(); + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) @@ -1662,7 +1749,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (port_no >= DP_MAX_PORTS) goto exit_unlock; - vport = ovs_vport_rtnl_rcu(dp, port_no); + vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) goto exit_unlock; @@ -1672,7 +1759,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -EFBIG; goto exit_unlock; } - vport = ovs_vport_rtnl(dp, port_no); + vport = ovs_vport_ovsl(dp, port_no); if (!vport) break; } @@ -1690,6 +1777,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; + err = 0; reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { @@ -1697,11 +1785,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_detach_port(vport); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + + ovs_notify(reply, info, &ovs_dp_vport_multicast_group); exit_unlock: - rtnl_unlock(); + ovs_unlock(); exit: return err; } @@ -1713,7 +1801,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; - rtnl_lock(); + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) @@ -1724,26 +1812,35 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) err = -EINVAL; + reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!reply) { + err = -ENOMEM; + goto exit_unlock; + } + if (!err && a[OVS_VPORT_ATTR_OPTIONS]) err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_unlock; + goto exit_free; + if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + ovs_unlock(); + ovs_notify(reply, info, &ovs_dp_vport_multicast_group); + return 0; -exit_unlock: rtnl_unlock(); + return 0; + +exit_free: + kfree_skb(reply); +exit_unlock: + ovs_unlock(); return err; } @@ -1754,7 +1851,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; - rtnl_lock(); + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) @@ -1771,13 +1868,13 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) goto exit_unlock; + err = 0; ovs_dp_detach_port(vport); - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + ovs_notify(reply, info, &ovs_dp_vport_multicast_group); exit_unlock: - rtnl_unlock(); + ovs_unlock(); return err; } @@ -1937,13 +2034,13 @@ static void rehash_flow_table(struct work_struct *work) struct datapath *dp; struct net *net; - genl_lock(); + ovs_lock(); rtnl_lock(); for_each_net(net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); list_for_each_entry(dp, &ovs_net->dps, list_node) { - struct flow_table *old_table = genl_dereference(dp->table); + struct flow_table *old_table = ovsl_dereference(dp->table); struct flow_table *new_table; new_table = ovs_flow_tbl_rehash(old_table); @@ -1954,8 +2051,7 @@ static void rehash_flow_table(struct work_struct *work) } } rtnl_unlock(); - genl_unlock(); - + ovs_unlock(); schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); } @@ -1964,18 +2060,21 @@ static int __net_init ovs_init_net(struct net *net) struct ovs_net *ovs_net = net_generic(net, ovs_net_id); INIT_LIST_HEAD(&ovs_net->dps); + INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); return 0; } static void __net_exit ovs_exit_net(struct net *net) { - struct ovs_net *ovs_net = net_generic(net, ovs_net_id); struct datapath *dp, *dp_next; + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); - genl_lock(); + ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); - genl_unlock(); + ovs_unlock(); + + cancel_work_sync(&ovs_net->dp_notify_work); } static struct pernet_operations ovs_net_ops = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 031dfbf37c93..16b840695216 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -57,10 +57,9 @@ struct dp_stats_percpu { * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. - * @n_flows: Number of flows currently in flow table. - * @table: Current flow table. Protected by genl_lock and RCU. + * @table: Current flow table. Protected by ovs_mutex and RCU. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by - * RTNL and RCU. + * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. * @@ -86,26 +85,6 @@ struct datapath { #endif }; -struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); - -static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return ovs_lookup_vport(dp, port_no); -} - -static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no) -{ - WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked()); - return ovs_lookup_vport(dp, port_no); -} - -static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no) -{ - ASSERT_RTNL(); - return ovs_lookup_vport(dp, port_no); -} - /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. @@ -119,7 +98,7 @@ struct ovs_skb_cb { * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. - * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost @@ -132,6 +111,30 @@ struct dp_upcall_info { u32 portid; }; +/** + * struct ovs_net - Per net-namespace data for ovs. + * @dps: List of datapaths to enable dumping them all out. + * Protected by genl_mutex. + */ +struct ovs_net { + struct list_head dps; + struct work_struct dp_notify_work; +}; + +extern int ovs_net_id; +void ovs_lock(void); +void ovs_unlock(void); + +#ifdef CONFIG_LOCKDEP +int lockdep_ovsl_is_held(void); +#else +#define lockdep_ovsl_is_held() 1 +#endif + +#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) +#define ovsl_dereference(p) \ + rcu_dereference_protected(p, lockdep_ovsl_is_held()) + static inline struct net *ovs_dp_get_net(struct datapath *dp) { return read_pnet(&dp->net); @@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) write_pnet(&dp->net, net); } +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); + +static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); + return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no) +{ + ASSERT_OVSL(); + return ovs_lookup_vport(dp, port_no); +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_multicast_group ovs_dp_vport_multicast_group; @@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); +void ovs_dp_notify_wq(struct work_struct *work); #endif /* datapath.h */ diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 5558350e0d33..ef4feec6cd84 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -18,46 +18,78 @@ #include <linux/netdevice.h> #include <net/genetlink.h> +#include <net/netns/generic.h> #include "datapath.h" #include "vport-internal_dev.h" #include "vport-netdev.h" +static void dp_detach_port_notify(struct vport *vport) +{ + struct sk_buff *notify; + struct datapath *dp; + + dp = vport->dp; + notify = ovs_vport_cmd_build_info(vport, 0, 0, + OVS_VPORT_CMD_DEL); + ovs_dp_detach_port(vport); + if (IS_ERR(notify)) { + netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, + ovs_dp_vport_multicast_group.id, + PTR_ERR(notify)); + return; + } + + genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, + ovs_dp_vport_multicast_group.id, + GFP_KERNEL); +} + +void ovs_dp_notify_wq(struct work_struct *work) +{ + struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work); + struct datapath *dp; + + ovs_lock(); + list_for_each_entry(dp, &ovs_net->dps, list_node) { + int i; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + struct hlist_node *n; + + hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { + struct netdev_vport *netdev_vport; + + if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) + continue; + + netdev_vport = netdev_vport_priv(vport); + if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED || + netdev_vport->dev->reg_state == NETREG_UNREGISTERING) + dp_detach_port_notify(vport); + } + } + } + ovs_unlock(); +} + static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct ovs_net *ovs_net; struct net_device *dev = ptr; - struct vport *vport; + struct vport *vport = NULL; - if (ovs_is_internal_dev(dev)) - vport = ovs_internal_dev_get_vport(dev); - else + if (!ovs_is_internal_dev(dev)) vport = ovs_netdev_get_vport(dev); if (!vport) return NOTIFY_DONE; - switch (event) { - case NETDEV_UNREGISTER: - if (!ovs_is_internal_dev(dev)) { - struct sk_buff *notify; - struct datapath *dp = vport->dp; - - notify = ovs_vport_cmd_build_info(vport, 0, 0, - OVS_VPORT_CMD_DEL); - ovs_dp_detach_port(vport); - if (IS_ERR(notify)) { - netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, - PTR_ERR(notify)); - break; - } - - genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, - ovs_dp_vport_multicast_group.id, - GFP_KERNEL); - } - break; + if (event == NETDEV_UNREGISTER) { + ovs_net = net_generic(dev_net(dev), ovs_net_id); + queue_work(system_wq, &ovs_net->dp_notify_work); } return NOTIFY_DONE; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 20605ecf100b..b15321a2228c 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) return ERR_PTR(-ENOMEM); sfa->actions_len = actions_len; - memcpy(sfa->actions, nla_data(actions), actions_len); + nla_memcpy(sfa->actions, actions, actions_len); return sfa; } @@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= 1536) + if (ntohs(proto) >= ETH_P_802_3_MIN) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -482,7 +482,11 @@ static __be16 parse_ethertype(struct sk_buff *skb) return htons(ETH_P_802_2); __skb_pull(skb, sizeof(struct llc_snap_hdr)); - return llc->ethertype; + + if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) + return llc->ethertype; + + return htons(ETH_P_802_2); } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, @@ -791,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { + BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[table->node_ver]); table->count--; - BUG_ON(table->count < 0); } /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -1034,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (ntohs(swkey->eth.type) < 1536) + if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) return -EINVAL; attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); } else { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a7bb60ff3b5b..0875fde65b9c 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, void ovs_flow_used(struct sw_flow *, struct sk_buff *); u64 ovs_flow_used_time(unsigned long flow_jiffies); -/* Upper bound on the length of a nlattr-formatted flow key. The longest - * nlattr-formatted flow key would be: - * - * struct pad nl hdr total - * ------ --- ------ ----- - * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 - * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 - * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 - * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 - * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) - * OVS_KEY_ATTR_8021Q 4 -- 4 8 - * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation) - * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype) - * OVS_KEY_ATTR_IPV6 40 -- 4 44 - * OVS_KEY_ATTR_ICMPV6 2 2 4 8 - * OVS_KEY_ATTR_ND 28 -- 4 32 - * ------------------------------------------------- - * total 152 - */ -#define FLOW_BUFSIZE 152 - int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 0531de6c7a4a..73682de8dc69 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde return stats; } -static int internal_dev_mac_addr(struct net_device *dev, void *p) -{ - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - return 0; -} - /* Called with rcu_read_lock_bh. */ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, - .ndo_set_mac_address = internal_dev_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = internal_dev_change_mtu, .ndo_get_stats64 = internal_dev_get_stats, }; @@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev) netdev->netdev_ops = &internal_dev_netdev_ops; netdev->priv_flags &= ~IFF_TX_SKB_SHARING; + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); netdev->tx_queue_len = 0; @@ -146,7 +137,7 @@ static void do_setup(struct net_device *netdev) NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; netdev->vlan_features = netdev->features; - netdev->features |= NETIF_F_HW_VLAN_TX; + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features = netdev->features & ~NETIF_F_LLTX; eth_hw_addr_random(netdev); } @@ -182,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) if (vport->port_no == OVSP_LOCAL) netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; + rtnl_lock(); err = register_netdevice(netdev_vport->dev); if (err) goto error_free_netdev; dev_set_promiscuity(netdev_vport->dev, 1); + rtnl_unlock(); netif_start_queue(netdev_vport->dev); return vport; error_free_netdev: + rtnl_unlock(); free_netdev(netdev_vport->dev); error_free_vport: ovs_vport_free(vport); @@ -204,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport) struct netdev_vport *netdev_vport = netdev_vport_priv(vport); netif_stop_queue(netdev_vport->dev); + rtnl_lock(); dev_set_promiscuity(netdev_vport->dev, -1); /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(netdev_vport->dev); + + rtnl_unlock(); } static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 670cbc3518de..40a89ae8e19f 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -43,8 +43,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) /* Make our own copy of the packet. Otherwise we will mangle the * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). - * (No one comes after us, since we tell handle_bridge() that we took - * the packet.) */ + */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return; @@ -101,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms) goto error_put; } + rtnl_lock(); err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, vport); if (err) - goto error_put; + goto error_unlock; dev_set_promiscuity(netdev_vport->dev, 1); netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; + rtnl_unlock(); return vport; +error_unlock: + rtnl_unlock(); error_put: dev_put(netdev_vport->dev); error_free_vport: @@ -132,9 +135,11 @@ static void netdev_destroy(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + rtnl_lock(); netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(netdev_vport->dev); dev_set_promiscuity(netdev_vport->dev, -1); + rtnl_unlock(); call_rcu(&netdev_vport->rcu, free_port_rcu); } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index ba717cc038b3..720623190eaa 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_internal_vport_ops, }; -/* Protected by RCU read lock for reading, RTNL lock for writing. */ +/* Protected by RCU read lock for reading, ovs_mutex for writing. */ static struct hlist_head *dev_table; #define VPORT_HASH_BUCKETS 1024 @@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name) * * @name: name of port to find * - * Must be called with RTNL or RCU read lock. + * Must be called with ovs or RCU read lock. */ struct vport *ovs_vport_locate(struct net *net, const char *name) { @@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); - vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); + vport->percpu_stats = alloc_percpu(struct pcpu_tstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); @@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport) * @parms: Information about new vport. * * Creates a new vport with the specified configuration (which is dependent on - * device type). RTNL lock must be held. + * device type). ovs_mutex must be held. */ struct vport *ovs_vport_add(const struct vport_parms *parms) { @@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms) int err = 0; int i; - ASSERT_RTNL(); - for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { if (vport_ops_list[i]->type == parms->type) { struct hlist_head *bucket; @@ -201,12 +199,10 @@ out: * @port: New configuration. * * Modifies an existing device with the specified configuration (which is - * dependent on device type). RTNL lock must be held. + * dependent on device type). ovs_mutex must be held. */ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) { - ASSERT_RTNL(); - if (!vport->ops->set_options) return -EOPNOTSUPP; return vport->ops->set_options(vport, options); @@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) * @vport: vport to delete. * * Detaches @vport from its datapath and destroys it. It is possible to fail - * for reasons such as lack of memory. RTNL lock must be held. + * for reasons such as lack of memory. ovs_mutex must be held. */ void ovs_vport_del(struct vport *vport) { - ASSERT_RTNL(); + ASSERT_OVSL(); hlist_del_rcu(&vport->hash_node); @@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport) * * Retrieves transmit, receive, and error stats for the given device. * - * Must be called with RTNL lock or rcu_read_lock. + * Must be called with ovs_mutex or rcu_read_lock. */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { @@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) spin_unlock_bh(&vport->stats_lock); for_each_possible_cpu(i) { - const struct vport_percpu_stats *percpu_stats; - struct vport_percpu_stats local_stats; + const struct pcpu_tstats *percpu_stats; + struct pcpu_tstats local_stats; unsigned int start; percpu_stats = per_cpu_ptr(vport->percpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&percpu_stats->sync); + start = u64_stats_fetch_begin_bh(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); + } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start)); stats->rx_bytes += local_stats.rx_bytes; stats->rx_packets += local_stats.rx_packets; @@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) * negative error code if a real error occurred. If an error occurs, @skb is * left unmodified. * - * Must be called with RTNL lock or rcu_read_lock. + * Must be called with ovs_mutex or rcu_read_lock. */ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) { struct nlattr *nla; + int err; + + if (!vport->ops->get_options) + return 0; nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); if (!nla) return -EMSGSIZE; - if (vport->ops->get_options) { - int err = vport->ops->get_options(vport, skb); - if (err) { - nla_nest_cancel(skb, nla); - return err; - } + err = vport->ops->get_options(vport, skb); + if (err) { + nla_nest_cancel(skb, nla); + return err; } nla_nest_end(skb, nla); @@ -325,18 +323,17 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * @skb: skb that was received * * Must be called with rcu_read_lock. The packet cannot be shared and - * skb->data should point to the Ethernet header. The caller must have already - * called compute_ip_summed() to initialize the checksumming fields. + * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) { - struct vport_percpu_stats *stats; + struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); - u64_stats_update_begin(&stats->sync); + u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len; - u64_stats_update_end(&stats->sync); + u64_stats_update_end(&stats->syncp); ovs_dp_process_received_packet(vport, skb); } @@ -347,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) * @vport: vport on which to send the packet * @skb: skb to send * - * Sends the given packet and returns the length of data sent. Either RTNL + * Sends the given packet and returns the length of data sent. Either ovs * lock or rcu_read_lock must be held. */ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) @@ -355,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) int sent = vport->ops->send(vport, skb); if (likely(sent)) { - struct vport_percpu_stats *stats; + struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); - u64_stats_update_begin(&stats->sync); + u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += sent; - u64_stats_update_end(&stats->sync); + u64_stats_update_end(&stats->syncp); } return sent; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3f7961ea3c56..7ba08c30b853 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -19,6 +19,7 @@ #ifndef VPORT_H #define VPORT_H 1 +#include <linux/if_tunnel.h> #include <linux/list.h> #include <linux/netlink.h> #include <linux/openvswitch.h> @@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *); /* The following definitions are for implementers of vport devices: */ -struct vport_percpu_stats { - u64 rx_bytes; - u64 rx_packets; - u64 tx_bytes; - u64 tx_packets; - struct u64_stats_sync sync; -}; - struct vport_err_stats { u64 rx_dropped; u64 rx_errors; @@ -68,10 +61,10 @@ struct vport_err_stats { /** * struct vport - one port within a datapath * @rcu: RCU callback head for deferred destruction. - * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. + * @port_no: Index into @dp's @ports array. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. @@ -81,15 +74,15 @@ struct vport_err_stats { */ struct vport { struct rcu_head rcu; - u16 port_no; struct datapath *dp; u32 upcall_portid; + u16 port_no; struct hlist_node hash_node; struct hlist_node dp_hash_node; const struct vport_ops *ops; - struct vport_percpu_stats __percpu *percpu_stats; + struct pcpu_tstats __percpu *percpu_stats; spinlock_t stats_lock; struct vport_err_stats err_stats; @@ -138,14 +131,14 @@ struct vport_parms { struct vport_ops { enum ovs_vport_type type; - /* Called with RTNL lock. */ + /* Called with ovs_mutex. */ struct vport *(*create)(const struct vport_parms *); void (*destroy)(struct vport *); int (*set_options)(struct vport *, struct nlattr *); int (*get_options)(const struct vport *, struct sk_buff *); - /* Called with rcu_read_lock or RTNL lock. */ + /* Called with rcu_read_lock or ovs_mutex. */ const char *(*get_name)(const struct vport *); void (*get_config)(const struct vport *, void *); int (*get_ifindex)(const struct vport *); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1d6793dbfbae..7e387ff64465 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -158,10 +158,16 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; +union tpacket_uhdr { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + struct tpacket3_hdr *h3; + void *raw; +}; + static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); - #define V3_ALIGNMENT (8) #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) @@ -181,6 +187,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, @@ -288,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr) static void __packet_set_status(struct packet_sock *po, void *frame, int status) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; h.raw = frame; switch (po->tp_version) { @@ -315,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) static int __packet_get_status(struct packet_sock *po, void *frame) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; smp_rmb(); @@ -345,11 +345,7 @@ static void *packet_lookup_frame(struct packet_sock *po, int status) { unsigned int pg_vec_pos, frame_offset; - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; @@ -973,11 +969,11 @@ static void *packet_current_rx_frame(struct packet_sock *po, static void *prb_lookup_block(struct packet_sock *po, struct packet_ring_buffer *rb, - unsigned int previous, + unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); - struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; @@ -1041,6 +1037,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } +static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ + struct sock *sk = &po->sk; + bool has_room; + + if (po->prot_hook.func != tpacket_rcv) + return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) + <= sk->sk_rcvbuf; + + spin_lock(&sk->sk_receive_queue.lock); + if (po->tp_version == TPACKET_V3) + has_room = prb_lookup_block(po, &po->rx_ring, + po->rx_ring.prb_bdqc.kactive_blk_num, + TP_STATUS_KERNEL); + else + has_room = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, + TP_STATUS_KERNEL); + spin_unlock(&sk->sk_receive_queue.lock); + + return has_room; +} + static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); @@ -1066,16 +1085,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num) return x; } -static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_hash(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { - u32 idx, hash = skb->rxhash; - - idx = ((u64)hash * num) >> 32; - - return f->arr[idx]; + return (((u64)skb->rxhash) * num) >> 32; } -static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_lb(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { int cur, old; @@ -1083,14 +1102,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb while ((old = atomic_cmpxchg(&f->rr_cur, cur, fanout_rr_next(f, num))) != cur) cur = old; - return f->arr[cur]; + return cur; +} + +static unsigned int fanout_demux_cpu(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return smp_processor_id() % num; } -static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_rollover(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int idx, unsigned int skip, + unsigned int num) { - unsigned int cpu = smp_processor_id(); + unsigned int i, j; - return f->arr[cpu % num]; + i = j = min_t(int, f->next[idx], num - 1); + do { + if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { + if (i != j) + f->next[idx] = i; + return i; + } + if (++i == num) + i = 0; + } while (i != j); + + return idx; +} + +static bool fanout_has_flag(struct packet_fanout *f, u16 flag) +{ + return f->flags & (flag >> 8); } static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, @@ -1099,7 +1144,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_fanout *f = pt->af_packet_priv; unsigned int num = f->num_members; struct packet_sock *po; - struct sock *sk; + unsigned int idx; if (!net_eq(dev_net(dev), read_pnet(&f->net)) || !num) { @@ -1110,23 +1155,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, switch (f->type) { case PACKET_FANOUT_HASH: default: - if (f->defrag) { + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } skb_get_rxhash(skb); - sk = fanout_demux_hash(f, skb, num); + idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: - sk = fanout_demux_lb(f, skb, num); + idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: - sk = fanout_demux_cpu(f, skb, num); + idx = fanout_demux_cpu(f, skb, num); + break; + case PACKET_FANOUT_ROLLOVER: + idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; } - po = pkt_sk(sk); + po = pkt_sk(f->arr[idx]); + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && + unlikely(!packet_rcv_has_room(po, skb))) { + idx = fanout_demux_rollover(f, skb, idx, idx, num); + po = pkt_sk(f->arr[idx]); + } return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } @@ -1175,10 +1228,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; - u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; + u8 flags = type_flags >> 8; int err; switch (type) { + case PACKET_FANOUT_ROLLOVER: + if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) + return -EINVAL; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: @@ -1203,7 +1259,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } err = -EINVAL; - if (match && match->defrag != defrag) + if (match && match->flags != flags) goto out; if (!match) { err = -ENOMEM; @@ -1213,7 +1269,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; - match->defrag = defrag; + match->flags = flags; atomic_set(&match->rr_cur, 0); INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); @@ -1443,13 +1499,14 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_unlock; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; + skb_probe_transport_header(skb, 0); + dev_queue_xmit(skb); rcu_read_unlock(); return len; @@ -1600,27 +1657,40 @@ drop: return 0; } +static void tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, + unsigned int flags) +{ + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + + if (shhwtstamps) { + if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) + return; + if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) + return; + } + + if (ktime_to_timespec_cond(skb->tstamp, ts)) + return; + + getnstimeofday(ts); +} + static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - struct tpacket3_hdr *h3; - void *raw; - } h; + union tpacket_uhdr h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_USER; unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; - struct timeval tv; struct timespec ts; - struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -1703,6 +1773,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); + tpacket_get_timestamp(skb, &ts, po->tp_tstamp); switch (po->tp_version) { case TPACKET_V1: @@ -1710,18 +1781,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; - if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) - && shhwtstamps->syststamp.tv64) - tv = ktime_to_timeval(shhwtstamps->syststamp); - else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) - && shhwtstamps->hwtstamp.tv64) - tv = ktime_to_timeval(shhwtstamps->hwtstamp); - else if (skb->tstamp.tv64) - tv = ktime_to_timeval(skb->tstamp); - else - do_gettimeofday(&tv); - h.h1->tp_sec = tv.tv_sec; - h.h1->tp_usec = tv.tv_usec; + h.h1->tp_sec = ts.tv_sec; + h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; hdrlen = sizeof(*h.h1); break; case TPACKET_V2: @@ -1729,16 +1790,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; - if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) - && shhwtstamps->syststamp.tv64) - ts = ktime_to_timespec(shhwtstamps->syststamp); - else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) - && shhwtstamps->hwtstamp.tv64) - ts = ktime_to_timespec(shhwtstamps->hwtstamp); - else if (skb->tstamp.tv64) - ts = ktime_to_timespec(skb->tstamp); - else - getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; if (vlan_tx_tag_present(skb)) { @@ -1759,16 +1810,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h3->tp_snaplen = snaplen; h.h3->tp_mac = macoff; h.h3->tp_net = netoff; - if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) - && shhwtstamps->syststamp.tv64) - ts = ktime_to_timespec(shhwtstamps->syststamp); - else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) - && shhwtstamps->hwtstamp.tv64) - ts = ktime_to_timespec(shhwtstamps->hwtstamp); - else if (skb->tstamp.tv64) - ts = ktime_to_timespec(skb->tstamp); - else - getnstimeofday(&ts); h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; hdrlen = sizeof(*h.h3); @@ -1846,11 +1887,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } ph; + union tpacket_uhdr ph; int to_write, offset, len, tp_len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; @@ -1880,6 +1917,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); if (po->tp_tx_has_off) { int off_min, off_max, off; @@ -2247,9 +2285,8 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_free; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, @@ -2289,6 +2326,8 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } + skb_probe_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -3240,7 +3279,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_FANOUT: val = (po->fanout ? ((u32)po->fanout->id | - ((u32)po->fanout->type << 16)) : + ((u32)po->fanout->type << 16) | + ((u32)po->fanout->flags << 24)) : 0); break; case PACKET_TX_HAS_OFF: diff --git a/net/packet/internal.h b/net/packet/internal.h index e84cab8cb7a9..e891f025a1b9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -77,10 +77,11 @@ struct packet_fanout { unsigned int num_members; u16 id; u8 type; - u8 defrag; + u8 flags; atomic_t rr_cur; struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; + int next[PACKET_FANOUT_MAX]; spinlock_t lock; atomic_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 0193630d3061..dc15f4300808 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U8 }, }; -static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { [RTA_OIF] = { .type = NLA_U32 }, }; -static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/rds/stats.c b/net/rds/stats.c index 7be790d60b90..73be187d389e 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -87,6 +87,7 @@ void rds_stats_info_copy(struct rds_info_iterator *iter, for (i = 0; i < nr; i++) { BUG_ON(strlen(names[i]) >= sizeof(ctr.name)); strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); + ctr.name[sizeof(ctr.name) - 1] = '\0'; ctr.value = values[i]; rds_info_copy(iter, &ctr, sizeof(ctr)); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index cf68e6e4054a..9c8347451597 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (srose != NULL) { + memset(srose, 0, msg->msg_namelen); srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8579c4bb20c9..fd7072827a40 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -982,7 +982,7 @@ done: return ret; } -static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 08fa1e8a4ca4..3a4c0caa1f7d 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct icmp6hdr *icmp6h; + const struct ipv6hdr *ip6h; icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); if (icmp6h == NULL) return 0; + ip6h = ipv6_hdr(skb); icmp6h->icmp6_cksum = 0; skb->csum = csum_partial(icmp6h, ipl - ihl, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; + const struct iphdr *iph; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; + iph = ip_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = tcp_v4_check(ipl - ihl, @@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl) { struct tcphdr *tcph; + const struct ipv6hdr *ip6h; tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; + ip6h = ipv6_hdr(skb); tcph->check = 0; skb->csum = csum_partial(tcph, ipl - ihl, 0); tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; + const struct iphdr *iph; u16 ul; /* @@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, if (udph == NULL) return 0; + iph = ip_hdr(skb); ul = ntohs(udph->len); if (udplite || udph->check) { @@ -276,10 +284,11 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, unsigned int ipl, int udplite) { struct udphdr *udph; + const struct ipv6hdr *ip6h; u16 ul; /* @@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, if (udph == NULL) return 0; + ip6h = ipv6_hdr(skb); ul = ntohs(udph->len); udph->check = 0; @@ -328,7 +338,7 @@ ignore_obscure_skb: static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) { - struct iphdr *iph; + const struct iphdr *iph; int ntkoff; ntkoff = skb_network_offset(skb); @@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) break; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) - if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4, + if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4, ntohs(iph->tot_len))) goto fail; break; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) - if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, + if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 0)) goto fail; break; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) - if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, + if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4, ntohs(iph->tot_len), 1)) goto fail; break; @@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto fail; - ip_send_check(iph); + ip_send_check(ip_hdr(skb)); } return 1; @@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) ixhl = ipv6_optlen(ip6xh); if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) goto fail; + ip6xh = (void *)(skb_network_header(skb) + hl); if ((nexthdr == NEXTHDR_HOP) && !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) goto fail; @@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) break; case IPPROTO_ICMPV6: if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) - if (!tcf_csum_ipv6_icmp(skb, ip6h, + if (!tcf_csum_ipv6_icmp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_TCP: if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) - if (!tcf_csum_ipv6_tcp(skb, ip6h, + if (!tcf_csum_ipv6_tcp(skb, hl, pl + sizeof(*ip6h))) goto fail; goto done; case IPPROTO_UDP: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) - if (!tcf_csum_ipv6_udp(skb, ip6h, hl, + if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 0)) goto fail; goto done; case IPPROTO_UDPLITE: if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) - if (!tcf_csum_ipv6_udp(skb, ip6h, hl, + if (!tcf_csum_ipv6_udp(skb, hl, pl + sizeof(*ip6h), 1)) goto fail; goto done; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 964f5e4f4b8a..8e118af90973 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,7 +22,6 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> -#include <linux/netlink.h> #include <linux/err.h> #include <linux/slab.h> #include <net/net_namespace.h> @@ -118,7 +117,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) /* Add/change/delete/get a filter node */ -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; @@ -141,7 +140,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) return -EPERM; + replay: + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); + if (err < 0) + return err; + t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); @@ -164,10 +168,6 @@ replay: if (dev == NULL) return -ENODEV; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); - if (err < 0) - return err; - /* Find qdisc */ if (!parent) { q = dev->qdisc; @@ -427,7 +427,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index aa36a8c8b33b..7881e2fccbc2 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && - sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) + sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns) return -EOPNOTSUPP; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 1135d8227f9b..9b97172db84a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -204,7 +204,6 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (err < 0) return err; - err = -EINVAL; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); tcf_bind_filter(tp, &f->res, base); @@ -218,6 +217,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + err = -EINVAL; if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c297e2a8e2a1..2b935e7cfe7b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -971,13 +971,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; - u32 clid = tcm->tcm_parent; + u32 clid; struct Qdisc *q = NULL; struct Qdisc *p = NULL; int err; @@ -985,14 +985,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) return -EPERM; - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); if (err < 0) return err; + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { @@ -1038,7 +1039,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) * Create/change qdisc. */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm; @@ -1053,6 +1054,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) replay: /* Reinit, just in case something touches this. */ + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + if (err < 0) + return err; + tcm = nlmsg_data(n); clid = tcm->tcm_parent; q = p = NULL; @@ -1061,9 +1066,6 @@ replay: if (!dev) return -ENODEV; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); - if (err < 0) - return err; if (clid) { if (clid != TC_H_ROOT) { @@ -1372,7 +1374,7 @@ done: -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); @@ -1382,22 +1384,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 portid = tcm->tcm_parent; - u32 clid = tcm->tcm_handle; - u32 qid = TC_H_MAJ(clid); + u32 portid; + u32 clid; + u32 qid; int err; if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) return -EPERM; - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); if (err < 0) return err; + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. @@ -1413,6 +1415,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ + portid = tcm->tcm_parent; + clid = tcm->tcm_handle; + qid = TC_H_MAJ(clid); + if (portid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(portid); @@ -1636,7 +1642,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int t, s_t; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return 0; dev = dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 13aa47aa2ffb..1bc210ffcba2 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch) cbq_update(q); if ((incr -= incr2) < 0) incr = 0; + q->now += incr; + } else { + if (now > q->now) + q->now = now; } - q->now += incr; q->now_rt = now; for (;;) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4e606fcb2534..55786283a3df 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ffad48109a22..eac7e0ee23c1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -904,7 +904,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) u64 mult; int shift; - r->rate_bps = rate << 3; + r->rate_bps = (u64)rate << 3; r->shift = 0; r->mult = 1; /* diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 571f1d211f4d..79b1876b6cd2 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, + [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, }; static void htb_work_func(struct work_struct *work) @@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work) static int htb_init(struct Qdisc *sch, struct nlattr *opt) { struct htb_sched *q = qdisc_priv(sch); - struct nlattr *tb[TCA_HTB_INIT + 1]; + struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; int err; int i; @@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); if (err < 0) return err; - if (tb[TCA_HTB_INIT] == NULL) { - pr_err("HTB: hey probably you have bad tc tool ?\n"); + if (!tb[TCA_HTB_INIT]) return -EINVAL; - } + gopt = nla_data(tb[TCA_HTB_INIT]); - if (gopt->version != HTB_VER >> 16) { - pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", - HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); + if (gopt->version != HTB_VER >> 16) return -EINVAL; - } err = qdisc_class_hash_init(&q->clhash); if (err < 0) @@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) INIT_WORK(&q->work, htb_work_func); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = qdisc_dev(sch)->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ - q->direct_qlen = 2; - + if (tb[TCA_HTB_DIRECT_QLEN]) + q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); + else { + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; + if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ + q->direct_qlen = 2; + } if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt)) + if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || + nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[__TCA_HTB_MAX]; + struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; /* extract all subattrs from opt attr */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 43cd0dd9149d..423549a714e5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -104,8 +104,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); - asoc->base.dead = 0; - asoc->base.malloced = 0; + asoc->base.dead = false; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -371,7 +370,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, if (!sctp_association_init(asoc, ep, sk, scope, gfp)) goto fail_init; - asoc->base.malloced = 1; SCTP_DBG_OBJCNT_INC(assoc); SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); @@ -409,7 +407,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Mark as dead, so other users can know this structure is * going away. */ - asoc->base.dead = 1; + asoc->base.dead = true; /* Dispose of any data lying around in the outqueue. */ sctp_outq_free(&asoc->outqueue); @@ -484,10 +482,8 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - if (asoc->base.malloced) { - kfree(asoc); - SCTP_DBG_OBJCNT_DEC(assoc); - } + kfree(asoc); + SCTP_DBG_OBJCNT_DEC(assoc); } /* Change the primary destination address for the peer. */ @@ -1079,7 +1075,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc, transports) { if (transport == active) - break; + continue; list_for_each_entry(chunk, &transport->transmitted, transmitted_list) { if (key == chunk->subh.data_hdr->tsn) { diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index d886b3bf84f5..41145fe31813 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, */ void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port) { - bp->malloced = 0; - INIT_LIST_HEAD(&bp->address_list); bp->port = port; } @@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) { /* Empty the bind address list. */ sctp_bind_addr_clean(bp); - - if (bp->malloced) { - kfree(bp); - SCTP_DBG_OBJCNT_DEC(bind_addr); - } } /* Add an address to the bind address list in the SCTP_bind_addr structure. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 12ed45dbe75d..5fbd7bc6bb11 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the basic object fields. */ atomic_set(&ep->base.refcnt, 1); - ep->base.dead = 0; - ep->base.malloced = 1; + ep->base.dead = false; /* Create an input queue. */ sctp_inq_init(&ep->base.inqueue); @@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) goto fail; if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; - ep->base.malloced = 1; + SCTP_DBG_OBJCNT_INC(ep); return ep; @@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, */ void sctp_endpoint_free(struct sctp_endpoint *ep) { - ep->base.dead = 1; + ep->base.dead = true; ep->base.sk->sk_state = SCTP_SS_CLOSED; @@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (ep->base.sk) sock_put(ep->base.sk); - /* Finally, free up our memory. */ - if (ep->base.malloced) { - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); - } + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); } /* Hold a reference to an endpoint. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 2d5ad280de38..3221d073448c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue) /* Create a task for delivering data. */ INIT_WORK(&queue->immediate, NULL); - - queue->malloced = 0; } /* Release the memory associated with an SCTP inqueue. */ @@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue) sctp_chunk_free(queue->in_progress); queue->in_progress = NULL; } - - if (queue->malloced) { - /* Dump the master memory segment. */ - kfree(queue); - } } /* Put a new packet in an SCTP inqueue. diff --git a/net/sctp/output.c b/net/sctp/output.c index f5200a2ad852..bbef4a7a9b56 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->overhead = overhead; sctp_packet_reset(packet); packet->vtag = 0; - packet->malloced = 0; + return packet; } @@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet) list_del_init(&chunk->list); sctp_chunk_free(chunk); } - - if (packet->malloced) - kfree(packet); } /* This routine tries to append the chunk to the offered packet. If adding diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 01dca753db16..32a4625fef77 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; - - q->malloced = 0; q->out_qlen = 0; } @@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q) { /* Throw away leftover chunks. */ __sctp_outq_teardown(q); - - /* If we were kmalloc()'d, free the memory. */ - if (q->malloced) - kfree(q); } /* Put a new chunk in an sctp_outq. */ @@ -707,11 +701,10 @@ redo: /* Cork the outqueue so queued chunks are really queued. */ int sctp_outq_uncork(struct sctp_outq *q) { - int error = 0; if (q->cork) q->cork = 0; - error = sctp_outq_flush(q, 0); - return error; + + return sctp_outq_flush(q, 0); } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ab3bba8cb0a8..4e45ee35d0db 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " "RPORT LADDRS <-> RADDRS " - "HBINT INS OUTS MAXRT T1X T2X RTXC\n"); + "HBINT INS OUTS MAXRT T1X T2X RTXC " + "wmema wmemq sndbuf rcvbuf\n"); return (void *)pos; } @@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "<-> "); sctp_seq_dump_remote_addrs(seq, assoc); - seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ", + seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " + "%8d %8d %8d %8d", assoc->hbinterval, assoc->c.sinit_max_instreams, assoc->c.sinit_num_ostreams, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, - assoc->rtx_data_chunks); + assoc->rtx_data_chunks, + atomic_read(&sk->sk_wmem_alloc), + sk->sk_wmem_queued, + sk->sk_sndbuf, + sk->sk_rcvbuf); seq_printf(seq, "\n"); } read_unlock(&head->lock); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5131fcfedb03..de1a0138317f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2082,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, } /* Delete the tempory new association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); /* Restore association pointer to provide SCTP command interpeter diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9070736b8d9..f631c5ff4dbf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk, /* Make sure the destination port is correctly set * in all addresses. */ - if (asoc && asoc->peer.port && asoc->peer.port != port) + if (asoc && asoc->peer.port && asoc->peer.port != port) { + err = -EINVAL; goto out_free; - + } /* Check if there already is a matching association on the * endpoint (other than the one created here). @@ -6185,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 825ea94415b3..da8603523808 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, if (!sctp_ssnmap_init(retval, in, out)) goto fail_map; - retval->malloced = 1; SCTP_DBG_OBJCNT_INC(ssnmap); return retval; @@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map) /* Dispose of a ssnmap. */ void sctp_ssnmap_free(struct sctp_ssnmap *map) { - if (map && map->malloced) { - int size; - - size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= KMALLOC_MAX_SIZE) - kfree(map); - else - free_pages((unsigned long)map, get_order(size)); - SCTP_DBG_OBJCNT_DEC(ssnmap); - } + int size; + + if (unlikely(!map)) + return; + + size = sctp_ssnmap_size(map->in.len, map->out.len); + if (size <= KMALLOC_MAX_SIZE) + kfree(map); + else + free_pages((unsigned long)map, get_order(size)); + + SCTP_DBG_OBJCNT_DEC(ssnmap); } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index fafd2a461ba0..098f1d5f769e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net, if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; - transport->malloced = 1; SCTP_DBG_OBJCNT_INC(transport); return transport; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0fd5b3d2df03..04e3d470f877 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq, skb_queue_head_init(&ulpq->reasm); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; - ulpq->malloced = 0; return ulpq; } @@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) void sctp_ulpq_free(struct sctp_ulpq *ulpq) { sctp_ulpq_flush(ulpq); - if (ulpq->malloced) - kfree(ulpq); } /* Process an incoming DATA chunk. */ diff --git a/net/socket.c b/net/socket.c index 88f759adf3af..280283f03ccc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,7 +600,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) @@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_SW_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; - return 0; } EXPORT_SYMBOL(sock_tx_timestamp); @@ -682,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); -static int ktime2ts(ktime_t kt, struct timespec *ts) -{ - if (kt.tv64) { - *ts = ktime_to_timespec(kt); - return 1; - } else { - return 0; - } -} - /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -724,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(ts, 0, sizeof(ts)); - if (skb->tstamp.tv64 && - sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { - skb_get_timestampns(skb, ts + 0); + if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec_cond(skb->tstamp, ts + 0)) empty = 0; - } if (shhwtstamps) { if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && - ktime2ts(shhwtstamps->syststamp, ts + 1)) + ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1)) empty = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && - ktime2ts(shhwtstamps->hwtstamp, ts + 2)) + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) empty = 0; } if (!empty) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f7d34e7b6f81..5ead60550895 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -447,17 +447,21 @@ static int rsc_parse(struct cache_detail *cd, else { int N, i; + /* + * NOTE: we skip uid_valid()/gid_valid() checks here: + * instead, * -1 id's are later mapped to the + * (export-specific) anonymous id by nfsd_setuser. + * + * (But supplementary gid's get no such special + * treatment so are checked for validity here.) + */ /* uid */ rsci.cred.cr_uid = make_kuid(&init_user_ns, id); - if (!uid_valid(rsci.cred.cr_uid)) - goto out; /* gid */ if (get_int(&mesg, &id)) goto out; rsci.cred.cr_gid = make_kgid(&init_user_ns, id); - if (!gid_valid(rsci.cred.cr_gid)) - goto out; /* number of additional gid's */ if (get_int(&mesg, &N)) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dcc446e7fbf6..d5f35f15af98 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -304,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru err = rpciod_up(); if (err) goto out_no_rpciod; - err = -EINVAL; - if (!xprt) - goto out_no_xprt; + err = -EINVAL; if (args->version >= program->nrvers) goto out_err; version = program->version[args->version]; @@ -382,10 +380,9 @@ out_no_principal: out_no_stats: kfree(clnt); out_err: - xprt_put(xprt); -out_no_xprt: rpciod_down(); out_no_rpciod: + xprt_put(xprt); return ERR_PTR(err); } @@ -512,7 +509,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new = rpc_new_client(args, xprt); if (IS_ERR(new)) { err = PTR_ERR(new); - goto out_put; + goto out_err; } atomic_inc(&clnt->cl_count); @@ -525,8 +522,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_chatty = clnt->cl_chatty; return new; -out_put: - xprt_put(xprt); out_err: dprintk("RPC: %s: returned error %d\n", __func__, err); return ERR_PTR(err); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7b9b40224a27..a9129f8d7070 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1174,6 +1174,8 @@ static struct file_system_type rpc_pipe_fs_type = { .mount = rpc_mount, .kill_sb = rpc_kill_sb, }; +MODULE_ALIAS_FS("rpc_pipefs"); +MODULE_ALIAS("rpc_pipefs"); static void init_once(void *foo) @@ -1218,6 +1220,3 @@ void unregister_rpc_pipefs(void) kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } - -/* Make 'mount -t rpc_pipefs ...' autoload this module. */ -MODULE_ALIAS("rpc_pipefs"); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fb20f25ddec9..f8529fc8e542 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c1d8476b7692..3d02130828da 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -849,6 +849,14 @@ static void xs_tcp_close(struct rpc_xprt *xprt) xs_tcp_shutdown(xprt); } +static void xs_local_destroy(struct rpc_xprt *xprt) +{ + xs_close(xprt); + xs_free_peer_addresses(xprt); + xprt_free(xprt); + module_put(THIS_MODULE); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -862,10 +870,7 @@ static void xs_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&transport->connect_worker); - xs_close(xprt); - xs_free_peer_addresses(xprt); - xprt_free(xprt); - module_put(THIS_MODULE); + xs_local_destroy(xprt); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -2482,7 +2487,7 @@ static struct rpc_xprt_ops xs_local_ops = { .send_request = xs_local_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, - .destroy = xs_destroy, + .destroy = xs_local_destroy, .print_stats = xs_local_print_stats, }; diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 4f99600a5fed..c890848f9d56 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -31,3 +31,10 @@ config TIPC_PORTS Setting this to a smaller value saves some memory, setting it to higher allows for more ports. + +config TIPC_MEDIA_IB + bool "InfiniBand media type support" + depends on TIPC && INFINIBAND_IPOIB + help + Saying Y here will enable support for running TIPC on + IP-over-InfiniBand devices. diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 6cd55d671d3a..4df8e02d9008 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,3 +9,5 @@ tipc-y += addr.o bcast.o bearer.o config.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ socket.o log.o eth_media.o + +tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 2655c9f4ecad..25e159c2feb4 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -620,10 +620,10 @@ static int tipc_bcbearer_send(struct sk_buff *buf, continue; /* bearer pair doesn't add anything */ if (!tipc_bearer_blocked(p)) - tipc_bearer_send(p, buf, &p->media->bcast_addr); + tipc_bearer_send(p, buf, &p->bcast_addr); else if (s && !tipc_bearer_blocked(s)) /* unable to send on primary bearer */ - tipc_bearer_send(s, buf, &s->media->bcast_addr); + tipc_bearer_send(s, buf, &s->bcast_addr); else /* unable to send on either bearer */ continue; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index aa62f93a9127..cb29ef7ba2f0 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -39,7 +39,7 @@ #include "bearer.h" #include "discover.h" -#define MAX_ADDR_STR 32 +#define MAX_ADDR_STR 60 static struct tipc_media *media_list[MAX_MEDIA]; static u32 media_count; @@ -89,9 +89,6 @@ int tipc_register_media(struct tipc_media *m_ptr) if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) goto exit; - if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) || - !m_ptr->bcast_addr.broadcast) - goto exit; if (m_ptr->priority > TIPC_MAX_LINK_PRI) goto exit; if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || @@ -407,7 +404,7 @@ restart: INIT_LIST_HEAD(&b_ptr->links); spin_lock_init(&b_ptr->lock); - res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain); + res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain); if (res) { bearer_disable(b_ptr); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 39f1192d04bf..09c869adcfcf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -56,6 +56,7 @@ * Identifiers of supported TIPC media types */ #define TIPC_MEDIA_TYPE_ETH 1 +#define TIPC_MEDIA_TYPE_IB 2 /** * struct tipc_media_addr - destination address used by TIPC bearers @@ -77,7 +78,6 @@ struct tipc_bearer; * @enable_bearer: routine which enables a bearer * @disable_bearer: routine which disables a bearer * @addr2str: routine which converts media address to string - * @str2addr: routine which converts media address from string * @addr2msg: routine which converts media address to protocol message area * @msg2addr: routine which converts media address from protocol message area * @bcast_addr: media address used in broadcasting @@ -94,10 +94,9 @@ struct tipc_media { int (*enable_bearer)(struct tipc_bearer *b_ptr); void (*disable_bearer)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); - int (*str2addr)(struct tipc_media_addr *a, char *str_buf); int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); - int (*msg2addr)(struct tipc_media_addr *a, char *msg_area); - struct tipc_media_addr bcast_addr; + int (*msg2addr)(const struct tipc_bearer *b_ptr, + struct tipc_media_addr *a, char *msg_area); u32 priority; u32 tolerance; u32 window; @@ -136,6 +135,7 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; spinlock_t lock; struct tipc_media *media; + struct tipc_media_addr bcast_addr; u32 priority; u32 window; u32 tolerance; @@ -175,6 +175,14 @@ int tipc_disable_bearer(const char *name); int tipc_eth_media_start(void); void tipc_eth_media_stop(void); +#ifdef CONFIG_TIPC_MEDIA_IB +int tipc_ib_media_start(void); +void tipc_ib_media_stop(void); +#else +static inline int tipc_ib_media_start(void) { return 0; } +static inline void tipc_ib_media_stop(void) { return; } +#endif + int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); diff --git a/net/tipc/core.c b/net/tipc/core.c index fc05cecd7481..7ec2c1eb94f1 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,6 +82,7 @@ static void tipc_core_stop_net(void) { tipc_net_stop(); tipc_eth_media_stop(); + tipc_ib_media_stop(); } /** @@ -93,8 +94,15 @@ int tipc_core_start_net(unsigned long addr) tipc_net_start(addr); res = tipc_eth_media_start(); - if (res) - tipc_core_stop_net(); + if (res < 0) + goto err; + res = tipc_ib_media_start(); + if (res < 0) + goto err; + return res; + +err: + tipc_core_stop_net(); return res; } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 1074b9587e81..eedff58d0387 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -129,7 +129,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) int link_fully_up; media_addr.broadcast = 1; - b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg)); + b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg)); kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 2132c1ef2951..120a676a3360 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -77,12 +77,13 @@ static struct notifier_block notifier = { * Media-dependent "value" field stores MAC address in first 6 bytes * and zeroes out the remaining bytes. */ -static void eth_media_addr_set(struct tipc_media_addr *a, char *mac) +static void eth_media_addr_set(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *mac) { memcpy(a->value, mac, ETH_ALEN); memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); a->media_id = TIPC_MEDIA_TYPE_ETH; - a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN); + a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN); } /** @@ -110,6 +111,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, skb_reset_network_header(clone); clone->dev = dev; + clone->protocol = htons(ETH_P_TIPC); dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, dev->dev_addr, clone->len); dev_queue_xmit(clone); @@ -201,9 +203,13 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Associate TIPC bearer with Ethernet bearer */ eb_ptr->bearer = tb_ptr; tb_ptr->usr_handle = (void *)eb_ptr; + memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); + memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN); + tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH; + tb_ptr->bcast_addr.broadcast = 1; tb_ptr->mtu = dev->mtu; tb_ptr->blocked = 0; - eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr); + eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); return 0; } @@ -302,25 +308,6 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) } /** - * eth_str2addr - convert string to Ethernet address - */ -static int eth_str2addr(struct tipc_media_addr *a, char *str_buf) -{ - char mac[ETH_ALEN]; - int r; - - r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x", - (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2], - (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]); - - if (r != ETH_ALEN) - return 1; - - eth_media_addr_set(a, mac); - return 0; -} - -/** * eth_str2addr - convert Ethernet address format to message header format */ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) @@ -334,12 +321,13 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) /** * eth_str2addr - convert message header address format to Ethernet format */ -static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area) +static int eth_msg2addr(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *msg_area) { if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) return 1; - eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET); + eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); return 0; } @@ -351,11 +339,8 @@ static struct tipc_media eth_media_info = { .enable_bearer = enable_bearer, .disable_bearer = disable_bearer, .addr2str = eth_addr2str, - .str2addr = eth_str2addr, .addr2msg = eth_addr2msg, .msg2addr = eth_msg2addr, - .bcast_addr = { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - TIPC_MEDIA_TYPE_ETH, 1 }, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c new file mode 100644 index 000000000000..2a2864c25e15 --- /dev/null +++ b/net/tipc/ib_media.c @@ -0,0 +1,387 @@ +/* + * net/tipc/ib_media.c: Infiniband bearer support for TIPC + * + * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> + * + * Based on eth_media.c, which carries the following copyright notice: + * + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2008, 2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/if_infiniband.h> +#include "core.h" +#include "bearer.h" + +#define MAX_IB_BEARERS MAX_BEARERS + +/** + * struct ib_bearer - Infiniband bearer data structure + * @bearer: ptr to associated "generic" bearer structure + * @dev: ptr to associated Infiniband network device + * @tipc_packet_type: used in binding TIPC to Infiniband driver + * @cleanup: work item used when disabling bearer + */ + +struct ib_bearer { + struct tipc_bearer *bearer; + struct net_device *dev; + struct packet_type tipc_packet_type; + struct work_struct setup; + struct work_struct cleanup; +}; + +static struct tipc_media ib_media_info; +static struct ib_bearer ib_bearers[MAX_IB_BEARERS]; +static int ib_started; + +/** + * ib_media_addr_set - initialize Infiniband media address structure + * + * Media-dependent "value" field stores MAC address in first 6 bytes + * and zeroes out the remaining bytes. + */ +static void ib_media_addr_set(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *mac) +{ + BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN); + memcpy(a->value, mac, INFINIBAND_ALEN); + a->media_id = TIPC_MEDIA_TYPE_IB; + a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN); +} + +/** + * send_msg - send a TIPC message out over an InfiniBand interface + */ +static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, + struct tipc_media_addr *dest) +{ + struct sk_buff *clone; + struct net_device *dev; + int delta; + + clone = skb_clone(buf, GFP_ATOMIC); + if (!clone) + return 0; + + dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev; + delta = dev->hard_header_len - skb_headroom(buf); + + if ((delta > 0) && + pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(clone); + return 0; + } + + skb_reset_network_header(clone); + clone->dev = dev; + clone->protocol = htons(ETH_P_TIPC); + dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, clone->len); + dev_queue_xmit(clone); + return 0; +} + +/** + * recv_msg - handle incoming TIPC message from an InfiniBand interface + * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using InfiniBand multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). + */ +static int recv_msg(struct sk_buff *buf, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv; + + if (!net_eq(dev_net(dev), &init_net)) { + kfree_skb(buf); + return 0; + } + + if (likely(ib_ptr->bearer)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { + buf->next = NULL; + tipc_recv_msg(buf, ib_ptr->bearer); + return 0; + } + } + kfree_skb(buf); + return 0; +} + +/** + * setup_bearer - setup association between InfiniBand bearer and interface + */ +static void setup_bearer(struct work_struct *work) +{ + struct ib_bearer *ib_ptr = + container_of(work, struct ib_bearer, setup); + + dev_add_pack(&ib_ptr->tipc_packet_type); +} + +/** + * enable_bearer - attach TIPC bearer to an InfiniBand interface + */ +static int enable_bearer(struct tipc_bearer *tb_ptr) +{ + struct net_device *dev = NULL; + struct net_device *pdev = NULL; + struct ib_bearer *ib_ptr = &ib_bearers[0]; + struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; + char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; + int pending_dev = 0; + + /* Find unused InfiniBand bearer structure */ + while (ib_ptr->dev) { + if (!ib_ptr->bearer) + pending_dev++; + if (++ib_ptr == stop) + return pending_dev ? -EAGAIN : -EDQUOT; + } + + /* Find device with specified name */ + read_lock(&dev_base_lock); + for_each_netdev(&init_net, pdev) { + if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { + dev = pdev; + dev_hold(dev); + break; + } + } + read_unlock(&dev_base_lock); + if (!dev) + return -ENODEV; + + /* Create InfiniBand bearer for device */ + ib_ptr->dev = dev; + ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); + ib_ptr->tipc_packet_type.dev = dev; + ib_ptr->tipc_packet_type.func = recv_msg; + ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr; + INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list)); + INIT_WORK(&ib_ptr->setup, setup_bearer); + schedule_work(&ib_ptr->setup); + + /* Associate TIPC bearer with InfiniBand bearer */ + ib_ptr->bearer = tb_ptr; + tb_ptr->usr_handle = (void *)ib_ptr; + memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); + memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN); + tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB; + tb_ptr->bcast_addr.broadcast = 1; + tb_ptr->mtu = dev->mtu; + tb_ptr->blocked = 0; + ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); + return 0; +} + +/** + * cleanup_bearer - break association between InfiniBand bearer and interface + * + * This routine must be invoked from a work queue because it can sleep. + */ +static void cleanup_bearer(struct work_struct *work) +{ + struct ib_bearer *ib_ptr = + container_of(work, struct ib_bearer, cleanup); + + dev_remove_pack(&ib_ptr->tipc_packet_type); + dev_put(ib_ptr->dev); + ib_ptr->dev = NULL; +} + +/** + * disable_bearer - detach TIPC bearer from an InfiniBand interface + * + * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away, + * then get worker thread to complete bearer cleanup. (Can't do cleanup + * here because cleanup code needs to sleep and caller holds spinlocks.) + */ +static void disable_bearer(struct tipc_bearer *tb_ptr) +{ + struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle; + + ib_ptr->bearer = NULL; + INIT_WORK(&ib_ptr->cleanup, cleanup_bearer); + schedule_work(&ib_ptr->cleanup); +} + +/** + * recv_notification - handle device updates from OS + * + * Change the state of the InfiniBand bearer (if any) associated with the + * specified device. + */ +static int recv_notification(struct notifier_block *nb, unsigned long evt, + void *dv) +{ + struct net_device *dev = (struct net_device *)dv; + struct ib_bearer *ib_ptr = &ib_bearers[0]; + struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + while ((ib_ptr->dev != dev)) { + if (++ib_ptr == stop) + return NOTIFY_DONE; /* couldn't find device */ + } + if (!ib_ptr->bearer) + return NOTIFY_DONE; /* bearer had been disabled */ + + ib_ptr->bearer->mtu = dev->mtu; + + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev)) + tipc_continue(ib_ptr->bearer); + else + tipc_block_bearer(ib_ptr->bearer->name); + break; + case NETDEV_UP: + tipc_continue(ib_ptr->bearer); + break; + case NETDEV_DOWN: + tipc_block_bearer(ib_ptr->bearer->name); + break; + case NETDEV_CHANGEMTU: + case NETDEV_CHANGEADDR: + tipc_block_bearer(ib_ptr->bearer->name); + tipc_continue(ib_ptr->bearer); + break; + case NETDEV_UNREGISTER: + case NETDEV_CHANGENAME: + tipc_disable_bearer(ib_ptr->bearer->name); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block notifier = { + .notifier_call = recv_notification, + .priority = 0, +}; + +/** + * ib_addr2str - convert InfiniBand address to string + */ +static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +{ + if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ + return 1; + + sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" + "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + a->value[0], a->value[1], a->value[2], a->value[3], + a->value[4], a->value[5], a->value[6], a->value[7], + a->value[8], a->value[9], a->value[10], a->value[11], + a->value[12], a->value[13], a->value[14], a->value[15], + a->value[16], a->value[17], a->value[18], a->value[19]); + + return 0; +} + +/** + * ib_addr2msg - convert InfiniBand address format to message header format + */ +static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +{ + memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); + msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; + memcpy(msg_area, a->value, INFINIBAND_ALEN); + return 0; +} + +/** + * ib_msg2addr - convert message header address format to InfiniBand format + */ +static int ib_msg2addr(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *msg_area) +{ + ib_media_addr_set(tb_ptr, a, msg_area); + return 0; +} + +/* + * InfiniBand media registration info + */ +static struct tipc_media ib_media_info = { + .send_msg = send_msg, + .enable_bearer = enable_bearer, + .disable_bearer = disable_bearer, + .addr2str = ib_addr2str, + .addr2msg = ib_addr2msg, + .msg2addr = ib_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_IB, + .name = "ib" +}; + +/** + * tipc_ib_media_start - activate InfiniBand bearer support + * + * Register InfiniBand media type with TIPC bearer code. Also register + * with OS for notifications about device state changes. + */ +int tipc_ib_media_start(void) +{ + int res; + + if (ib_started) + return -EINVAL; + + res = tipc_register_media(&ib_media_info); + if (res) + return res; + + res = register_netdevice_notifier(¬ifier); + if (!res) + ib_started = 1; + return res; +} + +/** + * tipc_ib_media_stop - deactivate InfiniBand bearer support + */ +void tipc_ib_media_stop(void) +{ + if (!ib_started) + return; + + flush_scheduled_work(); + unregister_netdevice_notifier(¬ifier); + ib_started = 0; +} diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6675914dc592..8bcd4985d0fb 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *rep_nlh; struct nlmsghdr *req_nlh = info->nlhdr; struct tipc_genlmsghdr *req_userhdr = info->userhdr; - int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); + int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) @@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) cmd = req_userhdr->cmd; rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, - NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), + nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, + nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), hdr_space); if (rep_buf) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a9622b6cd916..515ce38e4f4c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -790,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) if (addr) { addr->family = AF_TIPC; addr->addrtype = TIPC_ADDR_ID; + memset(&addr->addr, 0, sizeof(addr->addr)); addr->addr.id.ref = msg_origport(msg); addr->addr.id.node = msg_orignode(msg); addr->addr.name.domain = 0; /* could leave uninitialized */ @@ -904,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1013,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51be64f163ec..9efe01113c5c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -699,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) @@ -1341,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb) struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; - scm.cred = UNIXCB(skb).cred; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); @@ -1392,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); - if (scm->cred) - UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).uid = scm->creds.uid; + UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1410,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { - if (UNIXCB(skb).cred) + if (UNIXCB(skb).pid) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); - UNIXCB(skb).cred = get_current_cred(); + current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -1820,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1992,11 +1990,12 @@ again: if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) + !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || + !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) break; - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } @@ -2197,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ca511c4f388a..7f93e2a42d7a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -207,7 +207,7 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) struct vsock_sock *vsk; list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) - if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + if (addr->svm_port == vsk->local_addr.svm_port) return sk_vsock(vsk); return NULL; @@ -220,8 +220,8 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, list_for_each_entry(vsk, vsock_connected_sockets(src, dst), connected_table) { - if (vsock_addr_equals_addr(src, &vsk->remote_addr) - && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) && + dst->svm_port == vsk->local_addr.svm_port) { return sk_vsock(vsk); } } @@ -1670,6 +1670,8 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index a70ace83a153..daff75200e25 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -123,6 +123,14 @@ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) return err > 0 ? -err : err; } +static u32 vmci_transport_peer_rid(u32 peer_cid) +{ + if (VMADDR_CID_HYPERVISOR == peer_cid) + return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID; + + return VMCI_TRANSPORT_PACKET_RID; +} + static inline void vmci_transport_packet_init(struct vmci_transport_packet *pkt, struct sockaddr_vm *src, @@ -140,7 +148,7 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, VMCI_TRANSPORT_PACKET_RID); pkt->dg.dst = vmci_make_handle(dst->svm_cid, - VMCI_TRANSPORT_PACKET_RID); + vmci_transport_peer_rid(dst->svm_cid)); pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); pkt->version = VMCI_TRANSPORT_PACKET_VERSION; pkt->type = type; @@ -464,19 +472,16 @@ static struct sock *vmci_transport_get_pending( struct vsock_sock *vlistener; struct vsock_sock *vpending; struct sock *pending; + struct sockaddr_vm src; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); vlistener = vsock_sk(listener); list_for_each_entry(vpending, &vlistener->pending_links, pending_links) { - struct sockaddr_vm src; - struct sockaddr_vm dst; - - vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); - vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pkt->dst_port == vpending->local_addr.svm_port) { pending = sk_vsock(vpending); sock_hold(pending); goto found; @@ -511,6 +516,9 @@ static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) { + if (VMADDR_CID_HYPERVISOR == peer_cid) + return true; + if (vsock->cached_peer != peer_cid) { vsock->cached_peer = peer_cid; if (!vmci_transport_is_trusted(vsock, peer_cid) && @@ -631,7 +639,6 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) static bool vmci_transport_stream_allow(u32 cid, u32 port) { static const u32 non_socket_contexts[] = { - VMADDR_CID_HYPERVISOR, VMADDR_CID_RESERVED, }; int i; @@ -670,7 +677,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) */ if (!vmci_transport_stream_allow(dg->src.context, -1) - || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + || vmci_transport_peer_rid(dg->src.context) != dg->src.resource) return VMCI_ERROR_NO_ACCESS; if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) @@ -739,10 +746,15 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) */ bh_lock_sock(sk); - if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) - vmci_trans(vsk)->notify_ops->handle_notify_pkt( - sk, pkt, true, &dst, &src, - &bh_process_pkt); + if (!sock_owned_by_user(sk)) { + /* The local context ID may be out of date, update it. */ + vsk->local_addr.svm_cid = dst.svm_cid; + + if (sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + } bh_unlock_sock(sk); @@ -902,6 +914,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) lock_sock(sk); + /* The local context ID may be out of date. */ + vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; + switch (sk->sk_state) { case SS_LISTEN: vmci_transport_recv_listen(sk, pkt); @@ -958,6 +973,10 @@ static int vmci_transport_recv_listen(struct sock *sk, pending = vmci_transport_get_pending(sk, pkt); if (pending) { lock_sock(pending); + + /* The local context ID may be out of date. */ + vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; + switch (pending->sk_state) { case SS_CONNECTING: err = vmci_transport_recv_connecting_server(sk, @@ -1727,6 +1746,8 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; + msg->msg_namelen = 0; + /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); @@ -1759,7 +1780,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (err) goto out; - msg->msg_namelen = 0; if (msg->msg_name) { struct sockaddr_vm *vm_addr; diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 1bf991803ec0..fd88ea8924e4 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -28,6 +28,9 @@ /* The resource ID on which control packets are sent. */ #define VMCI_TRANSPORT_PACKET_RID 1 +/* The resource ID on which control packets are sent to the hypervisor. */ +#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15 + #define VSOCK_PROTO_INVALID 0 #define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) #define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c index b7df1aea7c59..ec2611b4ea0e 100644 --- a/net/vmw_vsock/vsock_addr.c +++ b/net/vmw_vsock/vsock_addr.c @@ -64,16 +64,6 @@ bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, } EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other) -{ - return (addr->svm_cid == VMADDR_CID_ANY || - other->svm_cid == VMADDR_CID_ANY || - addr->svm_cid == other->svm_cid) && - addr->svm_port == other->svm_port; -} -EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); - int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr) { diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h index cdfbcefdf843..9ccd5316eac0 100644 --- a/net/vmw_vsock/vsock_addr.h +++ b/net/vmw_vsock/vsock_addr.h @@ -24,8 +24,6 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr); void vsock_addr_unbind(struct sockaddr_vm *addr); bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, const struct sockaddr_vm *other); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other); int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr); diff --git a/net/wireless/util.c b/net/wireless/util.c index a7046a4333e9..f5ad4d94ba88 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype > 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 167c67d46c6a..23cea0f74336 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1037,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } +static int flow_to_policy_dir(int dir) +{ + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + + switch (dir) { + default: + case FLOW_DIR_IN: + return XFRM_POLICY_IN; + case FLOW_DIR_OUT: + return XFRM_POLICY_OUT; + case FLOW_DIR_FWD: + return XFRM_POLICY_FWD; + } +} + static struct flow_cache_object * xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *old_obj, void *ctx) @@ -1046,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, if (old_obj) xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - pol = __xfrm_policy_lookup(net, fl, family, dir); + pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); if (IS_ERR_OR_NULL(pol)) return ERR_CAST(pol); @@ -1932,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, * previous cache entry */ if (xdst == NULL) { num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, dir); + pols[0] = __xfrm_policy_lookup(net, fl, family, + flow_to_policy_dir(dir)); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 35754cc8a9e5..8dafe6d3c6e4 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -334,6 +334,70 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff) + break; + + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq - preplay_esn->oseq; + else + oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; + + if (seq_diff < x->replay_maxdiff && + oseq_diff < x->replay_maxdiff) { + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; @@ -510,7 +574,7 @@ static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_bmp, + .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2c341bdaf47c..78f66fa92449 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; x->props.flags = orig->props.flags; + x->props.extra_flags = orig->props.extra_flags; x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fbd9e6cd0fd7..aa778748c565 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, copy_from_user_state(x, p); + if (attrs[XFRMA_SA_EXTRA_FLAGS]) + x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + if ((err = attach_aead(&x->aead, &x->props.ealgo, attrs[XFRMA_ALG_AEAD]))) goto error; @@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, copy_to_user_state(x, p); + if (x->props.extra_flags) { + ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, + x->props.extra_flags); + if (ret) + goto out; + } + if (x->coaddr) { ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); if (ret) @@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, + [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, }; -static struct xfrm_link { +static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); @@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; - struct xfrm_link *link; + const struct xfrm_link *link; int type, err; type = nlh->nlmsg_type; @@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) x->security->ctx_len); if (x->coaddr) l += nla_total_size(sizeof(*x->coaddr)); + if (x->props.extra_flags) + l += nla_total_size(sizeof(x->props.extra_flags)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size(sizeof(u64)); diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index 25f216a841d5..477d137c0557 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -14,7 +14,7 @@ kbuild-file := $(srctree)/$(obj)/Kbuild include $(kbuild-file) # called may set destination dir (when installing to asm/) -_dst := $(or $(destination-y),$(dst),$(obj)) +_dst := $(if $(destination-y),$(destination-y),$(if $(dst),$(dst),$(obj))) old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild ifneq ($(wildcard $(old-kbuild-file)),) @@ -48,13 +48,14 @@ all-files := $(header-y) $(genhdr-y) $(wrapper-files) output-files := $(addprefix $(installdir)/, $(all-files)) input-files := $(foreach hdr, $(header-y), \ - $(or \ + $(if $(wildcard $(srcdir)/$(hdr)), \ $(wildcard $(srcdir)/$(hdr)), \ - $(wildcard $(oldsrcdir)/$(hdr)), \ - $(error Missing UAPI file $(srcdir)/$(hdr)) \ + $(if $(wildcard $(oldsrcdir)/$(hdr)), \ + $(wildcard $(oldsrcdir)/$(hdr)), \ + $(error Missing UAPI file $(srcdir)/$(hdr))) \ )) \ $(foreach hdr, $(genhdr-y), \ - $(or \ + $(if $(wildcard $(gendir)/$(hdr)), \ $(wildcard $(gendir)/$(hdr)), \ $(error Missing generated UAPI file $(gendir)/$(hdr)) \ )) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b28cc384a5bc..4de4bc48493b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3016,6 +3016,7 @@ sub process { $dstat !~ /^'X'$/ && # character constants $dstat !~ /$exceptions/ && $dstat !~ /^\.$Ident\s*=/ && # .foo = + $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo $dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...) $dstat !~ /^for\s*$Constant$/ && # for (...) $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() diff --git a/security/capability.c b/security/capability.c index 579775088967..6783c3e6c88e 100644 --- a/security/capability.c +++ b/security/capability.c @@ -737,6 +737,11 @@ static int cap_tun_dev_open(void *security) { return 0; } + +static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1071,6 +1076,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, tun_dev_open); set_to_cap_if_null(ops, tun_dev_attach_queue); set_to_cap_if_null(ops, tun_dev_attach); + set_to_cap_if_null(ops, skb_owned_by); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_cap_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/keys/compat.c b/security/keys/compat.c index 1c261763f479..d65fa7fa29ba 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -40,12 +40,12 @@ static long compat_keyctl_instantiate_key_iov( ARRAY_SIZE(iovstack), iovstack, &iov); if (ret < 0) - return ret; + goto err; if (ret == 0) goto no_payload_free; ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid); - +err: if (iov != iovstack) kfree(iov); return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 58dfe0890947..42defae1e161 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -57,7 +57,7 @@ int install_user_keyrings(void) kenter("%p{%u}", user, uid); - if (user->uid_keyring) { + if (user->uid_keyring && user->session_keyring) { kleave(" = 0 [exist]"); return 0; } @@ -839,7 +839,7 @@ void key_change_session_keyring(struct callback_head *twork) new-> sgid = old-> sgid; new->fsgid = old->fsgid; new->user = get_uid(old->user); - new->user_ns = get_user_ns(new->user_ns); + new->user_ns = get_user_ns(old->user_ns); new->group_info = get_group_info(old->group_info); new->securebits = old->securebits; diff --git a/security/security.c b/security/security.c index 7b88c6aeaed4..03f248b84e9f 100644 --- a/security/security.c +++ b/security/security.c @@ -1290,6 +1290,11 @@ int security_tun_dev_open(void *security) } EXPORT_SYMBOL(security_tun_dev_open); +void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ + security_ops->skb_owned_by(skb, sk); +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2fa28c88900c..bf889ee51509 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -51,6 +51,7 @@ #include <linux/tty.h> #include <net/icmp.h> #include <net/ip.h> /* for local_port_range[] */ +#include <net/sock.h> #include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */ #include <net/net_namespace.h> #include <net/netlabel.h> @@ -60,7 +61,7 @@ #include <linux/bitops.h> #include <linux/interrupt.h> #include <linux/netdevice.h> /* for network interface checks */ -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/dccp.h> @@ -4363,6 +4364,11 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); } +static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ + skb_set_owner_w(skb, sk); +} + static int selinux_secmark_relabel_packet(u32 sid) { const struct task_security_struct *__tsec; @@ -4475,7 +4481,7 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) struct nlmsghdr *nlh; struct sk_security_struct *sksec = sk->sk_security; - if (skb->len < NLMSG_SPACE(0)) { + if (skb->len < NLMSG_HDRLEN) { err = -EINVAL; goto out; } @@ -5664,6 +5670,7 @@ static struct security_operations selinux_ops = { .tun_dev_attach_queue = selinux_tun_dev_attach_queue, .tun_dev_attach = selinux_tun_dev_attach, .tun_dev_open = selinux_tun_dev_open, + .skb_owned_by = selinux_skb_owned_by, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 14d810ead420..828fb6a4e941 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -16,7 +16,6 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/skbuff.h> -#include <linux/netlink.h> #include <linux/selinux_netlink.h> #include <net/net_namespace.h> #include <net/netlink.h> @@ -77,7 +76,7 @@ static void selnl_notify(int msgtype, void *data) len = selnl_msglen(msgtype); - skb = alloc_skb(NLMSG_SPACE(len), GFP_USER); + skb = nlmsg_new(len, GFP_USER); if (!skb) goto oom; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 48665ecd1197..8ab295154517 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -310,7 +310,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, if (old_ctx) { new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len, - GFP_KERNEL); + GFP_ATOMIC); if (!new_ctx) return -ENOMEM; diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 23414b93771f..13c88fbcf037 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -347,10 +347,8 @@ int yama_ptrace_traceme(struct task_struct *parent) /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: - rcu_read_lock(); - if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE)) + if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) rc = -EPERM; - rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 71ae86ca64ac..eb560fa32321 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3222,18 +3222,10 @@ EXPORT_SYMBOL_GPL(snd_pcm_lib_default_mmap); int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_struct *area) { - long size; - unsigned long offset; + struct snd_pcm_runtime *runtime = substream->runtime;; area->vm_page_prot = pgprot_noncached(area->vm_page_prot); - area->vm_flags |= VM_IO; - size = area->vm_end - area->vm_start; - offset = area->vm_pgoff << PAGE_SHIFT; - if (io_remap_pfn_range(area, area->vm_start, - (substream->runtime->dma_addr + offset) >> PAGE_SHIFT, - size, area->vm_page_prot)) - return -EAGAIN; - return 0; + return vm_iomap_memory(area, runtime->dma_addr, runtime->dma_bytes); } EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem); diff --git a/sound/core/seq/oss/seq_oss_event.c b/sound/core/seq/oss/seq_oss_event.c index 066f5f3e3f4c..c3908862bc8b 100644 --- a/sound/core/seq/oss/seq_oss_event.c +++ b/sound/core/seq/oss/seq_oss_event.c @@ -285,7 +285,12 @@ local_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { - struct seq_oss_synthinfo *info = &dp->synths[dev]; + struct seq_oss_synthinfo *info; + + if (!snd_seq_oss_synth_is_valid(dp, dev)) + return -ENXIO; + + info = &dp->synths[dev]; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { @@ -340,7 +345,12 @@ note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, st static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { - struct seq_oss_synthinfo *info = &dp->synths[dev]; + struct seq_oss_synthinfo *info; + + if (!snd_seq_oss_synth_is_valid(dp, dev)) + return -ENXIO; + + info = &dp->synths[dev]; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 160b1bd0cd62..24d44b2f61ac 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -290,10 +290,10 @@ int snd_seq_timer_open(struct snd_seq_queue *q) tid.device = SNDRV_TIMER_GLOBAL_SYSTEM; err = snd_timer_open(&t, str, &tid, q->queue); } - if (err < 0) { - snd_printk(KERN_ERR "seq fatal error: cannot create timer (%i)\n", err); - return err; - } + } + if (err < 0) { + snd_printk(KERN_ERR "seq fatal error: cannot create timer (%i)\n", err); + return err; } t->callback = snd_seq_timer_interrupt; t->callback_data = q; diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c index 857586135d18..0097f3619faa 100644 --- a/sound/core/vmaster.c +++ b/sound/core/vmaster.c @@ -213,7 +213,10 @@ static int slave_put(struct snd_kcontrol *kcontrol, } if (!changed) return 0; - return slave_put_val(slave, ucontrol); + err = slave_put_val(slave, ucontrol); + if (err < 0) + return err; + return 1; } static int slave_tlv_cmd(struct snd_kcontrol *kcontrol, diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c index 30bcfe470f83..4ff60a6427d9 100644 --- a/sound/oss/sequencer.c +++ b/sound/oss/sequencer.c @@ -545,6 +545,9 @@ static void seq_chn_common_event(unsigned char *event_rec) case MIDI_PGM_CHANGE: if (seq_mode == SEQ_2) { + if (chn > 15) + break; + synth_devs[dev]->chn_info[chn].pgm_num = p1; if ((int) dev >= num_synths) synth_devs[dev]->set_instr(dev, chn, p1); @@ -596,6 +599,9 @@ static void seq_chn_common_event(unsigned char *event_rec) case MIDI_PITCH_BEND: if (seq_mode == SEQ_2) { + if (chn > 15) + break; + synth_devs[dev]->chn_info[chn].bender_value = w14; if ((int) dev < num_synths) diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c index 3536b076b529..0aabfedeecba 100644 --- a/sound/pci/asihpi/asihpi.c +++ b/sound/pci/asihpi/asihpi.c @@ -2549,7 +2549,7 @@ static int snd_asihpi_sampleclock_add(struct snd_card_asihpi *asihpi, static int snd_card_asihpi_mixer_new(struct snd_card_asihpi *asihpi) { - struct snd_card *card = asihpi->card; + struct snd_card *card; unsigned int idx = 0; unsigned int subindex = 0; int err; @@ -2557,6 +2557,7 @@ static int snd_card_asihpi_mixer_new(struct snd_card_asihpi *asihpi) if (snd_BUG_ON(!asihpi)) return -EINVAL; + card = asihpi->card; strcpy(card->mixername, "Asihpi Mixer"); err = diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 04b57383e8cb..4aba7646dd9c 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -173,7 +173,7 @@ const char *snd_hda_get_jack_type(u32 cfg) "Line Out", "Speaker", "HP Out", "CD", "SPDIF Out", "Digital Out", "Modem Line", "Modem Hand", "Line In", "Aux", "Mic", "Telephony", - "SPDIF In", "Digitial In", "Reserved", "Other" + "SPDIF In", "Digital In", "Reserved", "Other" }; return jack_types[(cfg & AC_DEFCFG_DEVICE) @@ -494,7 +494,7 @@ static unsigned int get_num_conns(struct hda_codec *codec, hda_nid_t nid) int snd_hda_get_num_raw_conns(struct hda_codec *codec, hda_nid_t nid) { - return get_num_conns(codec, nid) & AC_CLIST_LENGTH; + return snd_hda_get_raw_connections(codec, nid, NULL, 0); } /** @@ -517,9 +517,6 @@ int snd_hda_get_raw_connections(struct hda_codec *codec, hda_nid_t nid, hda_nid_t prev_nid; int null_count = 0; - if (snd_BUG_ON(!conn_list || max_conns <= 0)) - return -EINVAL; - parm = get_num_conns(codec, nid); if (!parm) return 0; @@ -545,7 +542,8 @@ int snd_hda_get_raw_connections(struct hda_codec *codec, hda_nid_t nid, AC_VERB_GET_CONNECT_LIST, 0); if (parm == -1 && codec->bus->rirb_error) return -EIO; - conn_list[0] = parm & mask; + if (conn_list) + conn_list[0] = parm & mask; return 1; } @@ -580,14 +578,20 @@ int snd_hda_get_raw_connections(struct hda_codec *codec, hda_nid_t nid, continue; } for (n = prev_nid + 1; n <= val; n++) { + if (conn_list) { + if (conns >= max_conns) + return -ENOSPC; + conn_list[conns] = n; + } + conns++; + } + } else { + if (conn_list) { if (conns >= max_conns) return -ENOSPC; - conn_list[conns++] = n; + conn_list[conns] = val; } - } else { - if (conns >= max_conns) - return -ENOSPC; - conn_list[conns++] = val; + conns++; } prev_nid = val; } @@ -3140,7 +3144,7 @@ static unsigned int convert_to_spdif_status(unsigned short val) if (val & AC_DIG1_PROFESSIONAL) sbits |= IEC958_AES0_PROFESSIONAL; if (sbits & IEC958_AES0_PROFESSIONAL) { - if (sbits & AC_DIG1_EMPHASIS) + if (val & AC_DIG1_EMPHASIS) sbits |= IEC958_AES0_PRO_EMPHASIS_5015; } else { if (val & AC_DIG1_EMPHASIS) @@ -3334,6 +3338,8 @@ int snd_hda_create_dig_out_ctls(struct hda_codec *codec, return -EBUSY; } spdif = snd_array_new(&codec->spdif_out); + if (!spdif) + return -ENOMEM; for (dig_mix = dig_mixes; dig_mix->name; dig_mix++) { kctl = snd_ctl_new1(dig_mix, codec); if (!kctl) @@ -3431,11 +3437,16 @@ static struct snd_kcontrol_new spdif_share_sw = { int snd_hda_create_spdif_share_sw(struct hda_codec *codec, struct hda_multi_out *mout) { + struct snd_kcontrol *kctl; + if (!mout->dig_out_nid) return 0; + + kctl = snd_ctl_new1(&spdif_share_sw, mout); + if (!kctl) + return -ENOMEM; /* ATTENTION: here mout is passed as private_data, instead of codec */ - return snd_hda_ctl_add(codec, mout->dig_out_nid, - snd_ctl_new1(&spdif_share_sw, mout)); + return snd_hda_ctl_add(codec, mout->dig_out_nid, kctl); } EXPORT_SYMBOL_HDA(snd_hda_create_spdif_share_sw); diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index 7dd846380a50..d0d7ac1e99d2 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -320,7 +320,7 @@ int snd_hdmi_get_eld(struct hda_codec *codec, hda_nid_t nid, unsigned char *buf, int *eld_size) { int i; - int ret; + int ret = 0; int size; /* diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 78897d05d80f..2dbe767be16b 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -740,7 +740,7 @@ EXPORT_SYMBOL_HDA(snd_hda_activate_path); static void path_power_down_sync(struct hda_codec *codec, struct nid_path *path) { struct hda_gen_spec *spec = codec->spec; - bool changed; + bool changed = false; int i; if (!spec->power_down_unused || path->active) @@ -995,6 +995,8 @@ enum { BAD_NO_EXTRA_SURR_DAC = 0x101, /* Primary DAC shared with main surrounds */ BAD_SHARED_SURROUND = 0x100, + /* No independent HP possible */ + BAD_NO_INDEP_HP = 0x40, /* Primary DAC shared with main CLFE */ BAD_SHARED_CLFE = 0x10, /* Primary DAC shared with extra surrounds */ @@ -1392,6 +1394,43 @@ static int check_aamix_out_path(struct hda_codec *codec, int path_idx) return snd_hda_get_path_idx(codec, path); } +/* check whether the independent HP is available with the current config */ +static bool indep_hp_possible(struct hda_codec *codec) +{ + struct hda_gen_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + struct nid_path *path; + int i, idx; + + if (cfg->line_out_type == AUTO_PIN_HP_OUT) + idx = spec->out_paths[0]; + else + idx = spec->hp_paths[0]; + path = snd_hda_get_path_from_idx(codec, idx); + if (!path) + return false; + + /* assume no path conflicts unless aamix is involved */ + if (!spec->mixer_nid || !is_nid_contained(path, spec->mixer_nid)) + return true; + + /* check whether output paths contain aamix */ + for (i = 0; i < cfg->line_outs; i++) { + if (spec->out_paths[i] == idx) + break; + path = snd_hda_get_path_from_idx(codec, spec->out_paths[i]); + if (path && is_nid_contained(path, spec->mixer_nid)) + return false; + } + for (i = 0; i < cfg->speaker_outs; i++) { + path = snd_hda_get_path_from_idx(codec, spec->speaker_paths[i]); + if (path && is_nid_contained(path, spec->mixer_nid)) + return false; + } + + return true; +} + /* fill the empty entries in the dac array for speaker/hp with the * shared dac pointed by the paths */ @@ -1545,6 +1584,9 @@ static int fill_and_eval_dacs(struct hda_codec *codec, badness += BAD_MULTI_IO; } + if (spec->indep_hp && !indep_hp_possible(codec)) + badness += BAD_NO_INDEP_HP; + /* re-fill the shared DAC for speaker / headphone */ if (cfg->line_out_type != AUTO_PIN_HP_OUT) refill_shared_dacs(codec, cfg->hp_outs, @@ -1758,6 +1800,10 @@ static int parse_output_paths(struct hda_codec *codec) cfg->speaker_pins, val); } + /* clear indep_hp flag if not available */ + if (spec->indep_hp && !indep_hp_possible(codec)) + spec->indep_hp = 0; + kfree(best_cfg); return 0; } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4cea6bb6fade..bcd40ee488e3 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -134,8 +134,8 @@ MODULE_PARM_DESC(power_save, "Automatic power-saving timeout " * this may give more power-saving, but will take longer time to * wake up. */ -static int power_save_controller = -1; -module_param(power_save_controller, bint, 0644); +static bool power_save_controller = 1; +module_param(power_save_controller, bool, 0644); MODULE_PARM_DESC(power_save_controller, "Reset controller in power save mode."); #endif /* CONFIG_PM */ @@ -415,6 +415,8 @@ struct azx_dev { unsigned int opened :1; unsigned int running :1; unsigned int irq_pending :1; + unsigned int prepared:1; + unsigned int locked:1; /* * For VIA: * A flag to ensure DMA position is 0 @@ -426,8 +428,25 @@ struct azx_dev { struct timecounter azx_tc; struct cyclecounter azx_cc; + +#ifdef CONFIG_SND_HDA_DSP_LOADER + struct mutex dsp_mutex; +#endif }; +/* DSP lock helpers */ +#ifdef CONFIG_SND_HDA_DSP_LOADER +#define dsp_lock_init(dev) mutex_init(&(dev)->dsp_mutex) +#define dsp_lock(dev) mutex_lock(&(dev)->dsp_mutex) +#define dsp_unlock(dev) mutex_unlock(&(dev)->dsp_mutex) +#define dsp_is_locked(dev) ((dev)->locked) +#else +#define dsp_lock_init(dev) do {} while (0) +#define dsp_lock(dev) do {} while (0) +#define dsp_unlock(dev) do {} while (0) +#define dsp_is_locked(dev) 0 +#endif + /* CORB/RIRB */ struct azx_rb { u32 *buf; /* CORB/RIRB buffer @@ -527,6 +546,10 @@ struct azx { /* card list (for power_save trigger) */ struct list_head list; + +#ifdef CONFIG_SND_HDA_DSP_LOADER + struct azx_dev saved_azx_dev; +#endif }; #define CREATE_TRACE_POINTS @@ -1793,15 +1816,25 @@ azx_assign_device(struct azx *chip, struct snd_pcm_substream *substream) dev = chip->capture_index_offset; nums = chip->capture_streams; } - for (i = 0; i < nums; i++, dev++) - if (!chip->azx_dev[dev].opened) { - res = &chip->azx_dev[dev]; - if (res->assigned_key == key) - break; + for (i = 0; i < nums; i++, dev++) { + struct azx_dev *azx_dev = &chip->azx_dev[dev]; + dsp_lock(azx_dev); + if (!azx_dev->opened && !dsp_is_locked(azx_dev)) { + res = azx_dev; + if (res->assigned_key == key) { + res->opened = 1; + res->assigned_key = key; + dsp_unlock(azx_dev); + return azx_dev; + } } + dsp_unlock(azx_dev); + } if (res) { + dsp_lock(res); res->opened = 1; res->assigned_key = key; + dsp_unlock(res); } return res; } @@ -2009,6 +2042,12 @@ static int azx_pcm_hw_params(struct snd_pcm_substream *substream, struct azx_dev *azx_dev = get_azx_dev(substream); int ret; + dsp_lock(azx_dev); + if (dsp_is_locked(azx_dev)) { + ret = -EBUSY; + goto unlock; + } + mark_runtime_wc(chip, azx_dev, substream, false); azx_dev->bufsize = 0; azx_dev->period_bytes = 0; @@ -2016,8 +2055,10 @@ static int azx_pcm_hw_params(struct snd_pcm_substream *substream, ret = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params)); if (ret < 0) - return ret; + goto unlock; mark_runtime_wc(chip, azx_dev, substream, true); + unlock: + dsp_unlock(azx_dev); return ret; } @@ -2029,16 +2070,21 @@ static int azx_pcm_hw_free(struct snd_pcm_substream *substream) struct hda_pcm_stream *hinfo = apcm->hinfo[substream->stream]; /* reset BDL address */ - azx_sd_writel(azx_dev, SD_BDLPL, 0); - azx_sd_writel(azx_dev, SD_BDLPU, 0); - azx_sd_writel(azx_dev, SD_CTL, 0); - azx_dev->bufsize = 0; - azx_dev->period_bytes = 0; - azx_dev->format_val = 0; + dsp_lock(azx_dev); + if (!dsp_is_locked(azx_dev)) { + azx_sd_writel(azx_dev, SD_BDLPL, 0); + azx_sd_writel(azx_dev, SD_BDLPU, 0); + azx_sd_writel(azx_dev, SD_CTL, 0); + azx_dev->bufsize = 0; + azx_dev->period_bytes = 0; + azx_dev->format_val = 0; + } snd_hda_codec_cleanup(apcm->codec, hinfo, substream); mark_runtime_wc(chip, azx_dev, substream, false); + azx_dev->prepared = 0; + dsp_unlock(azx_dev); return snd_pcm_lib_free_pages(substream); } @@ -2055,6 +2101,12 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream) snd_hda_spdif_out_of_nid(apcm->codec, hinfo->nid); unsigned short ctls = spdif ? spdif->ctls : 0; + dsp_lock(azx_dev); + if (dsp_is_locked(azx_dev)) { + err = -EBUSY; + goto unlock; + } + azx_stream_reset(chip, azx_dev); format_val = snd_hda_calc_stream_format(runtime->rate, runtime->channels, @@ -2065,7 +2117,8 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream) snd_printk(KERN_ERR SFX "%s: invalid format_val, rate=%d, ch=%d, format=%d\n", pci_name(chip->pci), runtime->rate, runtime->channels, runtime->format); - return -EINVAL; + err = -EINVAL; + goto unlock; } bufsize = snd_pcm_lib_buffer_bytes(substream); @@ -2084,7 +2137,7 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream) azx_dev->no_period_wakeup = runtime->no_period_wakeup; err = azx_setup_periods(chip, substream, azx_dev); if (err < 0) - return err; + goto unlock; } /* wallclk has 24Mhz clock source */ @@ -2101,8 +2154,14 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream) if ((chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND) && stream_tag > chip->capture_streams) stream_tag -= chip->capture_streams; - return snd_hda_codec_prepare(apcm->codec, hinfo, stream_tag, + err = snd_hda_codec_prepare(apcm->codec, hinfo, stream_tag, azx_dev->format_val, substream); + + unlock: + if (!err) + azx_dev->prepared = 1; + dsp_unlock(azx_dev); + return err; } static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) @@ -2117,6 +2176,9 @@ static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) azx_dev = get_azx_dev(substream); trace_azx_pcm_trigger(chip, azx_dev, cmd); + if (dsp_is_locked(azx_dev) || !azx_dev->prepared) + return -EPIPE; + switch (cmd) { case SNDRV_PCM_TRIGGER_START: rstart = 1; @@ -2621,17 +2683,27 @@ static int azx_load_dsp_prepare(struct hda_bus *bus, unsigned int format, struct azx_dev *azx_dev; int err; - if (snd_hda_lock_devices(bus)) - return -EBUSY; + azx_dev = azx_get_dsp_loader_dev(chip); + + dsp_lock(azx_dev); + spin_lock_irq(&chip->reg_lock); + if (azx_dev->running || azx_dev->locked) { + spin_unlock_irq(&chip->reg_lock); + err = -EBUSY; + goto unlock; + } + azx_dev->prepared = 0; + chip->saved_azx_dev = *azx_dev; + azx_dev->locked = 1; + spin_unlock_irq(&chip->reg_lock); err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, snd_dma_pci_data(chip->pci), byte_size, bufp); if (err < 0) - goto unlock; + goto err_alloc; mark_pages_wc(chip, bufp, true); - azx_dev = azx_get_dsp_loader_dev(chip); azx_dev->bufsize = byte_size; azx_dev->period_bytes = byte_size; azx_dev->format_val = format; @@ -2649,13 +2721,20 @@ static int azx_load_dsp_prepare(struct hda_bus *bus, unsigned int format, goto error; azx_setup_controller(chip, azx_dev); + dsp_unlock(azx_dev); return azx_dev->stream_tag; error: mark_pages_wc(chip, bufp, false); snd_dma_free_pages(bufp); -unlock: - snd_hda_unlock_devices(bus); + err_alloc: + spin_lock_irq(&chip->reg_lock); + if (azx_dev->opened) + *azx_dev = chip->saved_azx_dev; + azx_dev->locked = 0; + spin_unlock_irq(&chip->reg_lock); + unlock: + dsp_unlock(azx_dev); return err; } @@ -2677,9 +2756,10 @@ static void azx_load_dsp_cleanup(struct hda_bus *bus, struct azx *chip = bus->private_data; struct azx_dev *azx_dev = azx_get_dsp_loader_dev(chip); - if (!dmab->area) + if (!dmab->area || !azx_dev->locked) return; + dsp_lock(azx_dev); /* reset BDL address */ azx_sd_writel(azx_dev, SD_BDLPL, 0); azx_sd_writel(azx_dev, SD_BDLPU, 0); @@ -2692,7 +2772,12 @@ static void azx_load_dsp_cleanup(struct hda_bus *bus, snd_dma_free_pages(dmab); dmab->area = NULL; - snd_hda_unlock_devices(bus); + spin_lock_irq(&chip->reg_lock); + if (azx_dev->opened) + *azx_dev = chip->saved_azx_dev; + azx_dev->locked = 0; + spin_unlock_irq(&chip->reg_lock); + dsp_unlock(azx_dev); } #endif /* CONFIG_SND_HDA_DSP_LOADER */ @@ -2846,8 +2931,6 @@ static int azx_runtime_idle(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (power_save_controller > 0) - return 0; if (!power_save_controller || !(chip->driver_caps & AZX_DCAPS_PM_RUNTIME)) return -EBUSY; @@ -3481,6 +3564,7 @@ static int azx_first_init(struct azx *chip) } for (i = 0; i < chip->num_streams; i++) { + dsp_lock_init(&chip->azx_dev[i]); /* allocate memory for the BDL for each stream */ err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index db02c1e96b08..0792b5725f9c 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -2298,6 +2298,11 @@ static int dspxfr_one_seg(struct hda_codec *codec, hda_frame_size_words = ((sample_rate_div == 0) ? 0 : (num_chans * sample_rate_mul / sample_rate_div)); + if (hda_frame_size_words == 0) { + snd_printdd(KERN_ERR "frmsz zero\n"); + return -EINVAL; + } + buffer_size_words = min(buffer_size_words, (unsigned int)(UC_RANGE(chip_addx, 1) ? 65536 : 32768)); @@ -2308,8 +2313,7 @@ static int dspxfr_one_seg(struct hda_codec *codec, chip_addx, hda_frame_size_words, num_chans, sample_rate_mul, sample_rate_div, buffer_size_words); - if ((buffer_addx == NULL) || (hda_frame_size_words == 0) || - (buffer_size_words < hda_frame_size_words)) { + if (buffer_size_words < hda_frame_size_words) { snd_printdd(KERN_ERR "dspxfr_one_seg:failed\n"); return -EINVAL; } @@ -3235,7 +3239,7 @@ static int ca0132_set_vipsource(struct hda_codec *codec, int val) struct ca0132_spec *spec = codec->spec; unsigned int tmp; - if (!dspload_is_loaded(codec)) + if (spec->dsp_state != DSP_DOWNLOADED) return 0; /* if CrystalVoice if off, vipsource should be 0 */ @@ -4263,11 +4267,12 @@ static void ca0132_refresh_widget_caps(struct hda_codec *codec) */ static void ca0132_setup_defaults(struct hda_codec *codec) { + struct ca0132_spec *spec = codec->spec; unsigned int tmp; int num_fx; int idx, i; - if (!dspload_is_loaded(codec)) + if (spec->dsp_state != DSP_DOWNLOADED) return; /* out, in effects + voicefx */ @@ -4347,12 +4352,16 @@ static bool ca0132_download_dsp_images(struct hda_codec *codec) return false; dsp_os_image = (struct dsp_image_seg *)(fw_entry->data); - dspload_image(codec, dsp_os_image, 0, 0, true, 0); + if (dspload_image(codec, dsp_os_image, 0, 0, true, 0)) { + pr_err("ca0132 dspload_image failed.\n"); + goto exit_download; + } + dsp_loaded = dspload_wait_loaded(codec); +exit_download: release_firmware(fw_entry); - return dsp_loaded; } @@ -4363,16 +4372,13 @@ static void ca0132_download_dsp(struct hda_codec *codec) #ifndef CONFIG_SND_HDA_CODEC_CA0132_DSP return; /* NOP */ #endif - spec->dsp_state = DSP_DOWNLOAD_INIT; - if (spec->dsp_state == DSP_DOWNLOAD_INIT) { - chipio_enable_clocks(codec); - spec->dsp_state = DSP_DOWNLOADING; - if (!ca0132_download_dsp_images(codec)) - spec->dsp_state = DSP_DOWNLOAD_FAILED; - else - spec->dsp_state = DSP_DOWNLOADED; - } + chipio_enable_clocks(codec); + spec->dsp_state = DSP_DOWNLOADING; + if (!ca0132_download_dsp_images(codec)) + spec->dsp_state = DSP_DOWNLOAD_FAILED; + else + spec->dsp_state = DSP_DOWNLOADED; if (spec->dsp_state == DSP_DOWNLOADED) ca0132_set_dsp_msr(codec, true); diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 72ebb8a36b13..0d9c58f13560 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -168,10 +168,10 @@ static void cs_automute(struct hda_codec *codec) snd_hda_gen_update_outputs(codec); if (spec->gpio_eapd_hp) { - unsigned int gpio = spec->gen.hp_jack_present ? + spec->gpio_data = spec->gen.hp_jack_present ? spec->gpio_eapd_hp : spec->gpio_eapd_speaker; snd_hda_codec_write(codec, 0x01, 0, - AC_VERB_SET_GPIO_DATA, gpio); + AC_VERB_SET_GPIO_DATA, spec->gpio_data); } } @@ -506,6 +506,8 @@ static int patch_cs420x(struct hda_codec *codec) if (!spec) return -ENOMEM; + spec->gen.automute_hook = cs_automute; + snd_hda_pick_fixup(codec, cs420x_models, cs420x_fixup_tbl, cs420x_fixups); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE); @@ -893,6 +895,8 @@ static int patch_cs4210(struct hda_codec *codec) if (!spec) return -ENOMEM; + spec->gen.automute_hook = cs_automute; + snd_hda_pick_fixup(codec, cs421x_models, cs421x_fixup_tbl, cs421x_fixups); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 941bf6c766ec..2a89d1eefeb6 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1142,7 +1142,7 @@ static int patch_cxt5045(struct hda_codec *codec) } if (spec->beep_amp) - snd_hda_attach_beep_device(codec, spec->beep_amp); + snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp)); return 0; } @@ -1921,7 +1921,7 @@ static int patch_cxt5051(struct hda_codec *codec) } if (spec->beep_amp) - snd_hda_attach_beep_device(codec, spec->beep_amp); + snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp)); return 0; } @@ -3099,7 +3099,7 @@ static int patch_cxt5066(struct hda_codec *codec) } if (spec->beep_amp) - snd_hda_attach_beep_device(codec, spec->beep_amp); + snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp)); return 0; } @@ -3191,11 +3191,17 @@ static int cx_auto_build_controls(struct hda_codec *codec) return 0; } +static void cx_auto_free(struct hda_codec *codec) +{ + snd_hda_detach_beep_device(codec); + snd_hda_gen_free(codec); +} + static const struct hda_codec_ops cx_auto_patch_ops = { .build_controls = cx_auto_build_controls, .build_pcms = snd_hda_gen_build_pcms, .init = snd_hda_gen_init, - .free = snd_hda_gen_free, + .free = cx_auto_free, .unsol_event = snd_hda_jack_unsol_event, #ifdef CONFIG_PM .check_power_status = snd_hda_gen_check_power_status, @@ -3391,7 +3397,7 @@ static int patch_conexant_auto(struct hda_codec *codec) codec->patch_ops = cx_auto_patch_ops; if (spec->beep_amp) - snd_hda_attach_beep_device(codec, spec->beep_amp); + snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp)); /* Some laptops with Conexant chips show stalls in S3 resume, * which falls into the single-cmd mode. diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 78e1827d0a95..de8ac5c07fd0 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1196,7 +1196,7 @@ static void hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) _snd_printd(SND_PR_VERBOSE, "HDMI status: Codec=%d Pin=%d Presence_Detect=%d ELD_Valid=%d\n", - codec->addr, pin_nid, eld->monitor_present, eld->eld_valid); + codec->addr, pin_nid, pin_eld->monitor_present, eld->eld_valid); if (eld->eld_valid) { if (snd_hdmi_get_eld(codec, pin_nid, eld->eld_buffer, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2d4237bc0d8e..f15c36bde540 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3163,6 +3163,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0290: spec->codec_variant = ALC269_TYPE_ALC280; break; + case 0x10ec0233: case 0x10ec0282: case 0x10ec0283: spec->codec_variant = ALC269_TYPE_ALC282; @@ -3439,7 +3440,8 @@ static int alc662_parse_auto_config(struct hda_codec *codec) const hda_nid_t *ssids; if (codec->vendor_id == 0x10ec0272 || codec->vendor_id == 0x10ec0663 || - codec->vendor_id == 0x10ec0665 || codec->vendor_id == 0x10ec0670) + codec->vendor_id == 0x10ec0665 || codec->vendor_id == 0x10ec0670 || + codec->vendor_id == 0x10ec0671) ssids = alc663_ssids; else ssids = alc662_ssids; @@ -3862,6 +3864,7 @@ static int patch_alc680(struct hda_codec *codec) */ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0221, .name = "ALC221", .patch = patch_alc269 }, + { .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 }, { .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 }, { .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 }, { .id = 0x10ec0267, .name = "ALC267", .patch = patch_alc268 }, @@ -3892,6 +3895,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 }, { .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 }, { .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 }, + { .id = 0x10ec0671, .name = "ALC671", .patch = patch_alc662 }, { .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 }, { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 }, { .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 }, diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 83d5335ac348..dafe04ae8c72 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -815,6 +815,29 @@ static int find_mute_led_cfg(struct hda_codec *codec, int default_polarity) return 0; } +/* check whether a built-in speaker is included in parsed pins */ +static bool has_builtin_speaker(struct hda_codec *codec) +{ + struct sigmatel_spec *spec = codec->spec; + hda_nid_t *nid_pin; + int nids, i; + + if (spec->gen.autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) { + nid_pin = spec->gen.autocfg.line_out_pins; + nids = spec->gen.autocfg.line_outs; + } else { + nid_pin = spec->gen.autocfg.speaker_pins; + nids = spec->gen.autocfg.speaker_outs; + } + + for (i = 0; i < nids; i++) { + unsigned int def_conf = snd_hda_codec_get_pincfg(codec, nid_pin[i]); + if (snd_hda_get_input_pin_attr(def_conf) == INPUT_PIN_ATTR_INT) + return true; + } + return false; +} + /* * PC beep controls */ @@ -3890,6 +3913,12 @@ static int patch_stac92hd73xx(struct hda_codec *codec) return err; } + /* Don't GPIO-mute speakers if there are no internal speakers, because + * the GPIO might be necessary for Headphone + */ + if (spec->eapd_switch && !has_builtin_speaker(codec)) + spec->eapd_switch = 0; + codec->proc_widget_hook = stac92hd7x_proc_hook; snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PROBE); diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index 2ffdc35d5ffd..806407a3973e 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -2594,6 +2594,8 @@ static int snd_ice1712_create(struct snd_card *card, snd_ice1712_proc_init(ice); synchronize_irq(pci->irq); + card->private_data = ice; + err = pci_request_regions(pci, "ICE1712"); if (err < 0) { kfree(ice); diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index fc176044994d..fc176044994d 100755..100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c diff --git a/sound/soc/codecs/max98090.h b/sound/soc/codecs/max98090.h index 7e103f249053..7e103f249053 100755..100644 --- a/sound/soc/codecs/max98090.h +++ b/sound/soc/codecs/max98090.h diff --git a/sound/soc/codecs/si476x.c b/sound/soc/codecs/si476x.c index f2d61a187830..566ea3256e2d 100644 --- a/sound/soc/codecs/si476x.c +++ b/sound/soc/codecs/si476x.c @@ -159,6 +159,7 @@ static int si476x_codec_hw_params(struct snd_pcm_substream *substream, switch (params_format(params)) { case SNDRV_PCM_FORMAT_S8: width = SI476X_PCM_FORMAT_S8; + break; case SNDRV_PCM_FORMAT_S16_LE: width = SI476X_PCM_FORMAT_S16_LE; break; diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index b8d461db369f..34d0201d6a78 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -573,11 +573,18 @@ static const struct reg_default wm5102_sysclk_reva_patch[] = { { 0x025e, 0x0112 }, }; +static const struct reg_default wm5102_sysclk_revb_patch[] = { + { 0x3081, 0x08FE }, + { 0x3083, 0x00ED }, + { 0x30C1, 0x08FE }, + { 0x30C3, 0x00ED }, +}; + static int wm5102_sysclk_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = w->codec; - struct arizona *arizona = dev_get_drvdata(codec->dev); + struct arizona *arizona = dev_get_drvdata(codec->dev->parent); struct regmap *regmap = codec->control_data; const struct reg_default *patch = NULL; int i, patch_size; @@ -587,6 +594,10 @@ static int wm5102_sysclk_ev(struct snd_soc_dapm_widget *w, patch = wm5102_sysclk_reva_patch; patch_size = ARRAY_SIZE(wm5102_sysclk_reva_patch); break; + default: + patch = wm5102_sysclk_revb_patch; + patch_size = ARRAY_SIZE(wm5102_sysclk_revb_patch); + break; } switch (event) { @@ -755,7 +766,7 @@ SOC_SINGLE("SPKDAT1 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_5L, SOC_DOUBLE_R("HPOUT1 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_MUTE_SHIFT, 1, 1), -SOC_DOUBLE_R("OUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R("HPOUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_MUTE_SHIFT, 1, 1), SOC_SINGLE("EPOUT Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_3L, ARIZONA_OUT3L_MUTE_SHIFT, 1, 1), @@ -767,7 +778,7 @@ SOC_DOUBLE_R("SPKDAT1 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_5L, SOC_DOUBLE_R_TLV("HPOUT1 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_VOL_SHIFT, 0xbf, 0, digital_tlv), -SOC_DOUBLE_R_TLV("OUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R_TLV("HPOUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_VOL_SHIFT, 0xbf, 0, digital_tlv), SOC_SINGLE_TLV("EPOUT Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_3L, diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index cd17b477781d..cdeb301da1f6 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -213,9 +213,9 @@ ARIZONA_MIXER_CONTROLS("SPKDAT2R", ARIZONA_OUT6RMIX_INPUT_1_SOURCE), SOC_SINGLE("HPOUT1 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_1L, ARIZONA_OUT1_OSR_SHIFT, 1, 0), -SOC_SINGLE("OUT2 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_2L, +SOC_SINGLE("HPOUT2 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_2L, ARIZONA_OUT2_OSR_SHIFT, 1, 0), -SOC_SINGLE("OUT3 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_3L, +SOC_SINGLE("HPOUT3 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_3L, ARIZONA_OUT3_OSR_SHIFT, 1, 0), SOC_SINGLE("Speaker High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_4L, ARIZONA_OUT4_OSR_SHIFT, 1, 0), @@ -226,9 +226,9 @@ SOC_SINGLE("SPKDAT2 High Performance Switch", ARIZONA_OUTPUT_PATH_CONFIG_6L, SOC_DOUBLE_R("HPOUT1 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_MUTE_SHIFT, 1, 1), -SOC_DOUBLE_R("OUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R("HPOUT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_MUTE_SHIFT, 1, 1), -SOC_DOUBLE_R("OUT3 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_3L, +SOC_DOUBLE_R("HPOUT3 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_3L, ARIZONA_DAC_DIGITAL_VOLUME_3R, ARIZONA_OUT3L_MUTE_SHIFT, 1, 1), SOC_DOUBLE_R("Speaker Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_4L, ARIZONA_DAC_DIGITAL_VOLUME_4R, ARIZONA_OUT4L_MUTE_SHIFT, 1, 1), @@ -240,10 +240,10 @@ SOC_DOUBLE_R("SPKDAT2 Digital Switch", ARIZONA_DAC_DIGITAL_VOLUME_6L, SOC_DOUBLE_R_TLV("HPOUT1 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_1L, ARIZONA_DAC_DIGITAL_VOLUME_1R, ARIZONA_OUT1L_VOL_SHIFT, 0xbf, 0, digital_tlv), -SOC_DOUBLE_R_TLV("OUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, +SOC_DOUBLE_R_TLV("HPOUT2 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_2L, ARIZONA_DAC_DIGITAL_VOLUME_2R, ARIZONA_OUT2L_VOL_SHIFT, 0xbf, 0, digital_tlv), -SOC_DOUBLE_R_TLV("OUT3 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_3L, +SOC_DOUBLE_R_TLV("HPOUT3 Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_3L, ARIZONA_DAC_DIGITAL_VOLUME_3R, ARIZONA_OUT3L_VOL_SHIFT, 0xbf, 0, digital_tlv), SOC_DOUBLE_R_TLV("Speaker Digital Volume", ARIZONA_DAC_DIGITAL_VOLUME_4L, @@ -260,11 +260,11 @@ SOC_DOUBLE_R_RANGE_TLV("HPOUT1 Volume", ARIZONA_OUTPUT_PATH_CONFIG_1L, ARIZONA_OUTPUT_PATH_CONFIG_1R, ARIZONA_OUT1L_PGA_VOL_SHIFT, 0x34, 0x40, 0, ana_tlv), -SOC_DOUBLE_R_RANGE_TLV("OUT2 Volume", ARIZONA_OUTPUT_PATH_CONFIG_2L, +SOC_DOUBLE_R_RANGE_TLV("HPOUT2 Volume", ARIZONA_OUTPUT_PATH_CONFIG_2L, ARIZONA_OUTPUT_PATH_CONFIG_2R, ARIZONA_OUT2L_PGA_VOL_SHIFT, 0x34, 0x40, 0, ana_tlv), -SOC_DOUBLE_R_RANGE_TLV("OUT3 Volume", ARIZONA_OUTPUT_PATH_CONFIG_3L, +SOC_DOUBLE_R_RANGE_TLV("HPOUT3 Volume", ARIZONA_OUTPUT_PATH_CONFIG_3L, ARIZONA_OUTPUT_PATH_CONFIG_3R, ARIZONA_OUT3L_PGA_VOL_SHIFT, 0x34, 0x40, 0, ana_tlv), diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index ec0efc1443ba..0e8b3aaf6c8d 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1301,7 +1301,7 @@ static irqreturn_t wm8350_hpl_jack_handler(int irq, void *data) if (device_may_wakeup(wm8350->dev)) pm_wakeup_event(wm8350->dev, 250); - schedule_delayed_work(&priv->hpl.work, 200); + schedule_delayed_work(&priv->hpl.work, msecs_to_jiffies(200)); return IRQ_HANDLED; } @@ -1318,7 +1318,7 @@ static irqreturn_t wm8350_hpr_jack_handler(int irq, void *data) if (device_may_wakeup(wm8350->dev)) pm_wakeup_event(wm8350->dev, 250); - schedule_delayed_work(&priv->hpr.work, 200); + schedule_delayed_work(&priv->hpr.work, msecs_to_jiffies(200)); return IRQ_HANDLED; } diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 134e41c870b9..f8a31ad0b203 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1083,6 +1083,8 @@ static const struct snd_soc_dapm_route wm8903_intercon[] = { { "ROP", NULL, "Right Speaker PGA" }, { "RON", NULL, "Right Speaker PGA" }, + { "Charge Pump", NULL, "CLK_DSP" }, + { "Left Headphone Output PGA", NULL, "Charge Pump" }, { "Right Headphone Output PGA", NULL, "Charge Pump" }, { "Left Line Output PGA", NULL, "Charge Pump" }, diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 9bb927325993..a64b93425ae3 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -53,8 +53,8 @@ * using 2 wire for device control, so we cache them instead. */ static const struct reg_default wm8960_reg_defaults[] = { - { 0x0, 0x0097 }, - { 0x1, 0x0097 }, + { 0x0, 0x00a7 }, + { 0x1, 0x00a7 }, { 0x2, 0x0000 }, { 0x3, 0x0000 }, { 0x4, 0x0000 }, @@ -323,8 +323,8 @@ SND_SOC_DAPM_MIXER("Left Input Mixer", WM8960_POWER3, 5, 0, SND_SOC_DAPM_MIXER("Right Input Mixer", WM8960_POWER3, 4, 0, wm8960_rin, ARRAY_SIZE(wm8960_rin)), -SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER2, 3, 0), -SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER2, 2, 0), +SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER1, 3, 0), +SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER1, 2, 0), SND_SOC_DAPM_DAC("Left DAC", "Playback", WM8960_POWER2, 8, 0), SND_SOC_DAPM_DAC("Right DAC", "Playback", WM8960_POWER2, 7, 0), diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index f3f7e75f8628..9af1bddc4c62 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -828,7 +828,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) &buf_list); if (!buf) { adsp_err(dsp, "Out of memory\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out_fw; } adsp_dbg(dsp, "%s.%d: Writing %d bytes at %x\n", @@ -865,7 +866,7 @@ out_fw: wm_adsp_buf_free(&buf_list); out: kfree(file); - return 0; + return ret; } int wm_adsp1_init(struct wm_adsp *adsp) diff --git a/sound/soc/fsl/imx-ssi.c b/sound/soc/fsl/imx-ssi.c index 55464a5b0706..810c7eeb7b03 100644 --- a/sound/soc/fsl/imx-ssi.c +++ b/sound/soc/fsl/imx-ssi.c @@ -496,6 +496,8 @@ static void imx_ssi_ac97_reset(struct snd_ac97 *ac97) if (imx_ssi->ac97_reset) imx_ssi->ac97_reset(ac97); + /* First read sometimes fails, do a dummy read */ + imx_ssi_ac97_read(ac97, 0); } static void imx_ssi_ac97_warm_reset(struct snd_ac97 *ac97) @@ -504,6 +506,9 @@ static void imx_ssi_ac97_warm_reset(struct snd_ac97 *ac97) if (imx_ssi->ac97_warm_reset) imx_ssi->ac97_warm_reset(ac97); + + /* First read sometimes fails, do a dummy read */ + imx_ssi_ac97_read(ac97, 0); } struct snd_ac97_bus_ops soc_ac97_ops = { diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c index 8e52c1485df3..eb4373840bb6 100644 --- a/sound/soc/fsl/pcm030-audio-fabric.c +++ b/sound/soc/fsl/pcm030-audio-fabric.c @@ -51,7 +51,7 @@ static struct snd_soc_card pcm030_card = { .num_links = ARRAY_SIZE(pcm030_fabric_dai), }; -static int __init pcm030_fabric_probe(struct platform_device *op) +static int pcm030_fabric_probe(struct platform_device *op) { struct device_node *np = op->dev.of_node; struct device_node *platform_np; diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index d7231e336a7c..6bbeb0bf1a73 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -972,6 +972,7 @@ static const struct snd_soc_dai_ops samsung_i2s_dai_ops = { static struct i2s_dai *i2s_alloc_dai(struct platform_device *pdev, bool sec) { struct i2s_dai *i2s; + int ret; i2s = devm_kzalloc(&pdev->dev, sizeof(struct i2s_dai), GFP_KERNEL); if (i2s == NULL) @@ -996,15 +997,17 @@ static struct i2s_dai *i2s_alloc_dai(struct platform_device *pdev, bool sec) i2s->i2s_dai_drv.capture.channels_max = 2; i2s->i2s_dai_drv.capture.rates = SAMSUNG_I2S_RATES; i2s->i2s_dai_drv.capture.formats = SAMSUNG_I2S_FMTS; + dev_set_drvdata(&i2s->pdev->dev, i2s); } else { /* Create a new platform_device for Secondary */ - i2s->pdev = platform_device_register_resndata(NULL, - "samsung-i2s-sec", -1, NULL, 0, NULL, 0); + i2s->pdev = platform_device_alloc("samsung-i2s-sec", -1); if (IS_ERR(i2s->pdev)) return NULL; - } - /* Pre-assign snd_soc_dai_set_drvdata */ - dev_set_drvdata(&i2s->pdev->dev, i2s); + platform_set_drvdata(i2s->pdev, i2s); + ret = platform_device_add(i2s->pdev); + if (ret < 0) + return NULL; + } return i2s; } @@ -1107,6 +1110,10 @@ static int samsung_i2s_probe(struct platform_device *pdev) if (samsung_dai_type == TYPE_SEC) { sec_dai = dev_get_drvdata(&pdev->dev); + if (!sec_dai) { + dev_err(&pdev->dev, "Unable to get drvdata\n"); + return -EFAULT; + } snd_soc_register_dai(&sec_dai->pdev->dev, &sec_dai->i2s_dai_drv); asoc_dma_platform_register(&pdev->dev); diff --git a/sound/soc/sh/dma-sh7760.c b/sound/soc/sh/dma-sh7760.c index 19eff8fc4fdd..1a8b03e4b41b 100644 --- a/sound/soc/sh/dma-sh7760.c +++ b/sound/soc/sh/dma-sh7760.c @@ -342,8 +342,8 @@ static int camelot_pcm_new(struct snd_soc_pcm_runtime *rtd) return 0; } -static struct snd_soc_platform sh7760_soc_platform = { - .pcm_ops = &camelot_pcm_ops, +static struct snd_soc_platform_driver sh7760_soc_platform = { + .ops = &camelot_pcm_ops, .pcm_new = camelot_pcm_new, .pcm_free = camelot_pcm_free, }; diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index b5b3db71e253..ed0bfb0ddb96 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -211,19 +211,27 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream, if (platform->driver->compr_ops && platform->driver->compr_ops->set_params) { ret = platform->driver->compr_ops->set_params(cstream, params); if (ret < 0) - goto out; + goto err; } if (rtd->dai_link->compr_ops && rtd->dai_link->compr_ops->set_params) { ret = rtd->dai_link->compr_ops->set_params(cstream); if (ret < 0) - goto out; + goto err; } snd_soc_dapm_stream_event(rtd, SNDRV_PCM_STREAM_PLAYBACK, SND_SOC_DAPM_STREAM_START); -out: + /* cancel any delayed stream shutdown that is pending */ + rtd->pop_wait = 0; + mutex_unlock(&rtd->pcm_mutex); + + cancel_delayed_work_sync(&rtd->delayed_work); + + return ret; + +err: mutex_unlock(&rtd->pcm_mutex); return ret; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index b7e84a7cd9ee..ff4b45a5d796 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2963,7 +2963,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val = val << shift; ret = snd_soc_update_bits_locked(codec, reg, val_mask, val); - if (ret != 0) + if (ret < 0) return ret; if (snd_soc_volsw_is_stereo(mc)) { @@ -3140,7 +3140,7 @@ int snd_soc_bytes_put(struct snd_kcontrol *kcontrol, if (params->mask) { ret = regmap_read(codec->control_data, params->base, &val); if (ret != 0) - return ret; + goto out; val &= params->mask; @@ -3158,13 +3158,15 @@ int snd_soc_bytes_put(struct snd_kcontrol *kcontrol, ((u32 *)data)[0] |= cpu_to_be32(val); break; default: - return -EINVAL; + ret = -EINVAL; + goto out; } } ret = regmap_raw_write(codec->control_data, params->base, data, len); +out: kfree(data); return ret; @@ -4197,7 +4199,6 @@ int snd_soc_of_parse_audio_routing(struct snd_soc_card *card, dev_err(card->dev, "ASoC: Property '%s' index %d could not be read: %d\n", propname, 2 * i, ret); - kfree(routes); return -EINVAL; } ret = of_property_read_string_index(np, propname, @@ -4206,7 +4207,6 @@ int snd_soc_of_parse_audio_routing(struct snd_soc_card *card, dev_err(card->dev, "ASoC: Property '%s' index %d could not be read: %d\n", propname, (2 * i) + 1, ret); - kfree(routes); return -EINVAL; } } diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 1d6a9b3ceb27..d6d9ba2e6916 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -831,6 +831,9 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget, if (path->weak) continue; + if (path->walking) + return 1; + if (path->walked) continue; @@ -838,6 +841,7 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget, if (path->sink && path->connect) { path->walked = 1; + path->walking = 1; /* do we need to add this widget to the list ? */ if (list) { @@ -847,11 +851,14 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget, dev_err(widget->dapm->dev, "ASoC: could not add widget %s\n", widget->name); + path->walking = 0; return con; } } con += is_connected_output_ep(path->sink, list); + + path->walking = 0; } } @@ -931,6 +938,9 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget, if (path->weak) continue; + if (path->walking) + return 1; + if (path->walked) continue; @@ -938,6 +948,7 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget, if (path->source && path->connect) { path->walked = 1; + path->walking = 1; /* do we need to add this widget to the list ? */ if (list) { @@ -947,11 +958,14 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget, dev_err(widget->dapm->dev, "ASoC: could not add widget %s\n", widget->name); + path->walking = 0; return con; } } con += is_connected_input_ep(path->source, list); + + path->walking = 0; } } diff --git a/sound/soc/spear/spear_pcm.c b/sound/soc/spear/spear_pcm.c index 9b76cc5a1148..5e7aebe1e664 100644 --- a/sound/soc/spear/spear_pcm.c +++ b/sound/soc/spear/spear_pcm.c @@ -149,9 +149,9 @@ static void spear_pcm_free(struct snd_pcm *pcm) static u64 spear_pcm_dmamask = DMA_BIT_MASK(32); -static int spear_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int spear_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; int ret; if (!card->dev->dma_mask) @@ -159,16 +159,16 @@ static int spear_pcm_new(struct snd_card *card, if (!card->dev->coherent_dma_mask) card->dev->coherent_dma_mask = DMA_BIT_MASK(32); - if (dai->driver->playback.channels_min) { - ret = spear_pcm_preallocate_dma_buffer(pcm, + if (rtd->cpu_dai->driver->playback.channels_min) { + ret = spear_pcm_preallocate_dma_buffer(rtd->pcm, SNDRV_PCM_STREAM_PLAYBACK, spear_pcm_hardware.buffer_bytes_max); if (ret) return ret; } - if (dai->driver->capture.channels_min) { - ret = spear_pcm_preallocate_dma_buffer(pcm, + if (rtd->cpu_dai->driver->capture.channels_min) { + ret = spear_pcm_preallocate_dma_buffer(rtd->pcm, SNDRV_PCM_STREAM_CAPTURE, spear_pcm_hardware.buffer_bytes_max); if (ret) diff --git a/sound/soc/tegra/tegra20_i2s.h b/sound/soc/tegra/tegra20_i2s.h index c27069d24d77..729958713cd4 100644 --- a/sound/soc/tegra/tegra20_i2s.h +++ b/sound/soc/tegra/tegra20_i2s.h @@ -121,7 +121,7 @@ #define TEGRA20_I2S_TIMING_NON_SYM_ENABLE (1 << 12) #define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT 0 -#define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7fff +#define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7ff #define TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK (TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US << TEGRA20_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT) /* Fields in TEGRA20_I2S_FIFO_SCR */ diff --git a/sound/soc/tegra/tegra30_i2s.h b/sound/soc/tegra/tegra30_i2s.h index 34dc47b9581c..a294d942b9f7 100644 --- a/sound/soc/tegra/tegra30_i2s.h +++ b/sound/soc/tegra/tegra30_i2s.h @@ -110,7 +110,7 @@ #define TEGRA30_I2S_TIMING_NON_SYM_ENABLE (1 << 12) #define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT 0 -#define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7fff +#define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US 0x7ff #define TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK (TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_MASK_US << TEGRA30_I2S_TIMING_CHANNEL_BIT_COUNT_SHIFT) /* Fields in TEGRA30_I2S_OFFSET */ diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index c925ab0adeb6..5e2c55c5b255 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -43,8 +43,6 @@ static const struct snd_pcm_hardware tegra_pcm_hardware = { .info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE | - SNDRV_PCM_INFO_RESUME | SNDRV_PCM_INFO_INTERLEAVED, .formats = SNDRV_PCM_FMTBIT_S16_LE, .channels_min = 2, @@ -127,26 +125,6 @@ static int tegra_pcm_hw_free(struct snd_pcm_substream *substream) return 0; } -static int tegra_pcm_trigger(struct snd_pcm_substream *substream, int cmd) -{ - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - return snd_dmaengine_pcm_trigger(substream, - SNDRV_PCM_TRIGGER_START); - - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - return snd_dmaengine_pcm_trigger(substream, - SNDRV_PCM_TRIGGER_STOP); - default: - return -EINVAL; - } - return 0; -} - static int tegra_pcm_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *vma) { @@ -164,7 +142,7 @@ static struct snd_pcm_ops tegra_pcm_ops = { .ioctl = snd_pcm_lib_ioctl, .hw_params = tegra_pcm_hw_params, .hw_free = tegra_pcm_hw_free, - .trigger = tegra_pcm_trigger, + .trigger = snd_dmaengine_pcm_trigger, .pointer = snd_dmaengine_pcm_pointer, .mmap = tegra_pcm_mmap, }; diff --git a/sound/usb/card.c b/sound/usb/card.c index 803953a9bff3..2da8ad75fd96 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -244,6 +244,21 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) usb_ifnum_to_if(dev, ctrlif)->intf_assoc; if (!assoc) { + /* + * Firmware writers cannot count to three. So to find + * the IAD on the NuForce UDH-100, also check the next + * interface. + */ + struct usb_interface *iface = + usb_ifnum_to_if(dev, ctrlif + 1); + if (iface && + iface->intf_assoc && + iface->intf_assoc->bFunctionClass == USB_CLASS_AUDIO && + iface->intf_assoc->bFunctionProtocol == UAC_VERSION_2) + assoc = iface->intf_assoc; + } + + if (!assoc) { snd_printk(KERN_ERR "Audio class v2 interfaces need an interface association\n"); return -EINVAL; } diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 5e634a2eb282..9e2703a25156 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -253,7 +253,7 @@ static int set_sample_rate_v2(struct snd_usb_audio *chip, int iface, { struct usb_device *dev = chip->dev; unsigned char data[4]; - int err, crate; + int err, cur_rate, prev_rate; int clock = snd_usb_clock_find_source(chip, fmt->clock); if (clock < 0) @@ -266,6 +266,19 @@ static int set_sample_rate_v2(struct snd_usb_audio *chip, int iface, return -ENXIO; } + err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, + USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, + UAC2_CS_CONTROL_SAM_FREQ << 8, + snd_usb_ctrl_intf(chip) | (clock << 8), + data, sizeof(data)); + if (err < 0) { + snd_printk(KERN_WARNING "%d:%d:%d: cannot get freq (v2)\n", + dev->devnum, iface, fmt->altsetting); + prev_rate = 0; + } else { + prev_rate = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + } + data[0] = rate; data[1] = rate >> 8; data[2] = rate >> 16; @@ -280,19 +293,31 @@ static int set_sample_rate_v2(struct snd_usb_audio *chip, int iface, return err; } - if ((err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, - USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, - UAC2_CS_CONTROL_SAM_FREQ << 8, - snd_usb_ctrl_intf(chip) | (clock << 8), - data, sizeof(data))) < 0) { + err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, + USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, + UAC2_CS_CONTROL_SAM_FREQ << 8, + snd_usb_ctrl_intf(chip) | (clock << 8), + data, sizeof(data)); + if (err < 0) { snd_printk(KERN_WARNING "%d:%d:%d: cannot get freq (v2)\n", dev->devnum, iface, fmt->altsetting); - return err; + cur_rate = 0; + } else { + cur_rate = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); } - crate = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); - if (crate != rate) - snd_printd(KERN_WARNING "current rate %d is different from the runtime rate %d\n", crate, rate); + if (cur_rate != rate) { + snd_printd(KERN_WARNING + "current rate %d is different from the runtime rate %d\n", + cur_rate, rate); + } + + /* Some devices doesn't respond to sample rate changes while the + * interface is active. */ + if (rate != prev_rate) { + usb_set_interface(dev, iface, 0); + usb_set_interface(dev, iface, fmt->altsetting); + } return 0; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 638e7f738018..ca4739c3f650 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -715,8 +715,9 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_ case UAC2_CLOCK_SELECTOR: { struct uac_selector_unit_descriptor *d = p1; /* call recursively to retrieve the channel info */ - if (check_input_term(state, d->baSourceID[0], term) < 0) - return -ENODEV; + err = check_input_term(state, d->baSourceID[0], term); + if (err < 0) + return err; term->type = d->bDescriptorSubtype << 16; /* virtual type */ term->id = id; term->name = uac_selector_unit_iSelector(d); @@ -725,7 +726,8 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_ case UAC1_PROCESSING_UNIT: case UAC1_EXTENSION_UNIT: /* UAC2_PROCESSING_UNIT_V2 */ - /* UAC2_EFFECT_UNIT */ { + /* UAC2_EFFECT_UNIT */ + case UAC2_EXTENSION_UNIT_V2: { struct uac_processing_unit_descriptor *d = p1; if (state->mixer->protocol == UAC_VERSION_2 && @@ -1356,8 +1358,9 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void return err; /* determine the input source type and name */ - if (check_input_term(state, hdr->bSourceID, &iterm) < 0) - return -EINVAL; + err = check_input_term(state, hdr->bSourceID, &iterm); + if (err < 0) + return err; master_bits = snd_usb_combine_bytes(bmaControls, csize); /* master configuration quirks */ @@ -2052,6 +2055,8 @@ static int parse_audio_unit(struct mixer_build *state, int unitid) return parse_audio_extension_unit(state, unitid, p1); else /* UAC_VERSION_2 */ return parse_audio_processing_unit(state, unitid, p1); + case UAC2_EXTENSION_UNIT_V2: + return parse_audio_extension_unit(state, unitid, p1); default: snd_printk(KERN_ERR "usbaudio: unit %u: unexpected type 0x%02x\n", unitid, p1[2]); return -EINVAL; @@ -2118,7 +2123,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer) state.oterm.type = le16_to_cpu(desc->wTerminalType); state.oterm.name = desc->iTerminal; err = parse_audio_unit(&state, desc->bSourceID); - if (err < 0) + if (err < 0 && err != -EINVAL) return err; } else { /* UAC_VERSION_2 */ struct uac2_output_terminal_descriptor *desc = p; @@ -2130,12 +2135,12 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer) state.oterm.type = le16_to_cpu(desc->wTerminalType); state.oterm.name = desc->iTerminal; err = parse_audio_unit(&state, desc->bSourceID); - if (err < 0) + if (err < 0 && err != -EINVAL) return err; /* for UAC2, use the same approach to also add the clock selectors */ err = parse_audio_unit(&state, desc->bCSourceID); - if (err < 0) + if (err < 0 && err != -EINVAL) return err; } } diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 497d2741d119..ebe91440a068 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -509,7 +509,7 @@ static int snd_nativeinstruments_control_get(struct snd_kcontrol *kcontrol, else ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), bRequest, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0, cpu_to_le16(wIndex), + 0, wIndex, &tmp, sizeof(tmp), 1000); up_read(&mixer->chip->shutdown_rwsem); @@ -540,7 +540,7 @@ static int snd_nativeinstruments_control_put(struct snd_kcontrol *kcontrol, else ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), bRequest, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - cpu_to_le16(wValue), cpu_to_le16(wIndex), + wValue, wIndex, NULL, 0, 1000); up_read(&mixer->chip->shutdown_rwsem); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 5325a3869bb7..9c5ab22358b1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -486,7 +486,7 @@ static int snd_usb_nativeinstruments_boot_quirk(struct usb_device *dev) { int ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0xaf, USB_TYPE_VENDOR | USB_RECIP_DEVICE, - cpu_to_le16(1), 0, NULL, 0, 1000); + 1, 0, NULL, 0, 1000); if (ret < 0) return ret; diff --git a/tools/Makefile b/tools/Makefile index fa36565b209d..c73c6357481c 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -12,6 +12,7 @@ help: @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @echo ' virtio - vhost test module' + @echo ' net - misc networking tools' @echo ' vm - misc vm tools' @echo ' x86_energy_perf_policy - Intel energy policy tool' @echo '' @@ -34,7 +35,7 @@ help: cpupower: FORCE $(call descend,power/$@) -cgroup firewire lguest perf usb virtio vm: FORCE +cgroup firewire lguest perf usb virtio vm net: FORCE $(call descend,$@) selftests: FORCE @@ -46,7 +47,7 @@ turbostat x86_energy_perf_policy: FORCE cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install: +cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install net_install: $(call descend,$(@:_install=),install) selftests_install: @@ -57,12 +58,12 @@ turbostat_install x86_energy_perf_policy_install: install: cgroup_install cpupower_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ - virtio_install vm_install x86_energy_perf_policy_install + virtio_install vm_install net_install x86_energy_perf_policy_install cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: +cgroup_clean firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean net_clean: $(call descend,$(@:_clean=),clean) selftests_clean: @@ -73,6 +74,6 @@ turbostat_clean x86_energy_perf_policy_clean: clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \ selftests_clean turbostat_clean usb_clean virtio_clean \ - vm_clean x86_energy_perf_policy_clean + vm_clean net_clean x86_energy_perf_policy_clean .PHONY: FORCE diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index a20e32033431..0b0a90787db6 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -122,7 +122,7 @@ export Q VERBOSE EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES) +INCLUDES = -I. $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -Wall diff --git a/tools/net/Makefile b/tools/net/Makefile new file mode 100644 index 000000000000..b4444d53b73f --- /dev/null +++ b/tools/net/Makefile @@ -0,0 +1,15 @@ +prefix = /usr + +CC = gcc + +all : bpf_jit_disasm + +bpf_jit_disasm : CFLAGS = -Wall -O2 +bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl +bpf_jit_disasm : bpf_jit_disasm.o + +clean : + rm -rf *.o bpf_jit_disasm + +install : + install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c new file mode 100644 index 000000000000..cfe0cdcda3de --- /dev/null +++ b/tools/net/bpf_jit_disasm.c @@ -0,0 +1,199 @@ +/* + * Minimal BPF JIT image disassembler + * + * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for + * debugging or verification purposes. + * + * To get the disassembly of the JIT code, do the following: + * + * 1) `echo 2 > /proc/sys/net/core/bpf_jit_enable` + * 2) Load a BPF filter (e.g. `tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24`) + * 3) Run e.g. `bpf_jit_disasm -o` to read out the last JIT code + * + * Copyright 2013 Daniel Borkmann <borkmann@redhat.com> + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <string.h> +#include <bfd.h> +#include <dis-asm.h> +#include <sys/klog.h> +#include <sys/types.h> +#include <regex.h> + +static void get_exec_path(char *tpath, size_t size) +{ + char *path; + ssize_t len; + + snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); + tpath[size - 1] = 0; + + path = strdup(tpath); + assert(path); + + len = readlink(path, tpath, size); + tpath[len] = 0; + + free(path); +} + +static void get_asm_insns(uint8_t *image, size_t len, unsigned long base, + int opcodes) +{ + int count, i, pc = 0; + char tpath[256]; + struct disassemble_info info; + disassembler_ftype disassemble; + bfd *bfdf; + + memset(tpath, 0, sizeof(tpath)); + get_exec_path(tpath, sizeof(tpath)); + + bfdf = bfd_openr(tpath, NULL); + assert(bfdf); + assert(bfd_check_format(bfdf, bfd_object)); + + init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + info.buffer = image; + info.buffer_length = len; + + disassemble_init_for_target(&info); + + disassemble = disassembler(bfdf); + assert(disassemble); + + do { + printf("%4x:\t", pc); + + count = disassemble(pc, &info); + + if (opcodes) { + printf("\n\t"); + for (i = 0; i < count; ++i) + printf("%02x ", (uint8_t) image[pc + i]); + } + printf("\n"); + + pc += count; + } while(count > 0 && pc < len); + + bfd_close(bfdf); +} + +static char *get_klog_buff(int *klen) +{ + int ret, len = klogctl(10, NULL, 0); + char *buff = malloc(len); + + assert(buff && klen); + ret = klogctl(3, buff, len); + assert(ret >= 0); + *klen = ret; + + return buff; +} + +static void put_klog_buff(char *buff) +{ + free(buff); +} + +static int get_last_jit_image(char *haystack, size_t hlen, + uint8_t *image, size_t ilen, + unsigned long *base) +{ + char *ptr, *pptr, *tmp; + off_t off = 0; + int ret, flen, proglen, pass, ulen = 0; + regmatch_t pmatch[1]; + regex_t regex; + + if (hlen == 0) + return 0; + + ret = regcomp(®ex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ " + "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED); + assert(ret == 0); + + ptr = haystack; + while (1) { + ret = regexec(®ex, ptr, 1, pmatch, 0); + if (ret == 0) { + ptr += pmatch[0].rm_eo; + off += pmatch[0].rm_eo; + assert(off < hlen); + } else + break; + } + + ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); + ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", + &flen, &proglen, &pass, base); + if (ret != 4) + return 0; + + tmp = ptr = haystack + off; + while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) { + tmp = NULL; + if (!strstr(ptr, "JIT code")) + continue; + pptr = ptr; + while ((ptr = strstr(pptr, ":"))) + pptr = ptr + 1; + ptr = pptr; + do { + image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16); + if (ptr == pptr || ulen >= ilen) { + ulen--; + break; + } + ptr = pptr; + } while (1); + } + + assert(ulen == proglen); + printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", + proglen, pass, flen); + printf("%lx + <x>:\n", *base); + + regfree(®ex); + return ulen; +} + +int main(int argc, char **argv) +{ + int len, klen, opcodes = 0; + char *kbuff; + unsigned long base; + uint8_t image[4096]; + + if (argc > 1) { + if (!strncmp("-o", argv[argc - 1], 2)) { + opcodes = 1; + } else { + printf("usage: bpf_jit_disasm [-o: show opcodes]\n"); + exit(0); + } + } + + bfd_init(); + memset(image, 0, sizeof(image)); + + kbuff = get_klog_buff(&klen); + + len = get_last_jit_image(kbuff, klen, image, sizeof(image), &base); + if (len > 0 && base > 0) + get_asm_insns(image, len, base, opcodes); + + put_klog_buff(kbuff); + + return 0; +} diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a2108ca1cc17..bb74c79cd16e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -95,7 +95,7 @@ ifeq ("$(origin DEBUG)", "command line") PERF_DEBUG = $(DEBUG) endif ifndef PERF_DEBUG - CFLAGS_OPTIMIZE = -O6 -D_FORTIFY_SOURCE=2 + CFLAGS_OPTIMIZE = -O6 endif ifdef PARSER_DEBUG @@ -180,6 +180,12 @@ ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-W CFLAGS := $(CFLAGS) -Wvolatile-register-var endif +ifndef PERF_DEBUG + ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y) + CFLAGS := $(CFLAGS) -D_FORTIFY_SOURCE=2 + endif +endif + ### --- END CONFIGURATION SECTION --- ifeq ($(srctree),) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a5223e6a7b43..0fdc85269c4d 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -1,6 +1,30 @@ #ifndef BENCH_H #define BENCH_H +/* + * The madvise transparent hugepage constants were added in glibc + * 2.13. For compatibility with older versions of glibc, define these + * tokens if they are not already defined. + * + * PA-RISC uses different madvise values from other architectures and + * needs to be special-cased. + */ +#ifdef __hppa__ +# ifndef MADV_HUGEPAGE +# define MADV_HUGEPAGE 67 +# endif +# ifndef MADV_NOHUGEPAGE +# define MADV_NOHUGEPAGE 68 +# endif +#else +# ifndef MADV_HUGEPAGE +# define MADV_HUGEPAGE 14 +# endif +# ifndef MADV_NOHUGEPAGE +# define MADV_NOHUGEPAGE 15 +# endif +#endif + extern int bench_numa(int argc, const char **argv, const char *prefix); extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 774c90713a53..f1a939ebc19c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -573,13 +573,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) perf_event__synthesize_guest_os, tool); } - if (!opts->target.system_wide) + if (perf_target__has_task(&opts->target)) err = perf_event__synthesize_thread_map(tool, evsel_list->threads, process_synthesized_event, machine); - else + else if (perf_target__has_cpu(&opts->target)) err = perf_event__synthesize_threads(tool, process_synthesized_event, machine); + else /* command specified */ + err = 0; if (err != 0) goto out_delete_session; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 38624686ee9a..226a4ae2f936 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -208,8 +208,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) return 0; } -#define K_LEFT -1 -#define K_RIGHT -2 +#define K_LEFT -1000 +#define K_RIGHT -2000 +#define K_SWITCH_INPUT_DATA -3000 #endif #ifdef GTK2_SUPPORT diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 55433aa42c8f..eabdce0a2daa 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -143,7 +143,7 @@ struct strlist *strlist__new(bool dupstr, const char *list) slist->rblist.node_delete = strlist__node_delete; slist->dupstr = dupstr; - if (slist && strlist__parse_list(slist, list) != 0) + if (list && strlist__parse_list(slist, list) != 0) goto out_error; } diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6f3214ed4444..321e066a0753 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1421,6 +1421,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x3C: /* HSW */ case 0x3F: /* HSW */ case 0x45: /* HSW */ + case 0x46: /* HSW */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1515,6 +1516,7 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x3C: /* HSW */ case 0x3F: /* HSW */ case 0x45: /* HSW */ + case 0x46: /* HSW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; break; case 0x2D: @@ -1754,6 +1756,7 @@ int is_snb(unsigned int family, unsigned int model) case 0x3C: /* HSW */ case 0x3F: /* HSW */ case 0x45: /* HSW */ + case 0x46: /* HSW */ return 1; } return 0; @@ -2276,7 +2279,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose) - fprintf(stderr, "turbostat v3.2 February 11, 2013" + fprintf(stderr, "turbostat v3.3 March 15, 2013" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3cc0ad7ae863..a4805932972b 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -5,6 +5,7 @@ TARGETS += vm TARGETS += cpu-hotplug TARGETS += memory-hotplug TARGETS += efivarfs +TARGETS += net all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 880cdd5dc63f..77edcdcc016b 100644 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -125,6 +125,63 @@ test_open_unlink() ./open-unlink $file } +# test that we can create a range of filenames +test_valid_filenames() +{ + local attrs='\x07\x00\x00\x00' + local ret=0 + + local file_list="abc dump-type0-11-1-1362436005 1234 -" + for f in $file_list; do + local file=$efivarfs_mount/$f-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file could not be created" >&2 + ret=1 + else + rm $file + fi + done + + exit $ret +} + +test_invalid_filenames() +{ + local attrs='\x07\x00\x00\x00' + local ret=0 + + local file_list=" + -1234-1234-1234-123456789abc + foo + foo-bar + -foo- + foo-barbazba-foob-foob-foob-foobarbazfoo + foo------------------------------------- + -12345678-1234-1234-1234-123456789abc + a-12345678=1234-1234-1234-123456789abc + a-12345678-1234=1234-1234-123456789abc + a-12345678-1234-1234=1234-123456789abc + a-12345678-1234-1234-1234=123456789abc + 1112345678-1234-1234-1234-123456789abc" + + for f in $file_list; do + local file=$efivarfs_mount/$f + + printf "$attrs\x00" 2>/dev/null > $file + + if [ -e $file ]; then + echo "Creating $file should have failed" >&2 + rm $file + ret=1 + fi + done + + exit $ret +} + check_prereqs rc=0 @@ -135,5 +192,7 @@ run_test test_create_read run_test test_delete run_test test_zero_size_delete run_test test_open_unlink +run_test test_valid_filenames +run_test test_invalid_filenames exit $rc diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore new file mode 100644 index 000000000000..00326629d4af --- /dev/null +++ b/tools/testing/selftests/net/.gitignore @@ -0,0 +1,3 @@ +socket +psock_fanout +psock_tpacket diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile new file mode 100644 index 000000000000..750512ba2c88 --- /dev/null +++ b/tools/testing/selftests/net/Makefile @@ -0,0 +1,19 @@ +# Makefile for net selftests + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -O2 -g + +CFLAGS += -I../../../../usr/include/ + +NET_PROGS = socket psock_fanout psock_tpacket + +all: $(NET_PROGS) +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" + @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" + +clean: + $(RM) $(NET_PROGS) diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c new file mode 100644 index 000000000000..57b9c2b7c4ff --- /dev/null +++ b/tools/testing/selftests/net/psock_fanout.c @@ -0,0 +1,312 @@ +/* + * Copyright 2013 Google Inc. + * Author: Willem de Bruijn (willemb@google.com) + * + * A basic test of packet socket fanout behavior. + * + * Control: + * - create fanout fails as expected with illegal flag combinations + * - join fanout fails as expected with diverging types or flags + * + * Datapath: + * Open a pair of packet sockets and a pair of INET sockets, send a known + * number of packets across the two INET sockets and count the number of + * packets enqueued onto the two packet sockets. + * + * The test currently runs for + * - PACKET_FANOUT_HASH + * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER + * - PACKET_FANOUT_LB + * - PACKET_FANOUT_CPU + * - PACKET_FANOUT_ROLLOVER + * + * Todo: + * - functionality: PACKET_FANOUT_FLAG_DEFRAG + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE /* for sched_setaffinity */ + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "psock_lib.h" + +#define RING_NUM_FRAMES 20 + +/* Open a socket in a given fanout mode. + * @return -1 if mode is bad, a valid socket otherwise */ +static int sock_fanout_open(uint16_t typeflags, int num_packets) +{ + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + if (fd < 0) { + perror("socket packet"); + exit(1); + } + + /* fanout group ID is always 0: tests whether old groups are deleted */ + val = ((int) typeflags) << 16; + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (close(fd)) { + perror("close packet"); + exit(1); + } + return -1; + } + + pair_udp_setfilter(fd); + return fd; +} + +static char *sock_fanout_open_ring(int fd) +{ + struct tpacket_req req = { + .tp_block_size = getpagesize(), + .tp_frame_size = getpagesize(), + .tp_block_nr = RING_NUM_FRAMES, + .tp_frame_nr = RING_NUM_FRAMES, + }; + char *ring; + int val = TPACKET_V2; + + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val, + sizeof(val))) { + perror("packetsock ring setsockopt version"); + exit(1); + } + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, + sizeof(req))) { + perror("packetsock ring setsockopt"); + exit(1); + } + + ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (!ring) { + fprintf(stderr, "packetsock ring mmap\n"); + exit(1); + } + + return ring; +} + +static int sock_fanout_read_ring(int fd, void *ring) +{ + struct tpacket2_hdr *header = ring; + int count = 0; + + while (header->tp_status & TP_STATUS_USER && count < RING_NUM_FRAMES) { + count++; + header = ring + (count * getpagesize()); + } + + return count; +} + +static int sock_fanout_read(int fds[], char *rings[], const int expect[]) +{ + int ret[2]; + + ret[0] = sock_fanout_read_ring(fds[0], rings[0]); + ret[1] = sock_fanout_read_ring(fds[1], rings[1]); + + fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", + ret[0], ret[1], expect[0], expect[1]); + + if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && + (!(ret[0] == expect[1] && ret[1] == expect[0]))) { + fprintf(stderr, "ERROR: incorrect queue lengths\n"); + return 1; + } + + return 0; +} + +/* Test illegal mode + flag combination */ +static void test_control_single(void) +{ + fprintf(stderr, "test: control single socket\n"); + + if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | + PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { + fprintf(stderr, "ERROR: opened socket with dual rollover\n"); + exit(1); + } +} + +/* Test illegal group with different modes or flags */ +static void test_control_group(void) +{ + int fds[2]; + + fprintf(stderr, "test: control multiple sockets\n"); + + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); + exit(1); + } + if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) { + fprintf(stderr, "ERROR: joined group with wrong mode\n"); + exit(1); + } + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + if (close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + +static int test_datapath(uint16_t typeflags, int port_off, + const int expect1[], const int expect2[]) +{ + const int expect0[] = { 0, 0 }; + char *rings[2]; + int fds[2], fds_udp[2][2], ret; + + fprintf(stderr, "test: datapath 0x%hx\n", typeflags); + + fds[0] = sock_fanout_open(typeflags, 20); + fds[1] = sock_fanout_open(typeflags, 20); + if (fds[0] == -1 || fds[1] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + rings[0] = sock_fanout_open_ring(fds[0]); + rings[1] = sock_fanout_open_ring(fds[1]); + pair_udp_open(fds_udp[0], PORT_BASE); + pair_udp_open(fds_udp[1], PORT_BASE + port_off); + sock_fanout_read(fds, rings, expect0); + + /* Send data, but not enough to overflow a queue */ + pair_udp_send(fds_udp[0], 15); + pair_udp_send(fds_udp[1], 5); + ret = sock_fanout_read(fds, rings, expect1); + + /* Send more data, overflow the queue */ + pair_udp_send(fds_udp[0], 15); + /* TODO: ensure consistent order between expect1 and expect2 */ + ret |= sock_fanout_read(fds, rings, expect2); + + if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || + munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { + fprintf(stderr, "close rings\n"); + exit(1); + } + if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || + close(fds_udp[0][1]) || close(fds_udp[0][0]) || + close(fds[1]) || close(fds[0])) { + fprintf(stderr, "close datapath\n"); + exit(1); + } + + return ret; +} + +static int set_cpuaffinity(int cpuid) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpuid, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) { + if (errno != EINVAL) { + fprintf(stderr, "setaffinity %d\n", cpuid); + exit(1); + } + return 1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; + const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; + const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; + const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; + const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; + const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; + int port_off = 2, tries = 5, ret; + + test_control_single(); + test_control_group(); + + /* find a set of ports that do not collide onto the same socket */ + ret = test_datapath(PACKET_FANOUT_HASH, port_off, + expect_hash[0], expect_hash[1]); + while (ret && tries--) { + fprintf(stderr, "info: trying alternate ports (%d)\n", tries); + ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, + expect_hash[0], expect_hash[1]); + } + + ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, + port_off, expect_hash_rb[0], expect_hash_rb[1]); + ret |= test_datapath(PACKET_FANOUT_LB, + port_off, expect_lb[0], expect_lb[1]); + ret |= test_datapath(PACKET_FANOUT_ROLLOVER, + port_off, expect_rb[0], expect_rb[1]); + + set_cpuaffinity(0); + ret |= test_datapath(PACKET_FANOUT_CPU, port_off, + expect_cpu0[0], expect_cpu0[1]); + if (!set_cpuaffinity(1)) + /* TODO: test that choice alternates with previous */ + ret |= test_datapath(PACKET_FANOUT_CPU, port_off, + expect_cpu1[0], expect_cpu1[1]); + + if (ret) + return 1; + + printf("OK. All tests passed\n"); + return 0; +} diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h new file mode 100644 index 000000000000..37da54ac85a9 --- /dev/null +++ b/tools/testing/selftests/net/psock_lib.h @@ -0,0 +1,127 @@ +/* + * Copyright 2013 Google Inc. + * Author: Willem de Bruijn <willemb@google.com> + * Daniel Borkmann <dborkman@redhat.com> + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef PSOCK_LIB_H +#define PSOCK_LIB_H + +#include <sys/types.h> +#include <sys/socket.h> +#include <string.h> +#include <arpa/inet.h> +#include <unistd.h> + +#define DATA_LEN 100 +#define DATA_CHAR 'a' + +#define PORT_BASE 8000 + +#ifndef __maybe_unused +# define __maybe_unused __attribute__ ((__unused__)) +#endif + +static __maybe_unused void pair_udp_setfilter(int fd) +{ + struct sock_filter bpf_filter[] = { + { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ + { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ + { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ + { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ + { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x06, 0, 0, 0x00000060 }, /* RET match */ + { 0x06, 0, 0, 0x00000000 }, /* RET no match */ + }; + struct sock_fprog bpf_prog; + + bpf_prog.filter = bpf_filter; + bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, + sizeof(bpf_prog))) { + perror("setsockopt SO_ATTACH_FILTER"); + exit(1); + } +} + +static __maybe_unused void pair_udp_open(int fds[], uint16_t port) +{ + struct sockaddr_in saddr, daddr; + + fds[0] = socket(PF_INET, SOCK_DGRAM, 0); + fds[1] = socket(PF_INET, SOCK_DGRAM, 0); + if (fds[0] == -1 || fds[1] == -1) { + fprintf(stderr, "ERROR: socket dgram\n"); + exit(1); + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_port = htons(port); + saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sin_family = AF_INET; + daddr.sin_port = htons(port + 1); + daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + /* must bind both to get consistent hash result */ + if (bind(fds[1], (void *) &daddr, sizeof(daddr))) { + perror("bind"); + exit(1); + } + if (bind(fds[0], (void *) &saddr, sizeof(saddr))) { + perror("bind"); + exit(1); + } + if (connect(fds[0], (void *) &daddr, sizeof(daddr))) { + perror("connect"); + exit(1); + } +} + +static __maybe_unused void pair_udp_send(int fds[], int num) +{ + char buf[DATA_LEN], rbuf[DATA_LEN]; + + memset(buf, DATA_CHAR, sizeof(buf)); + while (num--) { + /* Should really handle EINTR and EAGAIN */ + if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { + fprintf(stderr, "ERROR: send failed left=%d\n", num); + exit(1); + } + if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) { + fprintf(stderr, "ERROR: recv failed left=%d\n", num); + exit(1); + } + if (memcmp(buf, rbuf, sizeof(buf))) { + fprintf(stderr, "ERROR: data failed left=%d\n", num); + exit(1); + } + } +} + +static __maybe_unused void pair_udp_close(int fds[]) +{ + close(fds[0]); + close(fds[1]); +} + +#endif /* PSOCK_LIB_H */ diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c new file mode 100644 index 000000000000..a8d7ffadd49b --- /dev/null +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -0,0 +1,824 @@ +/* + * Copyright 2013 Red Hat, Inc. + * Author: Daniel Borkmann <dborkman@redhat.com> + * + * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior. + * + * Control: + * Test the setup of the TPACKET socket with different patterns that are + * known to fail (TODO) resp. succeed (OK). + * + * Datapath: + * Open a pair of packet sockets and send resp. receive an a priori known + * packet pattern accross the sockets and check if it was received resp. + * sent correctly. Fanout in combination with RX_RING is currently not + * tested here. + * + * The test currently runs for + * - TPACKET_V1: RX_RING, TX_RING + * - TPACKET_V2: RX_RING, TX_RING + * - TPACKET_V3: RX_RING + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <linux/if_packet.h> +#include <linux/filter.h> +#include <ctype.h> +#include <fcntl.h> +#include <unistd.h> +#include <bits/wordsize.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <arpa/inet.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> +#include <net/if.h> +#include <inttypes.h> +#include <poll.h> + +#include "psock_lib.h" + +#ifndef bug_on +# define bug_on(cond) assert(!(cond)) +#endif + +#ifndef __aligned_tpacket +# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT))) +#endif + +#ifndef __align_tpacket +# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x)))) +#endif + +#define BLOCK_STATUS(x) ((x)->h1.block_status) +#define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts) +#define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt) +#define BLOCK_LEN(x) ((x)->h1.blk_len) +#define BLOCK_SNUM(x) ((x)->h1.seq_num) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x))) +#define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc))) +#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1)) +#define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri))) + +#define NUM_PACKETS 100 + +struct ring { + struct iovec *rd; + uint8_t *mm_space; + size_t mm_len, rd_len; + struct sockaddr_ll ll; + void (*walk)(int sock, struct ring *ring); + int type, rd_num, flen, version; + union { + struct tpacket_req req; + struct tpacket_req3 req3; + }; +}; + +struct block_desc { + uint32_t version; + uint32_t offset_to_priv; + struct tpacket_hdr_v1 h1; +}; + +union frame_map { + struct { + struct tpacket_hdr tp_h __aligned_tpacket; + struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr)); + } *v1; + struct { + struct tpacket2_hdr tp_h __aligned_tpacket; + struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr)); + } *v2; + void *raw; +}; + +static unsigned int total_packets, total_bytes; + +static int pfsocket(int ver) +{ + int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (sock == -1) { + perror("socket"); + exit(1); + } + + ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver)); + if (ret == -1) { + perror("setsockopt"); + exit(1); + } + + return sock; +} + +static void status_bar_update(void) +{ + if (total_packets % 10 == 0) { + fprintf(stderr, "."); + fflush(stderr); + } +} + +static void test_payload(void *pay, size_t len) +{ + struct ethhdr *eth = pay; + + if (len < sizeof(struct ethhdr)) { + fprintf(stderr, "test_payload: packet too " + "small: %zu bytes!\n", len); + exit(1); + } + + if (eth->h_proto != htons(ETH_P_IP)) { + fprintf(stderr, "test_payload: wrong ethernet " + "type: 0x%x!\n", ntohs(eth->h_proto)); + exit(1); + } +} + +static void create_payload(void *pay, size_t *len) +{ + int i; + struct ethhdr *eth = pay; + struct iphdr *ip = pay + sizeof(*eth); + + /* Lets create some broken crap, that still passes + * our BPF filter. + */ + + *len = DATA_LEN + 42; + + memset(pay, 0xff, ETH_ALEN * 2); + eth->h_proto = htons(ETH_P_IP); + + for (i = 0; i < sizeof(*ip); ++i) + ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand(); + + ip->ihl = 5; + ip->version = 4; + ip->protocol = 0x11; + ip->frag_off = 0; + ip->ttl = 64; + ip->tot_len = htons((uint16_t) *len - sizeof(*eth)); + + ip->saddr = htonl(INADDR_LOOPBACK); + ip->daddr = htonl(INADDR_LOOPBACK); + + memset(pay + sizeof(*eth) + sizeof(*ip), + DATA_CHAR, DATA_LEN); +} + +static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); +} + +static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER); +} + +static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static inline int __v1_v2_rx_kernel_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + return __v1_rx_kernel_ready(base); + case TPACKET_V2: + return __v2_rx_kernel_ready(base); + default: + bug_on(1); + return 0; + } +} + +static inline void __v1_v2_rx_user_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + __v1_rx_user_ready(base); + break; + case TPACKET_V2: + __v2_rx_user_ready(base); + break; + } +} + +static void walk_v1_v2_rx(int sock, struct ring *ring) +{ + struct pollfd pfd; + int udp_sock[2]; + union frame_map ppd; + unsigned int frame_num = 0; + + bug_on(ring->type != PACKET_RX_RING); + + pair_udp_open(udp_sock, PORT_BASE); + pair_udp_setfilter(sock); + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = sock; + pfd.events = POLLIN | POLLERR; + pfd.revents = 0; + + pair_udp_send(udp_sock, NUM_PACKETS); + + while (total_packets < NUM_PACKETS * 2) { + while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base, + ring->version)) { + ppd.raw = ring->rd[frame_num].iov_base; + + switch (ring->version) { + case TPACKET_V1: + test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac, + ppd.v1->tp_h.tp_snaplen); + total_bytes += ppd.v1->tp_h.tp_snaplen; + break; + + case TPACKET_V2: + test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac, + ppd.v2->tp_h.tp_snaplen); + total_bytes += ppd.v2->tp_h.tp_snaplen; + break; + } + + status_bar_update(); + total_packets++; + + __v1_v2_rx_user_ready(ppd.raw, ring->version); + + frame_num = (frame_num + 1) % ring->rd_num; + } + + poll(&pfd, 1, 1); + } + + pair_udp_close(udp_sock); + + if (total_packets != 2 * NUM_PACKETS) { + fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n", + ring->version, total_packets, NUM_PACKETS); + exit(1); + } + + fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1); +} + +static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_AVAILABLE) == TP_STATUS_AVAILABLE); +} + +static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_SEND_REQUEST; + __sync_synchronize(); +} + +static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr) +{ + return ((hdr->tp_status & TP_STATUS_AVAILABLE) == TP_STATUS_AVAILABLE); +} + +static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr) +{ + hdr->tp_status = TP_STATUS_SEND_REQUEST; + __sync_synchronize(); +} + +static inline int __v1_v2_tx_kernel_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + return __v1_tx_kernel_ready(base); + case TPACKET_V2: + return __v2_tx_kernel_ready(base); + default: + bug_on(1); + return 0; + } +} + +static inline void __v1_v2_tx_user_ready(void *base, int version) +{ + switch (version) { + case TPACKET_V1: + __v1_tx_user_ready(base); + break; + case TPACKET_V2: + __v2_tx_user_ready(base); + break; + } +} + +static void __v1_v2_set_packet_loss_discard(int sock) +{ + int ret, discard = 1; + + ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard, + sizeof(discard)); + if (ret == -1) { + perror("setsockopt"); + exit(1); + } +} + +static void walk_v1_v2_tx(int sock, struct ring *ring) +{ + struct pollfd pfd; + int rcv_sock, ret; + size_t packet_len; + union frame_map ppd; + char packet[1024]; + unsigned int frame_num = 0, got = 0; + struct sockaddr_ll ll = { + .sll_family = PF_PACKET, + .sll_halen = ETH_ALEN, + }; + + bug_on(ring->type != PACKET_TX_RING); + bug_on(ring->rd_num < NUM_PACKETS); + + rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (rcv_sock == -1) { + perror("socket"); + exit(1); + } + + pair_udp_setfilter(rcv_sock); + + ll.sll_ifindex = if_nametoindex("lo"); + ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll)); + if (ret == -1) { + perror("bind"); + exit(1); + } + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = sock; + pfd.events = POLLOUT | POLLERR; + pfd.revents = 0; + + total_packets = NUM_PACKETS; + create_payload(packet, &packet_len); + + while (total_packets > 0) { + while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base, + ring->version) && + total_packets > 0) { + ppd.raw = ring->rd[frame_num].iov_base; + + switch (ring->version) { + case TPACKET_V1: + ppd.v1->tp_h.tp_snaplen = packet_len; + ppd.v1->tp_h.tp_len = packet_len; + + memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN - + sizeof(struct sockaddr_ll), packet, + packet_len); + total_bytes += ppd.v1->tp_h.tp_snaplen; + break; + + case TPACKET_V2: + ppd.v2->tp_h.tp_snaplen = packet_len; + ppd.v2->tp_h.tp_len = packet_len; + + memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll), packet, + packet_len); + total_bytes += ppd.v2->tp_h.tp_snaplen; + break; + } + + status_bar_update(); + total_packets--; + + __v1_v2_tx_user_ready(ppd.raw, ring->version); + + frame_num = (frame_num + 1) % ring->rd_num; + } + + poll(&pfd, 1, 1); + } + + bug_on(total_packets != 0); + + ret = sendto(sock, NULL, 0, 0, NULL, 0); + if (ret == -1) { + perror("sendto"); + exit(1); + } + + while ((ret = recvfrom(rcv_sock, packet, sizeof(packet), + 0, NULL, NULL)) > 0 && + total_packets < NUM_PACKETS) { + got += ret; + test_payload(packet, ret); + + status_bar_update(); + total_packets++; + } + + close(rcv_sock); + + if (total_packets != NUM_PACKETS) { + fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n", + ring->version, total_packets, NUM_PACKETS); + exit(1); + } + + fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got); +} + +static void walk_v1_v2(int sock, struct ring *ring) +{ + if (ring->type == PACKET_RX_RING) + walk_v1_v2_rx(sock, ring); + else + walk_v1_v2_tx(sock, ring); +} + +static uint64_t __v3_prev_block_seq_num = 0; + +void __v3_test_block_seq_num(struct block_desc *pbd) +{ + if (__v3_prev_block_seq_num + 1 != BLOCK_SNUM(pbd)) { + fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected " + "seq:%"PRIu64" != actual seq:%"PRIu64"\n", + __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1, + (uint64_t) BLOCK_SNUM(pbd)); + exit(1); + } + + __v3_prev_block_seq_num = BLOCK_SNUM(pbd); +} + +static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) +{ + if (BLOCK_NUM_PKTS(pbd)) { + if (bytes != BLOCK_LEN(pbd)) { + fprintf(stderr, "\nblock:%u with %upackets, expected " + "len:%u != actual len:%u\n", block_num, + BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd)); + exit(1); + } + } else { + if (BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(13)) { + fprintf(stderr, "\nblock:%u, expected len:%lu != " + "actual len:%u\n", block_num, BLOCK_HDR_LEN, + BLOCK_LEN(pbd)); + exit(1); + } + } +} + +static void __v3_test_block_header(struct block_desc *pbd, const int block_num) +{ + uint32_t block_status = BLOCK_STATUS(pbd); + + if ((block_status & TP_STATUS_USER) == 0) { + fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num); + exit(1); + } + + __v3_test_block_seq_num(pbd); +} + +static void __v3_walk_block(struct block_desc *pbd, const int block_num) +{ + int num_pkts = BLOCK_NUM_PKTS(pbd), i; + unsigned long bytes = 0; + unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(13); + struct tpacket3_hdr *ppd; + + __v3_test_block_header(pbd, block_num); + + ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd)); + for (i = 0; i < num_pkts; ++i) { + bytes += ppd->tp_snaplen; + + if (ppd->tp_next_offset) + bytes_with_padding += ppd->tp_next_offset; + else + bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac); + + test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen); + + status_bar_update(); + total_packets++; + + ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset); + __sync_synchronize(); + } + + __v3_test_block_len(pbd, bytes_with_padding, block_num); + total_bytes += bytes; +} + +void __v3_flush_block(struct block_desc *pbd) +{ + BLOCK_STATUS(pbd) = TP_STATUS_KERNEL; + __sync_synchronize(); +} + +static void walk_v3_rx(int sock, struct ring *ring) +{ + unsigned int block_num = 0; + struct pollfd pfd; + struct block_desc *pbd; + int udp_sock[2]; + + bug_on(ring->type != PACKET_RX_RING); + + pair_udp_open(udp_sock, PORT_BASE); + pair_udp_setfilter(sock); + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = sock; + pfd.events = POLLIN | POLLERR; + pfd.revents = 0; + + pair_udp_send(udp_sock, NUM_PACKETS); + + while (total_packets < NUM_PACKETS * 2) { + pbd = (struct block_desc *) ring->rd[block_num].iov_base; + + while ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0) + poll(&pfd, 1, 1); + + __v3_walk_block(pbd, block_num); + __v3_flush_block(pbd); + + block_num = (block_num + 1) % ring->rd_num; + } + + pair_udp_close(udp_sock); + + if (total_packets != 2 * NUM_PACKETS) { + fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n", + total_packets, NUM_PACKETS); + exit(1); + } + + fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1); +} + +static void walk_v3(int sock, struct ring *ring) +{ + if (ring->type == PACKET_RX_RING) + walk_v3_rx(sock, ring); + else + bug_on(1); +} + +static void __v1_v2_fill(struct ring *ring, unsigned int blocks) +{ + ring->req.tp_block_size = getpagesize() << 2; + ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7; + ring->req.tp_block_nr = blocks; + + ring->req.tp_frame_nr = ring->req.tp_block_size / + ring->req.tp_frame_size * + ring->req.tp_block_nr; + + ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr; + ring->walk = walk_v1_v2; + ring->rd_num = ring->req.tp_frame_nr; + ring->flen = ring->req.tp_frame_size; +} + +static void __v3_fill(struct ring *ring, unsigned int blocks) +{ + ring->req3.tp_retire_blk_tov = 64; + ring->req3.tp_sizeof_priv = 13; + ring->req3.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH; + + ring->req3.tp_block_size = getpagesize() << 2; + ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7; + ring->req3.tp_block_nr = blocks; + + ring->req3.tp_frame_nr = ring->req3.tp_block_size / + ring->req3.tp_frame_size * + ring->req3.tp_block_nr; + + ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr; + ring->walk = walk_v3; + ring->rd_num = ring->req3.tp_block_nr; + ring->flen = ring->req3.tp_block_size; +} + +static void setup_ring(int sock, struct ring *ring, int version, int type) +{ + int ret = 0; + unsigned int blocks = 256; + + ring->type = type; + ring->version = version; + + switch (version) { + case TPACKET_V1: + case TPACKET_V2: + if (type == PACKET_TX_RING) + __v1_v2_set_packet_loss_discard(sock); + __v1_v2_fill(ring, blocks); + ret = setsockopt(sock, SOL_PACKET, type, &ring->req, + sizeof(ring->req)); + break; + + case TPACKET_V3: + __v3_fill(ring, blocks); + ret = setsockopt(sock, SOL_PACKET, type, &ring->req3, + sizeof(ring->req3)); + break; + } + + if (ret == -1) { + perror("setsockopt"); + exit(1); + } + + ring->rd_len = ring->rd_num * sizeof(*ring->rd); + ring->rd = malloc(ring->rd_len); + if (ring->rd == NULL) { + perror("malloc"); + exit(1); + } + + total_packets = 0; + total_bytes = 0; +} + +static void mmap_ring(int sock, struct ring *ring) +{ + int i; + + ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0); + if (ring->mm_space == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + memset(ring->rd, 0, ring->rd_len); + for (i = 0; i < ring->rd_num; ++i) { + ring->rd[i].iov_base = ring->mm_space + (i * ring->flen); + ring->rd[i].iov_len = ring->flen; + } +} + +static void bind_ring(int sock, struct ring *ring) +{ + int ret; + + ring->ll.sll_family = PF_PACKET; + ring->ll.sll_protocol = htons(ETH_P_ALL); + ring->ll.sll_ifindex = if_nametoindex("lo"); + ring->ll.sll_hatype = 0; + ring->ll.sll_pkttype = 0; + ring->ll.sll_halen = 0; + + ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll)); + if (ret == -1) { + perror("bind"); + exit(1); + } +} + +static void walk_ring(int sock, struct ring *ring) +{ + ring->walk(sock, ring); +} + +static void unmap_ring(int sock, struct ring *ring) +{ + munmap(ring->mm_space, ring->mm_len); + free(ring->rd); +} + +static int test_kernel_bit_width(void) +{ + char in[512], *ptr; + int num = 0, fd; + ssize_t ret; + + fd = open("/proc/kallsyms", O_RDONLY); + if (fd == -1) { + perror("open"); + exit(1); + } + + ret = read(fd, in, sizeof(in)); + if (ret <= 0) { + perror("read"); + exit(1); + } + + close(fd); + + ptr = in; + while(!isspace(*ptr)) { + num++; + ptr++; + } + + return num * 4; +} + +static int test_user_bit_width(void) +{ + return __WORDSIZE; +} + +static const char *tpacket_str[] = { + [TPACKET_V1] = "TPACKET_V1", + [TPACKET_V2] = "TPACKET_V2", + [TPACKET_V3] = "TPACKET_V3", +}; + +static const char *type_str[] = { + [PACKET_RX_RING] = "PACKET_RX_RING", + [PACKET_TX_RING] = "PACKET_TX_RING", +}; + +static int test_tpacket(int version, int type) +{ + int sock; + struct ring ring; + + fprintf(stderr, "test: %s with %s ", tpacket_str[version], + type_str[type]); + fflush(stderr); + + if (version == TPACKET_V1 && + test_kernel_bit_width() != test_user_bit_width()) { + fprintf(stderr, "test: skip %s %s since user and kernel " + "space have different bit width\n", + tpacket_str[version], type_str[type]); + return 0; + } + + sock = pfsocket(version); + memset(&ring, 0, sizeof(ring)); + setup_ring(sock, &ring, version, type); + mmap_ring(sock, &ring); + bind_ring(sock, &ring); + walk_ring(sock, &ring); + unmap_ring(sock, &ring); + close(sock); + + fprintf(stderr, "\n"); + return 0; +} + +int main(void) +{ + int ret = 0; + + ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING); + ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING); + + ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING); + ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING); + + ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING); + + if (ret) + return 1; + + printf("OK. All tests passed\n"); + return 0; +} diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests new file mode 100644 index 000000000000..5246e782d6e8 --- /dev/null +++ b/tools/testing/selftests/net/run_afpackettests @@ -0,0 +1,26 @@ +#!/bin/sh + +if [ $(id -u) != 0 ]; then + echo $msg must be run as root >&2 + exit 0 +fi + +echo "--------------------" +echo "running psock_fanout test" +echo "--------------------" +./psock_fanout +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running psock_tpacket test" +echo "--------------------" +./psock_tpacket +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests new file mode 100644 index 000000000000..c09a682df56a --- /dev/null +++ b/tools/testing/selftests/net/run_netsocktests @@ -0,0 +1,12 @@ +#!/bin/bash + +echo "--------------------" +echo "running socket test" +echo "--------------------" +./socket +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c new file mode 100644 index 000000000000..0f227f2f9be9 --- /dev/null +++ b/tools/testing/selftests/net/socket.c @@ -0,0 +1,92 @@ +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> + +struct socket_testcase { + int domain; + int type; + int protocol; + + /* 0 = valid file descriptor + * -foo = error foo + */ + int expect; + + /* If non-zero, accept EAFNOSUPPORT to handle the case + * of the protocol not being configured into the kernel. + */ + int nosupport_ok; +}; + +static struct socket_testcase tests[] = { + { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 }, + { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 }, + { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 }, + { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 }, + { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 }, +}; + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define ERR_STRING_SZ 64 + +static int run_tests(void) +{ + char err_string1[ERR_STRING_SZ]; + char err_string2[ERR_STRING_SZ]; + int i, err; + + err = 0; + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct socket_testcase *s = &tests[i]; + int fd; + + fd = socket(s->domain, s->type, s->protocol); + if (fd < 0) { + if (s->nosupport_ok && + errno == EAFNOSUPPORT) + continue; + + if (s->expect < 0 && + errno == -s->expect) + continue; + + strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + strerror_r(errno, err_string2, ERR_STRING_SZ); + + fprintf(stderr, "socket(%d, %d, %d) expected " + "err (%s) got (%s)\n", + s->domain, s->type, s->protocol, + err_string1, err_string2); + + err = -1; + break; + } else { + close(fd); + + if (s->expect < 0) { + strerror_r(errno, err_string1, ERR_STRING_SZ); + + fprintf(stderr, "socket(%d, %d, %d) expected " + "success got err (%s)\n", + s->domain, s->type, s->protocol, + err_string1); + + err = -1; + break; + } + } + } + + return err; +} + +int main(void) +{ + int err = run_tests(); + + return err; +} diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c index 8674b9ec14f6..fe1e66b6ef40 100644 --- a/tools/usb/ffs-test.c +++ b/tools/usb/ffs-test.c @@ -38,7 +38,7 @@ #include <unistd.h> #include <tools/le_byteshift.h> -#include "../../include/linux/usb/functionfs.h" +#include "../../include/uapi/linux/usb/functionfs.h" /******************** Little Endian Handling ********************************/ diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index ce82b9401958..5ba005c00e2f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -74,9 +74,12 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; u64 redir_content; - ASSERT(redir_index < IOAPIC_NUM_PINS); + if (redir_index < IOAPIC_NUM_PINS) + redir_content = + ioapic->redirtbl[redir_index].bits; + else + redir_content = ~0ULL; - redir_content = ioapic->redirtbl[redir_index].bits; result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : redir_content & 0xffffffff; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index adc68feb5c5a..f18013f09e68 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1541,21 +1541,38 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, } int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa) + gpa_t gpa, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int offset = offset_in_page(gpa); - gfn_t gfn = gpa >> PAGE_SHIFT; + gfn_t start_gfn = gpa >> PAGE_SHIFT; + gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; + gfn_t nr_pages_needed = end_gfn - start_gfn + 1; + gfn_t nr_pages_avail; ghc->gpa = gpa; ghc->generation = slots->generation; - ghc->memslot = gfn_to_memslot(kvm, gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); - if (!kvm_is_error_hva(ghc->hva)) + ghc->len = len; + ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { ghc->hva += offset; - else - return -EFAULT; - + } else { + /* + * If the requested region crosses two memslots, we still + * verify that the entire region is valid here. + */ + while (start_gfn <= end_gfn) { + ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, + &nr_pages_avail); + if (kvm_is_error_hva(ghc->hva)) + return -EFAULT; + start_gfn += nr_pages_avail; + } + /* Use the slow path for cross page reads and writes. */ + ghc->memslot = NULL; + } return 0; } EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); @@ -1566,8 +1583,13 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); int r; + BUG_ON(len > ghc->len); + if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -1587,8 +1609,13 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); int r; + BUG_ON(len > ghc->len); + if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; |