20 files changed, 776 insertions, 185 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 83bf4986baea..334b49ef02d1 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -51,6 +51,7 @@ Table of Contents
 3.4	Configuring Bonding Manually via Sysfs
 3.5	Configuration with Interfaces Support
 3.6	Overriding Configuration for Special Cases
+3.7 Configuring LACP for 802.3ad mode in a more secure way
 
 4. Querying Bonding Configuration
 4.1	Bonding Configuration
@@ -178,6 +179,27 @@ active_slave
 	active slave, or the empty string if there is no active slave or
 	the current mode does not use an active slave.
 
+ad_actor_sys_prio
+
+	In an AD system, this specifies the system priority. The allowed range
+	is 1 - 65535. If the value is not specified, it takes 65535 as the
+	default value.
+
+	This parameter has effect only in 802.3ad mode and is available through
+	SysFs interface.
+
+ad_actor_system
+
+	In an AD system, this specifies the mac-address for the actor in
+	protocol packet exchanges (LACPDUs). The value cannot be NULL or
+	multicast. It is preferred to have the local-admin bit set for this
+	mac but driver does not enforce it. If the value is not given then
+	system defaults to using the masters' mac address as actors' system
+	address.
+
+	This parameter has effect only in 802.3ad mode and is available through
+	SysFs interface.
+
 ad_select
 
 	Specifies the 802.3ad aggregation selection logic to use.  The
@@ -220,6 +242,21 @@ ad_select
 
 	This option was added in bonding version 3.4.0.
 
+ad_user_port_key
+
+	In an AD system, the port-key has three parts as shown below -
+
+	   Bits   Use
+	   00     Duplex
+	   01-05  Speed
+	   06-15  User-defined
+
+	This defines the upper 10 bits of the port key. The values can be
+	from 0 - 1023. If not given, the system defaults to 0.
+
+	This parameter has effect only in 802.3ad mode and is available through
+	SysFs interface.
+
 all_slaves_active
 
 	Specifies that duplicate frames (received on inactive ports) should be
@@ -1622,6 +1659,53 @@ output port selection.
 This feature first appeared in bonding driver version 3.7.0 and support for
 output slave selection was limited to round-robin and active-backup modes.
 
+3.7 Configuring LACP for 802.3ad mode in a more secure way
+----------------------------------------------------------
+
+When using 802.3ad bonding mode, the Actor (host) and Partner (switch)
+exchange LACPDUs.  These LACPDUs cannot be sniffed, because they are
+destined to link local mac addresses (which switches/bridges are not
+supposed to forward).  However, most of the values are easily predictable
+or are simply the machine's MAC address (which is trivially known to all
+other hosts in the same L2).  This implies that other machines in the L2
+domain can spoof LACPDU packets from other hosts to the switch and potentially
+cause mayhem by joining (from the point of view of the switch) another
+machine's aggregate, thus receiving a portion of that hosts incoming
+traffic and / or spoofing traffic from that machine themselves (potentially
+even successfully terminating some portion of flows). Though this is not
+a likely scenario, one could avoid this possibility by simply configuring
+few bonding parameters:
+
+   (a) ad_actor_system : You can set a random mac-address that can be used for
+       these LACPDU exchanges. The value can not be either NULL or Multicast.
+       Also it's preferable to set the local-admin bit. Following shell code
+       generates a random mac-address as described above.
+
+       # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \
+                                $(( (RANDOM & 0xFE) | 0x02 )) \
+                                $(( RANDOM & 0xFF )) \
+                                $(( RANDOM & 0xFF )) \
+                                $(( RANDOM & 0xFF )) \
+                                $(( RANDOM & 0xFF )) \
+                                $(( RANDOM & 0xFF )))
+       # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system
+
+   (b) ad_actor_sys_prio : Randomize the system priority. The default value
+       is 65535, but system can take the value from 1 - 65535. Following shell
+       code generates random priority and sets it.
+
+       # sys_prio=$(( 1 + RANDOM + RANDOM ))
+       # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio
+
+   (c) ad_user_port_key : Use the user portion of the port-key. The default
+       keeps this empty. These are the upper 10 bits of the port-key and value
+       ranges from 0 - 1023. Following shell code generates these 10 bits and
+       sets it.
+
+       # usr_port_key=$(( RANDOM & 0x3FF ))
+       # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key
+
+
 4 Querying Bonding Configuration
 =================================
 
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 0a2859a8ee7e..b48d4a149411 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -22,7 +22,8 @@ This file contains
       4.1.3 RAW socket option CAN_RAW_LOOPBACK
       4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
       4.1.5 RAW socket option CAN_RAW_FD_FRAMES
-      4.1.6 RAW socket returned message flags
+      4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+      4.1.7 RAW socket returned message flags
     4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
       4.2.1 Broadcast Manager operations
       4.2.2 Broadcast Manager message flags
@@ -267,6 +268,9 @@ solution for a couple of reasons:
     struct can_frame {
             canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
             __u8    can_dlc; /* frame payload length in byte (0 .. 8) */
+            __u8    __pad;   /* padding */
+            __u8    __res0;  /* reserved / padding */
+            __u8    __res1;  /* reserved / padding */
             __u8    data[8] __attribute__((aligned(8)));
     };
 
@@ -601,7 +605,22 @@ solution for a couple of reasons:
   CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
   The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
 
-  4.1.6 RAW socket returned message flags
+  4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+
+  The CAN_RAW socket can set multiple CAN identifier specific filters that
+  lead to multiple filters in the af_can.c filter processing. These filters
+  are indenpendent from each other which leads to logical OR'ed filters when
+  applied (see 4.1.1).
+
+  This socket option joines the given CAN filters in the way that only CAN
+  frames are passed to user space that matched *all* given CAN filters. The
+  semantic for the applied filters is therefore changed to a logical AND.
+
+  This is useful especially when the filterset is a combination of filters
+  where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+  CAN ID ranges from the incoming traffic.
+
+  4.1.7 RAW socket returned message flags
 
   When using recvmsg() call, the msg->msg_flags may contain following flags:
 
diff --git a/Documentation/networking/dctcp.txt b/Documentation/networking/dctcp.txt
index 0d5dfbc89ec9..13a857753208 100644
--- a/Documentation/networking/dctcp.txt
+++ b/Documentation/networking/dctcp.txt
@@ -8,6 +8,7 @@ the data center network to provide multi-bit feedback to the end hosts.
 To enable it on end hosts:
 
   sysctl -w net.ipv4.tcp_congestion_control=dctcp
+  sysctl -w net.ipv4.tcp_ecn_fallback=0 (optional)
 
 All switches in the data center network running DCTCP must support ECN
 marking and be configured for marking when reaching defined switch buffer
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 9930ecfbb465..135581f015e1 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -280,7 +280,8 @@ Possible BPF extensions are shown in the following table:
   rxhash                                skb->hash
   cpu                                   raw_smp_processor_id()
   vlan_tci                              skb_vlan_tag_get(skb)
-  vlan_pr                               skb_vlan_tag_present(skb)
+  vlan_avail                            skb_vlan_tag_present(skb)
+  vlan_tpid                             skb->vlan_proto
   rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
index 22bbc7225f8e..1700756af057 100644
--- a/Documentation/networking/ieee802154.txt
+++ b/Documentation/networking/ieee802154.txt
@@ -30,8 +30,8 @@ int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
 
 The address family, socket addresses etc. are defined in the
 include/net/af_ieee802154.h header or in the special header
-in our userspace package (see either linux-zigbee sourceforge download page
-or git tree at git://linux-zigbee.git.sourceforge.net/gitroot/linux-zigbee).
+in the userspace package (see either http://wpan.cakelab.org/ or the
+git tree at https://github.com/linux-wpan/wpan-tools).
 
 One can use SOCK_RAW for passing raw data towards device xmit function. YMMV.
 
@@ -49,15 +49,6 @@ Like with WiFi, there are several types of devices implementing IEEE 802.15.4.
 Those types of devices require different approach to be hooked into Linux kernel.
 
 
-MLME - MAC Level Management
-============================
-
-Most of IEEE 802.15.4 MLME interfaces are directly mapped on netlink commands.
-See the include/net/nl802154.h header. Our userspace tools package
-(see above) provides CLI configuration utility for radio interfaces and simple
-coordinator for IEEE 802.15.4 networks as an example users of MLME protocol.
-
-
 HardMAC
 =======
 
@@ -75,8 +66,6 @@ net_device with a pointer to struct ieee802154_mlme_ops instance. The fields
 assoc_req, assoc_resp, disassoc_req, start_req, and scan_req are optional.
 All other fields are required.
 
-We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c
-
 
 SoftMAC
 =======
@@ -89,7 +78,8 @@ stack interface for network sniffers (e.g. WireShark).
 
 This layer is going to be extended soon.
 
-See header include/net/mac802154.h and several drivers in drivers/ieee802154/.
+See header include/net/mac802154.h and several drivers in
+drivers/net/ieee802154/.
 
 
 Device drivers API
@@ -114,18 +104,17 @@ Moreover IEEE 802.15.4 device operations structure should be filled.
 Fake drivers
 ============
 
-In addition there are two drivers available which simulate real devices with
-HardMAC (fakehard) and SoftMAC (fakelb - IEEE 802.15.4 loopback driver)
-interfaces. This option provides possibility to test and debug stack without
-usage of real hardware.
+In addition there is a driver available which simulates a real device with
+SoftMAC (fakelb - IEEE 802.15.4 loopback driver) interface. This option
+provides possibility to test and debug stack without usage of real hardware.
 
-See sources in drivers/ieee802154 folder for more details.
+See sources in drivers/net/ieee802154 folder for more details.
 
 
 6LoWPAN Linux implementation
 ============================
 
-The IEEE 802.15.4 standard specifies an MTU of 128 bytes, yielding about 80
+The IEEE 802.15.4 standard specifies an MTU of 127 bytes, yielding about 80
 octets of actual MAC payload once security is turned on, on a wireless link
 with a link throughput of 250 kbps or less.  The 6LoWPAN adaptation format
 [RFC4944] was specified to carry IPv6 datagrams over such constrained links,
@@ -140,7 +129,8 @@ In Semptember 2011 the standard update was published - [RFC6282].
 It deprecates HC1 and HC2 compression and defines IPHC encoding format which is
 used in this Linux implementation.
 
-All the code related to 6lowpan you may find in files: net/ieee802154/6lowpan.*
+All the code related to 6lowpan you may find in files: net/6lowpan/*
+and net/ieee802154/6lowpan/*
 
 To setup 6lowpan interface you need (busybox release > 1.17.0):
 1. Add IEEE802.15.4 interface and initialize PANid;
diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt
index 43d3549366a0..15534fdd09a8 100644
--- a/Documentation/networking/igb.txt
+++ b/Documentation/networking/igb.txt
@@ -42,10 +42,10 @@ Additional Configurations
   Jumbo Frames
   ------------
   Jumbo Frames support is enabled by changing the MTU to a value larger than
-  the default of 1500.  Use the ifconfig command to increase the MTU size.
+  the default of 1500.  Use the ip command to increase the MTU size.
   For example:
 
-       ifconfig eth<x> mtu 9000 up
+       ip link set dev eth<x> mtu 9000
 
   This setting is not saved across reboots.
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 1b8c964b0d17..5fae7704daab 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -267,6 +267,15 @@ tcp_ecn - INTEGER
 		  but do not request ECN on outgoing connections.
 	Default: 2
 
+tcp_ecn_fallback - BOOLEAN
+	If the kernel detects that ECN connection misbehaves, enable fall
+	back to non-ECN. Currently, this knob implements the fallback
+	from RFC3168, section 6.1.1.1., but we reserve that in future,
+	additional detection mechanisms could be implemented under this
+	knob. The value	is not used, if tcp_ecn or per route (or congestion
+	control) ECN settings are disabled.
+	Default: 1 (fallback enabled)
+
 tcp_fack - BOOLEAN
 	Enable FACK congestion avoidance and fast retransmission.
 	The value is not used, if tcp_sack is not enabled.
@@ -388,6 +397,16 @@ tcp_mtu_probing - INTEGER
 	  1 - Disabled by default, enabled when an ICMP black hole detected
 	  2 - Always enabled, use initial MSS of tcp_base_mss.
 
+tcp_probe_interval - INTEGER
+	Controls how often to start TCP Packetization-Layer Path MTU
+	Discovery reprobe. The default is reprobing every 10 minutes as
+	per RFC4821.
+
+tcp_probe_threshold - INTEGER
+	Controls when TCP Packetization-Layer Path MTU Discovery probing
+	will stop in respect to the width of search range in bytes. Default
+	is 8 bytes.
+
 tcp_no_metrics_save - BOOLEAN
 	By default, TCP saves various connection metrics in the route cache
 	when the connection closes, so that connections established in the
@@ -732,8 +751,10 @@ IP Variables:
 ip_local_port_range - 2 INTEGERS
 	Defines the local port range that is used by TCP and UDP to
 	choose the local port. The first number is the first, the
-	second the last local port number. The default values are
-	32768 and 61000 respectively.
+	second the last local port number.
+	If possible, it is better these numbers have different parity.
+	(one even and one odd values)
+	The default values are 32768 and 60999 respectively.
 
 ip_local_reserved_ports - list of comma separated ranges
 	Specify the ports which are reserved for known third-party
@@ -756,7 +777,7 @@ ip_local_reserved_ports - list of comma separated ranges
 	ip_local_port_range, e.g.:
 
 	$ cat /proc/sys/net/ipv4/ip_local_port_range
-	32000	61000
+	32000	60999
 	$ cat /proc/sys/net/ipv4/ip_local_reserved_ports
 	8080,9148
 
@@ -1116,11 +1137,23 @@ arp_accept - BOOLEAN
 	gratuitous arp frame, the arp table will be updated regardless
 	if this setting is on or off.
 
+mcast_solicit - INTEGER
+	The maximum number of multicast probes in INCOMPLETE state,
+	when the associated hardware address is unknown.  Defaults
+	to 3.
+
+ucast_solicit - INTEGER
+	The maximum number of unicast probes in PROBE state, when
+	the hardware address is being reconfirmed.  Defaults to 3.
 
 app_solicit - INTEGER
 	The maximum number of probes to send to the user space ARP daemon
 	via netlink before dropping back to multicast probes (see
-	mcast_solicit).  Defaults to 0.
+	mcast_resolicit).  Defaults to 0.
+
+mcast_resolicit - INTEGER
+	The maximum number of multicast probes after unicast and
+	app probes in PROBE state.  Defaults to 0.
 
 disable_policy - BOOLEAN
 	Disable IPSEC policy (SPD) for this interface
@@ -1191,6 +1224,14 @@ auto_flowlabels - BOOLEAN
 	FALSE: disabled
 	Default: false
 
+flowlabel_state_ranges - BOOLEAN
+	Split the flow label number space into two ranges. 0-0x7FFFF is
+	reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF
+	is reserved for stateless flow labels as described in RFC6437.
+	TRUE: enabled
+	FALSE: disabled
+	Default: true
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
@@ -1198,6 +1239,17 @@ anycast_src_echo_reply - BOOLEAN
 	FALSE: disabled
 	Default: FALSE
 
+idgen_delay - INTEGER
+	Controls the delay in seconds after which time to retry
+	privacy stable address generation if a DAD conflict is
+	detected.
+	Default: 1 (as specified in RFC7217)
+
+idgen_retries - INTEGER
+	Controls the number of retries to generate a stable privacy
+	address if a DAD conflict is detected.
+	Default: 3 (as specified in RFC7217)
+
 mld_qrv - INTEGER
 	Controls the MLD query robustness variable (see RFC3810 9.1).
 	Default: 2 (as specified by RFC3810 9.1)
@@ -1518,6 +1570,20 @@ use_optimistic - BOOLEAN
 		0: disabled (default)
 		1: enabled
 
+stable_secret - IPv6 address
+	This IPv6 address will be used as a secret to generate IPv6
+	addresses for link-local addresses and autoconfigured
+	ones. All addresses generated after setting this secret will
+	be stable privacy ones by default. This can be changed via the
+	addrgenmode ip-link. conf/default/stable_secret is used as the
+	secret for the namespace, the interface specific ones can
+	overwrite that. Writes to conf/all/stable_secret are refused.
+
+	It is recommended to generate this secret during installation
+	of a system and keep it stable after that.
+
+	By default the stable secret is unset.
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
index 7a3c04729591..3ba709531adb 100644
--- a/Documentation/networking/ipvs-sysctl.txt
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -22,6 +22,27 @@ backup_only - BOOLEAN
 	If set, disable the director function while the server is
 	in backup mode to avoid packet loops for DR/TUN methods.
 
+conn_reuse_mode - INTEGER
+	1 - default
+
+	Controls how ipvs will deal with connections that are detected
+	port reuse. It is a bitmap, with the values being:
+
+	0: disable any special handling on port reuse. The new
+	connection will be delivered to the same real server that was
+	servicing the previous connection. This will effectively
+	disable expire_nodest_conn.
+
+	bit 1: enable rescheduling of new connections when it is safe.
+	That is, whenever expire_nodest_conn and for TCP sockets, when
+	the connection is in TIME_WAIT state (which is only possible if
+	you use NAT mode).
+
+	bit 2: it is bit 1 plus, for TCP connections, when connections
+	are in FIN_WAIT state, as this is the last state seen by load
+	balancer in Direct Routing mode. This bit helps on adding new
+	real servers to a very busy cluster.
+
 conntrack - BOOLEAN
 	0 - disabled (default)
 	not 0 - enabled
diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt
index 1e0c045e89f7..9b4a10a1cf50 100644
--- a/Documentation/networking/ixgb.txt
+++ b/Documentation/networking/ixgb.txt
@@ -39,7 +39,7 @@ Channel Bonding documentation can be found in the Linux kernel source:
 
 The driver information previously displayed in the /proc filesystem is not
 supported in this release.  Alternatively, you can use ethtool (version 1.6
-or later), lspci, and ifconfig to obtain the same information.
+or later), lspci, and iproute2 to obtain the same information.
 
 Instructions on updating ethtool can be found in the section "Additional
 Configurations" later in this document.
@@ -90,7 +90,7 @@ select m for "Intel(R) PRO/10GbE support" located at:
 3. Assign an IP address to the interface by entering the following, where
    x is the interface number:
 
-     ifconfig ethx <IP_address>
+     ip addr add ethx <IP_address>
 
 4. Verify that the interface works. Enter the following, where <IP_address>
    is the IP address for another machine on the same subnet as the interface
@@ -177,7 +177,7 @@ NOTE: These changes are only suggestions, and serve as a starting point for
       tuning your network performance.
 
 The changes are made in three major ways, listed in order of greatest effect:
-- Use ifconfig to modify the mtu (maximum transmission unit) and the txqueuelen
+- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
   parameter.
 - Use sysctl to modify /proc parameters (essentially kernel tuning)
 - Use setpci to modify the MMRBC field in PCI-X configuration space to increase
@@ -202,7 +202,7 @@ setpci -d 8086:1a48 e6.b=2e
 # to change as well.
 # set the txqueuelen
 # your ixgb adapter should be loaded as eth1 for this to work, change if needed
-ifconfig eth1 mtu 9000 txqueuelen 1000 up
+ip li set dev eth1 mtu 9000 txqueuelen 1000 up
 # call the sysctl utility to modify /proc/sys entries
 sysctl -p ./sysctl_ixgb.conf
 - END ixgb_perf.sh
@@ -297,10 +297,10 @@ Additional Configurations
   ------------
   The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
   enabled by changing the MTU to a value larger than the default of 1500.
-  The maximum value for the MTU is 16114.  Use the ifconfig command to
+  The maximum value for the MTU is 16114.  Use the ip command to
   increase the MTU size.  For example:
 
-        ifconfig ethx mtu 9000 up
+        ip li set dev ethx mtu 9000
 
   The maximum MTU setting for Jumbo Frames is 16114.  This value coincides
   with the maximum Jumbo Frames size of 16128.
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt
index 0ace6e776ac8..6f0cb57b59c6 100644
--- a/Documentation/networking/ixgbe.txt
+++ b/Documentation/networking/ixgbe.txt
@@ -70,10 +70,10 @@ Avago      1000BASE-T SFP                                    ABCU-5710RZ
 82599-based adapters support all passive and active limiting direct attach
 cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
 
-Laser turns off for SFP+ when ifconfig down
+Laser turns off for SFP+ when device is down
 -------------------------------------------
-"ifconfig down" turns off the laser for 82599-based SFP+ fiber adapters.
-"ifconfig up" turns on the laser.
+"ip link set down" turns off the laser for 82599-based SFP+ fiber adapters.
+"ip link set up" turns on the laser.
 
 
 82598-BASED ADAPTERS
@@ -213,13 +213,13 @@ Additional Configurations
   ------------
   The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
   enabled by changing the MTU to a value larger than the default of 1500.
-  The maximum value for the MTU is 16110.  Use the ifconfig command to
+  The maximum value for the MTU is 16110.  Use the ip command to
   increase the MTU size.  For example:
 
-        ifconfig ethx mtu 9000 up
+        ip link set dev ethx mtu 9000
 
-  The maximum MTU setting for Jumbo Frames is 16110.  This value coincides
-  with the maximum Jumbo Frames size of 16128.
+  The maximum MTU setting for Jumbo Frames is 9710.  This value coincides
+  with the maximum Jumbo Frames size of 9728.
 
   Generic Receive Offload, aka GRO
   --------------------------------
diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
new file mode 100644
index 000000000000..9ed15f86c17c
--- /dev/null
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -0,0 +1,29 @@
+/proc/sys/net/mpls/* Variables:
+
+platform_labels - INTEGER
+	Number of entries in the platform label table.  It is not
+	possible to configure forwarding for label values equal to or
+	greater than the number of platform labels.
+
+	A dense utliziation of the entries in the platform label table
+	is possible and expected aas the platform labels are locally
+	allocated.
+
+	If the number of platform label table entries is set to 0 no
+	label will be recognized by the kernel and mpls forwarding
+	will be disabled.
+
+	Reducing this value will remove all label routing entries that
+	no longer fit in the table.
+
+	Possible values: 0 - 1048575
+	Default: 0
+
+conf/<interface>/input - BOOL
+	Control whether packets can be input on this interface.
+
+	If disabled, packets will be discarded without further
+	processing.
+
+	0 - disabled (default)
+	not 0 - enabled
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index a6d7cb91069e..daa015af16a0 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -440,9 +440,10 @@ and the following flags apply:
 +++ Capture process:
      from include/linux/if_packet.h
 
-     #define TP_STATUS_COPY          2 
-     #define TP_STATUS_LOSING        4 
-     #define TP_STATUS_CSUMNOTREADY  8 
+     #define TP_STATUS_COPY          (1 << 1)
+     #define TP_STATUS_LOSING        (1 << 2)
+     #define TP_STATUS_CSUMNOTREADY  (1 << 3)
+     #define TP_STATUS_CSUM_VALID    (1 << 7)
 
 TP_STATUS_COPY        : This flag indicates that the frame (and associated
                         meta information) has been truncated because it's 
@@ -466,6 +467,12 @@ TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which
                         reading the packet we should not try to check the 
                         checksum. 
 
+TP_STATUS_CSUM_VALID  : This flag indicates that at least the transport
+                        header checksum of the packet has been already
+                        validated on the kernel side. If the flag is not set
+                        then we are free to check the checksum by ourselves
+                        provided that TP_STATUS_CSUMNOTREADY is also not set.
+
 for convenience there are also the following defines:
 
      #define TP_STATUS_KERNEL        0
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 6915c6b27869..f4be85e96005 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -1,15 +1,13 @@
 
 
-                  HOWTO for the linux packet generator 
+                  HOWTO for the linux packet generator
                   ------------------------------------
 
-Date: 041221
-
-Enable CONFIG_NET_PKTGEN to compile and build pktgen.o either in kernel
-or as module. Module is preferred. insmod pktgen if needed. Once running
-pktgen creates a thread on each CPU where each thread has affinity to its CPU.
-Monitoring and controlling is done via /proc. Easiest to select a suitable 
-a sample script and configure.
+Enable CONFIG_NET_PKTGEN to compile and build pktgen either in-kernel
+or as a module.  A module is preferred; modprobe pktgen if needed.  Once
+running, pktgen creates a thread for each CPU with affinity to that CPU.
+Monitoring and controlling is done via /proc.  It is easiest to select a
+suitable sample script and configure that.
 
 On a dual CPU:
 
@@ -27,7 +25,7 @@ For monitoring and control pktgen creates:
 Tuning NIC for max performance
 ==============================
 
-The default NIC setting are (likely) not tuned for pktgen's artificial
+The default NIC settings are (likely) not tuned for pktgen's artificial
 overload type of benchmarking, as this could hurt the normal use-case.
 
 Specifically increasing the TX ring buffer in the NIC:
@@ -35,65 +33,86 @@ Specifically increasing the TX ring buffer in the NIC:
 
 A larger TX ring can improve pktgen's performance, while it can hurt
 in the general case, 1) because the TX ring buffer might get larger
-than the CPUs L1/L2 cache, 2) because it allow more queueing in the
+than the CPU's L1/L2 cache, 2) because it allows more queueing in the
 NIC HW layer (which is bad for bufferbloat).
 
-One should be careful to conclude, that packets/descriptors in the HW
+One should hesitate to conclude that packets/descriptors in the HW
 TX ring cause delay.  Drivers usually delay cleaning up the
-ring-buffers (for various performance reasons), thus packets stalling
-the TX ring, might just be waiting for cleanup.
+ring-buffers for various performance reasons, and packets stalling
+the TX ring might just be waiting for cleanup.
 
-This cleanup issues is specifically the case, for the driver ixgbe
-(Intel 82599 chip).  This driver (ixgbe) combine TX+RX ring cleanups,
+This cleanup issue is specifically the case for the driver ixgbe
+(Intel 82599 chip).  This driver (ixgbe) combines TX+RX ring cleanups,
 and the cleanup interval is affected by the ethtool --coalesce setting
 of parameter "rx-usecs".
 
-For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6):
+For ixgbe use e.g. "30" resulting in approx 33K interrupts/sec (1/30*10^6):
  # ethtool -C ethX rx-usecs 30
 
 
-Viewing threads
-===============
-/proc/net/pktgen/kpktgend_0 
-Name: kpktgend_0  max_before_softirq: 10000
-Running: 
-Stopped: eth1 
-Result: OK: max_before_softirq=10000
+Kernel threads
+==============
+Pktgen creates a thread for each CPU with affinity to that CPU.
+Which is controlled through procfile /proc/net/pktgen/kpktgend_X.
+
+Example: /proc/net/pktgen/kpktgend_0
+
+ Running:
+ Stopped: eth4@0
+ Result: OK: add_device=eth4@0
+
+Most important are the devices assigned to the thread.
+
+The two basic thread commands are:
+ * add_device DEVICE@NAME -- adds a single device
+ * rem_device_all         -- remove all associated devices
+
+When adding a device to a thread, a corrosponding procfile is created
+which is used for configuring this device. Thus, device names need to
+be unique.
 
-Most important the devices assigned to thread. Note! A device can only belong 
-to one thread.
+To support adding the same device to multiple threads, which is useful
+with multi queue NICs, a the device naming scheme is extended with "@":
+ device@something
 
+The part after "@" can be anything, but it is custom to use the thread
+number.
 
 Viewing devices
 ===============
 
-Parm section holds configured info. Current hold running stats. 
-Result is printed after run or after interruption. Example:
+The Params section holds configured information.  The Current section
+holds running statistics.  The Result is printed after a run or after
+interruption.  Example:
 
-/proc/net/pktgen/eth1       
+/proc/net/pktgen/eth4@0
 
-Params: count 10000000  min_pkt_size: 60  max_pkt_size: 60
-     frags: 0  delay: 0  clone_skb: 1000000  ifname: eth1
+ Params: count 100000  min_pkt_size: 60  max_pkt_size: 60
+     frags: 0  delay: 0  clone_skb: 64  ifname: eth4@0
      flows: 0 flowlen: 0
-     dst_min: 10.10.11.2  dst_max: 
-     src_min:   src_max: 
-     src_mac: 00:00:00:00:00:00  dst_mac: 00:04:23:AC:FD:82
-     udp_src_min: 9  udp_src_max: 9  udp_dst_min: 9  udp_dst_max: 9
-     src_mac_count: 0  dst_mac_count: 0 
-     Flags: 
-Current:
-     pkts-sofar: 10000000  errors: 39664
-     started: 1103053986245187us  stopped: 1103053999346329us idle: 880401us
-     seq_num: 10000011  cur_dst_mac_offset: 0  cur_src_mac_offset: 0
-     cur_saddr: 0x10a0a0a  cur_daddr: 0x20b0a0a
-     cur_udp_dst: 9  cur_udp_src: 9
+     queue_map_min: 0  queue_map_max: 0
+     dst_min: 192.168.81.2  dst_max:
+     src_min:   src_max:
+     src_mac: 90:e2:ba:0a:56:b4 dst_mac: 00:1b:21:3c:9d:f8
+     udp_src_min: 9  udp_src_max: 109  udp_dst_min: 9  udp_dst_max: 9
+     src_mac_count: 0  dst_mac_count: 0
+     Flags: UDPSRC_RND  NO_TIMESTAMP  QUEUE_MAP_CPU
+ Current:
+     pkts-sofar: 100000  errors: 0
+     started: 623913381008us  stopped: 623913396439us idle: 25us
+     seq_num: 100001  cur_dst_mac_offset: 0  cur_src_mac_offset: 0
+     cur_saddr: 192.168.8.3  cur_daddr: 192.168.81.2
+     cur_udp_dst: 9  cur_udp_src: 42
+     cur_queue_map: 0
      flows: 0
-Result: OK: 13101142(c12220741+d880401) usec, 10000000 (60byte,0frags)
-  763292pps 390Mb/sec (390805504bps) errors: 39664
+ Result: OK: 15430(c15405+d25) usec, 100000 (60byte,0frags)
+  6480562pps 3110Mb/sec (3110669760bps) errors: 0
 
-Configuring threads and devices
-================================
-This is done via the /proc interface easiest done via pgset in the scripts
+
+Configuring devices
+===================
+This is done via the /proc interface, and most easily done via pgset
+as defined in the sample scripts.
 
 Examples:
 
@@ -126,7 +145,7 @@ Examples:
                          To select queue 1 of a given device,
                          use queue_map_min=1 and queue_map_max=1
 
- pgset "src_mac_count 1" Sets the number of MACs we'll range through.  
+ pgset "src_mac_count 1" Sets the number of MACs we'll range through.
                          The 'minimum' MAC is what you set with srcmac.
 
  pgset "dst_mac_count 1" Sets the number of MACs we'll range through.
@@ -145,6 +164,7 @@ Examples:
                               UDPCSUM,
                               IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
                               NODE_ALLOC # node specific memory allocation
+                              NO_TIMESTAMP # disable timestamping
 
  pgset spi SPI_VALUE     Set specific SA used to transform packet.
 
@@ -192,39 +212,63 @@ Examples:
  pgset "rate 300M"        set rate to 300 Mb/s
  pgset "ratep 1000000"    set rate to 1Mpps
 
-Example scripts
-===============
+ pgset "xmit_mode netif_receive"  RX inject into stack netif_receive_skb()
+				  Works with "burst" but not with "clone_skb".
+				  Default xmit_mode is "start_xmit".
+
+Sample scripts
+==============
+
+A collection of tutorial scripts and helpers for pktgen is in the
+samples/pktgen directory. The helper parameters.sh file support easy
+and consistant parameter parsing across the sample scripts.
+
+Usage example and help:
+ ./pktgen_sample01_simple.sh -i eth4 -m 00:1B:21:3C:9D:F8 -d 192.168.8.2
+
+Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX
+  -i : ($DEV)       output interface/device (required)
+  -s : ($PKT_SIZE)  packet size
+  -d : ($DEST_IP)   destination IP
+  -m : ($DST_MAC)   destination MAC-addr
+  -t : ($THREADS)   threads to start
+  -c : ($SKB_CLONE) SKB clones send before alloc new SKB
+  -b : ($BURST)     HW level bursting of SKBs
+  -v : ($VERBOSE)   verbose
+  -x : ($DEBUG)     debug
 
-A collection of small tutorial scripts for pktgen is in examples dir.
+The global variables being set are also listed.  E.g. the required
+interface/device parameter "-i" sets variable $DEV.  Copy the
+pktgen_sampleXX scripts and modify them to fit your own needs.
+
+The old scripts:
 
-pktgen.conf-1-1                  # 1 CPU 1 dev 
 pktgen.conf-1-2                  # 1 CPU 2 dev
-pktgen.conf-2-1                  # 2 CPU's 1 dev 
-pktgen.conf-2-2                  # 2 CPU's 2 dev
 pktgen.conf-1-1-rdos             # 1 CPU 1 dev w. route DoS 
 pktgen.conf-1-1-ip6              # 1 CPU 1 dev ipv6
 pktgen.conf-1-1-ip6-rdos         # 1 CPU 1 dev ipv6  w. route DoS
 pktgen.conf-1-1-flows            # 1 CPU 1 dev multiple flows.
 
-Run in shell: ./pktgen.conf-X-Y It does all the setup including sending. 
-
 
 Interrupt affinity
 ===================
-Note when adding devices to a specific CPU there good idea to also assign 
-/proc/irq/XX/smp_affinity so the TX-interrupts gets bound to the same CPU.
-as this reduces cache bouncing when freeing skb's.
+Note that when adding devices to a specific CPU it is a good idea to
+also assign /proc/irq/XX/smp_affinity so that the TX interrupts are bound
+to the same CPU.  This reduces cache bouncing when freeing skbs.
+
+Plus using the device flag QUEUE_MAP_CPU, which maps the SKBs TX queue
+to the running threads CPU (directly from smp_processor_id()).
 
 Enable IPsec
 ============
-Default IPsec transformation with ESP encapsulation plus Transport mode
-could be enabled by simply setting:
+Default IPsec transformation with ESP encapsulation plus transport mode
+can be enabled by simply setting:
 
 pgset "flag IPSEC"
 pgset "flows 1"
 
 To avoid breaking existing testbed scripts for using AH type and tunnel mode,
-user could use "pgset spi SPI_VALUE" to specify which formal of transformation
+you can use "pgset spi SPI_VALUE" to specify which transformation mode
 to employ.
 
 
@@ -235,18 +279,19 @@ Current commands and configuration options
 
 start
 stop
+reset
 
 ** Thread commands:
 
 add_device
 rem_device_all
-max_before_softirq
 
 
 ** Device commands:
 
 count
 clone_skb
+burst
 debug
 
 frags
@@ -255,10 +300,17 @@ delay
 src_mac_count
 dst_mac_count
 
-pkt_size 
+pkt_size
 min_pkt_size
 max_pkt_size
 
+queue_map_min
+queue_map_max
+skb_priority
+
+tos           (ipv4)
+traffic_class (ipv6)
+
 mpls
 
 udp_src_min
@@ -267,6 +319,8 @@ udp_src_max
 udp_dst_min
 udp_dst_max
 
+node
+
 flag
   IPSRC_RND
   IPDST_RND
@@ -285,6 +339,9 @@ flag
   UDPCSUM
   IPSEC
   NODE_ALLOC
+  NO_TIMESTAMP
+
+spi (ipsec)
 
 dst_min
 dst_max
@@ -297,8 +354,10 @@ src_mac
 
 clear_counters
 
-dst6
 src6
+dst6
+dst6_max
+dst6_min
 
 flows
 flowlen
@@ -306,6 +365,17 @@ flowlen
 rate
 ratep
 
+xmit_mode <start_xmit|netif_receive>
+
+vlan_cfi
+vlan_id
+vlan_p
+
+svlan_cfi
+svlan_id
+svlan_p
+
+
 References:
 ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
 ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
index c67077cbeb80..e1a3d59bbe0f 100644
--- a/Documentation/networking/rds.txt
+++ b/Documentation/networking/rds.txt
@@ -62,11 +62,10 @@ Socket Interface
 ================
 
   AF_RDS, PF_RDS, SOL_RDS
-        These constants haven't been assigned yet, because RDS isn't in
-        mainline yet. Currently, the kernel module assigns some constant
-        and publishes it to user space through two sysctl files
-                /proc/sys/net/rds/pf_rds
-                /proc/sys/net/rds/sol_rds
+	AF_RDS and PF_RDS are the domain type to be used with socket(2)
+	to create RDS sockets. SOL_RDS is the socket-level to be used
+	with setsockopt(2) and getsockopt(2) for RDS specific socket
+	options.
 
   fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
         This creates a new, unbound RDS socket.
diff --git a/Documentation/networking/s2io.txt b/Documentation/networking/s2io.txt
index d2a9f43b5546..0362a42f7cf4 100644
--- a/Documentation/networking/s2io.txt
+++ b/Documentation/networking/s2io.txt
@@ -38,7 +38,7 @@ The corresponding adapter's LED will blink multiple times.
 
 3.	Features supported:
 a. Jumbo frames. Xframe I/II supports MTU up to 9600 bytes,
-modifiable using ifconfig command.
+modifiable using ip command.
 
 b. Offloads. Supports checksum offload(TCP/UDP/IP) on transmit
 and receive, TSO.
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 99ca40e8e810..59f4db2a0c85 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -282,7 +282,7 @@ following is true:
 
 - The current CPU's queue head counter >= the recorded tail counter
   value in rps_dev_flow[i]
-- The current CPU is unset (equal to RPS_NO_CPU)
+- The current CPU is unset (>= nr_cpu_ids)
 - The current CPU is offline
 
 After this check, the packet is sent to the (possibly updated) current
@@ -421,6 +421,15 @@ best CPUs to share a given queue are probably those that share the cache
 with the CPU that processes transmit completions for that queue
 (transmit interrupts).
 
+Per TX Queue rate limitation:
+=============================
+
+These are rate-limitation mechanisms implemented by HW, where currently
+a max-rate attribute is supported, by setting a Mbps value to
+
+/sys/class/net/<dev>/queues/tx-<n>/tx_maxrate
+
+A value of zero means disabled, and this is the default.
 
 Further Information
 ===================
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index f981a9295a39..c5d7ade10ff2 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -1,59 +1,360 @@
-Switch (and switch-ish) device drivers HOWTO
-===========================
-
-Please note that the word "switch" is here used in very generic meaning.
-This include devices supporting L2/L3 but also various flow offloading chips,
-including switches embedded into SR-IOV NICs.
-
-Lets describe a topology a bit. Imagine the following example:
-
-       +----------------------------+    +---------------+
-       |     SOME switch chip       |    |      CPU      |
-       +----------------------------+    +---------------+
-       port1 port2 port3 port4 MNGMNT    |     PCI-E     |
-         |     |     |     |     |       +---------------+
-        PHY   PHY    |     |     |         |  NIC0 NIC1
-                     |     |     |         |   |    |
-                     |     |     +- PCI-E -+   |    |
-                     |     +------- MII -------+    |
-                     +------------- MII ------------+
-
-In this example, there are two independent lines between the switch silicon
-and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are
-separate from the switch driver. SOME switch chip is by managed by a driver
-via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be
-connected to some other type of bus.
-
-Now, for the previous example show the representation in kernel:
-
-       +----------------------------+    +---------------+
-       |     SOME switch chip       |    |      CPU      |
-       +----------------------------+    +---------------+
-       sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT    |     PCI-E     |
-         |     |     |     |     |       +---------------+
-        PHY   PHY    |     |     |         |  eth0 eth1
-                     |     |     |         |   |    |
-                     |     |     +- PCI-E -+   |    |
-                     |     +------- MII -------+    |
-                     +------------- MII ------------+
-
-Lets call the example switch driver for SOME switch chip "SOMEswitch". This
-driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX
-created for each port of a switch. These netdevices are instances
-of "SOMEswitch" driver. sw0pX netdevices serve as a "representation"
-of the switch chip. eth0 and eth1 are instances of some other existing driver.
-
-The only difference of the switch-port netdevice from the ordinary netdevice
-is that is implements couple more NDOs:
-
-  ndo_switch_parent_id_get - This returns the same ID for two port netdevices
-			     of the same physical switch chip. This is
-			     mandatory to be implemented by all switch drivers
-			     and serves the caller for recognition of a port
-			     netdevice.
-  ndo_switch_parent_* - Functions that serve for a manipulation of the switch
-			chip itself (it can be though of as a "parent" of the
-			port, therefore the name). They are not port-specific.
-			Caller might use arbitrary port netdevice of the same
-			switch and it will make no difference.
-  ndo_switch_port_* - Functions that serve for a port-specific manipulation.
+Ethernet switch device driver model (switchdev)
+===============================================
+Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
+
+
+The Ethernet switch device driver model (switchdev) is an in-kernel driver
+model for switch devices which offload the forwarding (data) plane from the
+kernel.
+
+Figure 1 is a block diagram showing the components of the switchdev model for
+an example setup using a data-center-class switch ASIC chip.  Other setups
+with SR-IOV or soft switches, such as OVS, are possible.
+
+
+                             User-space tools                                 
+                                                                              
+       user space                   |                                         
+      +-------------------------------------------------------------------+   
+       kernel                       | Netlink                                 
+                                    |                                         
+                     +--------------+-------------------------------+         
+                     |         Network stack                        |         
+                     |           (Linux)                            |         
+                     |                                              |         
+                     +----------------------------------------------+         
+                                                                              
+                           sw1p2     sw1p4     sw1p6
+                      sw1p1  +  sw1p3  +  sw1p5  +          eth1             
+                        +    |    +    |    +    |            +               
+                        |    |    |    |    |    |            |               
+                     +--+----+----+----+-+--+----+---+  +-----+-----+         
+                     |         Switch driver         |  |    mgmt   |         
+                     |        (this document)        |  |   driver  |         
+                     |                               |  |           |         
+                     +--------------+----------------+  +-----------+         
+                                    |                                         
+       kernel                       | HW bus (eg PCI)                         
+      +-------------------------------------------------------------------+   
+       hardware                     |                                         
+                     +--------------+---+------------+                        
+                     |         Switch device (sw1)   |                        
+                     |  +----+                       +--------+               
+                     |  |    v offloaded data path   | mgmt port              
+                     |  |    |                       |                        
+                     +--|----|----+----+----+----+---+                        
+                        |    |    |    |    |    |                            
+                        +    +    +    +    +    +                            
+                       p1   p2   p3   p4   p5   p6
+                                       
+                             front-panel ports                                
+                                                                              
+
+                                    Fig 1.
+
+
+Include Files
+-------------
+
+#include <linux/netdevice.h>
+#include <net/switchdev.h>
+
+
+Configuration
+-------------
+
+Use "depends NET_SWITCHDEV" in driver's Kconfig to ensure switchdev model
+support is built for driver.
+
+
+Switch Ports
+------------
+
+On switchdev driver initialization, the driver will allocate and register a
+struct net_device (using register_netdev()) for each enumerated physical switch
+port, called the port netdev.  A port netdev is the software representation of
+the physical port and provides a conduit for control traffic to/from the
+controller (the kernel) and the network, as well as an anchor point for higher
+level constructs such as bridges, bonds, VLANs, tunnels, and L3 routers.  Using
+standard netdev tools (iproute2, ethtool, etc), the port netdev can also
+provide to the user access to the physical properties of the switch port such
+as PHY link state and I/O statistics.
+
+There is (currently) no higher-level kernel object for the switch beyond the
+port netdevs.  All of the switchdev driver ops are netdev ops or switchdev ops.
+
+A switch management port is outside the scope of the switchdev driver model.
+Typically, the management port is not participating in offloaded data plane and
+is loaded with a different driver, such as a NIC driver, on the management port
+device.
+
+Port Netdev Naming
+^^^^^^^^^^^^^^^^^^
+
+Udev rules should be used for port netdev naming, using some unique attribute
+of the port as a key, for example the port MAC address or the port PHYS name.
+Hard-coding of kernel netdev names within the driver is discouraged; let the
+kernel pick the default netdev name, and let udev set the final name based on a
+port attribute.
+
+Using port PHYS name (ndo_get_phys_port_name) for the key is particularly
+useful for dynamically-named ports where the device names its ports based on
+external configuration.  For example, if a physical 40G port is split logically
+into 4 10G ports, resulting in 4 port netdevs, the device can give a unique
+name for each port using port PHYS name.  The udev rule would be:
+
+SUBSYSTEM=="net", ACTION=="add", DRIVER="<driver>", ATTR{phys_port_name}!="", \
+	NAME="$attr{phys_port_name}"
+
+Suggested naming convention is "swXpYsZ", where X is the switch name or ID, Y
+is the port name or ID, and Z is the sub-port name or ID.  For example, sw1p1s0
+would be sub-port 0 on port 1 on switch 1.
+
+Switch ID
+^^^^^^^^^
+
+The switchdev driver must implement the switchdev op switchdev_port_attr_get
+for SWITCHDEV_ATTR_PORT_PARENT_ID for each port netdev, returning the same
+physical ID for each port of a switch.  The ID must be unique between switches
+on the same system.  The ID does not need to be unique between switches on
+different systems.
+
+The switch ID is used to locate ports on a switch and to know if aggregated
+ports belong to the same switch.
+
+Port Features
+^^^^^^^^^^^^^
+
+NETIF_F_NETNS_LOCAL
+
+If the switchdev driver (and device) only supports offloading of the default
+network namespace (netns), the driver should set this feature flag to prevent
+the port netdev from being moved out of the default netns.  A netns-aware
+driver/device would not set this flag and be responsible for partitioning
+hardware to preserve netns containment.  This means hardware cannot forward
+traffic from a port in one namespace to another port in another namespace.
+
+Port Topology
+^^^^^^^^^^^^^
+
+The port netdevs representing the physical switch ports can be organized into
+higher-level switching constructs.  The default construct is a standalone
+router port, used to offload L3 forwarding.  Two or more ports can be bonded
+together to form a LAG.  Two or more ports (or LAGs) can be bridged to bridge
+L2 networks.  VLANs can be applied to sub-divide L2 networks.  L2-over-L3
+tunnels can be built on ports.  These constructs are built using standard Linux
+tools such as the bridge driver, the bonding/team drivers, and netlink-based
+tools such as iproute2.
+
+The switchdev driver can know a particular port's position in the topology by
+monitoring NETDEV_CHANGEUPPER notifications.  For example, a port moved into a
+bond will see it's upper master change.  If that bond is moved into a bridge,
+the bond's upper master will change.  And so on.  The driver will track such
+movements to know what position a port is in in the overall topology by
+registering for netdevice events and acting on NETDEV_CHANGEUPPER.
+
+L2 Forwarding Offload
+---------------------
+
+The idea is to offload the L2 data forwarding (switching) path from the kernel
+to the switchdev device by mirroring bridge FDB entries down to the device.  An
+FDB entry is the {port, MAC, VLAN} tuple forwarding destination.
+
+To offloading L2 bridging, the switchdev driver/device should support:
+
+	- Static FDB entries installed on a bridge port
+	- Notification of learned/forgotten src mac/vlans from device
+	- STP state changes on the port
+	- VLAN flooding of multicast/broadcast and unknown unicast packets
+
+Static FDB Entries
+^^^^^^^^^^^^^^^^^^
+
+The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump
+to support static FDB entries installed to the device.  Static bridge FDB
+entries are installed, for example, using iproute2 bridge cmd:
+
+	bridge fdb add ADDR dev DEV [vlan VID] [self]
+
+The driver should use the helper switchdev_port_fdb_xxx ops for ndo_fdb_xxx
+ops, and handle add/delete/dump of SWITCHDEV_OBJ_PORT_FDB object using
+switchdev_port_obj_xxx ops.
+
+XXX: what should be done if offloading this rule to hardware fails (for
+example, due to full capacity in hardware tables) ?
+
+Note: by default, the bridge does not filter on VLAN and only bridges untagged
+traffic.  To enable VLAN support, turn on VLAN filtering:
+
+	echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering
+
+Notification of Learned/Forgotten Source MAC/VLANs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The switch device will learn/forget source MAC address/VLAN on ingress packets
+and notify the switch driver of the mac/vlan/port tuples.  The switch driver,
+in turn, will notify the bridge driver using the switchdev notifier call:
+
+	err = call_switchdev_notifiers(val, dev, info);
+
+Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when
+forgetting, and info points to a struct switchdev_notifier_fdb_info.  On
+SWITCHDEV_FDB_ADD, the bridge driver will install the FDB entry into the
+bridge's FDB and mark the entry as NTF_EXT_LEARNED.  The iproute2 bridge
+command will label these entries "offload":
+
+	$ bridge fdb
+	52:54:00:12:35:01 dev sw1p1 master br0 permanent
+	00:02:00:00:02:00 dev sw1p1 master br0 offload
+	00:02:00:00:02:00 dev sw1p1 self
+	52:54:00:12:35:02 dev sw1p2 master br0 permanent
+	00:02:00:00:03:00 dev sw1p2 master br0 offload
+	00:02:00:00:03:00 dev sw1p2 self
+	33:33:00:00:00:01 dev eth0 self permanent
+	01:00:5e:00:00:01 dev eth0 self permanent
+	33:33:ff:00:00:00 dev eth0 self permanent
+	01:80:c2:00:00:0e dev eth0 self permanent
+	33:33:00:00:00:01 dev br0 self permanent
+	01:00:5e:00:00:01 dev br0 self permanent
+	33:33:ff:12:35:01 dev br0 self permanent
+
+Learning on the port should be disabled on the bridge using the bridge command:
+
+	bridge link set dev DEV learning off
+
+Learning on the device port should be enabled, as well as learning_sync:
+
+	bridge link set dev DEV learning on self
+	bridge link set dev DEV learning_sync on self
+
+Learning_sync attribute enables syncing of the learned/forgotton FDB entry to
+the bridge's FDB.  It's possible, but not optimal, to enable learning on the
+device port and on the bridge port, and disable learning_sync.
+
+To support learning and learning_sync port attributes, the driver implements
+switchdev op switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS.
+The driver should initialize the attributes to the hardware defaults.
+
+FDB Ageing
+^^^^^^^^^^
+
+There are two FDB ageing models supported: 1) ageing by the device, and 2)
+ageing by the kernel.  Ageing by the device is preferred if many FDB entries
+are supported.  The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
+...) to age out the FDB entry.  In this model, ageing by the kernel should be
+turned off.  XXX: how to turn off ageing in kernel on a per-port basis or
+otherwise prevent the kernel from ageing out the FDB entry?
+
+In the kernel ageing model, the standard bridge ageing mechanism is used to age
+out stale FDB entries.  To keep an FDB entry "alive", the driver should refresh
+the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
+notification will reset the FDB entry's last-used time to now.  The driver
+should rate limit refresh notifications, for example, no more than once a
+second.  If the FDB entry expires, fdb_delete is called to remove entry from
+the device.
+
+STP State Change on Port
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Internally or with a third-party STP protocol implementation (e.g. mstpd), the
+bridge driver maintains the STP state for ports, and will notify the switch
+driver of STP state change on a port using the switchdev op
+switchdev_attr_port_set for SWITCHDEV_ATTR_PORT_STP_UPDATE.
+
+State is one of BR_STATE_*.  The switch driver can use STP state updates to
+update ingress packet filter list for the port.  For example, if port is
+DISABLED, no packets should pass, but if port moves to BLOCKED, then STP BPDUs
+and other IEEE 01:80:c2:xx:xx:xx link-local multicast packets can pass.
+
+Note that STP BDPUs are untagged and STP state applies to all VLANs on the port
+so packet filters should be applied consistently across untagged and tagged
+VLANs on the port.
+
+Flooding L2 domain
+^^^^^^^^^^^^^^^^^^
+
+For a given L2 VLAN domain, the switch device should flood multicast/broadcast
+and unknown unicast packets to all ports in domain, if allowed by port's
+current STP state.  The switch driver, knowing which ports are within which
+vlan L2 domain, can program the switch device for flooding.  The packet should
+also be sent to the port netdev for processing by the bridge driver.  The
+bridge should not reflood the packet to the same ports the device flooded.
+XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+
+It is possible for the switch device to not handle flooding and push the
+packets up to the bridge driver for flooding.  This is not ideal as the number
+of ports scale in the L2 domain as the device is much more efficient at
+flooding packets that software.
+
+IGMP Snooping
+^^^^^^^^^^^^^
+
+XXX: complete this section
+
+
+L3 Routing Offload
+------------------
+
+Offloading L3 routing requires that device be programmed with FIB entries from
+the kernel, with the device doing the FIB lookup and forwarding.  The device
+does a longest prefix match (LPM) on FIB entries matching route prefix and
+forwards the packet to the matching FIB entry's nexthop(s) egress ports.
+
+To program the device, the driver implements support for
+SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops.
+switchdev_port_obj_add is used for both adding a new FIB entry to the device,
+or modifying an existing entry on the device.
+
+XXX: Currently, only SWITCHDEV_OBJ_IPV4_FIB objects are supported.
+
+SWITCHDEV_OBJ_IPV4_FIB object passes:
+
+	struct switchdev_obj_ipv4_fib {         /* IPV4_FIB */
+		u32 dst;
+		int dst_len;
+		struct fib_info *fi;
+		u8 tos;
+		u8 type;
+		u32 nlflags;
+		u32 tb_id;
+	} ipv4_fib;
+
+to add/modify/delete IPv4 dst/dest_len prefix on table tb_id.  The *fi
+structure holds details on the route and route's nexthops.  *dev is one of the
+port netdevs mentioned in the routes next hop list.  If the output port netdevs
+referenced in the route's nexthop list don't all have the same switch ID, the
+driver is not called to add/modify/delete the FIB entry.
+
+Routes offloaded to the device are labeled with "offload" in the ip route
+listing:
+
+	$ ip route show
+	default via 192.168.0.2 dev eth0
+	11.0.0.0/30 dev sw1p1  proto kernel  scope link  src 11.0.0.2 offload
+	11.0.0.4/30 via 11.0.0.1 dev sw1p1  proto zebra  metric 20 offload
+	11.0.0.8/30 dev sw1p2  proto kernel  scope link  src 11.0.0.10 offload
+	11.0.0.12/30 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
+	12.0.0.2  proto zebra  metric 30 offload
+		nexthop via 11.0.0.1  dev sw1p1 weight 1
+		nexthop via 11.0.0.9  dev sw1p2 weight 1
+	12.0.0.3 via 11.0.0.1 dev sw1p1  proto zebra  metric 20 offload
+	12.0.0.4 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
+	192.168.0.0/24 dev eth0  proto kernel  scope link  src 192.168.0.15
+
+XXX: add/mod/del IPv6 FIB API
+
+Nexthop Resolution
+^^^^^^^^^^^^^^^^^^
+
+The FIB entry's nexthop list contains the nexthop tuple (gateway, dev), but for
+the switch device to forward the packet with the correct dst mac address, the
+nexthop gateways must be resolved to the neighbor's mac address.  Neighbor mac
+address discovery comes via the ARP (or ND) process and is available via the
+arp_tbl neighbor table.  To resolve the routes nexthop gateways, the driver
+should trigger the kernel's neighbor resolution process.  See the rocker
+driver's rocker_port_ipv4_resolve() for an example.
+
+The driver can monitor for updates to arp_tbl using the netevent notifier
+NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
+for the routes as arp_tbl updates.
diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt
index 70d6cf608251..f37814693ad3 100644
--- a/Documentation/networking/tc-actions-env-rules.txt
+++ b/Documentation/networking/tc-actions-env-rules.txt
@@ -8,14 +8,8 @@ For example if your action queues a packet to be processed later,
 or intentionally branches by redirecting a packet, then you need to
 clone the packet.
 
-There are certain fields in the skb tc_verd that need to be reset so we
-avoid loops, etc.  A few are generic enough that skb_act_clone()
-resets them for you, so invoke skb_act_clone() rather than skb_clone().
-
 2) If you munge any packet thou shalt call pskb_expand_head in the case
 someone else is referencing the skb. After that you "own" the skb.
-You must also tell us if it is ok to munge the packet (TC_OK2MUNGE),
-this way any action downstream can stomp on the packet.
 
 3) Dropping packets you don't own is a no-no. You simply return
 TC_ACT_SHOT to the caller and they will drop it.
diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt
index d727a3829100..53a726855e49 100644
--- a/Documentation/networking/udplite.txt
+++ b/Documentation/networking/udplite.txt
@@ -20,7 +20,7 @@
 	files/UDP-Lite-HOWTO.txt
 
    o The Wireshark UDP-Lite WiKi (with capture files):
-       http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
+       https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
 
    o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt
 
diff --git a/Documentation/networking/vxge.txt b/Documentation/networking/vxge.txt
index bb76c667a476..abfec245f97c 100644
--- a/Documentation/networking/vxge.txt
+++ b/Documentation/networking/vxge.txt
@@ -39,7 +39,7 @@ iii) PCI-SIG's I/O Virtualization
 
 iv)  Jumbo frames
        X3100 Series supports MTU up to 9600 bytes, modifiable using
-       ifconfig command.
+       ip command.
 
 v)   Offloads supported: (Enabled by default)
        Checksum offload (TCP/UDP/IP) on transmit and receive paths