summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-net8
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-queues8
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-enet.txt2
-rw-r--r--Documentation/networking/scaling.txt9
-rw-r--r--arch/arm64/boot/dts/apm/apm-mustang.dts4
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi25
-rw-r--r--drivers/net/can/cc770/cc770_platform.c2
-rw-r--r--drivers/net/can/grcan.c2
-rw-r--r--drivers/net/can/mscan/mpc5xxx_can.c2
-rw-r--r--drivers/net/can/sja1000/sja1000_platform.c2
-rw-r--r--drivers/net/can/xilinx_can.c2
-rw-r--r--drivers/net/dsa/Kconfig13
-rw-r--r--drivers/net/ethernet/aeroflex/greth.c2
-rw-r--r--drivers/net/ethernet/altera/altera_tse_main.c4
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_hw.h2
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c52
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.h17
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c10
-rw-r--r--drivers/net/ethernet/apple/bmac.c2
-rw-r--r--drivers/net/ethernet/apple/mace.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c175
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h1
-rw-r--r--drivers/net/ethernet/ethoc.c2
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx.c2
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx_phy.c2
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c4
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c2
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mii-fec.c4
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.c2
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c204
-rw-r--r--drivers/net/ethernet/freescale/gianfar.h32
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ptp.c2
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c2
-rw-r--r--drivers/net/ethernet/freescale/xgmac_mdio.c98
-rw-r--r--drivers/net/ethernet/ibm/ehea/ehea_main.c4
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c2
-rw-r--r--drivers/net/ethernet/ibm/emac/mal.c2
-rw-r--r--drivers/net/ethernet/ibm/emac/rgmii.c2
-rw-r--r--drivers/net/ethernet/ibm/emac/tah.c2
-rw-r--r--drivers/net/ethernet/ibm/emac/zmii.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c6
-rw-r--r--drivers/net/ethernet/octeon/octeon_mgmt.c2
-rw-r--r--drivers/net/ethernet/rocker/rocker.c141
-rw-r--r--drivers/net/ethernet/rocker/rocker.h1
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c2
-rw-r--r--drivers/net/ethernet/via/via-rhine.c2
-rw-r--r--drivers/net/ethernet/via/via-velocity.c2
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c2
-rw-r--r--drivers/net/phy/mdio-bcm-unimac.c2
-rw-r--r--drivers/net/phy/mdio-gpio.c2
-rw-r--r--drivers/net/phy/mdio-mux-gpio.c2
-rw-r--r--drivers/net/phy/mdio-mux-mmioreg.c2
-rw-r--r--drivers/net/phy/mdio-octeon.c2
-rw-r--r--drivers/net/vxlan.c150
-rw-r--r--drivers/net/wireless/orinoco/airport.c2
-rw-r--r--include/linux/bpf.h8
-rw-r--r--include/linux/igmp.h2
-rw-r--r--include/linux/mlx4/device.h17
-rw-r--r--include/linux/mlx4/qp.h14
-rw-r--r--include/linux/netdevice.h54
-rw-r--r--include/linux/rhashtable.h29
-rw-r--r--include/linux/tcp.h2
-rw-r--r--include/linux/udp.h2
-rw-r--r--include/net/inet6_hashtables.h2
-rw-r--r--include/net/inet_hashtables.h32
-rw-r--r--include/net/inet_sock.h28
-rw-r--r--include/net/ipv6.h4
-rw-r--r--include/net/netns/hash.h4
-rw-r--r--include/net/request_sock.h35
-rw-r--r--include/net/sock.h9
-rw-r--r--include/net/switchdev.h38
-rw-r--r--include/net/tcp.h57
-rw-r--r--include/uapi/linux/bpf.h15
-rw-r--r--include/uapi/linux/if_link.h4
-rw-r--r--kernel/bpf/core.c3
-rw-r--r--kernel/bpf/helpers.c24
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c152
-rw-r--r--lib/rhashtable.c164
-rw-r--r--lib/test_rhashtable.c4
-rw-r--r--net/8021q/vlan.c16
-rw-r--r--net/bridge/br_netlink.c32
-rw-r--r--net/core/dev.c28
-rw-r--r--net/core/filter.c176
-rw-r--r--net/core/net-sysfs.c90
-rw-r--r--net/core/request_sock.c22
-rw-r--r--net/core/rtnetlink.c21
-rw-r--r--net/core/sock.c17
-rw-r--r--net/dccp/ipv4.c16
-rw-r--r--net/dccp/ipv6.c30
-rw-r--r--net/dsa/Kconfig3
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/igmp.c41
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/inet_diag.c122
-rw-r--r--net/ipv4/inet_hashtables.c61
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/ip_sockglue.c28
-rw-r--r--net/ipv4/ping.c6
-rw-r--r--net/ipv4/syncookies.c22
-rw-r--r--net/ipv4/tcp_fastopen.c10
-rw-r--r--net/ipv4/tcp_input.c80
-rw-r--r--net/ipv4/tcp_ipv4.c18
-rw-r--r--net/ipv4/tcp_metrics.c20
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/inet6_hashtables.c58
-rw-r--r--net/ipv6/ipv6_sockglue.c30
-rw-r--r--net/ipv6/mcast.c30
-rw-r--r--net/ipv6/syncookies.c12
-rw-r--r--net/ipv6/tcp_ipv6.c30
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_log_common.c2
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue_core.c2
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/xt_TPROXY.c18
-rw-r--r--net/netfilter/xt_socket.c34
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/switchdev/switchdev.c42
-rw-r--r--net/tipc/link.c6
-rw-r--r--net/tipc/name_table.c4
-rw-r--r--net/tipc/server.c47
-rw-r--r--net/tipc/socket.c126
-rw-r--r--net/tipc/socket.h4
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--samples/bpf/sockex1_kern.c8
-rw-r--r--samples/bpf/sockex1_user.c2
-rw-r--r--samples/bpf/sockex2_kern.c26
-rw-r--r--samples/bpf/sockex2_user.c11
-rw-r--r--samples/bpf/test_verifier.c79
141 files changed, 2139 insertions, 1189 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index beb8ec4dabbc..5ecfd72ba684 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -188,6 +188,14 @@ Description:
Indicates the interface unique physical port identifier within
the NIC, as a string.
+What: /sys/class/net/<iface>/phys_port_name
+Date: March 2015
+KernelVersion: 4.0
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface physical port name within the NIC,
+ as a string.
+
What: /sys/class/net/<iface>/speed
Date: October 2009
KernelVersion: 2.6.33
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
index 5e9aeb91d355..0c0df91b1516 100644
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -24,6 +24,14 @@ Description:
Indicates the number of transmit timeout events seen by this
network interface transmit queue.
+What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+Date: March 2015
+KernelVersion: 4.1
+Contact: netdev@vger.kernel.org
+Description:
+ A Mbps max-rate set for the queue, a value of zero means disabled,
+ default is disabled.
+
What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus
Date: November 2010
KernelVersion: 2.6.38
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
index 6151999c5dca..dc7961b33076 100644
--- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
+++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
@@ -15,6 +15,7 @@ Required properties for all the ethernet interfaces:
- "ring_csr": Descriptor ring control and status register address space
- "ring_cmd": Descriptor ring command register address space
- interrupts: Ethernet main interrupt
+- port-id: Port number (0 or 1)
- clocks: Reference to the clock entry.
- local-mac-address: MAC address assigned to this device
- phy-connection-type: Interface type between ethernet device and PHY device
@@ -49,6 +50,7 @@ Example:
<0x0 0X10000000 0x0 0X200>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
interrupts = <0x0 0x3c 0x4>;
+ port-id = <0>;
clocks = <&menetclk 0>;
local-mac-address = [00 01 73 00 00 01];
phy-connection-type = "rgmii";
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 99ca40e8e810..cbfac0949635 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -421,6 +421,15 @@ best CPUs to share a given queue are probably those that share the cache
with the CPU that processes transmit completions for that queue
(transmit interrupts).
+Per TX Queue rate limitation:
+=============================
+
+These are rate-limitation mechanisms implemented by HW, where currently
+a max-rate attribute is supported, by setting a Mbps value to
+
+/sys/class/net/<dev>/queues/tx-<n>/tx_maxrate
+
+A value of zero means disabled, and this is the default.
Further Information
===================
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 2e25de0800b9..83578e766b94 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -45,6 +45,10 @@
status = "ok";
};
+&sgenet1 {
+ status = "ok";
+};
+
&xgenet {
status = "ok";
};
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index a857794432d6..c1eb6911e539 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -186,6 +186,16 @@
clock-output-names = "sge0clk";
};
+ sge1clk: sge1clk@1f21c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f21c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ csr-mask = <0xc>;
+ clock-output-names = "sge1clk";
+ };
+
xge0clk: xge0clk@1f61c000 {
compatible = "apm,xgene-device-clock";
#clock-cells = <1>;
@@ -635,6 +645,21 @@
phy-connection-type = "sgmii";
};
+ sgenet1: ethernet@1f210030 {
+ compatible = "apm,xgene1-sgenet";
+ status = "disabled";
+ reg = <0x0 0x1f210030 0x0 0xd100>,
+ <0x0 0x1f200000 0x0 0Xc300>,
+ <0x0 0x1B000000 0x0 0X8000>;
+ reg-names = "enet_csr", "ring_csr", "ring_cmd";
+ interrupts = <0x0 0xAC 0x4>;
+ port-id = <1>;
+ dma-coherent;
+ clocks = <&sge1clk 0>;
+ local-mac-address = [00 00 00 00 00 00];
+ phy-connection-type = "sgmii";
+ };
+
xgenet: ethernet@1f610000 {
compatible = "apm,xgene1-xgenet";
status = "disabled";
diff --git a/drivers/net/can/cc770/cc770_platform.c b/drivers/net/can/cc770/cc770_platform.c
index b1e8851d3cc4..866e5e12fdd2 100644
--- a/drivers/net/can/cc770/cc770_platform.c
+++ b/drivers/net/can/cc770/cc770_platform.c
@@ -254,7 +254,7 @@ static int cc770_platform_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id cc770_platform_table[] = {
+static const struct of_device_id cc770_platform_table[] = {
{.compatible = "bosch,cc770"}, /* CC770 from Bosch */
{.compatible = "intc,82527"}, /* AN82527 from Intel CP */
{},
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index fed1bbd0b0d2..e3d7e22a4fa0 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -1725,7 +1725,7 @@ static int grcan_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id grcan_match[] = {
+static const struct of_device_id grcan_match[] = {
{.name = "GAISLER_GRCAN"},
{.name = "01_03d"},
{.name = "GAISLER_GRHCAN"},
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index ad024e60ba8c..c7427bdd3a4b 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -43,7 +43,7 @@ struct mpc5xxx_can_data {
};
#ifdef CONFIG_PPC_MPC52xx
-static struct of_device_id mpc52xx_cdm_ids[] = {
+static const struct of_device_id mpc52xx_cdm_ids[] = {
{ .compatible = "fsl,mpc5200-cdm", },
{}
};
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
index 93115250eaf5..0552ed46a206 100644
--- a/drivers/net/can/sja1000/sja1000_platform.c
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -242,7 +242,7 @@ static int sp_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id sp_of_table[] = {
+static const struct of_device_id sp_of_table[] = {
{.compatible = "nxp,sja1000"},
{},
};
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 6c6764312285..6bddfe062b51 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -1185,7 +1185,7 @@ static int xcan_remove(struct platform_device *pdev)
}
/* Match table for OF platform binding */
-static struct of_device_id xcan_of_match[] = {
+static const struct of_device_id xcan_of_match[] = {
{ .compatible = "xlnx,zynq-can-1.0", },
{ .compatible = "xlnx,axi-can-1.00.a", },
{ /* end of list */ },
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 48e62a34f7f2..18550c7ebe6f 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -7,7 +7,7 @@ config NET_DSA_MV88E6XXX
config NET_DSA_MV88E6060
tristate "Marvell 88E6060 ethernet switch chip support"
- select NET_DSA
+ depends on NET_DSA
select NET_DSA_TAG_TRAILER
---help---
This enables support for the Marvell 88E6060 ethernet switch
@@ -19,7 +19,7 @@ config NET_DSA_MV88E6XXX_NEED_PPU
config NET_DSA_MV88E6131
tristate "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
- select NET_DSA
+ depends on NET_DSA
select NET_DSA_MV88E6XXX
select NET_DSA_MV88E6XXX_NEED_PPU
select NET_DSA_TAG_DSA
@@ -29,7 +29,7 @@ config NET_DSA_MV88E6131
config NET_DSA_MV88E6123_61_65
tristate "Marvell 88E6123/6161/6165 ethernet switch chip support"
- select NET_DSA
+ depends on NET_DSA
select NET_DSA_MV88E6XXX
select NET_DSA_TAG_EDSA
---help---
@@ -38,7 +38,7 @@ config NET_DSA_MV88E6123_61_65
config NET_DSA_MV88E6171
tristate "Marvell 88E6171/6172 ethernet switch chip support"
- select NET_DSA
+ depends on NET_DSA
select NET_DSA_MV88E6XXX
select NET_DSA_TAG_EDSA
---help---
@@ -47,7 +47,7 @@ config NET_DSA_MV88E6171
config NET_DSA_MV88E6352
tristate "Marvell 88E6176/88E6352 ethernet switch chip support"
- select NET_DSA
+ depends on NET_DSA
select NET_DSA_MV88E6XXX
select NET_DSA_TAG_EDSA
---help---
@@ -56,8 +56,7 @@ config NET_DSA_MV88E6352
config NET_DSA_BCM_SF2
tristate "Broadcom Starfighter 2 Ethernet switch support"
- depends on HAS_IOMEM
- select NET_DSA
+ depends on HAS_IOMEM && NET_DSA
select NET_DSA_TAG_BRCM
select FIXED_PHY
select BCM7XXX_PHY
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 2b8bfeeee9cf..ae89de7deb13 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1588,7 +1588,7 @@ static int greth_of_remove(struct platform_device *of_dev)
return 0;
}
-static struct of_device_id greth_of_match[] = {
+static const struct of_device_id greth_of_match[] = {
{
.name = "GAISLER_ETHMAC",
},
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index fd9296a5014d..79ea35869e1e 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -89,7 +89,7 @@ MODULE_PARM_DESC(dma_tx_num, "Number of descriptors in the TX list");
#define TXQUEUESTOP_THRESHHOLD 2
-static struct of_device_id altera_tse_ids[];
+static const struct of_device_id altera_tse_ids[];
static inline u32 tse_tx_avail(struct altera_tse_private *priv)
{
@@ -1576,7 +1576,7 @@ static const struct altera_dmaops altera_dtype_msgdma = {
.start_rxdma = msgdma_start_rxdma,
};
-static struct of_device_id altera_tse_ids[] = {
+static const struct of_device_id altera_tse_ids[] = {
{ .compatible = "altr,tse-msgdma-1.0", .data = &altera_dtype_msgdma, },
{ .compatible = "altr,tse-1.0", .data = &altera_dtype_sgdma, },
{ .compatible = "ALTR,tse-1.0", .data = &altera_dtype_sgdma, },
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index ec45f3256f0e..d9bc89d69266 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -97,6 +97,8 @@ enum xgene_enet_rm {
#define QCOHERENT BIT(4)
#define RECOMBBUF BIT(27)
+#define MAC_OFFSET 0x30
+
#define BLOCK_ETH_CSR_OFFSET 0x2000
#define BLOCK_ETH_RING_IF_OFFSET 0x9000
#define BLOCK_ETH_DIAG_CSR_OFFSET 0xD000
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 635a83be7e5e..6146a993a136 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -645,9 +645,11 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
struct device *dev = ndev_to_dev(ndev);
struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring;
struct xgene_enet_desc_ring *buf_pool = NULL;
- u8 cpu_bufnum = 0, eth_bufnum = START_ETH_BUFNUM;
- u8 bp_bufnum = START_BP_BUFNUM;
- u16 ring_id, ring_num = START_RING_NUM;
+ u8 cpu_bufnum = pdata->cpu_bufnum;
+ u8 eth_bufnum = pdata->eth_bufnum;
+ u8 bp_bufnum = pdata->bp_bufnum;
+ u16 ring_num = pdata->ring_num;
+ u16 ring_id;
int ret;
/* allocate rx descriptor ring */
@@ -752,6 +754,22 @@ static const struct net_device_ops xgene_ndev_ops = {
.ndo_set_mac_address = xgene_enet_set_mac_address,
};
+static int xgene_get_port_id(struct device *dev, struct xgene_enet_pdata *pdata)
+{
+ u32 id = 0;
+ int ret;
+
+ ret = device_property_read_u32(dev, "port-id", &id);
+ if (!ret && id > 1) {
+ dev_err(dev, "Incorrect port-id specified\n");
+ return -ENODEV;
+ }
+
+ pdata->port_id = id;
+
+ return 0;
+}
+
static int xgene_get_mac_address(struct device *dev,
unsigned char *addr)
{
@@ -843,6 +861,10 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
}
pdata->rx_irq = ret;
+ ret = xgene_get_port_id(dev, pdata);
+ if (ret)
+ return ret;
+
if (xgene_get_mac_address(dev, ndev->dev_addr) != ETH_ALEN)
eth_hw_addr_random(ndev);
@@ -866,13 +888,13 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
pdata->clk = NULL;
}
- base_addr = pdata->base_addr;
+ base_addr = pdata->base_addr - (pdata->port_id * MAC_OFFSET);
pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET;
pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET;
pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET;
if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII ||
pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
- pdata->mcx_mac_addr = base_addr + BLOCK_ETH_MAC_OFFSET;
+ pdata->mcx_mac_addr = pdata->base_addr + BLOCK_ETH_MAC_OFFSET;
pdata->mcx_mac_csr_addr = base_addr + BLOCK_ETH_MAC_CSR_OFFSET;
} else {
pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET;
@@ -935,6 +957,24 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
pdata->rm = RM0;
break;
}
+
+ switch (pdata->port_id) {
+ case 0:
+ pdata->cpu_bufnum = START_CPU_BUFNUM_0;
+ pdata->eth_bufnum = START_ETH_BUFNUM_0;
+ pdata->bp_bufnum = START_BP_BUFNUM_0;
+ pdata->ring_num = START_RING_NUM_0;
+ break;
+ case 1:
+ pdata->cpu_bufnum = START_CPU_BUFNUM_1;
+ pdata->eth_bufnum = START_ETH_BUFNUM_1;
+ pdata->bp_bufnum = START_BP_BUFNUM_1;
+ pdata->ring_num = START_RING_NUM_1;
+ break;
+ default:
+ break;
+ }
+
}
static int xgene_enet_probe(struct platform_device *pdev)
@@ -1033,7 +1073,7 @@ MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match);
#endif
#ifdef CONFIG_OF
-static struct of_device_id xgene_enet_of_match[] = {
+static const struct of_device_id xgene_enet_of_match[] = {
{.compatible = "apm,xgene-enet",},
{.compatible = "apm,xgene1-sgenet",},
{.compatible = "apm,xgene1-xgenet",},
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index c2d465c3db66..b93ed21a157f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -41,9 +41,15 @@
#define SKB_BUFFER_SIZE (XGENE_ENET_MAX_MTU - NET_IP_ALIGN)
#define NUM_PKT_BUF 64
#define NUM_BUFPOOL 32
-#define START_ETH_BUFNUM 2
-#define START_BP_BUFNUM 0x22
-#define START_RING_NUM 8
+
+#define START_CPU_BUFNUM_0 0
+#define START_ETH_BUFNUM_0 2
+#define START_BP_BUFNUM_0 0x22
+#define START_RING_NUM_0 8
+#define START_CPU_BUFNUM_1 12
+#define START_ETH_BUFNUM_1 10
+#define START_BP_BUFNUM_1 0x2A
+#define START_RING_NUM_1 264
#define PHY_POLL_LINK_ON (10 * HZ)
#define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5)
@@ -125,6 +131,11 @@ struct xgene_enet_pdata {
struct xgene_mac_ops *mac_ops;
struct xgene_port_ops *port_ops;
struct delayed_work link_work;
+ u32 port_id;
+ u8 cpu_bufnum;
+ u8 eth_bufnum;
+ u8 bp_bufnum;
+ u16 ring_num;
};
struct xgene_indirect_ctl {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index f5d4f68c288c..f27fb6f2a93b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -226,6 +226,7 @@ static u32 xgene_enet_link_status(struct xgene_enet_pdata *p)
static void xgene_sgmac_init(struct xgene_enet_pdata *p)
{
u32 data, loop = 10;
+ u32 offset = p->port_id * 4;
xgene_sgmac_reset(p);
@@ -272,9 +273,9 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p)
xgene_enet_wr_csr(p, RSIF_RAM_DBG_REG0_ADDR, 0);
/* Bypass traffic gating */
- xgene_enet_wr_csr(p, CFG_LINK_AGGR_RESUME_0_ADDR, TX_PORT0);
+ xgene_enet_wr_csr(p, CFG_LINK_AGGR_RESUME_0_ADDR + offset, TX_PORT0);
xgene_enet_wr_csr(p, CFG_BYPASS_ADDR, RESUME_TX);
- xgene_enet_wr_csr(p, SG_RX_DV_GATE_REG_0_ADDR, RESUME_RX0);
+ xgene_enet_wr_csr(p, SG_RX_DV_GATE_REG_0_ADDR + offset, RESUME_RX0);
}
static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set)
@@ -330,13 +331,14 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p,
u32 dst_ring_num, u16 bufpool_id)
{
u32 data, fpsel;
+ u32 offset = p->port_id * MAC_OFFSET;
data = CFG_CLE_BYPASS_EN0;
- xgene_enet_wr_csr(p, CLE_BYPASS_REG0_0_ADDR, data);
+ xgene_enet_wr_csr(p, CLE_BYPASS_REG0_0_ADDR + offset, data);
fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20;
data = CFG_CLE_DSTQID0(dst_ring_num) | CFG_CLE_FPSEL0(fpsel);
- xgene_enet_wr_csr(p, CLE_BYPASS_REG1_0_ADDR, data);
+ xgene_enet_wr_csr(p, CLE_BYPASS_REG1_0_ADDR + offset, data);
}
static void xgene_enet_shutdown(struct xgene_enet_pdata *p)
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index daae0e016253..2f98846e2d89 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -1621,7 +1621,7 @@ static int bmac_remove(struct macio_dev *mdev)
return 0;
}
-static struct of_device_id bmac_match[] =
+static const struct of_device_id bmac_match[] =
{
{
.name = "bmac",
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 7fcaf0da42a8..a18948286682 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -984,7 +984,7 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static struct of_device_id mace_match[] =
+static const struct of_device_id mace_match[] =
{
{
.name = "mace",
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index e74ae628bbb9..12956b143b11 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -197,6 +197,14 @@ enum dma_reg {
DMA_PRIORITY_0,
DMA_PRIORITY_1,
DMA_PRIORITY_2,
+ DMA_INDEX2RING_0,
+ DMA_INDEX2RING_1,
+ DMA_INDEX2RING_2,
+ DMA_INDEX2RING_3,
+ DMA_INDEX2RING_4,
+ DMA_INDEX2RING_5,
+ DMA_INDEX2RING_6,
+ DMA_INDEX2RING_7,
};
static const u8 bcmgenet_dma_regs_v3plus[] = {
@@ -208,6 +216,14 @@ static const u8 bcmgenet_dma_regs_v3plus[] = {
[DMA_PRIORITY_0] = 0x30,
[DMA_PRIORITY_1] = 0x34,
[DMA_PRIORITY_2] = 0x38,
+ [DMA_INDEX2RING_0] = 0x70,
+ [DMA_INDEX2RING_1] = 0x74,
+ [DMA_INDEX2RING_2] = 0x78,
+ [DMA_INDEX2RING_3] = 0x7C,
+ [DMA_INDEX2RING_4] = 0x80,
+ [DMA_INDEX2RING_5] = 0x84,
+ [DMA_INDEX2RING_6] = 0x88,
+ [DMA_INDEX2RING_7] = 0x8C,
};
static const u8 bcmgenet_dma_regs_v2[] = {
@@ -2283,6 +2299,160 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl)
bcmgenet_tdma_writel(priv, reg, DMA_CTRL);
}
+static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv,
+ u32 f_index)
+{
+ u32 offset;
+ u32 reg;
+
+ offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
+ reg = bcmgenet_hfb_reg_readl(priv, offset);
+ return !!(reg & (1 << (f_index % 32)));
+}
+
+static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index)
+{
+ u32 offset;
+ u32 reg;
+
+ offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
+ reg = bcmgenet_hfb_reg_readl(priv, offset);
+ reg |= (1 << (f_index % 32));
+ bcmgenet_hfb_reg_writel(priv, reg, offset);
+}
+
+static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv,
+ u32 f_index, u32 rx_queue)
+{
+ u32 offset;
+ u32 reg;
+
+ offset = f_index / 8;
+ reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset);
+ reg &= ~(0xF << (4 * (f_index % 8)));
+ reg |= ((rx_queue & 0xF) << (4 * (f_index % 8)));
+ bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset);
+}
+
+static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv,
+ u32 f_index, u32 f_length)
+{
+ u32 offset;
+ u32 reg;
+
+ offset = HFB_FLT_LEN_V3PLUS +
+ ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) *
+ sizeof(u32);
+ reg = bcmgenet_hfb_reg_readl(priv, offset);
+ reg &= ~(0xFF << (8 * (f_index % 4)));
+ reg |= ((f_length & 0xFF) << (8 * (f_index % 4)));
+ bcmgenet_hfb_reg_writel(priv, reg, offset);
+}
+
+static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
+{
+ u32 f_index;
+
+ for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++)
+ if (!bcmgenet_hfb_is_filter_enabled(priv, f_index))
+ return f_index;
+
+ return -ENOMEM;
+}
+
+/* bcmgenet_hfb_add_filter
+ *
+ * Add new filter to Hardware Filter Block to match and direct Rx traffic to
+ * desired Rx queue.
+ *
+ * f_data is an array of unsigned 32-bit integers where each 32-bit integer
+ * provides filter data for 2 bytes (4 nibbles) of Rx frame:
+ *
+ * bits 31:20 - unused
+ * bit 19 - nibble 0 match enable
+ * bit 18 - nibble 1 match enable
+ * bit 17 - nibble 2 match enable
+ * bit 16 - nibble 3 match enable
+ * bits 15:12 - nibble 0 data
+ * bits 11:8 - nibble 1 data
+ * bits 7:4 - nibble 2 data
+ * bits 3:0 - nibble 3 data
+ *
+ * Example:
+ * In order to match:
+ * - Ethernet frame type = 0x0800 (IP)
+ * - IP version field = 4
+ * - IP protocol field = 0x11 (UDP)
+ *
+ * The following filter is needed:
+ * u32 hfb_filter_ipv4_udp[] = {
+ * Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ * Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000,
+ * Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011,
+ * };
+ *
+ * To add the filter to HFB and direct the traffic to Rx queue 0, call:
+ * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp,
+ * ARRAY_SIZE(hfb_filter_ipv4_udp), 0);
+ */
+int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
+ u32 f_length, u32 rx_queue)
+{
+ int f_index;
+ u32 i;
+
+ f_index = bcmgenet_hfb_find_unused_filter(priv);
+ if (f_index < 0)
+ return -ENOMEM;
+
+ if (f_length > priv->hw_params->hfb_filter_size)
+ return -EINVAL;
+
+ for (i = 0; i < f_length; i++)
+ bcmgenet_hfb_writel(priv, f_data[i],
+ (f_index * priv->hw_params->hfb_filter_size + i) *
+ sizeof(u32));
+
+ bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
+ bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
+ bcmgenet_hfb_enable_filter(priv, f_index);
+ bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL);
+
+ return 0;
+}
+
+/* bcmgenet_hfb_clear
+ *
+ * Clear Hardware Filter Block and disable all filtering.
+ */
+static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv)
+{
+ u32 i;
+
+ bcmgenet_hfb_reg_writel(priv, 0x0, HFB_CTRL);
+ bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS);
+ bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS + 4);
+
+ for (i = DMA_INDEX2RING_0; i <= DMA_INDEX2RING_7; i++)
+ bcmgenet_rdma_writel(priv, 0x0, i);
+
+ for (i = 0; i < (priv->hw_params->hfb_filter_cnt / 4); i++)
+ bcmgenet_hfb_reg_writel(priv, 0x0,
+ HFB_FLT_LEN_V3PLUS + i * sizeof(u32));
+
+ for (i = 0; i < priv->hw_params->hfb_filter_cnt *
+ priv->hw_params->hfb_filter_size; i++)
+ bcmgenet_hfb_writel(priv, 0x0, i * sizeof(u32));
+}
+
+static void bcmgenet_hfb_init(struct bcmgenet_priv *priv)
+{
+ if (GENET_IS_V1(priv) || GENET_IS_V2(priv))
+ return;
+
+ bcmgenet_hfb_clear(priv);
+}
+
static void bcmgenet_netif_start(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -2348,6 +2518,9 @@ static int bcmgenet_open(struct net_device *dev)
/* Always enable ring 16 - descriptor ring */
bcmgenet_enable_dma(priv, dma_ctrl);
+ /* HFB init */
+ bcmgenet_hfb_init(priv);
+
ret = request_irq(priv->irq0, bcmgenet_isr0, IRQF_SHARED,
dev->name, priv);
if (ret < 0) {
@@ -2592,6 +2765,7 @@ static struct bcmgenet_hw_params bcmgenet_hw_params[] = {
.bp_in_en_shift = 17,
.bp_in_mask = 0x1ffff,
.hfb_filter_cnt = 48,
+ .hfb_filter_size = 128,
.qtag_mask = 0x3F,
.tbuf_offset = 0x0600,
.hfb_offset = 0x8000,
@@ -2609,6 +2783,7 @@ static struct bcmgenet_hw_params bcmgenet_hw_params[] = {
.bp_in_en_shift = 17,
.bp_in_mask = 0x1ffff,
.hfb_filter_cnt = 48,
+ .hfb_filter_size = 128,
.qtag_mask = 0x3F,
.tbuf_offset = 0x0600,
.hfb_offset = 0x8000,
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 2a8113898aed..1ea838946318 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -509,6 +509,7 @@ struct bcmgenet_hw_params {
u8 bp_in_en_shift;
u32 bp_in_mask;
u8 hfb_filter_cnt;
+ u8 hfb_filter_size;
u8 qtag_mask;
u16 tbuf_offset;
u32 hfb_offset;
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index f88cfaa359e7..442410cd2ca4 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1299,7 +1299,7 @@ static int ethoc_resume(struct platform_device *pdev)
# define ethoc_resume NULL
#endif
-static struct of_device_id ethoc_match[] = {
+static const struct of_device_id ethoc_match[] = {
{ .compatible = "opencores,ethoc", },
{},
};
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index f495796248db..afe7f39cdd7c 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -1057,7 +1057,7 @@ static int mpc52xx_fec_of_resume(struct platform_device *op)
}
#endif
-static struct of_device_id mpc52xx_fec_match[] = {
+static const struct of_device_id mpc52xx_fec_match[] = {
{ .compatible = "fsl,mpc5200b-fec", },
{ .compatible = "fsl,mpc5200-fec", },
{ .compatible = "mpc5200-fec", },
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
index e0528900db02..1e647beaf989 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx_phy.c
@@ -134,7 +134,7 @@ static int mpc52xx_fec_mdio_remove(struct platform_device *of)
return 0;
}
-static struct of_device_id mpc52xx_fec_mdio_match[] = {
+static const struct of_device_id mpc52xx_fec_mdio_match[] = {
{ .compatible = "fsl,mpc5200b-mdio", },
{ .compatible = "fsl,mpc5200-mdio", },
{ .compatible = "mpc5200b-fec-phy", },
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index a17628769a1f..9b3639eae676 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -916,7 +916,7 @@ static const struct net_device_ops fs_enet_netdev_ops = {
#endif
};
-static struct of_device_id fs_enet_match[];
+static const struct of_device_id fs_enet_match[];
static int fs_enet_probe(struct platform_device *ofdev)
{
const struct of_device_id *match;
@@ -1082,7 +1082,7 @@ static int fs_enet_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id fs_enet_match[] = {
+static const struct of_device_id fs_enet_match[] = {
#ifdef CONFIG_FS_ENET_HAS_SCC
{
.compatible = "fsl,cpm1-scc-enet",
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
index 1d5617d2d8bd..68a428de0bc0 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
@@ -213,7 +213,7 @@ static int fs_enet_mdio_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id fs_enet_mdio_bb_match[] = {
+static const struct of_device_id fs_enet_mdio_bb_match[] = {
{
.compatible = "fsl,cpm2-mdio-bitbang",
},
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
index 1648e3582500..2be383e6d258 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
@@ -95,7 +95,7 @@ static int fs_enet_fec_mii_write(struct mii_bus *bus, int phy_id, int location,
}
-static struct of_device_id fs_enet_mdio_fec_match[];
+static const struct of_device_id fs_enet_mdio_fec_match[];
static int fs_enet_mdio_probe(struct platform_device *ofdev)
{
const struct of_device_id *match;
@@ -208,7 +208,7 @@ static int fs_enet_mdio_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id fs_enet_mdio_fec_match[] = {
+static const struct of_device_id fs_enet_mdio_fec_match[] = {
{
.compatible = "fsl,pq1-fec-mdio",
},
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index d1a91e344e6b..3c40f6b99224 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -294,7 +294,7 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
#endif
-static struct of_device_id fsl_pq_mdio_match[] = {
+static const struct of_device_id fsl_pq_mdio_match[] = {
#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
{
.compatible = "fsl,gianfar-tbi",
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 7bf3682cdf47..4ee080d49bc0 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -158,7 +158,7 @@ static void gfar_init_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
{
u32 lstatus;
- bdp->bufPtr = buf;
+ bdp->bufPtr = cpu_to_be32(buf);
lstatus = BD_LFLAG(RXBD_EMPTY | RXBD_INTERRUPT);
if (bdp == rx_queue->rx_bd_base + rx_queue->rx_ring_size - 1)
@@ -166,7 +166,7 @@ static void gfar_init_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
gfar_wmb();
- bdp->lstatus = lstatus;
+ bdp->lstatus = cpu_to_be32(lstatus);
}
static int gfar_init_bds(struct net_device *ndev)
@@ -200,7 +200,8 @@ static int gfar_init_bds(struct net_device *ndev)
/* Set the last descriptor in the ring to indicate wrap */
txbdp--;
- txbdp->status |= TXBD_WRAP;
+ txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) |
+ TXBD_WRAP);
}
rfbptr = &regs->rfbptr0;
@@ -214,7 +215,7 @@ static int gfar_init_bds(struct net_device *ndev)
struct sk_buff *skb = rx_queue->rx_skbuff[j];
if (skb) {
- bufaddr = rxbdp->bufPtr;
+ bufaddr = be32_to_cpu(rxbdp->bufPtr);
} else {
skb = gfar_new_skb(ndev, &bufaddr);
if (!skb) {
@@ -696,19 +697,28 @@ static int gfar_parse_group(struct device_node *np,
grp->priv = priv;
spin_lock_init(&grp->grplock);
if (priv->mode == MQ_MG_MODE) {
- u32 *rxq_mask, *txq_mask;
- rxq_mask = (u32 *)of_get_property(np, "fsl,rx-bit-map", NULL);
- txq_mask = (u32 *)of_get_property(np, "fsl,tx-bit-map", NULL);
+ u32 rxq_mask, txq_mask;
+ int ret;
+
+ grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
+ grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
+
+ ret = of_property_read_u32(np, "fsl,rx-bit-map", &rxq_mask);
+ if (!ret) {
+ grp->rx_bit_map = rxq_mask ?
+ rxq_mask : (DEFAULT_MAPPING >> priv->num_grps);
+ }
+
+ ret = of_property_read_u32(np, "fsl,tx-bit-map", &txq_mask);
+ if (!ret) {
+ grp->tx_bit_map = txq_mask ?
+ txq_mask : (DEFAULT_MAPPING >> priv->num_grps);
+ }
if (priv->poll_mode == GFAR_SQ_POLLING) {
/* One Q per interrupt group: Q0 to G0, Q1 to G1 */
grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
- } else { /* GFAR_MQ_POLLING */
- grp->rx_bit_map = rxq_mask ?
- *rxq_mask : (DEFAULT_MAPPING >> priv->num_grps);
- grp->tx_bit_map = txq_mask ?
- *txq_mask : (DEFAULT_MAPPING >> priv->num_grps);
}
} else {
grp->rx_bit_map = 0xFF;
@@ -769,11 +779,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
struct gfar_private *priv = NULL;
struct device_node *np = ofdev->dev.of_node;
struct device_node *child = NULL;
- const u32 *stash;
- const u32 *stash_len;
- const u32 *stash_idx;
+ struct property *stash;
+ u32 stash_len = 0;
+ u32 stash_idx = 0;
unsigned int num_tx_qs, num_rx_qs;
- u32 *tx_queues, *rx_queues;
unsigned short mode, poll_mode;
if (!np)
@@ -787,10 +796,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
poll_mode = GFAR_SQ_POLLING;
}
- /* parse the num of HW tx and rx queues */
- tx_queues = (u32 *)of_get_property(np, "fsl,num_tx_queues", NULL);
- rx_queues = (u32 *)of_get_property(np, "fsl,num_rx_queues", NULL);
-
if (mode == SQ_SG_MODE) {
num_tx_qs = 1;
num_rx_qs = 1;
@@ -809,8 +814,17 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
num_tx_qs = num_grps; /* one txq per int group */
num_rx_qs = num_grps; /* one rxq per int group */
} else { /* GFAR_MQ_POLLING */
- num_tx_qs = tx_queues ? *tx_queues : 1;
- num_rx_qs = rx_queues ? *rx_queues : 1;
+ u32 tx_queues, rx_queues;
+ int ret;
+
+ /* parse the num of HW tx and rx queues */
+ ret = of_property_read_u32(np, "fsl,num_tx_queues",
+ &tx_queues);
+ num_tx_qs = ret ? 1 : tx_queues;
+
+ ret = of_property_read_u32(np, "fsl,num_rx_queues",
+ &rx_queues);
+ num_rx_qs = ret ? 1 : rx_queues;
}
}
@@ -851,13 +865,17 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
if (err)
goto rx_alloc_failed;
+ err = of_property_read_string(np, "model", &model);
+ if (err) {
+ pr_err("Device model property missing, aborting\n");
+ goto rx_alloc_failed;
+ }
+
/* Init Rx queue filer rule set linked list */
INIT_LIST_HEAD(&priv->rx_list.list);
priv->rx_list.count = 0;
mutex_init(&priv->rx_queue_access);
- model = of_get_property(np, "model", NULL);
-
for (i = 0; i < MAXGROUPS; i++)
priv->gfargrp[i].regs = NULL;
@@ -877,22 +895,22 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
goto err_grp_init;
}
- stash = of_get_property(np, "bd-stash", NULL);
+ stash = of_find_property(np, "bd-stash", NULL);
if (stash) {
priv->device_flags |= FSL_GIANFAR_DEV_HAS_BD_STASHING;
priv->bd_stash_en = 1;
}
- stash_len = of_get_property(np, "rx-stash-len", NULL);
+ err = of_property_read_u32(np, "rx-stash-len", &stash_len);
- if (stash_len)
- priv->rx_stash_size = *stash_len;
+ if (err == 0)
+ priv->rx_stash_size = stash_len;
- stash_idx = of_get_property(np, "rx-stash-idx", NULL);
+ err = of_property_read_u32(np, "rx-stash-idx", &stash_idx);
- if (stash_idx)
- priv->rx_stash_index = *stash_idx;
+ if (err == 0)
+ priv->rx_stash_index = stash_idx;
if (stash_len || stash_idx)
priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING;
@@ -919,15 +937,15 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
FSL_GIANFAR_DEV_HAS_EXTENDED_HASH |
FSL_GIANFAR_DEV_HAS_TIMER;
- ctype = of_get_property(np, "phy-connection-type", NULL);
+ err = of_property_read_string(np, "phy-connection-type", &ctype);
/* We only care about rgmii-id. The rest are autodetected */
- if (ctype && !strcmp(ctype, "rgmii-id"))
+ if (err == 0 && !strcmp(ctype, "rgmii-id"))
priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
else
priv->interface = PHY_INTERFACE_MODE_MII;
- if (of_get_property(np, "fsl,magic-packet", NULL))
+ if (of_find_property(np, "fsl,magic-packet", NULL))
priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
@@ -1884,14 +1902,15 @@ static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
if (!tx_queue->tx_skbuff[i])
continue;
- dma_unmap_single(priv->dev, txbdp->bufPtr,
- txbdp->length, DMA_TO_DEVICE);
+ dma_unmap_single(priv->dev, be32_to_cpu(txbdp->bufPtr),
+ be16_to_cpu(txbdp->length), DMA_TO_DEVICE);
txbdp->lstatus = 0;
for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
j++) {
txbdp++;
- dma_unmap_page(priv->dev, txbdp->bufPtr,
- txbdp->length, DMA_TO_DEVICE);
+ dma_unmap_page(priv->dev, be32_to_cpu(txbdp->bufPtr),
+ be16_to_cpu(txbdp->length),
+ DMA_TO_DEVICE);
}
txbdp++;
dev_kfree_skb_any(tx_queue->tx_skbuff[i]);
@@ -1911,7 +1930,7 @@ static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
for (i = 0; i < rx_queue->rx_ring_size; i++) {
if (rx_queue->rx_skbuff[i]) {
- dma_unmap_single(priv->dev, rxbdp->bufPtr,
+ dma_unmap_single(priv->dev, be32_to_cpu(rxbdp->bufPtr),
priv->rx_buffer_size,
DMA_FROM_DEVICE);
dev_kfree_skb_any(rx_queue->rx_skbuff[i]);
@@ -2167,16 +2186,16 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
*/
if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
flags |= TXFCB_UDP;
- fcb->phcs = udp_hdr(skb)->check;
+ fcb->phcs = (__force __be16)(udp_hdr(skb)->check);
} else
- fcb->phcs = tcp_hdr(skb)->check;
+ fcb->phcs = (__force __be16)(tcp_hdr(skb)->check);
/* l3os is the distance between the start of the
* frame (skb->data) and the start of the IP hdr.
* l4os is the distance between the start of the
* l3 hdr and the l4 hdr
*/
- fcb->l3os = (u16)(skb_network_offset(skb) - fcb_length);
+ fcb->l3os = (u8)(skb_network_offset(skb) - fcb_length);
fcb->l4os = skb_network_header_len(skb);
fcb->flags = flags;
@@ -2185,7 +2204,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
void inline gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
{
fcb->flags |= TXFCB_VLN;
- fcb->vlctl = skb_vlan_tag_get(skb);
+ fcb->vlctl = cpu_to_be16(skb_vlan_tag_get(skb));
}
static inline struct txbd8 *skip_txbd(struct txbd8 *bdp, int stride,
@@ -2298,7 +2317,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx_queue->stats.tx_packets++;
txbdp = txbdp_start = tx_queue->cur_tx;
- lstatus = txbdp->lstatus;
+ lstatus = be32_to_cpu(txbdp->lstatus);
/* Time stamp insertion requires one additional TxBD */
if (unlikely(do_tstamp))
@@ -2306,11 +2325,14 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx_queue->tx_ring_size);
if (nr_frags == 0) {
- if (unlikely(do_tstamp))
- txbdp_tstamp->lstatus |= BD_LFLAG(TXBD_LAST |
- TXBD_INTERRUPT);
- else
+ if (unlikely(do_tstamp)) {
+ u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
+
+ lstatus_ts |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+ txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
+ } else {
lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+ }
} else {
/* Place the fragment addresses and lengths into the TxBDs */
for (i = 0; i < nr_frags; i++) {
@@ -2320,7 +2342,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
frag_len = skb_shinfo(skb)->frags[i].size;
- lstatus = txbdp->lstatus | frag_len |
+ lstatus = be32_to_cpu(txbdp->lstatus) | frag_len |
BD_LFLAG(TXBD_READY);
/* Handle the last BD specially */
@@ -2336,11 +2358,11 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
goto dma_map_err;
/* set the TxBD length and buffer pointer */
- txbdp->bufPtr = bufaddr;
- txbdp->lstatus = lstatus;
+ txbdp->bufPtr = cpu_to_be32(bufaddr);
+ txbdp->lstatus = cpu_to_be32(lstatus);
}
- lstatus = txbdp_start->lstatus;
+ lstatus = be32_to_cpu(txbdp_start->lstatus);
}
/* Add TxPAL between FCB and frame if required */
@@ -2388,7 +2410,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
goto dma_map_err;
- txbdp_start->bufPtr = bufaddr;
+ txbdp_start->bufPtr = cpu_to_be32(bufaddr);
/* If time stamping is requested one additional TxBD must be set up. The
* first TxBD points to the FCB and must have a data length of
@@ -2396,9 +2418,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
* the full frame length.
*/
if (unlikely(do_tstamp)) {
- txbdp_tstamp->bufPtr = txbdp_start->bufPtr + fcb_len;
- txbdp_tstamp->lstatus |= BD_LFLAG(TXBD_READY) |
- (skb_headlen(skb) - fcb_len);
+ u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
+
+ bufaddr = be32_to_cpu(txbdp_start->bufPtr);
+ bufaddr += fcb_len;
+ lstatus_ts |= BD_LFLAG(TXBD_READY) |
+ (skb_headlen(skb) - fcb_len);
+
+ txbdp_tstamp->bufPtr = cpu_to_be32(bufaddr);
+ txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN;
} else {
lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
@@ -2421,7 +2449,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
gfar_wmb();
- txbdp_start->lstatus = lstatus;
+ txbdp_start->lstatus = cpu_to_be32(lstatus);
gfar_wmb(); /* force lstatus write before tx_skbuff */
@@ -2460,13 +2488,14 @@ dma_map_err:
if (do_tstamp)
txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
for (i = 0; i < nr_frags; i++) {
- lstatus = txbdp->lstatus;
+ lstatus = be32_to_cpu(txbdp->lstatus);
if (!(lstatus & BD_LFLAG(TXBD_READY)))
break;
- txbdp->lstatus = lstatus & ~BD_LFLAG(TXBD_READY);
- bufaddr = txbdp->bufPtr;
- dma_unmap_page(priv->dev, bufaddr, txbdp->length,
+ lstatus &= ~BD_LFLAG(TXBD_READY);
+ txbdp->lstatus = cpu_to_be32(lstatus);
+ bufaddr = be32_to_cpu(txbdp->bufPtr);
+ dma_unmap_page(priv->dev, bufaddr, be16_to_cpu(txbdp->length),
DMA_TO_DEVICE);
txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
}
@@ -2607,7 +2636,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
lbdp = skip_txbd(bdp, nr_txbds - 1, base, tx_ring_size);
- lstatus = lbdp->lstatus;
+ lstatus = be32_to_cpu(lbdp->lstatus);
/* Only clean completed frames */
if ((lstatus & BD_LFLAG(TXBD_READY)) &&
@@ -2616,11 +2645,12 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
next = next_txbd(bdp, base, tx_ring_size);
- buflen = next->length + GMAC_FCB_LEN + GMAC_TXPAL_LEN;
+ buflen = be16_to_cpu(next->length) +
+ GMAC_FCB_LEN + GMAC_TXPAL_LEN;
} else
- buflen = bdp->length;
+ buflen = be16_to_cpu(bdp->length);
- dma_unmap_single(priv->dev, bdp->bufPtr,
+ dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
buflen, DMA_TO_DEVICE);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
@@ -2631,17 +2661,18 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
shhwtstamps.hwtstamp = ns_to_ktime(*ns);
skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN);
skb_tstamp_tx(skb, &shhwtstamps);
- bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
+ gfar_clear_txbd_status(bdp);
bdp = next;
}
- bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
+ gfar_clear_txbd_status(bdp);
bdp = next_txbd(bdp, base, tx_ring_size);
for (i = 0; i < frags; i++) {
- dma_unmap_page(priv->dev, bdp->bufPtr,
- bdp->length, DMA_TO_DEVICE);
- bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
+ dma_unmap_page(priv->dev, be32_to_cpu(bdp->bufPtr),
+ be16_to_cpu(bdp->length),
+ DMA_TO_DEVICE);
+ gfar_clear_txbd_status(bdp);
bdp = next_txbd(bdp, base, tx_ring_size);
}
@@ -2798,13 +2829,13 @@ static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb)
* were verified, then we tell the kernel that no
* checksumming is necessary. Otherwise, it is [FIXME]
*/
- if ((fcb->flags & RXFCB_CSUM_MASK) == (RXFCB_CIP | RXFCB_CTU))
+ if ((be16_to_cpu(fcb->flags) & RXFCB_CSUM_MASK) ==
+ (RXFCB_CIP | RXFCB_CTU))
skb->ip_summed = CHECKSUM_UNNECESSARY;
else
skb_checksum_none_assert(skb);
}
-
/* gfar_process_frame() -- handle one incoming packet if skb isn't NULL. */
static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
int amount_pull, struct napi_struct *napi)
@@ -2846,8 +2877,9 @@ static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
* RXFCB_VLN is pseudo randomly set.
*/
if (dev->features & NETIF_F_HW_VLAN_CTAG_RX &&
- fcb->flags & RXFCB_VLN)
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), fcb->vlctl);
+ be16_to_cpu(fcb->flags) & RXFCB_VLN)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(fcb->vlctl));
/* Send the packet up the stack */
napi_gro_receive(napi, skb);
@@ -2874,7 +2906,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
amount_pull = priv->uses_rxfcb ? GMAC_FCB_LEN : 0;
- while (!((bdp->status & RXBD_EMPTY) || (--rx_work_limit < 0))) {
+ while (!(be16_to_cpu(bdp->status) & RXBD_EMPTY) && rx_work_limit--) {
struct sk_buff *newskb;
dma_addr_t bufaddr;
@@ -2885,21 +2917,22 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
skb = rx_queue->rx_skbuff[rx_queue->skb_currx];
- dma_unmap_single(priv->dev, bdp->bufPtr,
+ dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
priv->rx_buffer_size, DMA_FROM_DEVICE);
- if (unlikely(!(bdp->status & RXBD_ERR) &&
- bdp->length > priv->rx_buffer_size))
- bdp->status = RXBD_LARGE;
+ if (unlikely(!(be16_to_cpu(bdp->status) & RXBD_ERR) &&
+ be16_to_cpu(bdp->length) > priv->rx_buffer_size))
+ bdp->status = cpu_to_be16(RXBD_LARGE);
/* We drop the frame if we failed to allocate a new buffer */
- if (unlikely(!newskb || !(bdp->status & RXBD_LAST) ||
- bdp->status & RXBD_ERR)) {
- count_errors(bdp->status, dev);
+ if (unlikely(!newskb ||
+ !(be16_to_cpu(bdp->status) & RXBD_LAST) ||
+ be16_to_cpu(bdp->status) & RXBD_ERR)) {
+ count_errors(be16_to_cpu(bdp->status), dev);
if (unlikely(!newskb)) {
newskb = skb;
- bufaddr = bdp->bufPtr;
+ bufaddr = be32_to_cpu(bdp->bufPtr);
} else if (skb)
dev_kfree_skb(skb);
} else {
@@ -2908,7 +2941,8 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
howmany++;
if (likely(skb)) {
- pkt_len = bdp->length - ETH_FCS_LEN;
+ pkt_len = be16_to_cpu(bdp->length) -
+ ETH_FCS_LEN;
/* Remove the FCS from the packet length */
skb_put(skb, pkt_len);
rx_queue->stats.rx_bytes += pkt_len;
@@ -3560,7 +3594,7 @@ static noinline void gfar_update_link_state(struct gfar_private *priv)
phy_print_status(phydev);
}
-static struct of_device_id gfar_match[] =
+static const struct of_device_id gfar_match[] =
{
{
.type = "network",
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 9e1802400c23..daa1d37de642 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -544,12 +544,12 @@ struct txbd8
{
union {
struct {
- u16 status; /* Status Fields */
- u16 length; /* Buffer length */
+ __be16 status; /* Status Fields */
+ __be16 length; /* Buffer length */
};
- u32 lstatus;
+ __be32 lstatus;
};
- u32 bufPtr; /* Buffer Pointer */
+ __be32 bufPtr; /* Buffer Pointer */
};
struct txfcb {
@@ -557,28 +557,28 @@ struct txfcb {
u8 ptp; /* Flag to enable tx timestamping */
u8 l4os; /* Level 4 Header Offset */
u8 l3os; /* Level 3 Header Offset */
- u16 phcs; /* Pseudo-header Checksum */
- u16 vlctl; /* VLAN control word */
+ __be16 phcs; /* Pseudo-header Checksum */
+ __be16 vlctl; /* VLAN control word */
};
struct rxbd8
{
union {
struct {
- u16 status; /* Status Fields */
- u16 length; /* Buffer Length */
+ __be16 status; /* Status Fields */
+ __be16 length; /* Buffer Length */
};
- u32 lstatus;
+ __be32 lstatus;
};
- u32 bufPtr; /* Buffer Pointer */
+ __be32 bufPtr; /* Buffer Pointer */
};
struct rxfcb {
- u16 flags;
+ __be16 flags;
u8 rq; /* Receive Queue index */
u8 pro; /* Layer 4 Protocol */
u16 reserved;
- u16 vlctl; /* VLAN control word */
+ __be16 vlctl; /* VLAN control word */
};
struct gianfar_skb_cb {
@@ -1287,6 +1287,14 @@ static inline void gfar_wmb(void)
#endif
}
+static inline void gfar_clear_txbd_status(struct txbd8 *bdp)
+{
+ u32 lstatus = be32_to_cpu(bdp->lstatus);
+
+ lstatus &= BD_LFLAG(TXBD_WRAP);
+ bdp->lstatus = cpu_to_be32(lstatus);
+}
+
irqreturn_t gfar_receive(int irq, void *dev_id);
int startup_gfar(struct net_device *dev);
void stop_gfar(struct net_device *dev);
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 16826341a4c9..77353366f33b 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -554,7 +554,7 @@ static int gianfar_ptp_remove(struct platform_device *dev)
return 0;
}
-static struct of_device_id match_table[] = {
+static const struct of_device_id match_table[] = {
{ .compatible = "fsl,etsec-ptp" },
{},
};
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 357e8b576905..bfdccbd58be0 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3930,7 +3930,7 @@ static int ucc_geth_remove(struct platform_device* ofdev)
return 0;
}
-static struct of_device_id ucc_geth_match[] = {
+static const struct of_device_id ucc_geth_match[] = {
{
.type = "network",
.compatible = "ucc_geth",
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 3a83bc2c613c..7b8fe866f603 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -46,17 +46,43 @@ struct tgec_mdio_controller {
#define MDIO_DATA(x) (x & 0xffff)
#define MDIO_DATA_BSY BIT(31)
+struct mdio_fsl_priv {
+ struct tgec_mdio_controller __iomem *mdio_base;
+ bool is_little_endian;
+};
+
+static u32 xgmac_read32(void __iomem *regs,
+ bool is_little_endian)
+{
+ if (is_little_endian)
+ return ioread32(regs);
+ else
+ return ioread32be(regs);
+}
+
+static void xgmac_write32(u32 value,
+ void __iomem *regs,
+ bool is_little_endian)
+{
+ if (is_little_endian)
+ iowrite32(value, regs);
+ else
+ iowrite32be(value, regs);
+}
+
/*
* Wait until the MDIO bus is free
*/
static int xgmac_wait_until_free(struct device *dev,
- struct tgec_mdio_controller __iomem *regs)
+ struct tgec_mdio_controller __iomem *regs,
+ bool is_little_endian)
{
unsigned int timeout;
/* Wait till the bus is free */
timeout = TIMEOUT;
- while ((ioread32be(&regs->mdio_stat) & MDIO_STAT_BSY) && timeout) {
+ while ((xgmac_read32(&regs->mdio_stat, is_little_endian) &
+ MDIO_STAT_BSY) && timeout) {
cpu_relax();
timeout--;
}
@@ -73,13 +99,15 @@ static int xgmac_wait_until_free(struct device *dev,
* Wait till the MDIO read or write operation is complete
*/
static int xgmac_wait_until_done(struct device *dev,
- struct tgec_mdio_controller __iomem *regs)
+ struct tgec_mdio_controller __iomem *regs,
+ bool is_little_endian)
{
unsigned int timeout;
/* Wait till the MDIO write is complete */
timeout = TIMEOUT;
- while ((ioread32be(&regs->mdio_data) & MDIO_DATA_BSY) && timeout) {
+ while ((xgmac_read32(&regs->mdio_stat, is_little_endian) &
+ MDIO_STAT_BSY) && timeout) {
cpu_relax();
timeout--;
}
@@ -99,12 +127,14 @@ static int xgmac_wait_until_done(struct device *dev,
*/
static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
{
- struct tgec_mdio_controller __iomem *regs = bus->priv;
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
uint16_t dev_addr;
u32 mdio_ctl, mdio_stat;
int ret;
+ bool endian = priv->is_little_endian;
- mdio_stat = ioread32be(&regs->mdio_stat);
+ mdio_stat = xgmac_read32(&regs->mdio_stat, endian);
if (regnum & MII_ADDR_C45) {
/* Clause 45 (ie 10G) */
dev_addr = (regnum >> 16) & 0x1f;
@@ -115,29 +145,29 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val
mdio_stat &= ~MDIO_STAT_ENC;
}
- iowrite32be(mdio_stat, &regs->mdio_stat);
+ xgmac_write32(mdio_stat, &regs->mdio_stat, endian);
- ret = xgmac_wait_until_free(&bus->dev, regs);
+ ret = xgmac_wait_until_free(&bus->dev, regs, endian);
if (ret)
return ret;
/* Set the port and dev addr */
mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
- iowrite32be(mdio_ctl, &regs->mdio_ctl);
+ xgmac_write32(mdio_ctl, &regs->mdio_ctl, endian);
/* Set the register address */
if (regnum & MII_ADDR_C45) {
- iowrite32be(regnum & 0xffff, &regs->mdio_addr);
+ xgmac_write32(regnum & 0xffff, &regs->mdio_addr, endian);
- ret = xgmac_wait_until_free(&bus->dev, regs);
+ ret = xgmac_wait_until_free(&bus->dev, regs, endian);
if (ret)
return ret;
}
/* Write the value to the register */
- iowrite32be(MDIO_DATA(value), &regs->mdio_data);
+ xgmac_write32(MDIO_DATA(value), &regs->mdio_data, endian);
- ret = xgmac_wait_until_done(&bus->dev, regs);
+ ret = xgmac_wait_until_done(&bus->dev, regs, endian);
if (ret)
return ret;
@@ -151,14 +181,16 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val
*/
static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
{
- struct tgec_mdio_controller __iomem *regs = bus->priv;
+ struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
+ struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
uint16_t dev_addr;
uint32_t mdio_stat;
uint32_t mdio_ctl;
uint16_t value;
int ret;
+ bool endian = priv->is_little_endian;
- mdio_stat = ioread32be(&regs->mdio_stat);
+ mdio_stat = xgmac_read32(&regs->mdio_stat, endian);
if (regnum & MII_ADDR_C45) {
dev_addr = (regnum >> 16) & 0x1f;
mdio_stat |= MDIO_STAT_ENC;
@@ -167,41 +199,41 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
mdio_stat &= ~MDIO_STAT_ENC;
}
- iowrite32be(mdio_stat, &regs->mdio_stat);
+ xgmac_write32(mdio_stat, &regs->mdio_stat, endian);
- ret = xgmac_wait_until_free(&bus->dev, regs);
+ ret = xgmac_wait_until_free(&bus->dev, regs, endian);
if (ret)
return ret;
/* Set the Port and Device Addrs */
mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
- iowrite32be(mdio_ctl, &regs->mdio_ctl);
+ xgmac_write32(mdio_ctl, &regs->mdio_ctl, endian);
/* Set the register address */
if (regnum & MII_ADDR_C45) {
- iowrite32be(regnum & 0xffff, &regs->mdio_addr);
+ xgmac_write32(regnum & 0xffff, &regs->mdio_addr, endian);
- ret = xgmac_wait_until_free(&bus->dev, regs);
+ ret = xgmac_wait_until_free(&bus->dev, regs, endian);
if (ret)
return ret;
}
/* Initiate the read */
- iowrite32be(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl);
+ xgmac_write32(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl, endian);
- ret = xgmac_wait_until_done(&bus->dev, regs);
+ ret = xgmac_wait_until_done(&bus->dev, regs, endian);
if (ret)
return ret;
/* Return all Fs if nothing was there */
- if (ioread32be(&regs->mdio_stat) & MDIO_STAT_RD_ER) {
+ if (xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) {
dev_err(&bus->dev,
"Error while reading PHY%d reg at %d.%hhu\n",
phy_id, dev_addr, regnum);
return 0xffff;
}
- value = ioread32be(&regs->mdio_data) & 0xffff;
+ value = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
dev_dbg(&bus->dev, "read %04x\n", value);
return value;
@@ -212,6 +244,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
struct mii_bus *bus;
struct resource res;
+ struct mdio_fsl_priv *priv;
int ret;
ret = of_address_to_resource(np, 0, &res);
@@ -220,7 +253,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
return ret;
}
- bus = mdiobus_alloc();
+ bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv));
if (!bus)
return -ENOMEM;
@@ -231,12 +264,19 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res.start);
/* Set the PHY base address */
- bus->priv = of_iomap(np, 0);
- if (!bus->priv) {
+ priv = bus->priv;
+ priv->mdio_base = of_iomap(np, 0);
+ if (!priv->mdio_base) {
ret = -ENOMEM;
goto err_ioremap;
}
+ if (of_get_property(pdev->dev.of_node,
+ "little-endian", NULL))
+ priv->is_little_endian = true;
+ else
+ priv->is_little_endian = false;
+
ret = of_mdiobus_register(bus, np);
if (ret) {
dev_err(&pdev->dev, "cannot register MDIO bus\n");
@@ -248,7 +288,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
return 0;
err_registration:
- iounmap(bus->priv);
+ iounmap(priv->mdio_base);
err_ioremap:
mdiobus_free(bus);
@@ -267,7 +307,7 @@ static int xgmac_mdio_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id xgmac_mdio_match[] = {
+static const struct of_device_id xgmac_mdio_match[] = {
{
.compatible = "fsl,fman-xmdio",
},
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index c05e50759621..291c87036e17 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -103,7 +103,7 @@ static int ehea_probe_adapter(struct platform_device *dev);
static int ehea_remove(struct platform_device *dev);
-static struct of_device_id ehea_module_device_table[] = {
+static const struct of_device_id ehea_module_device_table[] = {
{
.name = "lhea",
.compatible = "IBM,lhea",
@@ -116,7 +116,7 @@ static struct of_device_id ehea_module_device_table[] = {
};
MODULE_DEVICE_TABLE(of, ehea_module_device_table);
-static struct of_device_id ehea_device_table[] = {
+static const struct of_device_id ehea_device_table[] = {
{
.name = "lhea",
.compatible = "IBM,lhea",
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 162762d1a12c..8a17b97baa20 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2981,7 +2981,7 @@ static int emac_remove(struct platform_device *ofdev)
}
/* XXX Features in here should be replaced by properties... */
-static struct of_device_id emac_match[] =
+static const struct of_device_id emac_match[] =
{
{
.type = "network",
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index dddaab11a4c7..fdb5cdb3cd15 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -753,7 +753,7 @@ static int mal_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id mal_platform_match[] =
+static const struct of_device_id mal_platform_match[] =
{
{
.compatible = "ibm,mcmal",
diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index 457088fc5b06..206ccbbae7bb 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c
@@ -305,7 +305,7 @@ static int rgmii_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id rgmii_match[] =
+static const struct of_device_id rgmii_match[] =
{
{
.compatible = "ibm,rgmii",
diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index cb18e7f917c6..32cb6c9007c5 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c
@@ -148,7 +148,7 @@ static int tah_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id tah_match[] =
+static const struct of_device_id tah_match[] =
{
{
.compatible = "ibm,tah",
diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index 36409ccb75ea..8727b865ea02 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c
@@ -295,7 +295,7 @@ static int zmii_remove(struct platform_device *ofdev)
return 0;
}
-static struct of_device_id zmii_match[] =
+static const struct of_device_id zmii_match[] =
{
{
.compatible = "ibm,zmii",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index c59ed925adaf..763ba2641d5c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2379,6 +2379,33 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
}
#endif
+static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[queue_index];
+ struct mlx4_update_qp_params params;
+ int err;
+
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
+
+ /* rate provided to us in Mbs, check if it fits into 12 bits, if not use Gbs */
+ if (maxrate >> 12) {
+ params.rate_unit = MLX4_QP_RATE_LIMIT_GBS;
+ params.rate_val = maxrate / 1000;
+ } else if (maxrate) {
+ params.rate_unit = MLX4_QP_RATE_LIMIT_MBS;
+ params.rate_val = maxrate;
+ } else { /* zero serves to revoke the QP rate-limitation */
+ params.rate_unit = 0;
+ params.rate_val = 0;
+ }
+
+ err = mlx4_update_qp(priv->mdev->dev, tx_ring->qpn, MLX4_UPDATE_QP_RATE_LIMIT,
+ &params);
+ return err;
+}
+
static const struct net_device_ops mlx4_netdev_ops = {
.ndo_open = mlx4_en_open,
.ndo_stop = mlx4_en_close,
@@ -2410,6 +2437,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
.ndo_del_vxlan_port = mlx4_en_del_vxlan_port,
.ndo_features_check = mlx4_en_features_check,
#endif
+ .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
};
static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2444,6 +2472,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
.ndo_del_vxlan_port = mlx4_en_del_vxlan_port,
.ndo_features_check = mlx4_en_features_check,
#endif
+ .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
};
struct mlx4_en_bond {
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 242bcee5d774..4a471f5d1b56 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -144,7 +144,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[19] = "Performance optimized for limited rule configuration flow steering support",
[20] = "Recoverable error events support",
[21] = "Port Remap support",
- [22] = "QCN support"
+ [22] = "QCN support",
+ [23] = "QP rate limiting support"
};
int i;
@@ -697,6 +698,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8
#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2
+
dev_cap->flags2 = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -904,6 +909,18 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET);
dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
+ dev_cap->rl_caps.num_rates = size;
+ if (dev_cap->rl_caps.num_rates) {
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET);
+ dev_cap->rl_caps.max_val = size & 0xfff;
+ dev_cap->rl_caps.max_unit = size >> 14;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET);
+ dev_cap->rl_caps.min_val = size & 0xfff;
+ dev_cap->rl_caps.min_unit = size >> 14;
+ }
+
MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
if (field32 & (1 << 16))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -979,6 +996,15 @@ void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->dmfs_high_rate_qpn_base);
mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
dev_cap->dmfs_high_rate_qpn_range);
+
+ if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) {
+ struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps;
+
+ mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n",
+ rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val,
+ rl_caps->min_unit, rl_caps->min_val);
+ }
+
dump_dev_cap_flags(dev, dev_cap->flags);
dump_dev_cap_flags2(dev, dev_cap->flags2);
}
@@ -1075,6 +1101,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
u64 flags;
int err = 0;
u8 field;
+ u16 field16;
u32 bmme_flags, field32;
int real_port;
int slave_port;
@@ -1158,6 +1185,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
field &= 0xfe;
MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+ /* turn off QP max-rate limiting for guests */
+ field16 = 0;
+ MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index f44f7f6017ed..863655bd3947 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -127,6 +127,7 @@ struct mlx4_dev_cap {
u32 max_counters;
u32 dmfs_high_rate_qpn_base;
u32 dmfs_high_rate_qpn_range;
+ struct mlx4_rate_limit_caps rl_caps;
struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7e487223489a..43aa76775b5f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -489,6 +489,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
}
+ dev->caps.rl_caps = dev_cap->rl_caps;
+
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
dev->caps.dmfs_high_rate_qpn_range;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index eda29dbbfcd2..69e4462e4ee4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -442,6 +442,11 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN);
}
+ if (attr & MLX4_UPDATE_QP_RATE_LIMIT) {
+ qp_mask |= 1ULL << MLX4_UPD_QP_MASK_RATE_LIMIT;
+ cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val);
+ }
+
cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask);
cmd->qp_mask = cpu_to_be64(qp_mask);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index d43e25914d19..c258f8625aac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2947,8 +2947,12 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
optpar = be32_to_cpu(*(__be32 *) inbox->buf);
- if (slave != mlx4_master_func_num(dev))
+ if (slave != mlx4_master_func_num(dev)) {
qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+ /* setting QP rate-limit is disallowed for VFs */
+ if (qp_ctx->rate_limit_params)
+ return -EPERM;
+ }
switch (qp_type) {
case MLX4_QP_ST_RC:
diff --git a/drivers/net/ethernet/octeon/octeon_mgmt.c b/drivers/net/ethernet/octeon/octeon_mgmt.c
index d36599f47af5..7bf9c028d8d7 100644
--- a/drivers/net/ethernet/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/octeon/octeon_mgmt.c
@@ -1557,7 +1557,7 @@ static int octeon_mgmt_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id octeon_mgmt_match[] = {
+static const struct of_device_id octeon_mgmt_match[] = {
{
.compatible = "cavium,octeon-5750-mix",
},
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 223348d8cc07..c9558e6d57ad 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -30,6 +30,7 @@
#include <linux/if_vlan.h>
#include <linux/if_bridge.h>
#include <linux/bitops.h>
+#include <linux/ctype.h>
#include <net/switchdev.h>
#include <net/rtnetlink.h>
#include <net/ip_fib.h>
@@ -1630,6 +1631,53 @@ rocker_cmd_get_port_settings_macaddr_proc(struct rocker *rocker,
return 0;
}
+struct port_name {
+ char *buf;
+ size_t len;
+};
+
+static int
+rocker_cmd_get_port_settings_phys_name_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1];
+ struct port_name *name = priv;
+ struct rocker_tlv *attr;
+ size_t i, j, len;
+ char *str;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_CMD_INFO])
+ return -EIO;
+
+ rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ attrs[ROCKER_TLV_CMD_INFO]);
+ attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME];
+ if (!attr)
+ return -EIO;
+
+ len = min_t(size_t, rocker_tlv_len(attr), name->len);
+ str = rocker_tlv_data(attr);
+
+ /* make sure name only contains alphanumeric characters */
+ for (i = j = 0; i < len; ++i) {
+ if (isalnum(str[i])) {
+ name->buf[j] = str[i];
+ j++;
+ }
+ }
+
+ if (j == 0)
+ return -EIO;
+
+ name->buf[j] = '\0';
+
+ return 0;
+}
+
static int
rocker_cmd_set_port_settings_ethtool_prep(struct rocker *rocker,
struct rocker_port *rocker_port,
@@ -2955,11 +3003,16 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port,
struct rocker_port *p;
struct rocker *rocker = rocker_port->rocker;
u32 group_id = ROCKER_GROUP_L2_FLOOD(vlan_id, 0);
- u32 group_ids[ROCKER_FP_PORTS_MAX];
+ u32 *group_ids;
u8 group_count = 0;
- int err;
+ int err = 0;
int i;
+ group_ids = kcalloc(rocker->port_count, sizeof(u32),
+ rocker_op_flags_gfp(flags));
+ if (!group_ids)
+ return -ENOMEM;
+
/* Adjust the flood group for this VLAN. The flood group
* references an L2 interface group for each port in this
* VLAN.
@@ -2977,7 +3030,7 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port,
/* If there are no bridged ports in this VLAN, we're done */
if (group_count == 0)
- return 0;
+ goto no_ports_in_vlan;
err = rocker_group_l2_flood(rocker_port, flags, vlan_id,
group_count, group_ids,
@@ -2986,6 +3039,8 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port,
netdev_err(rocker_port->dev,
"Error (%d) port VLAN l2 flood group\n", err);
+no_ports_in_vlan:
+ kfree(group_ids);
return err;
}
@@ -4131,8 +4186,42 @@ static int rocker_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
rocker_port->brport_flags, mask);
}
-static int rocker_port_switch_parent_id_get(struct net_device *dev,
- struct netdev_phys_item_id *psid)
+static int rocker_port_get_phys_port_name(struct net_device *dev,
+ char *buf, size_t len)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ struct port_name name = { .buf = buf, .len = len };
+ int err;
+
+ err = rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_get_port_settings_prep, NULL,
+ rocker_cmd_get_port_settings_phys_name_proc,
+ &name, false);
+
+ return err ? -EOPNOTSUPP : 0;
+}
+
+static const struct net_device_ops rocker_port_netdev_ops = {
+ .ndo_open = rocker_port_open,
+ .ndo_stop = rocker_port_stop,
+ .ndo_start_xmit = rocker_port_xmit,
+ .ndo_set_mac_address = rocker_port_set_mac_address,
+ .ndo_vlan_rx_add_vid = rocker_port_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = rocker_port_vlan_rx_kill_vid,
+ .ndo_fdb_add = rocker_port_fdb_add,
+ .ndo_fdb_del = rocker_port_fdb_del,
+ .ndo_fdb_dump = rocker_port_fdb_dump,
+ .ndo_bridge_setlink = rocker_port_bridge_setlink,
+ .ndo_bridge_getlink = rocker_port_bridge_getlink,
+ .ndo_get_phys_port_name = rocker_port_get_phys_port_name,
+};
+
+/********************
+ * swdev interface
+ ********************/
+
+static int rocker_port_swdev_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
{
struct rocker_port *rocker_port = netdev_priv(dev);
struct rocker *rocker = rocker_port->rocker;
@@ -4142,18 +4231,18 @@ static int rocker_port_switch_parent_id_get(struct net_device *dev,
return 0;
}
-static int rocker_port_switch_port_stp_update(struct net_device *dev, u8 state)
+static int rocker_port_swdev_port_stp_update(struct net_device *dev, u8 state)
{
struct rocker_port *rocker_port = netdev_priv(dev);
return rocker_port_stp_update(rocker_port, state);
}
-static int rocker_port_switch_fib_ipv4_add(struct net_device *dev,
- __be32 dst, int dst_len,
- struct fib_info *fi,
- u8 tos, u8 type,
- u32 nlflags, u32 tb_id)
+static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev,
+ __be32 dst, int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type,
+ u32 nlflags, u32 tb_id)
{
struct rocker_port *rocker_port = netdev_priv(dev);
int flags = 0;
@@ -4162,10 +4251,10 @@ static int rocker_port_switch_fib_ipv4_add(struct net_device *dev,
fi, tb_id, flags);
}
-static int rocker_port_switch_fib_ipv4_del(struct net_device *dev,
- __be32 dst, int dst_len,
- struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev,
+ __be32 dst, int dst_len,
+ struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id)
{
struct rocker_port *rocker_port = netdev_priv(dev);
int flags = ROCKER_OP_FLAG_REMOVE;
@@ -4174,22 +4263,11 @@ static int rocker_port_switch_fib_ipv4_del(struct net_device *dev,
fi, tb_id, flags);
}
-static const struct net_device_ops rocker_port_netdev_ops = {
- .ndo_open = rocker_port_open,
- .ndo_stop = rocker_port_stop,
- .ndo_start_xmit = rocker_port_xmit,
- .ndo_set_mac_address = rocker_port_set_mac_address,
- .ndo_vlan_rx_add_vid = rocker_port_vlan_rx_add_vid,
- .ndo_vlan_rx_kill_vid = rocker_port_vlan_rx_kill_vid,
- .ndo_fdb_add = rocker_port_fdb_add,
- .ndo_fdb_del = rocker_port_fdb_del,
- .ndo_fdb_dump = rocker_port_fdb_dump,
- .ndo_bridge_setlink = rocker_port_bridge_setlink,
- .ndo_bridge_getlink = rocker_port_bridge_getlink,
- .ndo_switch_parent_id_get = rocker_port_switch_parent_id_get,
- .ndo_switch_port_stp_update = rocker_port_switch_port_stp_update,
- .ndo_switch_fib_ipv4_add = rocker_port_switch_fib_ipv4_add,
- .ndo_switch_fib_ipv4_del = rocker_port_switch_fib_ipv4_del,
+static const struct swdev_ops rocker_port_swdev_ops = {
+ .swdev_parent_id_get = rocker_port_swdev_parent_id_get,
+ .swdev_port_stp_update = rocker_port_swdev_port_stp_update,
+ .swdev_fib_ipv4_add = rocker_port_swdev_fib_ipv4_add,
+ .swdev_fib_ipv4_del = rocker_port_swdev_fib_ipv4_del,
};
/********************
@@ -4544,6 +4622,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
rocker_port_dev_addr_init(rocker, rocker_port);
dev->netdev_ops = &rocker_port_netdev_ops;
dev->ethtool_ops = &rocker_port_ethtool_ops;
+ dev->swdev_ops = &rocker_port_swdev_ops;
netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
NAPI_POLL_WEIGHT);
netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 51e430d25138..a4e9591d7457 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -158,6 +158,7 @@ enum {
ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */
ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */
ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, /* binary */
__ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
ROCKER_TLV_CMD_PORT_SETTINGS_MAX =
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 9f14d8b515c7..1c4dd8081a57 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2127,7 +2127,7 @@ static int netcp_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id of_match[] = {
+static const struct of_device_id of_match[] = {
{ .compatible = "ti,netcp-1.0", },
{},
};
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 8fb807ea1caa..de2850497c09 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -288,7 +288,7 @@ MODULE_DEVICE_TABLE(pci, rhine_pci_tbl);
* The .data field is currently only used to store quirks
*/
static u32 vt8500_quirks = rqWOL | rqForceReset | rq6patterns;
-static struct of_device_id rhine_of_tbl[] = {
+static const struct of_device_id rhine_of_tbl[] = {
{ .compatible = "via,vt8500-rhine", .data = &vt8500_quirks },
{ } /* terminate list */
};
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index c20206f83cc1..ae68afd50a15 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -392,7 +392,7 @@ MODULE_DEVICE_TABLE(pci, velocity_pci_id_table);
* Describe the OF device identifiers that we support in this
* device driver. Used for devicetree nodes.
*/
-static struct of_device_id velocity_of_ids[] = {
+static const struct of_device_id velocity_of_ids[] = {
{ .compatible = "via,velocity-vt6110", .data = &chip_info_table[0] },
{ /* Sentinel */ },
};
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index dbcbf0c5bcfa..690a4c36b316 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1157,7 +1157,7 @@ static int temac_of_remove(struct platform_device *op)
return 0;
}
-static struct of_device_id temac_of_match[] = {
+static const struct of_device_id temac_of_match[] = {
{ .compatible = "xlnx,xps-ll-temac-1.01.b", },
{ .compatible = "xlnx,xps-ll-temac-2.00.a", },
{ .compatible = "xlnx,xps-ll-temac-2.02.a", },
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index a6d2860b712c..28b7e7d9c272 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -48,7 +48,7 @@
#define AXIENET_REGS_N 32
/* Match table for of_platform binding */
-static struct of_device_id axienet_of_match[] = {
+static const struct of_device_id axienet_of_match[] = {
{ .compatible = "xlnx,axi-ethernet-1.00.a", },
{ .compatible = "xlnx,axi-ethernet-1.01.a", },
{ .compatible = "xlnx,axi-ethernet-2.01.a", },
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 9d4ce388510a..2111b91f5e49 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1231,7 +1231,7 @@ static struct net_device_ops xemaclite_netdev_ops = {
};
/* Match table for OF platform binding */
-static struct of_device_id xemaclite_of_match[] = {
+static const struct of_device_id xemaclite_of_match[] = {
{ .compatible = "xlnx,opb-ethernetlite-1.01.a", },
{ .compatible = "xlnx,opb-ethernetlite-1.01.b", },
{ .compatible = "xlnx,xps-ethernetlite-1.00.a", },
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c
index 6deac6d32f57..414fdf1f343f 100644
--- a/drivers/net/phy/mdio-bcm-unimac.c
+++ b/drivers/net/phy/mdio-bcm-unimac.c
@@ -187,7 +187,7 @@ static int unimac_mdio_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id unimac_mdio_ids[] = {
+static const struct of_device_id unimac_mdio_ids[] = {
{ .compatible = "brcm,genet-mdio-v4", },
{ .compatible = "brcm,genet-mdio-v3", },
{ .compatible = "brcm,genet-mdio-v2", },
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 0a0578a592b8..49ce7ece5af3 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -249,7 +249,7 @@ static int mdio_gpio_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id mdio_gpio_of_match[] = {
+static const struct of_device_id mdio_gpio_of_match[] = {
{ .compatible = "virtual,mdio-gpio", },
{ /* sentinel */ }
};
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index 320eb15315c8..1a87a585e74d 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -99,7 +99,7 @@ static int mdio_mux_gpio_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id mdio_mux_gpio_match[] = {
+static const struct of_device_id mdio_mux_gpio_match[] = {
{
.compatible = "mdio-mux-gpio",
},
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 0aa985c74014..2377c1341172 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -145,7 +145,7 @@ static int mdio_mux_mmioreg_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id mdio_mux_mmioreg_match[] = {
+static const struct of_device_id mdio_mux_mmioreg_match[] = {
{
.compatible = "mdio-mux-mmioreg",
},
diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c
index c81052486edc..c838ad6155f7 100644
--- a/drivers/net/phy/mdio-octeon.c
+++ b/drivers/net/phy/mdio-octeon.c
@@ -252,7 +252,7 @@ static int octeon_mdiobus_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id octeon_mdiobus_match[] = {
+static const struct of_device_id octeon_mdiobus_match[] = {
{
.compatible = "cavium,octeon-3860-mdio",
},
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 25d92d4fc625..5f749a51f356 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -127,10 +127,6 @@ struct vxlan_dev {
__u8 ttl;
u32 flags; /* VXLAN_F_* in vxlan.h */
- struct work_struct sock_work;
- struct work_struct igmp_join;
- struct work_struct igmp_leave;
-
unsigned long age_interval;
struct timer_list age_timer;
spinlock_t hash_lock;
@@ -144,8 +140,6 @@ struct vxlan_dev {
static u32 vxlan_salt __read_mostly;
static struct workqueue_struct *vxlan_wq;
-static void vxlan_sock_work(struct work_struct *work);
-
#if IS_ENABLED(CONFIG_IPV6)
static inline
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
@@ -1072,11 +1066,6 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
return false;
}
-static void vxlan_sock_hold(struct vxlan_sock *vs)
-{
- atomic_inc(&vs->refcnt);
-}
-
void vxlan_sock_release(struct vxlan_sock *vs)
{
struct sock *sk = vs->sock->sk;
@@ -1095,17 +1084,16 @@ void vxlan_sock_release(struct vxlan_sock *vs)
}
EXPORT_SYMBOL_GPL(vxlan_sock_release);
-/* Callback to update multicast group membership when first VNI on
+/* Update multicast group membership when first VNI on
* multicast asddress is brought up
- * Done as workqueue because ip_mc_join_group acquires RTNL.
*/
-static void vxlan_igmp_join(struct work_struct *work)
+static int vxlan_igmp_join(struct vxlan_dev *vxlan)
{
- struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
+ int ret;
lock_sock(sk);
if (ip->sa.sa_family == AF_INET) {
@@ -1114,27 +1102,26 @@ static void vxlan_igmp_join(struct work_struct *work)
.imr_ifindex = ifindex,
};
- ip_mc_join_group(sk, &mreq);
+ ret = ip_mc_join_group(sk, &mreq);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
- &ip->sin6.sin6_addr);
+ ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
+ &ip->sin6.sin6_addr);
#endif
}
release_sock(sk);
- vxlan_sock_release(vs);
- dev_put(vxlan->dev);
+ return ret;
}
/* Inverse of vxlan_igmp_join when last VNI is brought down */
-static void vxlan_igmp_leave(struct work_struct *work)
+static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
{
- struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
+ int ret;
lock_sock(sk);
if (ip->sa.sa_family == AF_INET) {
@@ -1143,18 +1130,16 @@ static void vxlan_igmp_leave(struct work_struct *work)
.imr_ifindex = ifindex,
};
- ip_mc_leave_group(sk, &mreq);
+ ret = ip_mc_leave_group(sk, &mreq);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
- &ip->sin6.sin6_addr);
+ ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
+ &ip->sin6.sin6_addr);
#endif
}
-
release_sock(sk);
- vxlan_sock_release(vs);
- dev_put(vxlan->dev);
+ return ret;
}
static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
@@ -2175,37 +2160,22 @@ static void vxlan_cleanup(unsigned long arg)
static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
__u32 vni = vxlan->default_dst.remote_vni;
vxlan->vn_sock = vs;
+ spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+ spin_unlock(&vn->sock_lock);
}
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- struct vxlan_sock *vs;
- bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
-
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->dst_port, vxlan->flags);
- if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
- /* If we have a socket with same port already, reuse it */
- vxlan_vs_add_dev(vs, vxlan);
- } else {
- /* otherwise make new socket outside of RTNL */
- dev_hold(dev);
- queue_work(vxlan_wq, &vxlan->sock_work);
- }
- spin_unlock(&vn->sock_lock);
-
return 0;
}
@@ -2223,12 +2193,9 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
static void vxlan_uninit(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_sock *vs = vxlan->vn_sock;
vxlan_fdb_delete_default(vxlan);
- if (vs)
- vxlan_sock_release(vs);
free_percpu(dev->tstats);
}
@@ -2236,22 +2203,28 @@ static void vxlan_uninit(struct net_device *dev)
static int vxlan_open(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_sock *vs = vxlan->vn_sock;
+ struct vxlan_sock *vs;
+ int ret = 0;
- /* socket hasn't been created */
- if (!vs)
- return -ENOTCONN;
+ vs = vxlan_sock_add(vxlan->net, vxlan->dst_port, vxlan_rcv, NULL,
+ false, vxlan->flags);
+ if (IS_ERR(vs))
+ return PTR_ERR(vs);
+
+ vxlan_vs_add_dev(vs, vxlan);
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
- vxlan_sock_hold(vs);
- dev_hold(dev);
- queue_work(vxlan_wq, &vxlan->igmp_join);
+ ret = vxlan_igmp_join(vxlan);
+ if (ret) {
+ vxlan_sock_release(vs);
+ return ret;
+ }
}
if (vxlan->age_interval)
mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
- return 0;
+ return ret;
}
/* Purge the forwarding table */
@@ -2279,19 +2252,21 @@ static int vxlan_stop(struct net_device *dev)
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
+ int ret = 0;
if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
!vxlan_group_used(vn, vxlan)) {
- vxlan_sock_hold(vs);
- dev_hold(dev);
- queue_work(vxlan_wq, &vxlan->igmp_leave);
+ ret = vxlan_igmp_leave(vxlan);
+ if (ret)
+ return ret;
}
del_timer_sync(&vxlan->age_timer);
vxlan_flush(vxlan);
+ vxlan_sock_release(vs);
- return 0;
+ return ret;
}
/* Stub, nothing needs to be done. */
@@ -2402,9 +2377,6 @@ static void vxlan_setup(struct net_device *dev)
INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
- INIT_WORK(&vxlan->igmp_join, vxlan_igmp_join);
- INIT_WORK(&vxlan->igmp_leave, vxlan_igmp_leave);
- INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
init_timer_deferrable(&vxlan->age_timer);
vxlan->age_timer.function = vxlan_cleanup;
@@ -2551,6 +2523,8 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
sock = vxlan_create_sock(net, ipv6, port, flags);
if (IS_ERR(sock)) {
+ pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
+ PTR_ERR(sock));
kfree(vs);
return ERR_CAST(sock);
}
@@ -2590,45 +2564,23 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
struct vxlan_sock *vs;
bool ipv6 = flags & VXLAN_F_IPV6;
- vs = vxlan_socket_create(net, port, rcv, data, flags);
- if (!IS_ERR(vs))
- return vs;
-
- if (no_share) /* Return error if sharing is not allowed. */
- return vs;
-
- spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
- if (vs && ((vs->rcv != rcv) ||
- !atomic_add_unless(&vs->refcnt, 1, 0)))
- vs = ERR_PTR(-EBUSY);
- spin_unlock(&vn->sock_lock);
-
- if (!vs)
- vs = ERR_PTR(-EINVAL);
+ if (!no_share) {
+ spin_lock(&vn->sock_lock);
+ vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
+ flags);
+ if (vs && vs->rcv == rcv) {
+ if (!atomic_add_unless(&vs->refcnt, 1, 0))
+ vs = ERR_PTR(-EBUSY);
+ spin_unlock(&vn->sock_lock);
+ return vs;
+ }
+ spin_unlock(&vn->sock_lock);
+ }
- return vs;
+ return vxlan_socket_create(net, port, rcv, data, flags);
}
EXPORT_SYMBOL_GPL(vxlan_sock_add);
-/* Scheduled at device creation to bind to a socket */
-static void vxlan_sock_work(struct work_struct *work)
-{
- struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
- struct net *net = vxlan->net;
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- __be16 port = vxlan->dst_port;
- struct vxlan_sock *nvs;
-
- nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags);
- spin_lock(&vn->sock_lock);
- if (!IS_ERR(nvs))
- vxlan_vs_add_dev(nvs, vxlan);
- spin_unlock(&vn->sock_lock);
-
- dev_put(vxlan->dev);
-}
-
static int vxlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
diff --git a/drivers/net/wireless/orinoco/airport.c b/drivers/net/wireless/orinoco/airport.c
index 0ca8b1455cd9..77e6c53040a3 100644
--- a/drivers/net/wireless/orinoco/airport.c
+++ b/drivers/net/wireless/orinoco/airport.c
@@ -228,7 +228,7 @@ MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
MODULE_DESCRIPTION("Driver for the Apple Airport wireless card.");
MODULE_LICENSE("Dual MPL/GPL");
-static struct of_device_id airport_match[] = {
+static const struct of_device_id airport_match[] = {
{
.name = "radio",
},
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 80f2e0fc3d02..280a315de8d6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -103,6 +103,9 @@ struct bpf_verifier_ops {
* with 'type' (read or write) is allowed
*/
bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+
+ u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+ struct bpf_insn *insn);
};
struct bpf_prog_type_list {
@@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f);
void bpf_map_put(struct bpf_map *map);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
#else
static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
{
@@ -154,4 +157,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
+extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index b5a6470e686c..2c677afeea47 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -111,9 +111,7 @@ struct ip_mc_list {
extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto);
extern int igmp_rcv(struct sk_buff *);
-extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
-extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
extern void ip_mc_drop_socket(struct sock *sk);
extern int ip_mc_source(int add, int omode, struct sock *sk,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1cc54822b931..4550c67b92e4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -205,6 +205,7 @@ enum {
MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21,
MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22,
+ MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23
};
enum {
@@ -450,6 +451,21 @@ enum mlx4_module_id {
MLX4_MODULE_ID_QSFP28 = 0x11,
};
+enum { /* rl */
+ MLX4_QP_RATE_LIMIT_NONE = 0,
+ MLX4_QP_RATE_LIMIT_KBS = 1,
+ MLX4_QP_RATE_LIMIT_MBS = 2,
+ MLX4_QP_RATE_LIMIT_GBS = 3
+};
+
+struct mlx4_rate_limit_caps {
+ u16 num_rates; /* Number of different rates */
+ u8 min_unit;
+ u16 min_val;
+ u8 max_unit;
+ u16 max_val;
+};
+
static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
{
return (major << 32) | (minor << 16) | subminor;
@@ -565,6 +581,7 @@ struct mlx4_caps {
u32 dmfs_high_rate_qpn_base;
u32 dmfs_high_rate_qpn_range;
u32 vf_caps;
+ struct mlx4_rate_limit_caps rl_caps;
};
struct mlx4_buf_list {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 551f85456c11..1023ebe035b7 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -207,14 +207,16 @@ struct mlx4_qp_context {
__be32 msn;
__be16 rq_wqe_counter;
__be16 sq_wqe_counter;
- u32 reserved3[2];
+ u32 reserved3;
+ __be16 rate_limit_params;
+ __be16 reserved4;
__be32 param3;
__be32 nummmcpeers_basemkey;
u8 log_page_size;
- u8 reserved4[2];
+ u8 reserved5[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
- u32 reserved5[10];
+ u32 reserved6[10];
};
struct mlx4_update_qp_context {
@@ -229,6 +231,7 @@ struct mlx4_update_qp_context {
enum {
MLX4_UPD_QP_MASK_PM_STATE = 32,
MLX4_UPD_QP_MASK_VSD = 33,
+ MLX4_UPD_QP_MASK_RATE_LIMIT = 35,
};
enum {
@@ -428,7 +431,8 @@ struct mlx4_wqe_inline_seg {
enum mlx4_update_qp_attr {
MLX4_UPDATE_QP_SMAC = 1 << 0,
MLX4_UPDATE_QP_VSD = 1 << 1,
- MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1
+ MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2,
+ MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1
};
enum mlx4_update_qp_params_flags {
@@ -438,6 +442,8 @@ enum mlx4_update_qp_params_flags {
struct mlx4_update_qp_params {
u8 smac_index;
u32 flags;
+ u16 rate_unit;
+ u16 rate_val;
};
int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddab1a2a07a0..76951c5fbedf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -587,6 +587,7 @@ struct netdev_queue {
#ifdef CONFIG_BQL
struct dql dql;
#endif
+ unsigned long tx_maxrate;
} ____cacheline_aligned_in_smp;
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -768,8 +769,6 @@ struct netdev_phys_item_id {
typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
struct sk_buff *skb);
-struct fib_info;
-
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -1024,23 +1023,10 @@ struct fib_info;
* be otherwise expressed by feature flags. The check is called with
* the set of features that the stack has calculated and it returns
* those the driver believes to be appropriate.
- *
- * int (*ndo_switch_parent_id_get)(struct net_device *dev,
- * struct netdev_phys_item_id *psid);
- * Called to get an ID of the switch chip this port is part of.
- * If driver implements this, it indicates that it represents a port
- * of a switch chip.
- * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
- * Called to notify switch device port of bridge port STP
- * state change.
- * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
- * int dst_len, struct fib_info *fi,
- * u8 tos, u8 type, u32 nlflags, u32 tb_id);
- * Called to add/modify IPv4 route to switch device.
- * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
- * int dst_len, struct fib_info *fi,
- * u8 tos, u8 type, u32 tb_id);
- * Called to delete IPv4 route from switch device.
+ * int (*ndo_set_tx_maxrate)(struct net_device *dev,
+ * int queue_index, u32 maxrate);
+ * Called when a user wants to set a max-rate limitation of specific
+ * TX queue.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1178,6 +1164,8 @@ struct net_device_ops {
bool new_carrier);
int (*ndo_get_phys_port_id)(struct net_device *dev,
struct netdev_phys_item_id *ppid);
+ int (*ndo_get_phys_port_name)(struct net_device *dev,
+ char *name, size_t len);
void (*ndo_add_vxlan_port)(struct net_device *dev,
sa_family_t sa_family,
__be16 port);
@@ -1197,25 +1185,9 @@ struct net_device_ops {
netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features);
-#ifdef CONFIG_NET_SWITCHDEV
- int (*ndo_switch_parent_id_get)(struct net_device *dev,
- struct netdev_phys_item_id *psid);
- int (*ndo_switch_port_stp_update)(struct net_device *dev,
- u8 state);
- int (*ndo_switch_fib_ipv4_add)(struct net_device *dev,
- __be32 dst,
- int dst_len,
- struct fib_info *fi,
- u8 tos, u8 type,
- u32 nlflags,
- u32 tb_id);
- int (*ndo_switch_fib_ipv4_del)(struct net_device *dev,
- __be32 dst,
- int dst_len,
- struct fib_info *fi,
- u8 tos, u8 type,
- u32 tb_id);
-#endif
+ int (*ndo_set_tx_maxrate)(struct net_device *dev,
+ int queue_index,
+ u32 maxrate);
};
/**
@@ -1577,6 +1549,9 @@ struct net_device {
const struct net_device_ops *netdev_ops;
const struct ethtool_ops *ethtool_ops;
const struct forwarding_accel_ops *fwd_ops;
+#ifdef CONFIG_NET_SWITCHDEV
+ const struct swdev_ops *swdev_ops;
+#endif
const struct header_ops *header_ops;
@@ -2181,6 +2156,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name);
int dev_alloc_name(struct net_device *dev, const char *name);
int dev_open(struct net_device *dev);
int dev_close(struct net_device *dev);
+int dev_close_many(struct list_head *head, bool unlink);
void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct sk_buff *newskb);
int dev_queue_xmit(struct sk_buff *skb);
@@ -2974,6 +2950,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev,
struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index c93ff8ac474a..99425f2be708 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -49,18 +49,25 @@ struct rhash_head {
/**
* struct bucket_table - Table of hash buckets
* @size: Number of hash buckets
+ * @rehash: Current bucket being rehashed
* @hash_rnd: Random seed to fold into hash
- * @shift: Current size (1 << shift)
* @locks_mask: Mask to apply before accessing locks[]
* @locks: Array of spinlocks protecting individual buckets
+ * @walkers: List of active walkers
+ * @rcu: RCU structure for freeing the table
+ * @future_tbl: Table under construction during rehashing
* @buckets: size * hash buckets
*/
struct bucket_table {
- size_t size;
+ unsigned int size;
+ unsigned int rehash;
u32 hash_rnd;
- u32 shift;
unsigned int locks_mask;
spinlock_t *locks;
+ struct list_head walkers;
+ struct rcu_head rcu;
+
+ struct bucket_table __rcu *future_tbl;
struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};
@@ -76,8 +83,8 @@ struct rhashtable;
* @key_len: Length of key
* @key_offset: Offset of key in struct to be hashed
* @head_offset: Offset of rhash_head in struct to be hashed
- * @max_shift: Maximum number of shifts while expanding
- * @min_shift: Minimum number of shifts while shrinking
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
* @nulls_base: Base value to generate nulls marker
* @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
* @hashfn: Function to hash key
@@ -88,8 +95,8 @@ struct rhashtable_params {
size_t key_len;
size_t key_offset;
size_t head_offset;
- size_t max_shift;
- size_t min_shift;
+ unsigned int max_size;
+ unsigned int min_size;
u32 nulls_base;
size_t locks_mul;
rht_hashfn_t hashfn;
@@ -99,33 +106,29 @@ struct rhashtable_params {
/**
* struct rhashtable - Hash table handle
* @tbl: Bucket table
- * @future_tbl: Table under construction during expansion/shrinking
* @nelems: Number of elements in table
* @p: Configuration parameters
* @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping
- * @walkers: List of active walkers
* @being_destroyed: True if table is set up for destruction
*/
struct rhashtable {
struct bucket_table __rcu *tbl;
- struct bucket_table __rcu *future_tbl;
atomic_t nelems;
bool being_destroyed;
struct rhashtable_params p;
struct work_struct run_work;
struct mutex mutex;
- struct list_head walkers;
};
/**
* struct rhashtable_walker - Hash table walker
* @list: List entry on list of walkers
- * @resize: Resize event occured
+ * @tbl: The table that we were walking over
*/
struct rhashtable_walker {
struct list_head list;
- bool resize;
+ struct bucket_table *tbl;
};
/**
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 97dbf16f7d9d..f869ae8afbaf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -111,7 +111,7 @@ struct tcp_request_sock_ops;
struct tcp_request_sock {
struct inet_request_sock req;
const struct tcp_request_sock_ops *af_specific;
- struct sock *listener; /* needed for TFO */
+ bool tfo_listener;
u32 rcv_isn;
u32 snt_isn;
u32 snt_synack; /* synack sent time */
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 247cfdcc4b08..87c094961bd5 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -34,7 +34,7 @@ static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
-static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
+static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
{
return (num + net_hash_mix(net)) & mask;
}
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 9201afe083fa..7ff588ca6817 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -38,8 +38,6 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
return jhash_3words(lhash, fhash, ports, initval);
}
-int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp);
-
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
* we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index bcd64756e5fe..73fe0f9525d9 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -221,8 +221,8 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
-static inline int inet_bhashfn(struct net *net, const __u16 lport,
- const int bhash_size)
+static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
+ const u32 bhash_size)
{
return (lport + net_hash_mix(net)) & (bhash_size - 1);
}
@@ -231,7 +231,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum);
/* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(struct net *net, const unsigned short num)
+static inline u32 inet_lhashfn(const struct net *net, const unsigned short num)
{
return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1);
}
@@ -249,6 +249,7 @@ void inet_put_port(struct sock *sk);
void inet_hashinfo_init(struct inet_hashinfo *h);
int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
+int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw);
void inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
@@ -383,13 +384,32 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
iph->daddr, dport, inet_iif(skb));
}
+u32 sk_ehashfn(const struct sock *sk);
+u32 inet6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr, const u16 lport,
+ const struct in6_addr *faddr, const __be16 fport);
+
+static inline void sk_daddr_set(struct sock *sk, __be32 addr)
+{
+ sk->sk_daddr = addr; /* alias of inet_daddr */
+#if IS_ENABLED(CONFIG_IPV6)
+ ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr);
+#endif
+}
+
+static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
+{
+ sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */
+#if IS_ENABLED(CONFIG_IPV6)
+ ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr);
+#endif
+}
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
- struct inet_timewait_sock **),
- int (*hash)(struct sock *sk,
- struct inet_timewait_sock *twp));
+ struct inet_timewait_sock **));
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b3053fdd871e..b6c3737da4e9 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -81,7 +81,6 @@ struct inet_request_sock {
#define ir_cookie req.__req_common.skc_cookie
#define ireq_net req.__req_common.skc_net
#define ireq_state req.__req_common.skc_state
-#define ireq_refcnt req.__req_common.skc_refcnt
#define ireq_family req.__req_common.skc_family
kmemcheck_bitfield_begin(flags);
@@ -94,11 +93,11 @@ struct inet_request_sock {
acked : 1,
no_srccheck: 1;
kmemcheck_bitfield_end(flags);
+ u32 ir_mark;
union {
struct ip_options_rcu *opt;
struct sk_buff *pktopts;
};
- u32 ir_mark;
};
static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -106,13 +105,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
return (struct inet_request_sock *)sk;
}
-static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
+ if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
return skb->mark;
- } else {
- return sk->sk_mark;
- }
+
+ return sk->sk_mark;
}
struct inet_cork {
@@ -245,20 +243,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
initval);
}
-static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
-{
- struct request_sock *req = reqsk_alloc(ops);
- struct inet_request_sock *ireq = inet_rsk(req);
-
- if (req != NULL) {
- kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
- atomic64_set(&ireq->ir_cookie, 0);
- ireq->ireq_state = TCP_NEW_SYN_RECV;
- }
-
- return req;
-}
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener);
static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b7673065c074..e7ba9758a345 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -942,10 +942,6 @@ void ipv6_sysctl_unregister(void);
int ipv6_sock_mc_join(struct sock *sk, int ifindex,
const struct in6_addr *addr);
-int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
- const struct in6_addr *addr);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
-int __ipv6_sock_mc_drop(struct sock *sk, int ifindex,
- const struct in6_addr *addr);
#endif /* _NET_IPV6_H */
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index c06ac58ca107..69a6715d9f3f 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -5,7 +5,7 @@
struct net;
-static inline unsigned int net_hash_mix(struct net *net)
+static inline u32 net_hash_mix(const struct net *net)
{
#ifdef CONFIG_NET_NS
/*
@@ -13,7 +13,7 @@ static inline unsigned int net_hash_mix(struct net *net)
* always zeroed
*/
- return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
+ return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT);
#else
return 0;
#endif
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 3275cf31f731..e7ef86340514 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -52,6 +52,7 @@ struct request_sock {
#define rsk_refcnt __req_common.skc_refcnt
struct request_sock *dl_next;
+ struct sock *rsk_listener;
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
@@ -67,13 +68,21 @@ struct request_sock {
u32 peer_secid;
};
-static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops)
+static inline struct request_sock *
+reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
{
struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC);
- if (req != NULL)
+ if (req) {
req->rsk_ops = ops;
-
+ sock_hold(sk_listener);
+ req->rsk_listener = sk_listener;
+
+ /* Following is temporary. It is coupled with debugging
+ * helpers in reqsk_put() & reqsk_free()
+ */
+ atomic_set(&req->rsk_refcnt, 0);
+ }
return req;
}
@@ -82,19 +91,27 @@ static inline struct request_sock *inet_reqsk(struct sock *sk)
return (struct request_sock *)sk;
}
-static inline void __reqsk_free(struct request_sock *req)
+static inline struct sock *req_to_sk(struct request_sock *req)
{
- kmem_cache_free(req->rsk_ops->slab, req);
+ return (struct sock *)req;
}
static inline void reqsk_free(struct request_sock *req)
{
+ /* temporary debugging */
+ WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
+
req->rsk_ops->destructor(req);
- __reqsk_free(req);
+ if (req->rsk_listener)
+ sock_put(req->rsk_listener);
+ kmem_cache_free(req->rsk_ops->slab, req);
}
static inline void reqsk_put(struct request_sock *req)
{
+ /* temporary debugging, until req sock are put into ehash table */
+ WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1);
+
if (atomic_dec_and_test(&req->rsk_refcnt))
reqsk_free(req);
}
@@ -285,6 +302,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
req->sk = NULL;
req->dl_next = lopt->syn_table[hash];
+ /* before letting lookups find us, make sure all req fields
+ * are committed to memory and refcnt initialized.
+ */
+ smp_wmb();
+ atomic_set(&req->rsk_refcnt, 1);
+
write_lock(&queue->syn_wait_lock);
lopt->syn_table[hash] = req;
write_unlock(&queue->syn_wait_lock);
diff --git a/include/net/sock.h b/include/net/sock.h
index f10832ca2e90..e0360f5a53e9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -67,6 +67,7 @@
#include <linux/atomic.h>
#include <net/dst.h>
#include <net/checksum.h>
+#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
struct cgroup;
@@ -2218,6 +2219,14 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb)
return NULL;
}
+/* This helper checks if a socket is a full socket,
+ * ie _not_ a timewait or request socket.
+ */
+static inline bool sk_fullsock(const struct sock *sk)
+{
+ return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
+}
+
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 1a9382febcc3..e5de53f92482 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -14,6 +14,44 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
+struct fib_info;
+
+/**
+ * struct switchdev_ops - switchdev operations
+ *
+ * int (*swdev_parent_id_get)(struct net_device *dev,
+ * struct netdev_phys_item_id *psid);
+ * Called to get an ID of the switch chip this port is part of.
+ * If driver implements this, it indicates that it represents a port
+ * of a switch chip.
+ *
+ * int (*swdev_port_stp_update)(struct net_device *dev, u8 state);
+ * Called to notify switch device port of bridge port STP
+ * state change.
+ *
+ * int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst,
+ * int dst_len, struct fib_info *fi,
+ * u8 tos, u8 type, u32 nlflags, u32 tb_id);
+ * Called to add/modify IPv4 route to switch device.
+ *
+ * int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst,
+ * int dst_len, struct fib_info *fi,
+ * u8 tos, u8 type, u32 tb_id);
+ * Called to delete IPv4 route from switch device.
+ */
+struct swdev_ops {
+ int (*swdev_parent_id_get)(struct net_device *dev,
+ struct netdev_phys_item_id *psid);
+ int (*swdev_port_stp_update)(struct net_device *dev, u8 state);
+ int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst,
+ int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 nlflags,
+ u32 tb_id);
+ int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst,
+ int dst_len, struct fib_info *fi,
+ u8 tos, u8 type, u32 tb_id);
+};
+
enum netdev_switch_notifier_type {
NETDEV_SWITCH_FDB_ADD = 1,
NETDEV_SWITCH_FDB_DEL,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2e11e38205c2..5b29835b81d8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1137,31 +1137,6 @@ static inline int tcp_full_space(const struct sock *sk)
return tcp_win_from_space(sk->sk_rcvbuf);
}
-static inline void tcp_openreq_init(struct request_sock *req,
- struct tcp_options_received *rx_opt,
- struct sk_buff *skb, struct sock *sk)
-{
- struct inet_request_sock *ireq = inet_rsk(req);
-
- req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
- req->cookie_ts = 0;
- tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_rsk(req)->last_oow_ack_time = 0;
- req->mss = rx_opt->mss_clamp;
- req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
- ireq->tstamp_ok = rx_opt->tstamp_ok;
- ireq->sack_ok = rx_opt->sack_ok;
- ireq->snd_wscale = rx_opt->snd_wscale;
- ireq->wscale_ok = rx_opt->wscale_ok;
- ireq->acked = 0;
- ireq->ecn_ok = 0;
- ireq->ir_rmt_port = tcp_hdr(skb)->source;
- ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
- ireq->ir_mark = inet_request_mark(sk, skb);
-}
-
extern void tcp_openreq_init_rwin(struct request_sock *req,
struct sock *sk, struct dst_entry *dst);
@@ -1241,36 +1216,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
return true;
}
-/* Return true if we're currently rate-limiting out-of-window ACKs and
- * thus shouldn't send a dupack right now. We rate-limit dupacks in
- * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
- * attacks that send repeated SYNs or ACKs for the same connection. To
- * do this, we do not send a duplicate SYNACK or ACK if the remote
- * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
- */
-static inline bool tcp_oow_rate_limited(struct net *net,
- const struct sk_buff *skb,
- int mib_idx, u32 *last_oow_ack_time)
-{
- /* Data packets without SYNs are not likely part of an ACK loop. */
- if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
- !tcp_hdr(skb)->syn)
- goto not_rate_limited;
-
- if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
- NET_INC_STATS_BH(net, mib_idx);
- return true; /* rate-limited: don't send yet! */
- }
- }
-
- *last_oow_ack_time = tcp_time_stamp;
-
-not_rate_limited:
- return false; /* not rate-limited: go ahead, send dupack now! */
-}
+bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+ int mib_idx, u32 *last_oow_ack_time);
static inline void tcp_mib_init(struct net *net)
{
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3fa1af8a58d7..1623047af463 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -165,7 +165,22 @@ enum bpf_func_id {
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
__BPF_FUNC_MAX_ID,
};
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 756436e1ce89..f5f5edd5ae5f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -147,6 +147,7 @@ enum {
IFLA_CARRIER_CHANGES,
IFLA_PHYS_SWITCH_ID,
IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
__IFLA_MAX
};
@@ -224,6 +225,9 @@ enum {
IFLA_BR_FORWARD_DELAY,
IFLA_BR_HELLO_TIME,
IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
__IFLA_BR_MAX,
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 50603aec766a..4139a0f8b558 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -661,6 +661,9 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
+const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
+
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a3c7701a8b5e..bd7f5988ed9c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,8 @@
*/
#include <linux/bpf.h>
#include <linux/rcupdate.h>
+#include <linux/random.h>
+#include <linux/smp.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -87,3 +89,25 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
+
+static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return prandom_u32();
+}
+
+const struct bpf_func_proto bpf_get_prandom_u32_proto = {
+ .func = bpf_get_prandom_u32,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return raw_smp_processor_id();
+}
+
+const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
+ .func = bpf_get_smp_processor_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 669719ccc9ee..ea75c654af1b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr)
goto free_prog;
/* run eBPF verifier */
- err = bpf_check(prog, attr);
+ err = bpf_check(&prog, attr);
if (err < 0)
goto free_used_maps;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e6b522496250..c22ebd36fa4b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env)
return err;
} else if (class == BPF_LDX) {
- if (BPF_MODE(insn->code) != BPF_MEM ||
- insn->imm != 0) {
- verbose("BPF_LDX uses reserved fields\n");
- return -EINVAL;
- }
+ enum bpf_reg_type src_reg_type;
+
+ /* check for reserved fields is already done */
+
/* check src operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
@@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
+ src_reg_type = regs[insn->src_reg].type;
+
+ if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
+ /* saw a valid insn
+ * dst_reg = *(u32 *)(src_reg + off)
+ * use reserved 'imm' field to mark this insn
+ */
+ insn->imm = src_reg_type;
+
+ } else if (src_reg_type != insn->imm &&
+ (src_reg_type == PTR_TO_CTX ||
+ insn->imm == PTR_TO_CTX)) {
+ /* ABuser program is trying to use the same insn
+ * dst_reg = *(u32*) (src_reg + off)
+ * with different pointer types:
+ * src_reg == ctx in one branch and
+ * src_reg == stack|map in some other branch.
+ * Reject it.
+ */
+ verbose("same insn cannot be used with different pointers\n");
+ return -EINVAL;
+ }
+
} else if (class == BPF_STX) {
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
int i, j;
for (i = 0; i < insn_cnt; i++, insn++) {
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ (BPF_MODE(insn->code) != BPF_MEM ||
+ insn->imm != 0)) {
+ verbose("BPF_LDX uses reserved fields\n");
+ return -EINVAL;
+ }
+
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_map *map;
struct fd f;
@@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
insn->src_reg = 0;
}
+static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ int insn_cnt = prog->len;
+ int i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (BPF_CLASS(insn->code) != BPF_JMP ||
+ BPF_OP(insn->code) == BPF_CALL ||
+ BPF_OP(insn->code) == BPF_EXIT)
+ continue;
+
+ /* adjust offset of jmps if necessary */
+ if (i < pos && i + insn->off + 1 > pos)
+ insn->off += delta;
+ else if (i > pos && i + insn->off + 1 < pos)
+ insn->off -= delta;
+ }
+}
+
+/* convert load instructions that access fields of 'struct __sk_buff'
+ * into sequence of instructions that access fields of 'struct sk_buff'
+ */
+static int convert_ctx_accesses(struct verifier_env *env)
+{
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ struct bpf_insn insn_buf[16];
+ struct bpf_prog *new_prog;
+ u32 cnt;
+ int i;
+
+ if (!env->prog->aux->ops->convert_ctx_access)
+ return 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+ continue;
+
+ if (insn->imm != PTR_TO_CTX) {
+ /* clear internal mark */
+ insn->imm = 0;
+ continue;
+ }
+
+ cnt = env->prog->aux->ops->
+ convert_ctx_access(insn->dst_reg, insn->src_reg,
+ insn->off, insn_buf);
+ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ verbose("bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
+
+ if (cnt == 1) {
+ memcpy(insn, insn_buf, sizeof(*insn));
+ continue;
+ }
+
+ /* several new insns need to be inserted. Make room for them */
+ insn_cnt += cnt - 1;
+ new_prog = bpf_prog_realloc(env->prog,
+ bpf_prog_size(insn_cnt),
+ GFP_USER);
+ if (!new_prog)
+ return -ENOMEM;
+
+ new_prog->len = insn_cnt;
+
+ memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
+ sizeof(*insn) * (insn_cnt - i - cnt));
+
+ /* copy substitute insns in place of load instruction */
+ memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
+
+ /* adjust branches in the whole program */
+ adjust_branches(new_prog, i, cnt - 1);
+
+ /* keep walking new program and skip insns we just inserted */
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + cnt - 1;
+ i += cnt - 1;
+ }
+
+ return 0;
+}
+
static void free_states(struct verifier_env *env)
{
struct verifier_state_list *sl, *sln;
@@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env)
kfree(env->explored_states);
}
-int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
char __user *log_ubuf = NULL;
struct verifier_env *env;
int ret = -EINVAL;
- if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
+ if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG;
/* 'struct verifier_env' can be global, but since it's not small,
@@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (!env)
return -ENOMEM;
- env->prog = prog;
+ env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
mutex_lock(&bpf_verifier_lock);
@@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (ret < 0)
goto skip_full_check;
- env->explored_states = kcalloc(prog->len,
+ env->explored_states = kcalloc(env->prog->len,
sizeof(struct verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
@@ -1968,6 +2083,10 @@ skip_full_check:
while (pop_stack(env, NULL) >= 0);
free_states(env);
+ if (ret == 0)
+ /* program is valid, convert *(u32*)(ctx + off) accesses */
+ ret = convert_ctx_accesses(env);
+
if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */
@@ -1983,18 +2102,18 @@ skip_full_check:
if (ret == 0 && env->used_map_cnt) {
/* if program passed verifier, update used_maps in bpf_prog_info */
- prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
- sizeof(env->used_maps[0]),
- GFP_KERNEL);
+ env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
+ sizeof(env->used_maps[0]),
+ GFP_KERNEL);
- if (!prog->aux->used_maps) {
+ if (!env->prog->aux->used_maps) {
ret = -ENOMEM;
goto free_log_buf;
}
- memcpy(prog->aux->used_maps, env->used_maps,
+ memcpy(env->prog->aux->used_maps, env->used_maps,
sizeof(env->used_maps[0]) * env->used_map_cnt);
- prog->aux->used_map_cnt = env->used_map_cnt;
+ env->prog->aux->used_map_cnt = env->used_map_cnt;
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
* bpf_ld_imm64 instructions
@@ -2006,11 +2125,12 @@ free_log_buf:
if (log_level)
vfree(log_buf);
free_env:
- if (!prog->aux->used_maps)
+ if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
+ *prog = env->prog;
kfree(env);
mutex_unlock(&bpf_verifier_lock);
return ret;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index fc0d451279f0..5f8fe3e88219 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -27,17 +27,12 @@
#include <linux/err.h>
#define HASH_DEFAULT_SIZE 64UL
-#define HASH_MIN_SIZE 4UL
+#define HASH_MIN_SIZE 4U
#define BUCKET_LOCKS_PER_CPU 128UL
/* Base bits plus 1 bit for nulls marker */
#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
-enum {
- RHT_LOCK_NORMAL,
- RHT_LOCK_NESTED,
-};
-
/* The bucket lock is selected based on the hash and protects mutations
* on a group of hash buckets.
*
@@ -146,8 +141,13 @@ static void bucket_table_free(const struct bucket_table *tbl)
kvfree(tbl);
}
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+ bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
- size_t nbuckets, u32 hash_rnd)
+ size_t nbuckets)
{
struct bucket_table *tbl = NULL;
size_t size;
@@ -162,14 +162,16 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
return NULL;
tbl->size = nbuckets;
- tbl->shift = ilog2(nbuckets);
- tbl->hash_rnd = hash_rnd;
if (alloc_bucket_locks(ht, tbl) < 0) {
bucket_table_free(tbl);
return NULL;
}
+ INIT_LIST_HEAD(&tbl->walkers);
+
+ get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
for (i = 0; i < nbuckets; i++)
INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
@@ -186,7 +188,7 @@ static bool rht_grow_above_75(const struct rhashtable *ht,
{
/* Expand table when exceeding 75% load */
return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
- (!ht->p.max_shift || tbl->shift < ht->p.max_shift);
+ (!ht->p.max_size || tbl->size < ht->p.max_size);
}
/**
@@ -199,13 +201,14 @@ static bool rht_shrink_below_30(const struct rhashtable *ht,
{
/* Shrink table beneath 30% load */
return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
- tbl->shift > ht->p.min_shift;
+ tbl->size > ht->p.min_size;
}
static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
{
- struct bucket_table *new_tbl = rht_dereference(ht->future_tbl, ht);
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *new_tbl =
+ rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl;
struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
int err = -ENOENT;
struct rhash_head *head, *next, *entry;
@@ -229,7 +232,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
new_bucket_lock = bucket_lock(new_tbl, new_hash);
- spin_lock_nested(new_bucket_lock, RHT_LOCK_NESTED);
+ spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
head = rht_dereference_bucket(new_tbl->buckets[new_hash],
new_tbl, new_hash);
@@ -257,6 +260,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
spin_lock_bh(old_bucket_lock);
while (!rhashtable_rehash_one(ht, old_hash))
;
+ old_tbl->rehash++;
spin_unlock_bh(old_bucket_lock);
}
@@ -264,16 +268,13 @@ static void rhashtable_rehash(struct rhashtable *ht,
struct bucket_table *new_tbl)
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct rhashtable_walker *walker;
unsigned old_hash;
- get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd));
-
/* Make insertions go into the new, empty table right away. Deletions
* and lookups will be attempted in both tables until we synchronize.
- * The synchronize_rcu() guarantees for the new table to be picked up
- * so no new additions go into the old table while we relink.
*/
- rcu_assign_pointer(ht->future_tbl, new_tbl);
+ rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
/* Ensure the new table is visible to readers. */
smp_wmb();
@@ -284,13 +285,14 @@ static void rhashtable_rehash(struct rhashtable *ht,
/* Publish the new table pointer. */
rcu_assign_pointer(ht->tbl, new_tbl);
+ list_for_each_entry(walker, &old_tbl->walkers, list)
+ walker->tbl = NULL;
+
/* Wait for readers. All new readers will see the new
* table, and thus no references to the old table will
* remain.
*/
- synchronize_rcu();
-
- bucket_table_free(old_tbl);
+ call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
}
/**
@@ -314,7 +316,7 @@ int rhashtable_expand(struct rhashtable *ht)
ASSERT_RHT_MUTEX(ht);
- new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, old_tbl->hash_rnd);
+ new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
if (new_tbl == NULL)
return -ENOMEM;
@@ -345,7 +347,7 @@ int rhashtable_shrink(struct rhashtable *ht)
ASSERT_RHT_MUTEX(ht);
- new_tbl = bucket_table_alloc(ht, old_tbl->size / 2, old_tbl->hash_rnd);
+ new_tbl = bucket_table_alloc(ht, old_tbl->size / 2);
if (new_tbl == NULL)
return -ENOMEM;
@@ -358,7 +360,6 @@ static void rht_deferred_worker(struct work_struct *work)
{
struct rhashtable *ht;
struct bucket_table *tbl;
- struct rhashtable_walker *walker;
ht = container_of(work, struct rhashtable, run_work);
mutex_lock(&ht->mutex);
@@ -367,9 +368,6 @@ static void rht_deferred_worker(struct work_struct *work)
tbl = rht_dereference(ht->tbl, ht);
- list_for_each_entry(walker, &ht->walkers, list)
- walker->resize = true;
-
if (rht_grow_above_75(ht, tbl))
rhashtable_expand(ht);
else if (rht_shrink_below_30(ht, tbl))
@@ -385,14 +383,16 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
struct rhash_head *head;
bool no_resize_running;
unsigned hash;
+ spinlock_t *old_lock;
bool success = true;
rcu_read_lock();
old_tbl = rht_dereference_rcu(ht->tbl, ht);
hash = head_hashfn(ht, old_tbl, obj);
+ old_lock = bucket_lock(old_tbl, hash);
- spin_lock_bh(bucket_lock(old_tbl, hash));
+ spin_lock_bh(old_lock);
/* Because we have already taken the bucket lock in old_tbl,
* if we find that future_tbl is not yet visible then that
@@ -400,10 +400,10 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
* also grab the bucket lock in old_tbl because until the
* rehash completes ht->tbl won't be changed.
*/
- tbl = rht_dereference_rcu(ht->future_tbl, ht);
+ tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
if (tbl != old_tbl) {
hash = head_hashfn(ht, tbl, obj);
- spin_lock_nested(bucket_lock(tbl, hash), RHT_LOCK_NESTED);
+ spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
}
if (compare &&
@@ -429,13 +429,10 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
schedule_work(&ht->run_work);
exit:
- if (tbl != old_tbl) {
- hash = head_hashfn(ht, tbl, obj);
+ if (tbl != old_tbl)
spin_unlock(bucket_lock(tbl, hash));
- }
- hash = head_hashfn(ht, old_tbl, obj);
- spin_unlock_bh(bucket_lock(old_tbl, hash));
+ spin_unlock_bh(old_lock);
rcu_read_unlock();
@@ -512,28 +509,25 @@ static bool __rhashtable_remove(struct rhashtable *ht,
*/
bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
{
- struct bucket_table *tbl, *old_tbl;
+ struct bucket_table *tbl;
bool ret;
rcu_read_lock();
- old_tbl = rht_dereference_rcu(ht->tbl, ht);
- ret = __rhashtable_remove(ht, old_tbl, obj);
+ tbl = rht_dereference_rcu(ht->tbl, ht);
/* Because we have already taken (and released) the bucket
* lock in old_tbl, if we find that future_tbl is not yet
* visible then that guarantees the entry to still be in
- * old_tbl if it exists.
+ * the old tbl if it exists.
*/
- tbl = rht_dereference_rcu(ht->future_tbl, ht);
- if (!ret && old_tbl != tbl)
- ret = __rhashtable_remove(ht, tbl, obj);
+ while (!(ret = __rhashtable_remove(ht, tbl, obj)) &&
+ (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+ ;
if (ret) {
- bool no_resize_running = tbl == old_tbl;
-
atomic_dec(&ht->nelems);
- if (no_resize_running && rht_shrink_below_30(ht, tbl))
+ if (rht_shrink_below_30(ht, tbl))
schedule_work(&ht->run_work);
}
@@ -599,7 +593,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
bool (*compare)(void *, void *), void *arg)
{
- const struct bucket_table *tbl, *old_tbl;
+ const struct bucket_table *tbl;
struct rhash_head *he;
u32 hash;
@@ -618,9 +612,8 @@ restart:
/* Ensure we see any new tables. */
smp_rmb();
- old_tbl = tbl;
- tbl = rht_dereference_rcu(ht->future_tbl, ht);
- if (unlikely(tbl != old_tbl))
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(tbl))
goto restart;
rcu_read_unlock();
@@ -725,11 +718,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
if (!iter->walker)
return -ENOMEM;
- INIT_LIST_HEAD(&iter->walker->list);
- iter->walker->resize = false;
-
mutex_lock(&ht->mutex);
- list_add(&iter->walker->list, &ht->walkers);
+ iter->walker->tbl = rht_dereference(ht->tbl, ht);
+ list_add(&iter->walker->list, &iter->walker->tbl->walkers);
mutex_unlock(&ht->mutex);
return 0;
@@ -745,7 +736,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
void rhashtable_walk_exit(struct rhashtable_iter *iter)
{
mutex_lock(&iter->ht->mutex);
- list_del(&iter->walker->list);
+ if (iter->walker->tbl)
+ list_del(&iter->walker->list);
mutex_unlock(&iter->ht->mutex);
kfree(iter->walker);
}
@@ -766,13 +758,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
* by calling rhashtable_walk_next.
*/
int rhashtable_walk_start(struct rhashtable_iter *iter)
+ __acquires(RCU)
{
+ struct rhashtable *ht = iter->ht;
+
+ mutex_lock(&ht->mutex);
+
+ if (iter->walker->tbl)
+ list_del(&iter->walker->list);
+
rcu_read_lock();
- if (iter->walker->resize) {
- iter->slot = 0;
- iter->skip = 0;
- iter->walker->resize = false;
+ mutex_unlock(&ht->mutex);
+
+ if (!iter->walker->tbl) {
+ iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
return -EAGAIN;
}
@@ -794,13 +794,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
*/
void *rhashtable_walk_next(struct rhashtable_iter *iter)
{
- const struct bucket_table *tbl;
+ struct bucket_table *tbl = iter->walker->tbl;
struct rhashtable *ht = iter->ht;
struct rhash_head *p = iter->p;
void *obj = NULL;
- tbl = rht_dereference_rcu(ht->tbl, ht);
-
if (p) {
p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
goto next;
@@ -826,17 +824,17 @@ next:
iter->skip = 0;
}
- iter->p = NULL;
-
-out:
- if (iter->walker->resize) {
- iter->p = NULL;
+ iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (iter->walker->tbl) {
iter->slot = 0;
iter->skip = 0;
- iter->walker->resize = false;
return ERR_PTR(-EAGAIN);
}
+ iter->p = NULL;
+
+out:
+
return obj;
}
EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -848,16 +846,34 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
* Finish a hash table walk.
*/
void rhashtable_walk_stop(struct rhashtable_iter *iter)
+ __releases(RCU)
{
- rcu_read_unlock();
+ struct rhashtable *ht;
+ struct bucket_table *tbl = iter->walker->tbl;
+
+ if (!tbl)
+ goto out;
+
+ ht = iter->ht;
+
+ mutex_lock(&ht->mutex);
+ if (tbl->rehash < tbl->size)
+ list_add(&iter->walker->list, &tbl->walkers);
+ else
+ iter->walker->tbl = NULL;
+ mutex_unlock(&ht->mutex);
+
iter->p = NULL;
+
+out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
static size_t rounded_hashtable_size(struct rhashtable_params *params)
{
return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
- 1UL << params->min_shift);
+ (unsigned long)params->min_size);
}
/**
@@ -907,7 +923,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
{
struct bucket_table *tbl;
size_t size;
- u32 hash_rnd;
size = HASH_DEFAULT_SIZE;
@@ -918,8 +933,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
return -EINVAL;
- params->min_shift = max_t(size_t, params->min_shift,
- ilog2(HASH_MIN_SIZE));
+ params->min_size = max(params->min_size, HASH_MIN_SIZE);
if (params->nelem_hint)
size = rounded_hashtable_size(params);
@@ -927,23 +941,19 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
memset(ht, 0, sizeof(*ht));
mutex_init(&ht->mutex);
memcpy(&ht->p, params, sizeof(*params));
- INIT_LIST_HEAD(&ht->walkers);
if (params->locks_mul)
ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
else
ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
- get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-
- tbl = bucket_table_alloc(ht, size, hash_rnd);
+ tbl = bucket_table_alloc(ht, size);
if (tbl == NULL)
return -ENOMEM;
atomic_set(&ht->nelems, 0);
RCU_INIT_POINTER(ht->tbl, tbl);
- RCU_INIT_POINTER(ht->future_tbl, tbl);
INIT_WORK(&ht->run_work, rht_deferred_worker);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 67c7593d1dd6..2bc403d8f9ee 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -80,7 +80,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
rcu_cnt = cnt = 0;
if (!quiet)
- pr_info(" [%#4x/%zu]", i, tbl->size);
+ pr_info(" [%#4x/%u]", i, tbl->size);
rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
cnt++;
@@ -201,7 +201,7 @@ static int __init test_rht_init(void)
.key_offset = offsetof(struct test_obj, value),
.key_len = sizeof(int),
.hashfn = jhash,
- .max_shift = 1, /* we expand/shrink manually here */
+ .max_size = 2, /* we expand/shrink manually here */
.nulls_base = (3U << RHT_BASE_SHIFT),
};
int err;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 64c6bed4a3d3..98a30a5b8664 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -413,7 +413,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan_transfer_features(dev, vlandev);
break;
- case NETDEV_DOWN:
+ case NETDEV_DOWN: {
+ struct net_device *tmp;
+ LIST_HEAD(close_list);
+
if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
@@ -425,11 +428,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
- dev_change_flags(vlandev, flgs & ~IFF_UP);
+ list_add(&vlandev->close_list, &close_list);
+ }
+
+ dev_close_many(&close_list, false);
+
+ list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) {
netif_stacked_transfer_operstate(dev, vlandev);
+ list_del_init(&vlandev->close_list);
}
+ list_del(&close_list);
break;
-
+ }
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8bc6b67457dc..e1115a224a95 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -733,6 +733,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 },
[IFLA_BR_HELLO_TIME] = { .type = NLA_U32 },
[IFLA_BR_MAX_AGE] = { .type = NLA_U32 },
+ [IFLA_BR_AGEING_TIME] = { .type = NLA_U32 },
+ [IFLA_BR_STP_STATE] = { .type = NLA_U32 },
+ [IFLA_BR_PRIORITY] = { .type = NLA_U16 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -762,6 +765,24 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return err;
}
+ if (data[IFLA_BR_AGEING_TIME]) {
+ u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
+
+ br->ageing_time = clock_t_to_jiffies(ageing_time);
+ }
+
+ if (data[IFLA_BR_STP_STATE]) {
+ u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]);
+
+ br_stp_set_enabled(br, stp_enabled);
+ }
+
+ if (data[IFLA_BR_PRIORITY]) {
+ u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]);
+
+ br_stp_set_bridge_priority(br, priority);
+ }
+
return 0;
}
@@ -770,6 +791,9 @@ static size_t br_get_size(const struct net_device *brdev)
return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */
nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */
nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */
+ nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */
0;
}
@@ -779,10 +803,16 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
u32 forward_delay = jiffies_to_clock_t(br->forward_delay);
u32 hello_time = jiffies_to_clock_t(br->hello_time);
u32 age_time = jiffies_to_clock_t(br->max_age);
+ u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
+ u32 stp_enabled = br->stp_enabled;
+ u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
- nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time))
+ nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
+ nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
+ nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
+ nla_put_u16(skb, IFLA_BR_PRIORITY, priority))
return -EMSGSIZE;
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 39fe369b46ad..5d43e010ef87 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev)
return retval;
}
-static int dev_close_many(struct list_head *head)
+int dev_close_many(struct list_head *head, bool unlink)
{
struct net_device *dev, *tmp;
@@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head)
list_for_each_entry_safe(dev, tmp, head, close_list) {
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
call_netdevice_notifiers(NETDEV_DOWN, dev);
- list_del_init(&dev->close_list);
+ if (unlink)
+ list_del_init(&dev->close_list);
}
return 0;
}
+EXPORT_SYMBOL(dev_close_many);
/**
* dev_close - shutdown an interface.
@@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev)
LIST_HEAD(single);
list_add(&dev->close_list, &single);
- dev_close_many(&single);
+ dev_close_many(&single, true);
list_del(&single);
}
return 0;
@@ -5912,6 +5914,24 @@ int dev_get_phys_port_id(struct net_device *dev,
EXPORT_SYMBOL(dev_get_phys_port_id);
/**
+ * dev_get_phys_port_name - Get device physical port name
+ * @dev: device
+ * @name: port name
+ *
+ * Get device physical port name
+ */
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_get_phys_port_name)
+ return -EOPNOTSUPP;
+ return ops->ndo_get_phys_port_name(dev, name, len);
+}
+EXPORT_SYMBOL(dev_get_phys_port_name);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -5968,7 +5988,7 @@ static void rollback_registered_many(struct list_head *head)
/* If device is running, close it first. */
list_for_each_entry(dev, head, unreg_list)
list_add_tail(&dev->close_list, &close_head);
- dev_close_many(&close_head);
+ dev_close_many(&close_head, true);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a4eb7030dba..b95ae7fe7e4f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -150,30 +150,82 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return prandom_u32();
}
+static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
+ struct bpf_insn *insn_buf)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (skb_field) {
+ case SKF_AD_MARK:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, mark));
+ break;
+
+ case SKF_AD_PKTTYPE:
+ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
+#endif
+ break;
+
+ case SKF_AD_QUEUE:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, queue_mapping));
+ break;
+
+ case SKF_AD_PROTOCOL:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+
+ /* dst_reg = *(u16 *) (src_reg + offsetof(protocol)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, protocol));
+ /* dst_reg = ntohs(dst_reg) [emitting a nop or swap16] */
+ *insn++ = BPF_ENDIAN(BPF_FROM_BE, dst_reg, 16);
+ break;
+
+ case SKF_AD_VLAN_TAG:
+ case SKF_AD_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+ /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, vlan_tci));
+ if (skb_field == SKF_AD_VLAN_TAG) {
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
+ ~VLAN_TAG_PRESENT);
+ } else {
+ /* dst_reg >>= 12 */
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
+ /* dst_reg &= 1 */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
+ }
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static bool convert_bpf_extensions(struct sock_filter *fp,
struct bpf_insn **insnp)
{
struct bpf_insn *insn = *insnp;
+ u32 cnt;
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
- /* A = *(u16 *) (CTX + offsetof(protocol)) */
- *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, protocol));
- /* A = ntohs(A) [emitting a nop or swap16] */
- *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
+ cnt = convert_skb_access(SKF_AD_PROTOCOL, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
- *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
- PKT_TYPE_OFFSET());
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
-#ifdef __BIG_ENDIAN_BITFIELD
- insn++;
- *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
-#endif
+ cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_IFINDEX:
@@ -197,10 +249,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
- *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, mark));
+ cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
@@ -211,29 +261,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-
- *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, queue_mapping));
+ cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_VLAN_TAG:
- case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ cnt = convert_skb_access(SKF_AD_VLAN_TAG,
+ BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
+ break;
- /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
- *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, vlan_tci));
- if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
- ~VLAN_TAG_PRESENT);
- } else {
- /* A >>= 12 */
- *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
- /* A &= 1 */
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
- }
+ case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+ cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+ BPF_REG_A, BPF_REG_CTX, insn);
+ insn += cnt - 1;
break;
case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
@@ -1139,6 +1180,10 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_get_prandom_u32:
+ return &bpf_get_prandom_u32_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
default:
return NULL;
}
@@ -1147,13 +1192,66 @@ sk_filter_func_proto(enum bpf_func_id func_id)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
- /* skb fields cannot be accessed yet */
- return false;
+ /* only read is allowed */
+ if (type != BPF_READ)
+ return false;
+
+ /* check bounds */
+ if (off < 0 || off >= sizeof(struct __sk_buff))
+ return false;
+
+ /* disallow misaligned access */
+ if (off % size != 0)
+ return false;
+
+ /* all __sk_buff fields are __u32 */
+ if (size != 4)
+ return false;
+
+ return true;
+}
+
+static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct __sk_buff, len):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, len));
+ break;
+
+ case offsetof(struct __sk_buff, mark):
+ return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, pkt_type):
+ return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, queue_mapping):
+ return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, protocol):
+ return convert_skb_access(SKF_AD_PROTOCOL, dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, vlan_present):
+ return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+ dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, vlan_tci):
+ return convert_skb_access(SKF_AD_VLAN_TAG,
+ dst_reg, src_reg, insn);
+ }
+
+ return insn - insn_buf;
}
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
};
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cf30620a88e1..cc5cf689809c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -418,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_port_id);
+static ssize_t phys_port_name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ char name[IFNAMSIZ];
+
+ ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+ if (!ret)
+ ret = sprintf(buf, "%s\n", name);
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(phys_port_name);
+
static ssize_t phys_switch_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -465,6 +487,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
&dev_attr_phys_port_id.attr,
+ &dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
NULL,
};
@@ -951,6 +974,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
return sprintf(buf, "%lu", trans_timeout);
}
+#ifdef CONFIG_XPS
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (queue == &dev->_tx[i])
+ break;
+
+ BUG_ON(i >= dev->num_tx_queues);
+
+ return i;
+}
+
+static ssize_t show_tx_maxrate(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", queue->tx_maxrate);
+}
+
+static ssize_t set_tx_maxrate(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct net_device *dev = queue->dev;
+ int err, index = get_netdev_queue_index(queue);
+ u32 rate = 0;
+
+ err = kstrtou32(buf, 10, &rate);
+ if (err < 0)
+ return err;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ err = -EOPNOTSUPP;
+ if (dev->netdev_ops->ndo_set_tx_maxrate)
+ err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
+
+ rtnl_unlock();
+ if (!err) {
+ queue->tx_maxrate = rate;
+ return len;
+ }
+ return err;
+}
+
+static struct netdev_queue_attribute queue_tx_maxrate =
+ __ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
+ show_tx_maxrate, set_tx_maxrate);
+#endif
+
static struct netdev_queue_attribute queue_trans_timeout =
__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
@@ -1065,18 +1142,6 @@ static struct attribute_group dql_group = {
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
-static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
-{
- struct net_device *dev = queue->dev;
- unsigned int i;
-
- i = queue - dev->_tx;
- BUG_ON(i >= dev->num_tx_queues);
-
- return i;
-}
-
-
static ssize_t show_xps_map(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute, char *buf)
{
@@ -1153,6 +1218,7 @@ static struct attribute *netdev_queue_default_attrs[] = {
&queue_trans_timeout.attr,
#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
+ &queue_tx_maxrate.attr,
#endif
NULL
};
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 04db318e6218..cc39a2aa663a 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -103,7 +103,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
lopt->qlen--;
- reqsk_free(req);
+ reqsk_put(req);
}
}
}
@@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
* case might also exist in tcp_v4_hnd_req() that will trigger this locking
* order.
*
- * When a TFO req is created, it needs to sock_hold its listener to prevent
- * the latter data structure from going away.
- *
- * This function also sets "treq->listener" to NULL and unreference listener
- * socket. treq->listener is used by the listener so it is protected by the
+ * This function also sets "treq->tfo_listener" to false.
+ * treq->tfo_listener is used by the listener so it is protected by the
* fastopenq->lock in this function.
*/
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset)
{
- struct sock *lsk = tcp_rsk(req)->listener;
- struct fastopen_queue *fastopenq =
- inet_csk(lsk)->icsk_accept_queue.fastopenq;
+ struct sock *lsk = req->rsk_listener;
+ struct fastopen_queue *fastopenq;
+
+ fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
- tcp_rsk(req)->listener = NULL;
+ tcp_rsk(req)->tfo_listener = false;
if (req->sk) /* the child socket hasn't been accepted yet */
goto out;
@@ -179,8 +177,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
* special RST handling below.
*/
spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
- reqsk_free(req);
+ reqsk_put(req);
return;
}
/* Wait for 60secs before removing a req that has triggered RST.
@@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
fastopenq->qlen++;
out:
spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 25b4b5d23485..6abe634c666c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ char name[IFNAMSIZ];
+ int err;
+
+ err = dev_get_phys_port_name(dev, name, sizeof(name));
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
@@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_port_id_fill(skb, dev))
goto nla_put_failure;
+ if (rtnl_phys_port_name_fill(skb, dev))
+ goto nla_put_failure;
+
if (rtnl_phys_switch_id_fill(skb, dev))
goto nla_put_failure;
diff --git a/net/core/sock.c b/net/core/sock.c
index 4bc42efb3e40..d9f9e4825362 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1454,8 +1454,8 @@ void sk_release_kernel(struct sock *sk)
return;
sock_hold(sk);
- sock_release(sk->sk_socket);
sock_net_set(sk, get_net(&init_net));
+ sock_release(sk->sk_socket);
sock_put(sk);
}
EXPORT_SYMBOL(sk_release_kernel);
@@ -1661,21 +1661,6 @@ void sock_efree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_efree);
-#ifdef CONFIG_INET
-void sock_edemux(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- if (sk->sk_state == TCP_TIME_WAIT)
- inet_twsk_put(inet_twsk(sk));
- else if (sk->sk_state == TCP_NEW_SYN_RECV)
- reqsk_put(inet_reqsk(sk));
- else
- sock_put(sk);
-}
-EXPORT_SYMBOL(sock_edemux);
-#endif
-
kuid_t sock_i_uid(struct sock *sk)
{
kuid_t uid;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f6456afbaec..e7ad291cd96b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -89,10 +89,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (inet->inet_saddr == 0)
inet->inet_saddr = fl4->saddr;
- inet->inet_rcv_saddr = inet->inet_saddr;
-
+ sk_rcv_saddr_set(sk, inet->inet_saddr);
inet->inet_dport = usin->sin_port;
- inet->inet_daddr = daddr;
+ sk_daddr_set(sk, daddr);
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
@@ -408,8 +407,8 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- newinet->inet_daddr = ireq->ir_rmt_addr;
- newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+ sk_daddr_set(newsk, ireq->ir_rmt_addr);
+ sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
newinet->inet_opt = ireq->opt;
ireq->opt = NULL;
@@ -624,7 +623,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp_request_sock_ops);
+ req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
if (req == NULL)
goto drop;
@@ -639,9 +638,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq = inet_rsk(req);
- ireq->ir_loc_addr = ip_hdr(skb)->daddr;
- ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
- write_pnet(&ireq->ireq_net, sock_net(sk));
+ sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+ sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5166b0043f95..c655de5f67c9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -40,19 +40,6 @@
static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
-static void dccp_v6_hash(struct sock *sk)
-{
- if (sk->sk_state != DCCP_CLOSED) {
- if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) {
- inet_hash(sk);
- return;
- }
- local_bh_disable();
- __inet6_hash(sk, NULL);
- local_bh_enable();
- }
-}
-
/* add pseudo-header to DCCP checksum stored in skb->csum */
static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
const struct in6_addr *saddr,
@@ -386,7 +373,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp6_request_sock_ops);
+ req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
if (req == NULL)
goto drop;
@@ -403,7 +390,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- write_pnet(&ireq->ireq_net, sock_net(sk));
ireq->ireq_family = AF_INET6;
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
@@ -471,11 +457,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
-
- ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
-
- newsk->sk_v6_rcv_saddr = newnp->saddr;
+ newnp->saddr = newsk->sk_v6_rcv_saddr;
inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -593,7 +575,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
dccp_done(newsk);
goto out;
}
- __inet6_hash(newsk, NULL);
+ __inet_hash(newsk, NULL);
return newsk;
@@ -918,9 +900,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_backlog_rcv = dccp_v6_do_rcv;
goto failure;
}
- ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);
-
+ np->saddr = sk->sk_v6_rcv_saddr;
return err;
}
@@ -1063,7 +1043,7 @@ static struct proto dccp_v6_prot = {
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v6_do_rcv,
- .hash = dccp_v6_hash,
+ .hash = inet_hash,
.unhash = inet_unhash,
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index b45206e8dd3e..9379a9cf7f5d 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -6,9 +6,8 @@ config HAVE_NET_DSA
config NET_DSA
tristate
- depends on HAVE_NET_DSA
+ depends on HAVE_NET_DSA && NET_SWITCHDEV
select PHYLIB
- select NET_SWITCHDEV
if NET_DSA
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6511552039d6..f0af7aa331c1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,6 +16,7 @@
#include <linux/of_net.h>
#include <linux/of_mdio.h>
#include <net/rtnetlink.h>
+#include <net/switchdev.h>
#include <linux/if_bridge.h>
#include "dsa_priv.h"
@@ -572,8 +573,11 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_do_ioctl = dsa_slave_ioctl,
- .ndo_switch_parent_id_get = dsa_slave_parent_id_get,
- .ndo_switch_port_stp_update = dsa_slave_stp_update,
+};
+
+static const struct swdev_ops dsa_slave_swdev_ops = {
+ .swdev_parent_id_get = dsa_slave_parent_id_get,
+ .swdev_port_stp_update = dsa_slave_stp_update,
};
static void dsa_slave_adjust_link(struct net_device *dev)
@@ -755,6 +759,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
eth_hw_addr_inherit(slave_dev, master);
slave_dev->tx_queue_len = 0;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
+ slave_dev->swdev_ops = &dsa_slave_swdev_ops;
SET_NETDEV_DEV(slave_dev, parent);
slave_dev->dev.of_node = ds->pd->port_dn[port];
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5105759e4e00..975ee5e30c64 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -107,7 +107,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-static u32 inet_addr_hash(struct net *net, __be32 addr)
+static u32 inet_addr_hash(const struct net *net, __be32 addr)
{
u32 val = (__force u32) addr ^ net_hash_mix(net);
@@ -560,9 +560,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
lock_sock(sk);
if (join)
- ret = __ip_mc_join_group(sk, &mreq);
+ ret = ip_mc_join_group(sk, &mreq);
else
- ret = __ip_mc_leave_group(sk, &mreq);
+ ret = ip_mc_leave_group(sk, &mreq);
release_sock(sk);
return ret;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5cb1ef4ce292..ad3f866085de 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1850,7 +1850,10 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
pmc->sfcount[MCAST_EXCLUDE] = 1;
}
-int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+/* Join a multicast group
+ */
+
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
{
__be32 addr = imr->imr_multiaddr.s_addr;
struct ip_mc_socklist *iml, *i;
@@ -1898,20 +1901,6 @@ int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
done:
return err;
}
-EXPORT_SYMBOL(__ip_mc_join_group);
-
-/* Join a multicast group
- */
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
-{
- int ret;
-
- rtnl_lock();
- ret = __ip_mc_join_group(sk, imr);
- rtnl_unlock();
-
- return ret;
-}
EXPORT_SYMBOL(ip_mc_join_group);
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
@@ -1934,7 +1923,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
return err;
}
-int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *iml;
@@ -1979,18 +1968,6 @@ int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
out:
return ret;
}
-EXPORT_SYMBOL(__ip_mc_leave_group);
-
-int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
-{
- int ret;
-
- rtnl_lock();
- ret = __ip_mc_leave_group(sk, imr);
- rtnl_unlock();
-
- return ret;
-}
EXPORT_SYMBOL(ip_mc_leave_group);
int ip_mc_source(int add, int omode, struct sock *sk, struct
@@ -2010,7 +1987,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
if (!ipv4_is_multicast(addr))
return -EINVAL;
- rtnl_lock();
+ ASSERT_RTNL();
imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
imr.imr_address.s_addr = mreqs->imr_interface;
@@ -2124,9 +2101,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
&mreqs->imr_sourceaddr, 1);
done:
- rtnl_unlock();
if (leavegroup)
- return ip_mc_leave_group(sk, &imr);
+ err = ip_mc_leave_group(sk, &imr);
return err;
}
@@ -2148,7 +2124,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
msf->imsf_fmode != MCAST_EXCLUDE)
return -EINVAL;
- rtnl_lock();
+ ASSERT_RTNL();
imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
imr.imr_address.s_addr = msf->imsf_interface;
@@ -2210,7 +2186,6 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
- rtnl_unlock();
if (leavegroup)
err = ip_mc_leave_group(sk, &imr);
return err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 34581f928afa..f0f91858aecf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -293,8 +293,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
- struct sock *newsk;
struct request_sock *req;
+ struct sock *newsk;
int error;
lock_sock(sk);
@@ -323,9 +323,11 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
newsk = req->sk;
sk_acceptq_removed(sk);
- if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_rsk(req)->tfo_listener &&
+ queue->fastopenq) {
spin_lock_bh(&queue->fastopenq->lock);
- if (tcp_rsk(req)->listener) {
+ if (tcp_rsk(req)->tfo_listener) {
/* We are still waiting for the final ACK from 3WHS
* so can't free req now. Instead, we set req->sk to
* NULL to signify that the child socket is taken
@@ -340,7 +342,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
out:
release_sock(sk);
if (req)
- __reqsk_free(req);
+ reqsk_put(req);
return newsk;
out_err:
newsk = NULL;
@@ -635,7 +637,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
/* Drop this request */
inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(queue, req);
- reqsk_free(req);
+ reqsk_put(req);
continue;
}
reqp = &req->dl_next;
@@ -817,9 +819,9 @@ void inet_csk_listen_stop(struct sock *sk)
percpu_counter_inc(sk->sk_prot->orphan_count);
- if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
+ if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
BUG_ON(tcp_sk(child)->fastopen_rsk != req);
- BUG_ON(sk != tcp_rsk(req)->listener);
+ BUG_ON(sk != req->rsk_listener);
/* Paranoid, to prevent race condition if
* an inbound pkt destined for child is
@@ -828,7 +830,6 @@ void inet_csk_listen_stop(struct sock *sk)
* tcp_v4_destroy_sock().
*/
tcp_sk(child)->fastopen_rsk = NULL;
- sock_put(sk);
}
inet_csk_destroy_sock(child);
@@ -837,7 +838,7 @@ void inet_csk_listen_stop(struct sock *sk)
sock_put(child);
sk_acceptq_removed(sk);
- __reqsk_free(req);
+ reqsk_put(req);
}
if (queue->fastopenq != NULL) {
/* Free all the reqs queued in rskq_rst_head. */
@@ -847,7 +848,7 @@ void inet_csk_listen_stop(struct sock *sk)
spin_unlock_bh(&queue->fastopenq->lock);
while ((req = acc_req) != NULL) {
acc_req = req->dl_next;
- __reqsk_free(req);
+ reqsk_put(req);
}
}
WARN_ON(sk->sk_ack_backlog);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ac7b5c909fe7..74c39c9f3e11 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -113,14 +113,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
return -EMSGSIZE;
r = nlmsg_data(nlh);
- BUG_ON((1 << sk->sk_state) & (TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV));
+ BUG_ON(!sk_fullsock(sk));
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;
-
if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
goto errout;
@@ -229,7 +228,6 @@ static int inet_csk_diag_fill(struct sock *sk,
static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb,
- const struct inet_diag_req_v2 *req,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
@@ -265,6 +263,39 @@ static int inet_twsk_diag_fill(struct sock *sk,
return 0;
}
+static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
+ u32 portid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ struct inet_diag_msg *r;
+ struct nlmsghdr *nlh;
+ long tmo;
+
+ nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
+ nlmsg_flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ r = nlmsg_data(nlh);
+ inet_diag_msg_common_fill(r, sk);
+ r->idiag_state = TCP_SYN_RECV;
+ r->idiag_timer = 1;
+ r->idiag_retrans = inet_reqsk(sk)->num_retrans;
+
+ BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
+ offsetof(struct sock, sk_cookie));
+
+ tmo = inet_reqsk(sk)->expires - jiffies;
+ r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
+ r->idiag_rqueue = 0;
+ r->idiag_wqueue = 0;
+ r->idiag_uid = 0;
+ r->idiag_inode = 0;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct inet_diag_req_v2 *r,
struct user_namespace *user_ns,
@@ -272,9 +303,13 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
- return inet_twsk_diag_fill(sk, skb, r, portid, seq,
+ return inet_twsk_diag_fill(sk, skb, portid, seq,
nlmsg_flags, unlh);
+ if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return inet_req_diag_fill(sk, skb, portid, seq,
+ nlmsg_flags, unlh);
+
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
nlmsg_flags, unlh);
}
@@ -502,7 +537,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry_fill_addrs(&entry, sk);
entry.sport = inet->inet_num;
entry.dport = ntohs(inet->inet_dport);
- entry.userlocks = (sk->sk_state != TCP_TIME_WAIT) ? sk->sk_userlocks : 0;
+ entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
return inet_diag_bc_run(bc, &entry);
}
@@ -661,61 +696,6 @@ static void twsk_build_assert(void)
#endif
}
-static int inet_twsk_diag_dump(struct sock *sk,
- struct sk_buff *skb,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r,
- const struct nlattr *bc)
-{
- twsk_build_assert();
-
- if (!inet_diag_bc_sk(bc, sk))
- return 0;
-
- return inet_twsk_diag_fill(sk, skb, r,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
-}
-
-static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
- struct request_sock *req,
- struct user_namespace *user_ns,
- u32 portid, u32 seq,
- const struct nlmsghdr *unlh)
-{
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct inet_diag_msg *r;
- struct nlmsghdr *nlh;
- long tmo;
-
- nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
- NLM_F_MULTI);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- inet_diag_msg_common_fill(r, (struct sock *)ireq);
- r->idiag_state = TCP_SYN_RECV;
- r->idiag_timer = 1;
- r->idiag_retrans = req->num_retrans;
-
- BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
- offsetof(struct sock, sk_cookie));
-
- tmo = req->expires - jiffies;
- if (tmo < 0)
- tmo = 0;
-
- r->idiag_expires = jiffies_to_msecs(tmo);
- r->idiag_rqueue = 0;
- r->idiag_wqueue = 0;
- r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
- r->idiag_inode = 0;
-
- nlmsg_end(skb, nlh);
- return 0;
-}
-
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
@@ -762,17 +742,17 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (bc) {
/* Note: entry.sport and entry.userlocks are already set */
- entry_fill_addrs(&entry, (struct sock *)req);
+ entry_fill_addrs(&entry, req_to_sk(req));
entry.dport = ntohs(ireq->ir_rmt_port);
if (!inet_diag_bc_run(bc, &entry))
continue;
}
- err = inet_diag_fill_req(skb, sk, req,
- sk_user_ns(NETLINK_CB(cb->skb).sk),
+ err = inet_req_diag_fill(req_to_sk(req), skb,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, cb->nlh);
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, cb->nlh);
if (err < 0) {
cb->args[3] = j + 1;
cb->args[4] = reqnum;
@@ -903,10 +883,16 @@ skip_listen_ht:
if (r->id.idiag_dport != sk->sk_dport &&
r->id.idiag_dport)
goto next_normal;
- if (sk->sk_state == TCP_TIME_WAIT)
- res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
- else
- res = inet_csk_diag_dump(sk, skb, cb, r, bc);
+ twsk_build_assert();
+
+ if (!inet_diag_bc_sk(bc, sk))
+ goto next_normal;
+
+ res = sk_diag_fill(sk, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh);
if (res < 0) {
spin_unlock_bh(lock);
goto done;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 64401a2fdd33..0fb841b9d834 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,9 +24,9 @@
#include <net/secure_seq.h>
#include <net/ip.h>
-static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
{
static u32 inet_ehash_secret __read_mostly;
@@ -36,17 +36,21 @@ static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
inet_ehash_secret + net_hash_mix(net));
}
-
-static unsigned int inet_sk_ehashfn(const struct sock *sk)
+/* This function handles inet_sock, but also timewait and request sockets
+ * for IPv4/IPv6.
+ */
+u32 sk_ehashfn(const struct sock *sk)
{
- const struct inet_sock *inet = inet_sk(sk);
- const __be32 laddr = inet->inet_rcv_saddr;
- const __u16 lport = inet->inet_num;
- const __be32 faddr = inet->inet_daddr;
- const __be16 fport = inet->inet_dport;
- struct net *net = sock_net(sk);
-
- return inet_ehashfn(net, laddr, lport, faddr, fport);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ return inet6_ehashfn(sock_net(sk),
+ &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+#endif
+ return inet_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
}
/*
@@ -269,6 +273,12 @@ void sock_gen_put(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sock_gen_put);
+void sock_edemux(struct sk_buff *skb)
+{
+ sock_gen_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_edemux);
+
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
@@ -401,13 +411,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
- spinlock_t *lock;
struct inet_ehash_bucket *head;
+ spinlock_t *lock;
int twrefcnt = 0;
WARN_ON(!sk_unhashed(sk));
- sk->sk_hash = inet_sk_ehashfn(sk);
+ sk->sk_hash = sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
list = &head->chain;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
@@ -424,15 +434,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
-static void __inet_hash(struct sock *sk)
+int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
- if (sk->sk_state != TCP_LISTEN) {
- __inet_hash_nolisten(sk, NULL);
- return;
- }
+ if (sk->sk_state != TCP_LISTEN)
+ return __inet_hash_nolisten(sk, tw);
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -441,13 +449,15 @@ static void __inet_hash(struct sock *sk)
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock);
+ return 0;
}
+EXPORT_SYMBOL(__inet_hash);
void inet_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk);
+ __inet_hash(sk, NULL);
local_bh_enable();
}
}
@@ -478,8 +488,7 @@ EXPORT_SYMBOL_GPL(inet_unhash);
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
- struct sock *, __u16, struct inet_timewait_sock **),
- int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
+ struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
const unsigned short snum = inet_sk(sk)->inet_num;
@@ -549,7 +558,7 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- twrefcnt += hash(sk, tw);
+ twrefcnt += __inet_hash_nolisten(sk, tw);
}
if (tw)
twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
@@ -571,7 +580,7 @@ ok:
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- hash(sk, NULL);
+ __inet_hash_nolisten(sk, NULL);
spin_unlock_bh(&head->lock);
return 0;
} else {
@@ -591,7 +600,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
- __inet_check_established, __inet_hash_nolisten);
+ __inet_check_established);
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 86ebf020925b..f38e387448fb 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -487,6 +487,7 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo,
for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
restart_rcu:
+ cond_resched();
rcu_read_lock();
restart:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5cd99271d3a6..f6a0d54b308a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -536,12 +536,34 @@ out:
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
*/
+static bool setsockopt_needs_rtnl(int optname)
+{
+ switch (optname) {
+ case IP_ADD_MEMBERSHIP:
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case IP_BLOCK_SOURCE:
+ case IP_DROP_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ case IP_MSFILTER:
+ case IP_UNBLOCK_SOURCE:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_MSFILTER:
+ case MCAST_JOIN_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_UNBLOCK_SOURCE:
+ return true;
+ }
+ return false;
+}
static int do_ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
int val = 0, err;
+ bool needs_rtnl = setsockopt_needs_rtnl(optname);
switch (optname) {
case IP_PKTINFO:
@@ -584,6 +606,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
return ip_mroute_setsockopt(sk, optname, optval, optlen);
err = 0;
+ if (needs_rtnl)
+ rtnl_lock();
lock_sock(sk);
switch (optname) {
@@ -1118,10 +1142,14 @@ mc_msf_out:
break;
}
release_sock(sk);
+ if (needs_rtnl)
+ rtnl_unlock();
return err;
e_inval:
release_sock(sk);
+ if (needs_rtnl)
+ rtnl_unlock();
return -EINVAL;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index fd88f868776f..344e7cdfb8d4 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -64,11 +64,11 @@ EXPORT_SYMBOL_GPL(pingv6_ops);
static u16 ping_port_rover;
-static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask)
+static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
{
- int res = (num + net_hash_mix(net)) & mask;
+ u32 res = (num + net_hash_mix(net)) & mask;
- pr_debug("hash(%d) = %d\n", num, res);
+ pr_debug("hash(%u) = %u\n", num, res);
return res;
}
EXPORT_SYMBOL_GPL(ping_hash);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f17db898ed26..ef01d8570358 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -219,19 +219,20 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
}
EXPORT_SYMBOL_GPL(__cookie_v4_check);
-static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
+static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child)
+ if (child) {
+ atomic_set(&req->rsk_refcnt, 1);
inet_csk_reqsk_queue_add(sk, req, child);
- else
+ } else {
reqsk_free(req);
-
+ }
return child;
}
@@ -325,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+ req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
if (!req)
goto out;
@@ -336,8 +337,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
req->mss = mss;
ireq->ir_num = ntohs(th->dest);
ireq->ir_rmt_port = th->source;
- ireq->ir_loc_addr = ip_hdr(skb)->daddr;
- ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+ sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+ sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
ireq->ir_mark = inet_request_mark(sk, skb);
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
@@ -345,8 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
- treq->listener = NULL;
- write_pnet(&ireq->ireq_net, sock_net(sk));
+ treq->tfo_listener = false;
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fe77417fc137..82e375a0cbcf 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
tp = tcp_sk(child);
tp->fastopen_rsk = req;
- /* Do a hold on the listner sk so that if the listener is being
- * closed, the child that has been accepted can live on and still
- * access listen_lock.
- */
- sock_hold(sk);
- tcp_rsk(req)->listener = sk;
+ tcp_rsk(req)->tfo_listener = true;
/* RFC1323: The window in SYN & SYN/ACK segments is never
* scaled. So correct it appropriately.
@@ -174,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+ atomic_set(&req->rsk_refcnt, 1);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, child);
@@ -253,7 +249,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
fastopenq->rskq_rst_head = req1->dl_next;
fastopenq->qlen--;
spin_unlock(&fastopenq->lock);
- reqsk_free(req1);
+ reqsk_put(req1);
}
return true;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 717d437b6ce1..1dfbaee3554e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
return flag;
}
+/* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+ * attacks that send repeated SYNs or ACKs for the same connection. To
+ * do this, we do not send a duplicate SYNACK or ACK if the remote
+ * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
+ */
+bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+ int mib_idx, u32 *last_oow_ack_time)
+{
+ /* Data packets without SYNs are not likely part of an ACK loop. */
+ if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+ !tcp_hdr(skb)->syn)
+ goto not_rate_limited;
+
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ NET_INC_STATS_BH(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+ }
+
+ *last_oow_ack_time = tcp_time_stamp;
+
+not_rate_limited:
+ return false; /* not rate-limited: go ahead, send dupack now! */
+}
+
/* RFC 5961 7 [ACK Throttling] */
static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
{
@@ -5912,6 +5942,51 @@ static void tcp_ecn_create_request(struct request_sock *req,
inet_rsk(req)->ecn_ok = 1;
}
+static void tcp_openreq_init(struct request_sock *req,
+ const struct tcp_options_received *rx_opt,
+ struct sk_buff *skb, const struct sock *sk)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+ req->cookie_ts = 0;
+ tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
+ tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_rsk(req)->last_oow_ack_time = 0;
+ req->mss = rx_opt->mss_clamp;
+ req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
+ ireq->tstamp_ok = rx_opt->tstamp_ok;
+ ireq->sack_ok = rx_opt->sack_ok;
+ ireq->snd_wscale = rx_opt->snd_wscale;
+ ireq->wscale_ok = rx_opt->wscale_ok;
+ ireq->acked = 0;
+ ireq->ecn_ok = 0;
+ ireq->ir_rmt_port = tcp_hdr(skb)->source;
+ ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+}
+
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener)
+{
+ struct request_sock *req = reqsk_alloc(ops, sk_listener);
+
+ if (req) {
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ kmemcheck_annotate_bitfield(ireq, flags);
+ ireq->opt = NULL;
+ atomic64_set(&ireq->ir_cookie, 0);
+ ireq->ireq_state = TCP_NEW_SYN_RECV;
+ write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+
+ }
+
+ return req;
+}
+EXPORT_SYMBOL(inet_reqsk_alloc);
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
@@ -5949,7 +6024,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
- req = inet_reqsk_alloc(rsk_ops);
+ req = inet_reqsk_alloc(rsk_ops, sk);
if (!req)
goto drop;
@@ -5965,7 +6040,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
- write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk));
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
@@ -6042,7 +6116,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (err || want_cookie)
goto drop_and_free;
- tcp_rsk(req)->listener = NULL;
+ tcp_rsk(req)->tfo_listener = false;
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1f514a0c5e60..ddd0b1f25b96 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -189,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
- inet->inet_rcv_saddr = inet->inet_saddr;
+ sk_rcv_saddr_set(sk, inet->inet_saddr);
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
@@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tcp_fetch_timewait_stamp(sk, &rt->dst);
inet->inet_dport = usin->sin_port;
- inet->inet_daddr = daddr;
+ sk_daddr_set(sk, daddr);
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
@@ -1219,14 +1219,14 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
#endif
-static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
+static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
- ireq->ir_loc_addr = ip_hdr(skb)->daddr;
- ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
- ireq->no_srccheck = inet_sk(sk)->transparent;
+ sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+ sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+ ireq->no_srccheck = inet_sk(sk_listener)->transparent;
ireq->opt = tcp_v4_save_options(skb);
ireq->ireq_family = AF_INET;
}
@@ -1319,8 +1319,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- newinet->inet_daddr = ireq->ir_rmt_addr;
- newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+ sk_daddr_set(newsk, ireq->ir_rmt_addr);
+ sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = ireq->opt;
rcu_assign_pointer(newinet->inet_opt, inet_opt);
@@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
- if (sk->sk_state != TCP_TIME_WAIT) {
+ if (sk_fullsock(sk)) {
struct dst_entry *dst = sk->sk_rx_dst;
if (dst)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 366728cbee4a..5bef3513af77 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -152,6 +152,9 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst
#define TCP_METRICS_RECLAIM_DEPTH 5
#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
+#define deref_locked(p) \
+ rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock))
+
static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
struct inetpeer_addr *saddr,
struct inetpeer_addr *daddr,
@@ -180,9 +183,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
if (unlikely(reclaim)) {
struct tcp_metrics_block *oldest;
- oldest = rcu_dereference(tcp_metrics_hash[hash].chain);
- for (tm = rcu_dereference(oldest->tcpm_next); tm;
- tm = rcu_dereference(tm->tcpm_next)) {
+ oldest = deref_locked(tcp_metrics_hash[hash].chain);
+ for (tm = deref_locked(oldest->tcpm_next); tm;
+ tm = deref_locked(tm->tcpm_next)) {
if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
oldest = tm;
}
@@ -1040,12 +1043,6 @@ out_free:
return ret;
}
-#define deref_locked_genl(p) \
- rcu_dereference_protected(p, lockdep_genl_is_held() && \
- lockdep_is_held(&tcp_metrics_lock))
-
-#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held())
-
static void tcp_metrics_flush_all(struct net *net)
{
unsigned int max_rows = 1U << tcp_metrics_hash_log;
@@ -1057,8 +1054,7 @@ static void tcp_metrics_flush_all(struct net *net)
struct tcp_metrics_block __rcu **pp;
spin_lock_bh(&tcp_metrics_lock);
pp = &hb->chain;
- for (tm = deref_locked_genl(*pp); tm;
- tm = deref_locked_genl(*pp)) {
+ for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
if (net_eq(tm_net(tm), net)) {
*pp = tm->tcpm_next;
kfree_rcu(tm, rcu_head);
@@ -1097,7 +1093,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
hb = tcp_metrics_hash + hash;
pp = &hb->chain;
spin_lock_bh(&tcp_metrics_lock);
- for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) {
+ for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
if (addr_same(&tm->tcpm_daddr, &daddr) &&
(!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
net_eq(tm_net(tm), net)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f27556e2158b..294af16633af 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -318,8 +318,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
}
-static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
- unsigned int port)
+static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
+ unsigned int port)
{
return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
}
@@ -421,9 +421,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
return score;
}
-static unsigned int udp_ehashfn(struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
{
static u32 udp_ehash_secret __read_mostly;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 88d2cf0cae52..158378e73f0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2473,9 +2473,9 @@ static int ipv6_mc_config(struct sock *sk, bool join,
lock_sock(sk);
if (join)
- ret = __ipv6_sock_mc_join(sk, ifindex, addr);
+ ret = ipv6_sock_mc_join(sk, ifindex, addr);
else
- ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+ ret = ipv6_sock_mc_drop(sk, ifindex, addr);
release_sock(sk);
return ret;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 051dffb49c90..033f17816ef4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,11 +23,9 @@
#include <net/secure_seq.h>
#include <net/ip.h>
-static unsigned int inet6_ehashfn(struct net *net,
- const struct in6_addr *laddr,
- const u16 lport,
- const struct in6_addr *faddr,
- const __be16 fport)
+u32 inet6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr, const u16 lport,
+ const struct in6_addr *faddr, const __be16 fport)
{
static u32 inet6_ehash_secret __read_mostly;
static u32 ipv6_hash_secret __read_mostly;
@@ -44,54 +42,6 @@ static unsigned int inet6_ehashfn(struct net *net,
inet6_ehash_secret + net_hash_mix(net));
}
-static int inet6_sk_ehashfn(const struct sock *sk)
-{
- const struct inet_sock *inet = inet_sk(sk);
- const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
- const struct in6_addr *faddr = &sk->sk_v6_daddr;
- const __u16 lport = inet->inet_num;
- const __be16 fport = inet->inet_dport;
- struct net *net = sock_net(sk);
-
- return inet6_ehashfn(net, laddr, lport, faddr, fport);
-}
-
-int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
-{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- int twrefcnt = 0;
-
- WARN_ON(!sk_unhashed(sk));
-
- if (sk->sk_state == TCP_LISTEN) {
- struct inet_listen_hashbucket *ilb;
-
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- spin_lock(&ilb->lock);
- __sk_nulls_add_node_rcu(sk, &ilb->head);
- spin_unlock(&ilb->lock);
- } else {
- unsigned int hash;
- struct hlist_nulls_head *list;
- spinlock_t *lock;
-
- sk->sk_hash = hash = inet6_sk_ehashfn(sk);
- list = &inet_ehash_bucket(hashinfo, hash)->chain;
- lock = inet_ehash_lockp(hashinfo, hash);
- spin_lock(lock);
- __sk_nulls_add_node_rcu(sk, list);
- if (tw) {
- WARN_ON(sk->sk_hash != tw->tw_hash);
- twrefcnt = inet_twsk_unhash(tw);
- }
- spin_unlock(lock);
- }
-
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- return twrefcnt;
-}
-EXPORT_SYMBOL(__inet6_hash);
-
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
* we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
@@ -320,6 +270,6 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
- __inet6_check_established, __inet6_hash);
+ __inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 8d766d9100cb..cc5883791bac 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -117,6 +117,23 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
return opt;
}
+static bool setsockopt_needs_rtnl(int optname)
+{
+ switch (optname) {
+ case IPV6_ADD_MEMBERSHIP:
+ case IPV6_DROP_MEMBERSHIP:
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ case MCAST_MSFILTER:
+ return true;
+ }
+ return false;
+}
+
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -124,6 +141,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
struct net *net = sock_net(sk);
int val, valbool;
int retv = -ENOPROTOOPT;
+ bool needs_rtnl = setsockopt_needs_rtnl(optname);
if (optval == NULL)
val = 0;
@@ -140,6 +158,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (ip6_mroute_opt(optname))
return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+ if (needs_rtnl)
+ rtnl_lock();
lock_sock(sk);
switch (optname) {
@@ -624,10 +644,10 @@ done:
psin6 = (struct sockaddr_in6 *)&greq.gr_group;
if (optname == MCAST_JOIN_GROUP)
retv = ipv6_sock_mc_join(sk, greq.gr_interface,
- &psin6->sin6_addr);
+ &psin6->sin6_addr);
else
retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
- &psin6->sin6_addr);
+ &psin6->sin6_addr);
break;
}
case MCAST_JOIN_SOURCE_GROUP:
@@ -660,7 +680,7 @@ done:
psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
- &psin6->sin6_addr);
+ &psin6->sin6_addr);
/* prior join w/ different source is ok */
if (retv && retv != -EADDRINUSE)
break;
@@ -837,11 +857,15 @@ pref_skip_coa:
}
release_sock(sk);
+ if (needs_rtnl)
+ rtnl_unlock();
return retv;
e_inval:
release_sock(sk);
+ if (needs_rtnl)
+ rtnl_unlock();
return -EINVAL;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1dd1fedff9f4..cbb66fd3da6d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -132,7 +132,7 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
-int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
@@ -199,24 +199,12 @@ int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *add
return 0;
}
-EXPORT_SYMBOL(__ipv6_sock_mc_join);
-
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
-{
- int ret;
-
- rtnl_lock();
- ret = __ipv6_sock_mc_join(sk, ifindex, addr);
- rtnl_unlock();
-
- return ret;
-}
EXPORT_SYMBOL(ipv6_sock_mc_join);
/*
* socket leave on multicast group
*/
-int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc_lst;
@@ -255,18 +243,6 @@ int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *add
return -EADDRNOTAVAIL;
}
-EXPORT_SYMBOL(__ipv6_sock_mc_drop);
-
-int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
-{
- int ret;
-
- rtnl_lock();
- ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
- rtnl_unlock();
-
- return ret;
-}
EXPORT_SYMBOL(ipv6_sock_mc_drop);
/* called with rcu_read_lock() */
@@ -460,7 +436,7 @@ done:
read_unlock_bh(&idev->lock);
rcu_read_unlock();
if (leavegroup)
- return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
+ err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
return err;
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 58875ce8e178..da5823e5e5a7 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct sock *child;
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child)
+ if (child) {
+ atomic_set(&req->rsk_refcnt, 1);
inet_csk_reqsk_queue_add(sk, req, child);
- else
+ } else {
reqsk_free(req);
-
+ }
return child;
}
@@ -189,14 +190,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp6_request_sock_ops);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
if (!req)
goto out;
ireq = inet_rsk(req);
treq = tcp_rsk(req);
- treq->listener = NULL;
- write_pnet(&ireq->ireq_net, sock_net(sk));
+ treq->tfo_listener = false;
ireq->ireq_family = AF_INET6;
if (security_inet_conn_request(sk, skb, req))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d89f028dc8c4..720676d073d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -104,19 +104,6 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static void tcp_v6_hash(struct sock *sk)
-{
- if (sk->sk_state != TCP_CLOSE) {
- if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
- tcp_prot.hash(sk);
- return;
- }
- local_bh_disable();
- __inet6_hash(sk, NULL);
- local_bh_enable();
- }
-}
-
static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
{
return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
@@ -233,11 +220,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tp->af_specific = &tcp_sock_ipv6_specific;
#endif
goto failure;
- } else {
- ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
}
+ np->saddr = sk->sk_v6_rcv_saddr;
return err;
}
@@ -1078,11 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
-
- ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
-
- newsk->sk_v6_rcv_saddr = newnp->saddr;
+ newnp->saddr = newsk->sk_v6_rcv_saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1231,7 +1211,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_done(newsk);
goto out;
}
- __inet6_hash(newsk, NULL);
+ __inet_hash(newsk, NULL);
return newsk;
@@ -1583,7 +1563,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
- if (sk->sk_state != TCP_TIME_WAIT) {
+ if (sk_fullsock(sk)) {
struct dst_entry *dst = sk->sk_rx_dst;
if (dst)
@@ -1890,7 +1870,7 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .hash = tcp_v6_hash,
+ .hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 70568a4548e4..7fe0329c0d37 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,11 +53,11 @@
#include <trace/events/skb.h>
#include "udp_impl.h"
-static unsigned int udp6_ehashfn(struct net *net,
- const struct in6_addr *laddr,
- const u16 lport,
- const struct in6_addr *faddr,
- const __be16 fport)
+static u32 udp6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
{
static u32 udp6_ehash_secret __read_mostly;
static u32 udp_ipv6_hash_secret __read_mostly;
@@ -104,9 +104,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 0;
}
-static unsigned int udp6_portaddr_hash(struct net *net,
- const struct in6_addr *addr6,
- unsigned int port)
+static u32 udp6_portaddr_hash(const struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
{
unsigned int hash, mix = net_hash_mix(net);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 08d95559b6f7..19b9cce6c210 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1405,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex;
+ rtnl_lock();
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
+ rtnl_unlock();
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3aedbda7658a..f35c15b0de6b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
struct sock *sk = skb->sk;
struct rtable *ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ if (!skb->dev && sk && sk_fullsock(sk))
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a2233e77cf39..2631876ac55b 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
{
- if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ if (!sk || !sk_fullsock(sk))
return;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11d85b3813f2..61d04bf9be2b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -539,7 +539,7 @@ __build_packet_message(struct nfnl_log_net *log,
/* UID */
sk = skb->sk;
- if (sk && sk->sk_state != TCP_TIME_WAIT) {
+ if (sk && sk_fullsock(sk)) {
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
struct file *file = sk->sk_socket->file;
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 0db8515e76da..86ee8b05adae 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
{
const struct cred *cred;
- if (sk->sk_state == TCP_TIME_WAIT)
+ if (!sk_fullsock(sk))
return 0;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e99911eda915..abe68119a76c 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -83,7 +83,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(u16 *)dest->data = out->type;
break;
case NFT_META_SKUID:
- if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
read_lock_bh(&skb->sk->sk_callback_lock);
@@ -99,7 +99,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
case NFT_META_SKGID:
- if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
read_lock_bh(&skb->sk->sk_callback_lock);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index ef8a926752a9..165b77ce9aa9 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t {
static bool tproxy_sk_is_transparent(struct sock *sk)
{
- if (sk->sk_state != TCP_TIME_WAIT) {
- if (inet_sk(sk)->transparent)
- return true;
- sock_put(sk);
- } else {
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
if (inet_twsk(sk)->tw_transparent)
return true;
- inet_twsk_put(inet_twsk(sk));
+ break;
+ case TCP_NEW_SYN_RECV:
+ if (inet_rsk(inet_reqsk(sk))->no_srccheck)
+ return true;
+ break;
+ default:
+ if (inet_sk(sk)->transparent)
+ return true;
}
+
+ sock_gen_put(sk);
return false;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 13332dbf291d..895534e87a47 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -129,6 +129,20 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol,
return NULL;
}
+static bool xt_socket_sk_is_transparent(struct sock *sk)
+{
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ return inet_twsk(sk)->tw_transparent;
+
+ case TCP_NEW_SYN_RECV:
+ return inet_rsk(inet_reqsk(sk))->no_srccheck;
+
+ default:
+ return inet_sk(sk)->transparent;
+ }
+}
+
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
@@ -195,16 +209,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* unless XT_SOCKET_NOWILDCARD is set
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
- sk->sk_state != TCP_TIME_WAIT &&
+ sk_fullsock(sk) &&
inet_sk(sk)->inet_rcv_saddr == 0);
/* Ignore non-transparent sockets,
- if XT_SOCKET_TRANSPARENT is used */
+ * if XT_SOCKET_TRANSPARENT is used
+ */
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = ((sk->sk_state != TCP_TIME_WAIT &&
- inet_sk(sk)->transparent) ||
- (sk->sk_state == TCP_TIME_WAIT &&
- inet_twsk(sk)->tw_transparent));
+ transparent = xt_socket_sk_is_transparent(sk);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -363,16 +375,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
* unless XT_SOCKET_NOWILDCARD is set
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
- sk->sk_state != TCP_TIME_WAIT &&
+ sk_fullsock(sk) &&
ipv6_addr_any(&sk->sk_v6_rcv_saddr));
/* Ignore non-transparent sockets,
- if XT_SOCKET_TRANSPARENT is used */
+ * if XT_SOCKET_TRANSPARENT is used
+ */
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = ((sk->sk_state != TCP_TIME_WAIT &&
- inet_sk(sk)->transparent) ||
- (sk->sk_state == TCP_TIME_WAIT &&
- inet_twsk(sk)->tw_transparent));
+ transparent = xt_socket_sk_is_transparent(sk);
if (sk != skb->sk)
sock_gen_put(sk);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6b0f21950e09..d97aed628bda 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3123,7 +3123,7 @@ static int __init netlink_proto_init(void)
.key_offset = offsetof(struct netlink_sock, portid),
.key_len = sizeof(u32), /* portid */
.hashfn = jhash,
- .max_shift = 16, /* 64K */
+ .max_size = 65536,
};
if (err != 0)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index b7a23132c610..c9bfa004abed 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -28,11 +28,11 @@
int netdev_switch_parent_id_get(struct net_device *dev,
struct netdev_phys_item_id *psid)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ const struct swdev_ops *ops = dev->swdev_ops;
- if (!ops->ndo_switch_parent_id_get)
+ if (!ops || !ops->swdev_parent_id_get)
return -EOPNOTSUPP;
- return ops->ndo_switch_parent_id_get(dev, psid);
+ return ops->swdev_parent_id_get(dev, psid);
}
EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
@@ -46,12 +46,12 @@ EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
*/
int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ const struct swdev_ops *ops = dev->swdev_ops;
- if (!ops->ndo_switch_port_stp_update)
+ if (!ops || !ops->swdev_port_stp_update)
return -EOPNOTSUPP;
- WARN_ON(!ops->ndo_switch_parent_id_get);
- return ops->ndo_switch_port_stp_update(dev, state);
+ WARN_ON(!ops->swdev_parent_id_get);
+ return ops->swdev_port_stp_update(dev, state);
}
EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update);
@@ -230,17 +230,17 @@ EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink);
static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ const struct swdev_ops *ops = dev->swdev_ops;
struct net_device *lower_dev;
struct net_device *port_dev;
struct list_head *iter;
/* Recusively search down until we find a sw port dev.
- * (A sw port dev supports ndo_switch_parent_id_get).
+ * (A sw port dev supports swdev_parent_id_get).
*/
if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD &&
- ops->ndo_switch_parent_id_get)
+ ops && ops->swdev_parent_id_get)
return dev;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
@@ -304,7 +304,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 nlflags, u32 tb_id)
{
struct net_device *dev;
- const struct net_device_ops *ops;
+ const struct swdev_ops *ops;
int err = 0;
/* Don't offload route if using custom ip rules or if
@@ -322,12 +322,12 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
dev = netdev_switch_get_dev_by_nhs(fi);
if (!dev)
return 0;
- ops = dev->netdev_ops;
+ ops = dev->swdev_ops;
- if (ops->ndo_switch_fib_ipv4_add) {
- err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len,
- fi, tos, type, nlflags,
- tb_id);
+ if (ops->swdev_fib_ipv4_add) {
+ err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len,
+ fi, tos, type, nlflags,
+ tb_id);
if (!err)
fi->fib_flags |= RTNH_F_EXTERNAL;
}
@@ -352,7 +352,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 tb_id)
{
struct net_device *dev;
- const struct net_device_ops *ops;
+ const struct swdev_ops *ops;
int err = 0;
if (!(fi->fib_flags & RTNH_F_EXTERNAL))
@@ -361,11 +361,11 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
dev = netdev_switch_get_dev_by_nhs(fi);
if (!dev)
return 0;
- ops = dev->netdev_ops;
+ ops = dev->swdev_ops;
- if (ops->ndo_switch_fib_ipv4_del) {
- err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst), dst_len,
- fi, tos, type, tb_id);
+ if (ops->swdev_fib_ipv4_del) {
+ err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len,
+ fi, tos, type, tb_id);
if (!err)
fi->fib_flags &= ~RTNH_F_EXTERNAL;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bc49120bfb44..8c98c4d00ad6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -845,8 +845,10 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
if (link)
return rc;
- if (likely(in_own_node(net, dnode)))
- return tipc_sk_rcv(net, list);
+ if (likely(in_own_node(net, dnode))) {
+ tipc_sk_rcv(net, list);
+ return 0;
+ }
__skb_queue_purge(list);
return rc;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 105ba7adf06f..ab0ac62a1287 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -811,8 +811,8 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
sseq = seq->sseqs;
info = sseq->info;
list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
- tipc_nametbl_remove_publ(net, publ->type, publ->lower,
- publ->node, publ->ref, publ->key);
+ tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
+ publ->ref, publ->key);
kfree_rcu(publ, rcu);
}
hlist_del_init_rcu(&seq->ns_list);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index eadd4ed45905..ab6183cdb121 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -37,11 +37,13 @@
#include "core.h"
#include "socket.h"
#include <net/sock.h>
+#include <linux/module.h>
/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
#define MAX_RECV_MSG_COUNT 25
#define CF_CONNECTED 1
+#define CF_SERVER 2
#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
@@ -88,9 +90,19 @@ static void tipc_clean_outqueues(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+ struct sockaddr_tipc *saddr = con->server->saddr;
+ struct socket *sock = con->sock;
+ struct sock *sk;
- if (con->sock) {
- tipc_sock_release_local(con->sock);
+ if (sock) {
+ sk = sock->sk;
+ if (test_bit(CF_SERVER, &con->flags)) {
+ __module_get(sock->ops->owner);
+ __module_get(sk->sk_prot_creator->owner);
+ }
+ saddr->scope = -TIPC_NODE_SCOPE;
+ kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
+ sk_release_kernel(sk);
con->sock = NULL;
}
@@ -281,7 +293,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
struct tipc_conn *newcon;
int ret;
- ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK);
+ ret = kernel_accept(sock, &newsock, O_NONBLOCK);
if (ret < 0)
return ret;
@@ -309,9 +321,12 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
struct socket *sock = NULL;
int ret;
- ret = tipc_sock_create_local(s->net, s->type, &sock);
+ ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock);
if (ret < 0)
return NULL;
+
+ sk_change_net(sock->sk, s->net);
+
ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
(char *)&s->imp, sizeof(s->imp));
if (ret < 0)
@@ -337,11 +352,31 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
pr_err("Unknown socket type %d\n", s->type);
goto create_err;
}
+
+ /* As server's listening socket owner and creator is the same module,
+ * we have to decrease TIPC module reference count to guarantee that
+ * it remains zero after the server socket is created, otherwise,
+ * executing "rmmod" command is unable to make TIPC module deleted
+ * after TIPC module is inserted successfully.
+ *
+ * However, the reference count is ever increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of TIPC socket's proto_ops struct; another is to increment the
+ * reference count of owner of TIPC proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(sock->ops->owner);
+ module_put(sock->sk->sk_prot_creator->owner);
+ set_bit(CF_SERVER, &con->flags);
+
return sock;
create_err:
- sock_release(sock);
- con->sock = NULL;
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
return NULL;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 934947f038b6..c03a3d33806f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -74,6 +74,7 @@
* @link_cong: non-zero if owner must sleep because of link congestion
* @sent_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @remote: 'connected' peer for dgram/rdm
* @node: hash table node
* @rcu: rcu struct for tipc_sock
*/
@@ -96,6 +97,7 @@ struct tipc_sock {
bool link_cong;
uint sent_unacked;
uint rcv_unacked;
+ struct sockaddr_tipc remote;
struct rhash_head node;
struct rcu_head rcu;
};
@@ -121,9 +123,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
-
static struct proto tipc_proto;
-static struct proto tipc_proto_kern;
static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
[TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
@@ -341,11 +341,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
}
/* Allocate socket's protocol area */
- if (!kern)
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
- else
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);
-
+ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
if (sk == NULL)
return -ENOMEM;
@@ -383,75 +379,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
return 0;
}
-/**
- * tipc_sock_create_local - create TIPC socket from inside TIPC module
- * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
- *
- * We cannot use sock_creat_kern here because it bumps module user count.
- * Since socket owner and creator is the same module we must make sure
- * that module count remains zero for module local sockets, otherwise
- * we cannot do rmmod.
- *
- * Returns 0 on success, errno otherwise
- */
-int tipc_sock_create_local(struct net *net, int type, struct socket **res)
-{
- int rc;
-
- rc = sock_create_lite(AF_TIPC, type, 0, res);
- if (rc < 0) {
- pr_err("Failed to create kernel socket\n");
- return rc;
- }
- tipc_sk_create(net, *res, 0, 1);
-
- return 0;
-}
-
-/**
- * tipc_sock_release_local - release socket created by tipc_sock_create_local
- * @sock: the socket to be released.
- *
- * Module reference count is not incremented when such sockets are created,
- * so we must keep it from being decremented when they are released.
- */
-void tipc_sock_release_local(struct socket *sock)
-{
- tipc_release(sock);
- sock->ops = NULL;
- sock_release(sock);
-}
-
-/**
- * tipc_sock_accept_local - accept a connection on a socket created
- * with tipc_sock_create_local. Use this function to avoid that
- * module reference count is inadvertently incremented.
- *
- * @sock: the accepting socket
- * @newsock: reference to the new socket to be created
- * @flags: socket flags
- */
-
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
- int flags)
-{
- struct sock *sk = sock->sk;
- int ret;
-
- ret = sock_create_lite(sk->sk_family, sk->sk_type,
- sk->sk_protocol, newsock);
- if (ret < 0)
- return ret;
-
- ret = tipc_accept(sock, *newsock, flags);
- if (ret < 0) {
- sock_release(*newsock);
- return ret;
- }
- (*newsock)->ops = sock->ops;
- return ret;
-}
-
static void tipc_sk_callback(struct rcu_head *head)
{
struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
@@ -929,22 +856,23 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
u32 dnode, dport;
struct sk_buff_head *pktchain = &sk->sk_write_queue;
struct sk_buff *skb;
- struct tipc_name_seq *seq = &dest->addr.nameseq;
+ struct tipc_name_seq *seq;
struct iov_iter save;
u32 mtu;
long timeo;
int rc;
- if (unlikely(!dest))
- return -EDESTADDRREQ;
-
- if (unlikely((m->msg_namelen < sizeof(*dest)) ||
- (dest->family != AF_TIPC)))
- return -EINVAL;
-
if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
-
+ if (unlikely(!dest)) {
+ if (tsk->connected && sock->state == SS_READY)
+ dest = &tsk->remote;
+ else
+ return -EDESTADDRREQ;
+ } else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
+ dest->family != AF_TIPC) {
+ return -EINVAL;
+ }
if (unlikely(sock->state != SS_READY)) {
if (sock->state == SS_LISTENING)
return -EPIPE;
@@ -957,7 +885,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
tsk->conn_instance = dest->addr.name.name.instance;
}
}
-
+ seq = &dest->addr.nameseq;
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
if (dest->addrtype == TIPC_ADDR_MCAST) {
@@ -1908,17 +1836,24 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
int destlen, int flags)
{
struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
struct msghdr m = {NULL,};
- long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout;
+ long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
socket_state previous;
- int res;
+ int res = 0;
lock_sock(sk);
- /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
+ /* DGRAM/RDM connect(), just save the destaddr */
if (sock->state == SS_READY) {
- res = -EOPNOTSUPP;
+ if (dst->family == AF_UNSPEC) {
+ memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc));
+ tsk->connected = 0;
+ } else {
+ memcpy(&tsk->remote, dest, destlen);
+ tsk->connected = 1;
+ }
goto exit;
}
@@ -2153,7 +2088,6 @@ restart:
TIPC_CONN_SHUTDOWN))
tipc_link_xmit_skb(net, skb, dnode,
tsk->portid);
- tipc_node_remove_conn(net, dnode, tsk->portid);
} else {
dnode = tsk_peer_node(tsk);
@@ -2361,8 +2295,8 @@ int tipc_sk_rht_init(struct net *net)
.key_offset = offsetof(struct tipc_sock, portid),
.key_len = sizeof(u32), /* portid */
.hashfn = jhash,
- .max_shift = 20, /* 1M */
- .min_shift = 8, /* 256 */
+ .max_size = 1048576,
+ .min_size = 256,
};
return rhashtable_init(&tn->sk_rht, &rht_params);
@@ -2608,12 +2542,6 @@ static struct proto tipc_proto = {
.sysctl_rmem = sysctl_tipc_rmem
};
-static struct proto tipc_proto_kern = {
- .name = "TIPC",
- .obj_size = sizeof(struct tipc_sock),
- .sysctl_rmem = sysctl_tipc_rmem
-};
-
/**
* tipc_socket_init - initialize TIPC socket interface
*
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 238f1b7bd9bd..bf6551389522 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -44,10 +44,6 @@
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
int tipc_socket_init(void);
void tipc_socket_stop(void);
-int tipc_sock_create_local(struct net *net, int type, struct socket **res);
-void tipc_sock_release_local(struct socket *sock);
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
- int flags);
int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index fc2fb11a354d..04836dd70c2b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -246,11 +246,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
return 0;
mreqn.imr_multiaddr = remote->ipv4;
mreqn.imr_ifindex = ub->ifindex;
- err = __ip_mc_join_group(sk, &mreqn);
+ err = ip_mc_join_group(sk, &mreqn);
} else {
if (!ipv6_addr_is_multicast(&remote->ipv6))
return 0;
- err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
+ err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
}
return err;
}
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
index 066892662915..ed18e9a4909c 100644
--- a/samples/bpf/sockex1_kern.c
+++ b/samples/bpf/sockex1_kern.c
@@ -1,5 +1,6 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include "bpf_helpers.h"
@@ -11,14 +12,17 @@ struct bpf_map_def SEC("maps") my_map = {
};
SEC("socket1")
-int bpf_prog1(struct sk_buff *skb)
+int bpf_prog1(struct __sk_buff *skb)
{
int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
long *value;
+ if (skb->pkt_type != PACKET_OUTGOING)
+ return 0;
+
value = bpf_map_lookup_elem(&my_map, &index);
if (value)
- __sync_fetch_and_add(value, 1);
+ __sync_fetch_and_add(value, skb->len);
return 0;
}
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index 34a443ff3831..678ce4693551 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c
@@ -40,7 +40,7 @@ int main(int ac, char **argv)
key = IPPROTO_ICMP;
assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
- printf("TCP %lld UDP %lld ICMP %lld packets\n",
+ printf("TCP %lld UDP %lld ICMP %lld bytes\n",
tcp_cnt, udp_cnt, icmp_cnt);
sleep(1);
}
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index 6f0135f0f217..ba0e177ff561 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -42,13 +42,13 @@ static inline int proto_ports_offset(__u64 proto)
}
}
-static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff)
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
{
return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
& (IP_MF | IP_OFFSET);
}
-static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off)
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
{
__u64 w0 = load_word(ctx, off);
__u64 w1 = load_word(ctx, off + 4);
@@ -58,7 +58,7 @@ static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off)
return (__u32)(w0 ^ w1 ^ w2 ^ w3);
}
-static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
struct flow_keys *flow)
{
__u64 verlen;
@@ -82,7 +82,7 @@ static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
return nhoff;
}
-static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
struct flow_keys *flow)
{
*ip_proto = load_byte(skb,
@@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto
return nhoff;
}
-static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow)
+static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow)
{
__u64 nhoff = ETH_HLEN;
__u64 ip_proto;
@@ -183,18 +183,23 @@ static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow)
return true;
}
+struct pair {
+ long packets;
+ long bytes;
+};
+
struct bpf_map_def SEC("maps") hash_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__be32),
- .value_size = sizeof(long),
+ .value_size = sizeof(struct pair),
.max_entries = 1024,
};
SEC("socket2")
-int bpf_prog2(struct sk_buff *skb)
+int bpf_prog2(struct __sk_buff *skb)
{
struct flow_keys flow;
- long *value;
+ struct pair *value;
u32 key;
if (!flow_dissector(skb, &flow))
@@ -203,9 +208,10 @@ int bpf_prog2(struct sk_buff *skb)
key = flow.dst;
value = bpf_map_lookup_elem(&hash_map, &key);
if (value) {
- __sync_fetch_and_add(value, 1);
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
} else {
- long val = 1;
+ struct pair val = {1, skb->len};
bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
}
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index d2d5f5a790d3..29a276d766fc 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -6,6 +6,11 @@
#include <unistd.h>
#include <arpa/inet.h>
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
int main(int ac, char **argv)
{
char filename[256];
@@ -29,13 +34,13 @@ int main(int ac, char **argv)
for (i = 0; i < 5; i++) {
int key = 0, next_key;
- long long value;
+ struct pair value;
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
bpf_lookup_elem(map_fd[0], &next_key, &value);
- printf("ip %s count %lld\n",
+ printf("ip %s bytes %lld packets %lld\n",
inet_ntoa((struct in_addr){htonl(next_key)}),
- value);
+ value.bytes, value.packets);
key = next_key;
}
sleep(1);
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 7b56b59fad8e..75d561f9fd6a 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -14,6 +14,7 @@
#include <linux/unistd.h>
#include <string.h>
#include <linux/filter.h>
+#include <stddef.h>
#include "libbpf.h"
#define MAX_INSNS 512
@@ -642,6 +643,84 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
},
+ {
+ "access skb fields ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields bad1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {4},
+ .errstr = "different pointers",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad3",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -12),
+ },
+ .fixup = {6},
+ .errstr = "different pointers",
+ .result = REJECT,
+ },
};
static int probe_filter_length(struct bpf_insn *fp)