diff options
99 files changed, 2417 insertions, 2479 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index 04e6bef3ac3f..5fdbbcdf8c4b 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -31,6 +31,8 @@ A switch child node has the following optional property: switch. Must be set if the switch can not detect the presence and/or size of a connected EEPROM, otherwise optional. +- reset-gpios : phandle and specifier to a gpio line connected to + reset pin of the switch chip. A switch may have multiple "port" children nodes @@ -114,6 +116,7 @@ Example: #size-cells = <0>; reg = <17 1>; /* MDIO address 17, switch 1 in tree */ mii-bus = <&mii_bus1>; + reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; switch1port0: port@0 { reg = <0>; diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index fc55e8e0351d..ad3806542d33 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -32,6 +32,7 @@ #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/can/led.h> +#include <linux/pm_runtime.h> #define DRIVER_NAME "xilinx_can" @@ -138,7 +139,7 @@ struct xcan_priv { u32 (*read_reg)(const struct xcan_priv *priv, enum xcan_reg reg); void (*write_reg)(const struct xcan_priv *priv, enum xcan_reg reg, u32 val); - struct net_device *dev; + struct device *dev; void __iomem *reg_base; unsigned long irq_flags; struct clk *bus_clk; @@ -843,6 +844,13 @@ static int xcan_open(struct net_device *ndev) struct xcan_priv *priv = netdev_priv(ndev); int ret; + ret = pm_runtime_get_sync(priv->dev); + if (ret < 0) { + netdev_err(ndev, "%s: pm_runtime_get failed(%d)\n", + __func__, ret); + return ret; + } + ret = request_irq(ndev->irq, xcan_interrupt, priv->irq_flags, ndev->name, ndev); if (ret < 0) { @@ -850,29 +858,17 @@ static int xcan_open(struct net_device *ndev) goto err; } - ret = clk_prepare_enable(priv->can_clk); - if (ret) { - netdev_err(ndev, "unable to enable device clock\n"); - goto err_irq; - } - - ret = clk_prepare_enable(priv->bus_clk); - if (ret) { - netdev_err(ndev, "unable to enable bus clock\n"); - goto err_can_clk; - } - /* Set chip into reset mode */ ret = set_reset_mode(ndev); if (ret < 0) { netdev_err(ndev, "mode resetting failed!\n"); - goto err_bus_clk; + goto err_irq; } /* Common open */ ret = open_candev(ndev); if (ret) - goto err_bus_clk; + goto err_irq; ret = xcan_chip_start(ndev); if (ret < 0) { @@ -888,13 +884,11 @@ static int xcan_open(struct net_device *ndev) err_candev: close_candev(ndev); -err_bus_clk: - clk_disable_unprepare(priv->bus_clk); -err_can_clk: - clk_disable_unprepare(priv->can_clk); err_irq: free_irq(ndev->irq, ndev); err: + pm_runtime_put(priv->dev); + return ret; } @@ -911,12 +905,11 @@ static int xcan_close(struct net_device *ndev) netif_stop_queue(ndev); napi_disable(&priv->napi); xcan_chip_stop(ndev); - clk_disable_unprepare(priv->bus_clk); - clk_disable_unprepare(priv->can_clk); free_irq(ndev->irq, ndev); close_candev(ndev); can_led_event(ndev, CAN_LED_EVENT_STOP); + pm_runtime_put(priv->dev); return 0; } @@ -935,27 +928,20 @@ static int xcan_get_berr_counter(const struct net_device *ndev, struct xcan_priv *priv = netdev_priv(ndev); int ret; - ret = clk_prepare_enable(priv->can_clk); - if (ret) - goto err; - - ret = clk_prepare_enable(priv->bus_clk); - if (ret) - goto err_clk; + ret = pm_runtime_get_sync(priv->dev); + if (ret < 0) { + netdev_err(ndev, "%s: pm_runtime_get failed(%d)\n", + __func__, ret); + return ret; + } bec->txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; bec->rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); - clk_disable_unprepare(priv->bus_clk); - clk_disable_unprepare(priv->can_clk); + pm_runtime_put(priv->dev); return 0; - -err_clk: - clk_disable_unprepare(priv->can_clk); -err: - return ret; } @@ -968,15 +954,45 @@ static const struct net_device_ops xcan_netdev_ops = { /** * xcan_suspend - Suspend method for the driver - * @dev: Address of the platform_device structure + * @dev: Address of the device structure * * Put the driver into low power mode. - * Return: 0 always + * Return: 0 on success and failure value on error */ static int __maybe_unused xcan_suspend(struct device *dev) { - struct platform_device *pdev = dev_get_drvdata(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + if (!device_may_wakeup(dev)) + return pm_runtime_force_suspend(dev); + + return 0; +} + +/** + * xcan_resume - Resume from suspend + * @dev: Address of the device structure + * + * Resume operation after suspend. + * Return: 0 on success and failure value on error + */ +static int __maybe_unused xcan_resume(struct device *dev) +{ + if (!device_may_wakeup(dev)) + return pm_runtime_force_resume(dev); + + return 0; + +} + +/** + * xcan_runtime_suspend - Runtime suspend method for the driver + * @dev: Address of the device structure + * + * Put the driver into low power mode. + * Return: 0 always + */ +static int __maybe_unused xcan_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); if (netif_running(ndev)) { @@ -987,43 +1003,55 @@ static int __maybe_unused xcan_suspend(struct device *dev) priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK); priv->can.state = CAN_STATE_SLEEPING; - clk_disable(priv->bus_clk); - clk_disable(priv->can_clk); + clk_disable_unprepare(priv->bus_clk); + clk_disable_unprepare(priv->can_clk); return 0; } /** - * xcan_resume - Resume from suspend - * @dev: Address of the platformdevice structure + * xcan_runtime_resume - Runtime resume from suspend + * @dev: Address of the device structure * * Resume operation after suspend. * Return: 0 on success and failure value on error */ -static int __maybe_unused xcan_resume(struct device *dev) +static int __maybe_unused xcan_runtime_resume(struct device *dev) { - struct platform_device *pdev = dev_get_drvdata(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); int ret; + u32 isr, status; - ret = clk_enable(priv->bus_clk); + ret = clk_prepare_enable(priv->bus_clk); if (ret) { dev_err(dev, "Cannot enable clock.\n"); return ret; } - ret = clk_enable(priv->can_clk); + ret = clk_prepare_enable(priv->can_clk); if (ret) { dev_err(dev, "Cannot enable clock.\n"); clk_disable_unprepare(priv->bus_clk); return ret; } - priv->write_reg(priv, XCAN_MSR_OFFSET, 0); - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK); - priv->can.state = CAN_STATE_ERROR_ACTIVE; + priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); + isr = priv->read_reg(priv, XCAN_ISR_OFFSET); + status = priv->read_reg(priv, XCAN_SR_OFFSET); if (netif_running(ndev)) { + if (isr & XCAN_IXR_BSOFF_MASK) { + priv->can.state = CAN_STATE_BUS_OFF; + priv->write_reg(priv, XCAN_SRR_OFFSET, + XCAN_SRR_RESET_MASK); + } else if ((status & XCAN_SR_ESTAT_MASK) == + XCAN_SR_ESTAT_MASK) { + priv->can.state = CAN_STATE_ERROR_PASSIVE; + } else if (status & XCAN_SR_ERRWRN_MASK) { + priv->can.state = CAN_STATE_ERROR_WARNING; + } else { + priv->can.state = CAN_STATE_ERROR_ACTIVE; + } netif_device_attach(ndev); netif_start_queue(ndev); } @@ -1031,7 +1059,10 @@ static int __maybe_unused xcan_resume(struct device *dev) return 0; } -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume); +static const struct dev_pm_ops xcan_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume) + SET_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL) +}; /** * xcan_probe - Platform registration call @@ -1072,7 +1103,7 @@ static int xcan_probe(struct platform_device *pdev) return -ENOMEM; priv = netdev_priv(ndev); - priv->dev = ndev; + priv->dev = &pdev->dev; priv->can.bittiming_const = &xcan_bittiming_const; priv->can.do_set_mode = xcan_do_set_mode; priv->can.do_get_berr_counter = xcan_get_berr_counter; @@ -1114,21 +1145,17 @@ static int xcan_probe(struct platform_device *pdev) } } - ret = clk_prepare_enable(priv->can_clk); - if (ret) { - dev_err(&pdev->dev, "unable to enable device clock\n"); - goto err_free; - } - - ret = clk_prepare_enable(priv->bus_clk); - if (ret) { - dev_err(&pdev->dev, "unable to enable bus clock\n"); - goto err_unprepare_disable_dev; - } - priv->write_reg = xcan_write_reg_le; priv->read_reg = xcan_read_reg_le; + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + netdev_err(ndev, "%s: pm_runtime_get failed(%d)\n", + __func__, ret); + goto err_pmdisable; + } + if (priv->read_reg(priv, XCAN_SR_OFFSET) != XCAN_SR_CONFIG_MASK) { priv->write_reg = xcan_write_reg_be; priv->read_reg = xcan_read_reg_be; @@ -1141,22 +1168,23 @@ static int xcan_probe(struct platform_device *pdev) ret = register_candev(ndev); if (ret) { dev_err(&pdev->dev, "fail to register failed (err=%d)\n", ret); - goto err_unprepare_disable_busclk; + goto err_disableclks; } devm_can_led_init(ndev); - clk_disable_unprepare(priv->bus_clk); - clk_disable_unprepare(priv->can_clk); + + pm_runtime_put(&pdev->dev); + netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", priv->reg_base, ndev->irq, priv->can.clock.freq, priv->tx_max); return 0; -err_unprepare_disable_busclk: - clk_disable_unprepare(priv->bus_clk); -err_unprepare_disable_dev: - clk_disable_unprepare(priv->can_clk); +err_disableclks: + pm_runtime_put(priv->dev); +err_pmdisable: + pm_runtime_disable(&pdev->dev); err_free: free_candev(ndev); err: @@ -1175,10 +1203,8 @@ static int xcan_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct xcan_priv *priv = netdev_priv(ndev); - if (set_reset_mode(ndev) < 0) - netdev_err(ndev, "mode resetting failed!\n"); - unregister_candev(ndev); + pm_runtime_disable(&pdev->dev); netif_napi_del(&priv->napi); free_candev(ndev); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index b06dba05594a..75e245c4235e 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -19,6 +19,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/netdevice.h> +#include <linux/gpio/consumer.h> #include <linux/phy.h> #include <net/dsa.h> #include <net/switchdev.h> @@ -2323,6 +2324,7 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); u16 is_reset = (ppu_active ? 0x8800 : 0xc800); + struct gpio_desc *gpiod = ds->pd->reset; unsigned long timeout; int ret; int i; @@ -2336,6 +2338,14 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active) /* Wait for transmit queues to drain. */ usleep_range(2000, 4000); + /* If there is a gpio connected to the reset pin, toggle it */ + if (gpiod) { + gpiod_set_value_cansleep(gpiod, 1); + usleep_range(10000, 20000); + gpiod_set_value_cansleep(gpiod, 0); + usleep_range(10000, 20000); + } + /* Reset the switch. Keep the PPU active if requested. The PPU * needs to be active to support indirect phy register access * through global registers 0x18 and 0x19. diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 53ce1222b11d..8a9b493566c9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2024,7 +2024,6 @@ read_again: skb->dev = netdev; skb->protocol = eth_type_trans(skb, netdev); skb_record_rx_queue(skb, channel->queue_index); - skb_mark_napi_id(skb, napi); napi_gro_receive(napi, skb); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 991412ce6f48..6096d0242841 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1084,7 +1084,7 @@ static const struct net_device_ops xgene_ndev_ops = { }; #ifdef CONFIG_ACPI -static int xgene_get_port_id_acpi(struct device *dev, +static void xgene_get_port_id_acpi(struct device *dev, struct xgene_enet_pdata *pdata) { acpi_status status; @@ -1097,24 +1097,19 @@ static int xgene_get_port_id_acpi(struct device *dev, pdata->port_id = temp; } - return 0; + return; } #endif -static int xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata) +static void xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata) { u32 id = 0; - int ret; - ret = of_property_read_u32(dev->of_node, "port-id", &id); - if (ret) { - pdata->port_id = 0; - ret = 0; - } else { - pdata->port_id = id & BIT(0); - } + of_property_read_u32(dev->of_node, "port-id", &id); - return ret; + pdata->port_id = id & BIT(0); + + return; } static int xgene_get_tx_delay(struct xgene_enet_pdata *pdata) @@ -1209,13 +1204,11 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) } if (dev->of_node) - ret = xgene_get_port_id_dt(dev, pdata); + xgene_get_port_id_dt(dev, pdata); #ifdef CONFIG_ACPI else - ret = xgene_get_port_id_acpi(dev, pdata); + xgene_get_port_id_acpi(dev, pdata); #endif - if (ret) - return ret; if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN)) eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 858106352ce9..993c780bdfab 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1216,7 +1216,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, /* Initialize SW view of the ring */ spin_lock_init(&ring->lock); ring->priv = priv; - netif_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); + netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); ring->index = index; ring->size = size; ring->alloc_size = ring->size; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index b5e64b02200c..0b214b5d944a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -540,10 +540,6 @@ struct bnx2x_fastpath { struct napi_struct napi; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned long busy_poll_state; -#endif - union host_hc_status_block status_blk; /* chip independent shortcuts into sb structure */ __le16 *sb_index_values; @@ -617,115 +613,6 @@ struct bnx2x_fastpath { #define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index])) #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) -#ifdef CONFIG_NET_RX_BUSY_POLL - -enum bnx2x_fp_state { - BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */ - - BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */ - BNX2X_STATE_FP_NAPI_REQ = BIT(1), - - BNX2X_STATE_FP_POLL_BIT = 2, - BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */ - - BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */ -}; - -static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) -{ - WRITE_ONCE(fp->busy_poll_state, 0); -} - -/* called from the device poll routine to get ownership of a FP */ -static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) -{ - unsigned long prev, old = READ_ONCE(fp->busy_poll_state); - - while (1) { - switch (old) { - case BNX2X_STATE_FP_POLL: - /* make sure bnx2x_fp_lock_poll() wont starve us */ - set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT, - &fp->busy_poll_state); - /* fallthrough */ - case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ: - return false; - default: - break; - } - prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI); - if (unlikely(prev != old)) { - old = prev; - continue; - } - return true; - } -} - -static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) -{ - smp_wmb(); - fp->busy_poll_state = 0; -} - -/* called from bnx2x_low_latency_poll() */ -static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) -{ - return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0; -} - -static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) -{ - smp_mb__before_atomic(); - clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state); -} - -/* true if a socket is polling */ -static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) -{ - return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL; -} - -/* false if fp is currently owned */ -static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) -{ - set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state); - return !bnx2x_fp_ll_polling(fp); - -} -#else -static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) -{ -} - -static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) -{ - return true; -} - -static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) -{ -} - -static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) -{ - return false; -} - -static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) -{ -} - -static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) -{ - return false; -} -static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) -{ - return true; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* Use 2500 as a mini-jumbo MTU for FCoE */ #define BNX2X_FCOE_MINI_JUMBO_MTU 2500 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index f8d7a2f06950..d9add7c02e42 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -46,7 +46,6 @@ static void bnx2x_add_all_napi_cnic(struct bnx2x *bp) for_each_rx_queue_cnic(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } @@ -58,7 +57,6 @@ static void bnx2x_add_all_napi(struct bnx2x *bp) for_each_eth_queue(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } @@ -1094,12 +1092,7 @@ reuse_rx: __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); - skb_mark_napi_id(skb, &fp->napi); - - if (bnx2x_fp_ll_polling(fp)) - netif_receive_skb(skb); - else - napi_gro_receive(&fp->napi, skb); + napi_gro_receive(&fp->napi, skb); next_rx: rx_buf->data = NULL; @@ -1869,7 +1862,6 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp) int i; for_each_rx_queue_cnic(bp, i) { - bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -1879,7 +1871,6 @@ static void bnx2x_napi_enable(struct bnx2x *bp) int i; for_each_eth_queue(bp, i) { - bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -1890,8 +1881,6 @@ static void bnx2x_napi_disable_cnic(struct bnx2x *bp) for_each_rx_queue_cnic(bp, i) { napi_disable(&bnx2x_fp(bp, i, napi)); - while (!bnx2x_fp_ll_disable(&bp->fp[i])) - usleep_range(1000, 2000); } } @@ -1901,8 +1890,6 @@ static void bnx2x_napi_disable(struct bnx2x *bp) for_each_eth_queue(bp, i) { napi_disable(&bnx2x_fp(bp, i, napi)); - while (!bnx2x_fp_ll_disable(&bp->fp[i])) - usleep_range(1000, 2000); } } @@ -3232,9 +3219,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) return 0; } #endif - if (!bnx2x_fp_lock_napi(fp)) - return budget; - for_each_cos_in_tx_queue(fp, cos) if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) bnx2x_tx_int(bp, fp->txdata_ptr[cos]); @@ -3243,14 +3227,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) work_done += bnx2x_rx_int(fp, budget - work_done); /* must not complete if we consumed full budget */ - if (work_done >= budget) { - bnx2x_fp_unlock_napi(fp); + if (work_done >= budget) break; - } } - bnx2x_fp_unlock_napi(fp); - /* Fall out from the NAPI loop if needed */ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { @@ -3294,32 +3274,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) return work_done; } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -int bnx2x_low_latency_recv(struct napi_struct *napi) -{ - struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath, - napi); - struct bnx2x *bp = fp->bp; - int found = 0; - - if ((bp->state == BNX2X_STATE_CLOSED) || - (bp->state == BNX2X_STATE_ERROR) || - (bp->dev->features & (NETIF_F_LRO | NETIF_F_GRO))) - return LL_FLUSH_FAILED; - - if (!bnx2x_fp_lock_poll(fp)) - return LL_FLUSH_BUSY; - - if (bnx2x_has_rx_work(fp)) - found = bnx2x_rx_int(fp, 4); - - bnx2x_fp_unlock_poll(fp); - - return found; -} -#endif - /* we split the first BD into headers and data BDs * to ease the pain of our fellow microcode engineers * we use one mapping for both BDs diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index b7d32e8412f1..4cbb03f87b5a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -570,13 +570,6 @@ int bnx2x_enable_msix(struct bnx2x *bp); int bnx2x_enable_msi(struct bnx2x *bp); /** - * bnx2x_low_latency_recv - LL callback - * - * @napi: napi structure - */ -int bnx2x_low_latency_recv(struct napi_struct *napi); - -/** * bnx2x_alloc_mem_bp - allocate memories outsize main driver structure * * @bp: driver handle diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index d84efcd34fac..a3ce9f2a2335 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -52,7 +52,7 @@ static const struct { { Q_STATS_OFFSET32(rx_skb_alloc_failed), 4, "[%s]: rx_skb_alloc_discard" }, { Q_STATS_OFFSET32(hw_csum_err), 4, "[%s]: rx_csum_offload_errors" }, - + { Q_STATS_OFFSET32(driver_xoff), 4, "[%s]: tx_exhaustion_events" }, { Q_STATS_OFFSET32(total_bytes_transmitted_hi), 8, "[%s]: tx_bytes" }, /* 10 */{ Q_STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, "[%s]: tx_ucast_packets" }, @@ -128,7 +128,8 @@ static const struct { 4, STATS_FLAGS_BOTH, "rx_skb_alloc_discard" }, { STATS_OFFSET32(hw_csum_err), 4, STATS_FLAGS_BOTH, "rx_csum_offload_errors" }, - + { STATS_OFFSET32(driver_xoff), + 4, STATS_FLAGS_BOTH, "tx_exhaustion_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), @@ -3068,9 +3069,7 @@ static void bnx2x_self_test(struct net_device *dev, #define IS_PORT_STAT(i) \ ((bnx2x_stats_arr[i].flags & STATS_FLAGS_BOTH) == STATS_FLAGS_PORT) #define IS_FUNC_STAT(i) (bnx2x_stats_arr[i].flags & STATS_FLAGS_FUNC) -#define HIDE_PORT_STAT(bp) \ - ((IS_MF(bp) && !(bp->msg_enable & BNX2X_MSG_STATS)) || \ - IS_VF(bp)) +#define HIDE_PORT_STAT(bp) IS_VF(bp) /* ethtool statistics are displayed for all regular ethernet queues and the * fcoe L2 queue if not disabled diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index cafd5de675cf..27aa0802d87d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -3013,8 +3013,8 @@ struct afex_stats { }; #define BCM_5710_FW_MAJOR_VERSION 7 -#define BCM_5710_FW_MINOR_VERSION 12 -#define BCM_5710_FW_REVISION_VERSION 30 +#define BCM_5710_FW_MINOR_VERSION 13 +#define BCM_5710_FW_REVISION_VERSION 1 #define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 @@ -3583,7 +3583,7 @@ enum classify_rule { CLASSIFY_RULE_OPCODE_MAC, CLASSIFY_RULE_OPCODE_VLAN, CLASSIFY_RULE_OPCODE_PAIR, - CLASSIFY_RULE_OPCODE_VXLAN, + CLASSIFY_RULE_OPCODE_IMAC_VNI, MAX_CLASSIFY_RULE }; @@ -3826,6 +3826,17 @@ struct eth_classify_header { __le32 echo; }; +/* + * Command for adding/removing a Inner-MAC/VNI classification rule + */ +struct eth_classify_imac_vni_cmd { + struct eth_classify_cmd_header header; + __le32 vni; + __le16 imac_lsb; + __le16 imac_mid; + __le16 imac_msb; + __le16 reserved1; +}; /* * Command for adding/removing a MAC classification rule @@ -3869,14 +3880,6 @@ struct eth_classify_vlan_cmd { /* * Command for adding/removing a VXLAN classification rule */ -struct eth_classify_vxlan_cmd { - struct eth_classify_cmd_header header; - __le32 vni; - __le16 inner_mac_lsb; - __le16 inner_mac_mid; - __le16 inner_mac_msb; - __le16 reserved1; -}; /* * union for eth classification rule @@ -3885,7 +3888,7 @@ union eth_classify_rule_cmd { struct eth_classify_mac_cmd mac; struct eth_classify_vlan_cmd vlan; struct eth_classify_pair_cmd pair; - struct eth_classify_vxlan_cmd vxlan; + struct eth_classify_imac_vni_cmd imac_vni; }; /* @@ -5623,6 +5626,14 @@ enum igu_mode { MAX_IGU_MODE }; +/* + * Inner Headers Classification Type + */ +enum inner_clss_type { + INNER_CLSS_DISABLED, + INNER_CLSS_USE_VLAN, + INNER_CLSS_USE_VNI, + MAX_INNER_CLSS_TYPE}; /* * IP versions @@ -5953,14 +5964,6 @@ enum ts_offset_cmd { MAX_TS_OFFSET_CMD }; -/* Tunnel Mode */ -enum tunnel_mode { - TUNN_MODE_NONE, - TUNN_MODE_VXLAN, - TUNN_MODE_GRE, - MAX_TUNNEL_MODE -}; - /* zone A per-queue data */ struct ustorm_queue_zone_data { struct ustorm_eth_rx_producers eth_rx_producers; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c9b036789184..2273576404b4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13004,9 +13004,6 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn, #endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = bnx2x_low_latency_recv, -#endif .ndo_get_phys_port_id = bnx2x_get_phys_port_id, .ndo_set_vf_link_state = bnx2x_set_vf_link_state, .ndo_features_check = bnx2x_features_check, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index db15c5ee09c5..f2d0dc9b1c41 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4227,12 +4227,10 @@ static void bnxt_init_napi(struct bnxt *bp) bnapi = bp->bnapi[i]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } else { bnapi = bp->bnapi[0]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 17f017ab4dac..b15a60d787c7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2041,11 +2041,11 @@ static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } ring = &priv->tx_rings[DESC_INDEX]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b7b93e7a643d..48d8fbb1c220 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1864,7 +1864,6 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rxq->rspq.idx); - skb_mark_napi_id(skb, &rxq->rspq.napi); pi = netdev_priv(skb->dev); if (pi->rxtstamp) cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb), @@ -2528,7 +2527,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto err; netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); - napi_hash_add(&iq->napi); iq->cur_desc = iq->desc; iq->cidx = 0; iq->gen = 1; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b36643ef0593..b2182d3ba3cc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2458,13 +2458,11 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); - napi_hash_add(&enic->napi[0]); break; case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], enic_poll_msix_rq, NAPI_POLL_WEIGHT); - napi_hash_add(&enic->napi[i]); } for (i = 0; i < enic->wq_count; i++) netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index ccca4799c27b..f92b6d948398 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -70,7 +70,6 @@ static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); static void rio_timer (unsigned long data); static void rio_tx_timeout (struct net_device *dev); -static void alloc_list (struct net_device *dev); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); static void rio_free_tx (struct net_device *dev, int irq); @@ -253,19 +252,6 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_out_unmap_rx; - if (np->chip_id == CHIP_IP1000A && - (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) { - /* PHY magic taken from ipg driver, undocumented registers */ - mii_write(dev, np->phy_addr, 31, 0x0001); - mii_write(dev, np->phy_addr, 27, 0x01e0); - mii_write(dev, np->phy_addr, 31, 0x0002); - mii_write(dev, np->phy_addr, 27, 0xeb8e); - mii_write(dev, np->phy_addr, 31, 0x0000); - mii_write(dev, np->phy_addr, 30, 0x005e); - /* advertise 1000BASE-T half & full duplex, prefer MASTER */ - mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700); - } - /* Fiber device? */ np->phy_media = (dr16(ASICCtrl) & PhyMedia) ? 1 : 0; np->link_status = 0; @@ -275,13 +261,11 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) if (np->an_enable == 2) { np->an_enable = 1; } - mii_set_media_pcs (dev); } else { /* Auto-Negotiation is mandatory for 1000BASE-T, IEEE 802.3ab Annex 28D page 14 */ if (np->speed == 1000) np->an_enable = 1; - mii_set_media (dev); } err = register_netdev (dev); @@ -446,19 +430,106 @@ static void rio_set_led_mode(struct net_device *dev) dw32(ASICCtrl, mode); } -static int -rio_open (struct net_device *dev) +static inline dma_addr_t desc_to_dma(struct netdev_desc *desc) +{ + return le64_to_cpu(desc->fraginfo) & DMA_BIT_MASK(48); +} + +static void free_list(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct sk_buff *skb; + int i; + + /* Free all the skbuffs in the queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + skb = np->rx_skbuff[i]; + if (skb) { + pci_unmap_single(np->pdev, desc_to_dma(&np->rx_ring[i]), + skb->len, PCI_DMA_FROMDEVICE); + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + } + np->rx_ring[i].status = 0; + np->rx_ring[i].fraginfo = 0; + } + for (i = 0; i < TX_RING_SIZE; i++) { + skb = np->tx_skbuff[i]; + if (skb) { + pci_unmap_single(np->pdev, desc_to_dma(&np->tx_ring[i]), + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); + np->tx_skbuff[i] = NULL; + } + } +} + +static void rio_reset_ring(struct netdev_private *np) +{ + int i; + + np->cur_rx = 0; + np->cur_tx = 0; + np->old_rx = 0; + np->old_tx = 0; + + for (i = 0; i < TX_RING_SIZE; i++) + np->tx_ring[i].status = cpu_to_le64(TFDDone); + + for (i = 0; i < RX_RING_SIZE; i++) + np->rx_ring[i].status = 0; +} + + /* allocate and initialize Tx and Rx descriptors */ +static int alloc_list(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int i; + + rio_reset_ring(np); + np->rx_buf_sz = (dev->mtu <= 1500 ? PACKET_SIZE : dev->mtu + 32); + + /* Initialize Tx descriptors, TFDListPtr leaves in start_xmit(). */ + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_skbuff[i] = NULL; + np->tx_ring[i].next_desc = cpu_to_le64(np->tx_ring_dma + + ((i + 1) % TX_RING_SIZE) * + sizeof(struct netdev_desc)); + } + + /* Initialize Rx descriptors & allocate buffers */ + for (i = 0; i < RX_RING_SIZE; i++) { + /* Allocated fixed size of skbuff */ + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); + np->rx_skbuff[i] = skb; + if (!skb) { + free_list(dev); + return -ENOMEM; + } + + np->rx_ring[i].next_desc = cpu_to_le64(np->rx_ring_dma + + ((i + 1) % RX_RING_SIZE) * + sizeof(struct netdev_desc)); + /* Rubicon now supports 40 bits of addressing space. */ + np->rx_ring[i].fraginfo = + cpu_to_le64(pci_map_single( + np->pdev, skb->data, np->rx_buf_sz, + PCI_DMA_FROMDEVICE)); + np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48); + } + + return 0; +} + +static void rio_hw_init(struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; - const int irq = np->pdev->irq; int i; u16 macctrl; - i = request_irq(irq, rio_interrupt, IRQF_SHARED, dev->name, dev); - if (i) - return i; - /* Reset all logic functions */ dw16(ASICCtrl + 2, GlobalReset | DMAReset | FIFOReset | NetworkReset | HostReset); @@ -469,11 +540,31 @@ rio_open (struct net_device *dev) /* DebugCtrl bit 4, 5, 9 must set */ dw32(DebugCtrl, dr32(DebugCtrl) | 0x0230); + if (np->chip_id == CHIP_IP1000A && + (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) { + /* PHY magic taken from ipg driver, undocumented registers */ + mii_write(dev, np->phy_addr, 31, 0x0001); + mii_write(dev, np->phy_addr, 27, 0x01e0); + mii_write(dev, np->phy_addr, 31, 0x0002); + mii_write(dev, np->phy_addr, 27, 0xeb8e); + mii_write(dev, np->phy_addr, 31, 0x0000); + mii_write(dev, np->phy_addr, 30, 0x005e); + /* advertise 1000BASE-T half & full duplex, prefer MASTER */ + mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700); + } + + if (np->phy_media) + mii_set_media_pcs(dev); + else + mii_set_media(dev); + /* Jumbo frame */ if (np->jumbo != 0) dw16(MaxFrameSize, MAX_JUMBO+14); - alloc_list (dev); + /* Set RFDListPtr */ + dw32(RFDListPtr0, np->rx_ring_dma); + dw32(RFDListPtr1, 0); /* Set station address */ /* 16 or 32-bit access is required by TC9020 datasheet but 8-bit works @@ -509,10 +600,6 @@ rio_open (struct net_device *dev) dw32(MACCtrl, dr32(MACCtrl) | AutoVLANuntagging); } - setup_timer(&np->timer, rio_timer, (unsigned long)dev); - np->timer.expires = jiffies + 1*HZ; - add_timer (&np->timer); - /* Start Tx/Rx */ dw32(MACCtrl, dr32(MACCtrl) | StatsEnable | RxEnable | TxEnable); @@ -522,6 +609,42 @@ rio_open (struct net_device *dev) macctrl |= (np->tx_flow) ? TxFlowControlEnable : 0; macctrl |= (np->rx_flow) ? RxFlowControlEnable : 0; dw16(MACCtrl, macctrl); +} + +static void rio_hw_stop(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->ioaddr; + + /* Disable interrupts */ + dw16(IntEnable, 0); + + /* Stop Tx and Rx logics */ + dw32(MACCtrl, TxDisable | RxDisable | StatsDisable); +} + +static int rio_open(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + const int irq = np->pdev->irq; + int i; + + i = alloc_list(dev); + if (i) + return i; + + rio_hw_init(dev); + + i = request_irq(irq, rio_interrupt, IRQF_SHARED, dev->name, dev); + if (i) { + rio_hw_stop(dev); + free_list(dev); + return i; + } + + setup_timer(&np->timer, rio_timer, (unsigned long)dev); + np->timer.expires = jiffies + 1 * HZ; + add_timer(&np->timer); netif_start_queue (dev); @@ -586,60 +709,6 @@ rio_tx_timeout (struct net_device *dev) dev->trans_start = jiffies; /* prevent tx timeout */ } - /* allocate and initialize Tx and Rx descriptors */ -static void -alloc_list (struct net_device *dev) -{ - struct netdev_private *np = netdev_priv(dev); - void __iomem *ioaddr = np->ioaddr; - int i; - - np->cur_rx = np->cur_tx = 0; - np->old_rx = np->old_tx = 0; - np->rx_buf_sz = (dev->mtu <= 1500 ? PACKET_SIZE : dev->mtu + 32); - - /* Initialize Tx descriptors, TFDListPtr leaves in start_xmit(). */ - for (i = 0; i < TX_RING_SIZE; i++) { - np->tx_skbuff[i] = NULL; - np->tx_ring[i].status = cpu_to_le64 (TFDDone); - np->tx_ring[i].next_desc = cpu_to_le64 (np->tx_ring_dma + - ((i+1)%TX_RING_SIZE) * - sizeof (struct netdev_desc)); - } - - /* Initialize Rx descriptors */ - for (i = 0; i < RX_RING_SIZE; i++) { - np->rx_ring[i].next_desc = cpu_to_le64 (np->rx_ring_dma + - ((i + 1) % RX_RING_SIZE) * - sizeof (struct netdev_desc)); - np->rx_ring[i].status = 0; - np->rx_ring[i].fraginfo = 0; - np->rx_skbuff[i] = NULL; - } - - /* Allocate the rx buffers */ - for (i = 0; i < RX_RING_SIZE; i++) { - /* Allocated fixed size of skbuff */ - struct sk_buff *skb; - - skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); - np->rx_skbuff[i] = skb; - if (skb == NULL) - break; - - /* Rubicon now supports 40 bits of addressing space. */ - np->rx_ring[i].fraginfo = - cpu_to_le64 ( pci_map_single ( - np->pdev, skb->data, np->rx_buf_sz, - PCI_DMA_FROMDEVICE)); - np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48); - } - - /* Set RFDListPtr */ - dw32(RFDListPtr0, np->rx_ring_dma); - dw32(RFDListPtr1, 0); -} - static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev) { @@ -748,11 +817,6 @@ rio_interrupt (int irq, void *dev_instance) return IRQ_RETVAL(handled); } -static inline dma_addr_t desc_to_dma(struct netdev_desc *desc) -{ - return le64_to_cpu(desc->fraginfo) & DMA_BIT_MASK(48); -} - static void rio_free_tx (struct net_device *dev, int irq) { @@ -1730,44 +1794,16 @@ static int rio_close (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); - void __iomem *ioaddr = np->ioaddr; - struct pci_dev *pdev = np->pdev; - struct sk_buff *skb; - int i; netif_stop_queue (dev); - /* Disable interrupts */ - dw16(IntEnable, 0); - - /* Stop Tx and Rx logics */ - dw32(MACCtrl, TxDisable | RxDisable | StatsDisable); + rio_hw_stop(dev); free_irq(pdev->irq, dev); del_timer_sync (&np->timer); - /* Free all the skbuffs in the queue. */ - for (i = 0; i < RX_RING_SIZE; i++) { - skb = np->rx_skbuff[i]; - if (skb) { - pci_unmap_single(pdev, desc_to_dma(&np->rx_ring[i]), - skb->len, PCI_DMA_FROMDEVICE); - dev_kfree_skb (skb); - np->rx_skbuff[i] = NULL; - } - np->rx_ring[i].status = 0; - np->rx_ring[i].fraginfo = 0; - } - for (i = 0; i < TX_RING_SIZE; i++) { - skb = np->tx_skbuff[i]; - if (skb) { - pci_unmap_single(pdev, desc_to_dma(&np->tx_ring[i]), - skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb (skb); - np->tx_skbuff[i] = NULL; - } - } + free_list(dev); return 0; } @@ -1795,11 +1831,55 @@ rio_remove1 (struct pci_dev *pdev) } } +#ifdef CONFIG_PM_SLEEP +static int rio_suspend(struct device *device) +{ + struct net_device *dev = dev_get_drvdata(device); + struct netdev_private *np = netdev_priv(dev); + + if (!netif_running(dev)) + return 0; + + netif_device_detach(dev); + del_timer_sync(&np->timer); + rio_hw_stop(dev); + + return 0; +} + +static int rio_resume(struct device *device) +{ + struct net_device *dev = dev_get_drvdata(device); + struct netdev_private *np = netdev_priv(dev); + + if (!netif_running(dev)) + return 0; + + rio_reset_ring(np); + rio_hw_init(dev); + np->timer.expires = jiffies + 1 * HZ; + add_timer(&np->timer); + netif_device_attach(dev); + dl2k_enable_int(np); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(rio_pm_ops, rio_suspend, rio_resume); +#define RIO_PM_OPS (&rio_pm_ops) + +#else + +#define RIO_PM_OPS NULL + +#endif /* CONFIG_PM_SLEEP */ + static struct pci_driver rio_driver = { .name = "dl2k", .id_table = rio_pci_tbl, .probe = rio_probe1, .remove = rio_remove1, + .driver.pm = RIO_PM_OPS, }; module_pci_driver(rio_driver); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b6ad02909d6b..4cab8879f5ae 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2184,7 +2184,6 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo, skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3); skb->csum_level = rxcp->tunneled; - skb_mark_napi_id(skb, napi); if (rxcp->vlanf) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); @@ -2631,7 +2630,6 @@ static int be_evt_queues_create(struct be_adapter *adapter) eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); - napi_hash_add(&eqo->napi); } return 0; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b2a32209ffbf..d2328fc5da57 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3277,7 +3277,6 @@ static void fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) { struct device_node *np = pdev->dev.of_node; - int err; *num_tx = *num_rx = 1; @@ -3285,13 +3284,9 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) return; /* parse the num of tx and rx queues */ - err = of_property_read_u32(np, "fsl,num-tx-queues", num_tx); - if (err) - *num_tx = 1; + of_property_read_u32(np, "fsl,num-tx-queues", num_tx); - err = of_property_read_u32(np, "fsl,num-rx-queues", num_rx); - if (err) - *num_rx = 1; + of_property_read_u32(np, "fsl,num-rx-queues", num_rx); if (*num_tx < 1 || *num_tx > FEC_ENET_MAX_TX_QS) { dev_warn(&pdev->dev, "Invalid num_tx(=%d), fall back to 1\n", diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index cf8e54652df9..48a9c176e0d1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1050,7 +1050,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3e6b9b437497..67b1850c034e 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -738,7 +738,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) struct gfar_private *priv = NULL; struct device_node *np = ofdev->dev.of_node; struct device_node *child = NULL; - struct property *stash; u32 stash_len = 0; u32 stash_idx = 0; unsigned int num_tx_qs, num_rx_qs; @@ -854,9 +853,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) goto err_grp_init; } - stash = of_find_property(np, "bd-stash", NULL); - - if (stash) { + if (of_property_read_bool(np, "bd-stash")) { priv->device_flags |= FSL_GIANFAR_DEV_HAS_BD_STASHING; priv->bd_stash_en = 1; } @@ -1347,12 +1344,12 @@ static int gfar_probe(struct platform_device *ofdev) if (priv->poll_mode == GFAR_SQ_POLLING) { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx_sq, 2); } else { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx, 2); } } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 14440200499b..48809e5d3f79 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -33,7 +33,7 @@ #include "fm10k_pf.h" #include "fm10k_vf.h" -#define FM10K_MAX_JUMBO_FRAME_SIZE 15358 /* Maximum supported size 15K */ +#define FM10K_MAX_JUMBO_FRAME_SIZE 15342 /* Maximum supported size 15K */ #define MAX_QUEUES FM10K_MAX_QUEUES_PF diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index e76a44cf330c..746a1986690b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1428,6 +1428,10 @@ static int fm10k_poll(struct napi_struct *napi, int budget) fm10k_for_each_ring(ring, q_vector->tx) clean_complete &= fm10k_clean_tx_irq(q_vector, ring); + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + return budget; + /* attempt to distribute budget to each queue fairly, but don't * allow the budget to go below 1 because we'll exit polling */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 318a212f0a78..35afd711d144 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -77,6 +77,7 @@ struct fm10k_hw; #define FM10K_PCIE_SRIOV_CTRL_VFARI 0x10 #define FM10K_ERR_PARAM -2 +#define FM10K_ERR_NO_RESOURCES -3 #define FM10K_ERR_REQUESTS_PENDING -4 #define FM10K_ERR_RESET_REQUESTED -5 #define FM10K_ERR_DMA_PENDING -6 diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index 36c8b0aa08fd..3a18ef1cc017 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -103,7 +103,12 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) s32 err; u16 i; - /* assume we always have at least 1 queue */ + /* verify we have at least 1 queue */ + if (!~fm10k_read_reg(hw, FM10K_TXQCTL(0)) || + !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) + return FM10K_ERR_NO_RESOURCES; + + /* determine how many queues we have */ for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) { /* verify the Descriptor cache offsets are increasing */ tqdloc = ~fm10k_read_reg(hw, FM10K_TQDLOC(i)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d4b7af9a2fc8..d1a91c8178d6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -103,8 +103,8 @@ static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer, len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos)); bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos += len; return len; @@ -353,8 +353,8 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, bytes_not_copied = copy_to_user(buffer, buf, len); kfree(buf); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos = len; return len; @@ -981,12 +981,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!cmd_buf) return count; bytes_not_copied = copy_from_user(cmd_buf, buffer, count); - if (bytes_not_copied < 0) { + if (bytes_not_copied) { kfree(cmd_buf); - return bytes_not_copied; + return -EFAULT; } - if (bytes_not_copied > 0) - count -= bytes_not_copied; cmd_buf[count] = '\0'; cmd_buf_tmp = strchr(cmd_buf, '\n'); @@ -2034,8 +2032,8 @@ static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, bytes_not_copied = copy_to_user(buffer, buf, len); kfree(buf); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos = len; return len; @@ -2068,10 +2066,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf)); bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf, buffer, count); - if (bytes_not_copied < 0) - return bytes_not_copied; - else if (bytes_not_copied > 0) - count -= bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; i40e_dbg_netdev_ops_buf[count] = '\0'; buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n'); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 635b3ac17877..6649ce4ba2de 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1632,7 +1632,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } #endif - skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); rx_desc->wb.qword1.status_error_len = 0; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 47e9a90d6b10..77968b184b1f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1090,7 +1090,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } #endif - skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); rx_desc->wb.qword1.status_error_len = 0; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index d962164dfb0f..6ad62656c75e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -307,10 +307,9 @@ static irqreturn_t i40evf_msix_aq(int irq, void *data) struct i40e_hw *hw = &adapter->hw; u32 val; - /* handle non-queue interrupts */ - rd32(hw, I40E_VFINT_ICR01); - rd32(hw, I40E_VFINT_ICR0_ENA1); - + /* handle non-queue interrupts, these reads clear the registers */ + val = rd32(hw, I40E_VFINT_ICR01); + val = rd32(hw, I40E_VFINT_ICR0_ENA1); val = rd32(hw, I40E_VFINT_DYN_CTL01) | I40E_VFINT_DYN_CTL01_CLEARPBA_MASK; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 631c603fc966..5f988703e1b7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -620,8 +620,7 @@ static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu) struct ixgbe_fcoe_ddp_pool *ddp_pool; ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu); - if (ddp_pool->pool) - dma_pool_destroy(ddp_pool->pool); + dma_pool_destroy(ddp_pool->pool); ddp_pool->pool = NULL; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f3168bcc7d87..e771e764daa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -844,7 +844,6 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); - napi_hash_add(&q_vector->napi); #ifdef CONFIG_NET_RX_BUSY_POLL /* initialize busy poll */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 47395ff5d908..c95042ee30de 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -65,9 +65,6 @@ #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_sriov.h" -#ifdef CONFIG_IXGBE_VXLAN -#include <net/vxlan.h> -#endif char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -1659,6 +1656,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, struct sk_buff *skb) { + skb_mark_napi_id(skb, &q_vector->napi); if (ixgbe_qv_busy_polling(q_vector)) netif_receive_skb(skb); else @@ -2123,7 +2121,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } #endif /* IXGBE_FCOE */ - skb_mark_napi_id(skb, &q_vector->napi); ixgbe_rx_skb(q_vector, skb); /* update budget accounting */ @@ -2757,7 +2754,7 @@ static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data) /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -2786,7 +2783,8 @@ int ixgbe_poll(struct napi_struct *napi, int budget) ixgbe_for_each_ring(ring, q_vector->tx) clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); - if (!ixgbe_qv_lock_napi(q_vector)) + /* Exit if we are called by netpoll or busy polling is active */ + if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) return budget; /* attempt to distribute budget to each queue fairly, but don't allow @@ -2950,7 +2948,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_ptp_check_pps_event(adapter, eicr); /* would disable interrupts here but EIAM disabled it */ - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); /* * re-enable link(maybe) and non-queue interrupts, no flush. @@ -3315,8 +3313,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, } /** - * Return a number of entries in the RSS indirection table - * + * ixgbe_rss_indir_tbl_entries - Return RSS indirection table entries * @adapter: device handle * * - 82598/82599/X540: 128 @@ -3334,8 +3331,7 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter) } /** - * Write the RETA table to HW - * + * ixgbe_store_reta - Write the RETA table to HW * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW. @@ -3374,8 +3370,7 @@ void ixgbe_store_reta(struct ixgbe_adapter *adapter) } /** - * Write the RETA table to HW (for x550 devices in SRIOV mode) - * + * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode) * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index ebe0ac950b14..bf2ae8daf717 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -26,6 +26,8 @@ #include "ixgbe_common.h" #include "ixgbe_phy.h" +static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed); + static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw) { struct ixgbe_mac_info *mac = &hw->mac; @@ -85,79 +87,6 @@ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value) } /** - * ixgbe_check_cs4227_reg - Perform diag on a CS4227 register - * @hw: pointer to hardware structure - * @reg: the register to check - * - * Performs a diagnostic on a register in the CS4227 chip. Returns an error - * if it is not operating correctly. - * This function assumes that the caller has acquired the proper semaphore. - */ -static s32 ixgbe_check_cs4227_reg(struct ixgbe_hw *hw, u16 reg) -{ - s32 status; - u32 retry; - u16 reg_val; - - reg_val = (IXGBE_CS4227_EDC_MODE_DIAG << 1) | 1; - status = ixgbe_write_cs4227(hw, reg, reg_val); - if (status) - return status; - for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) { - msleep(IXGBE_CS4227_CHECK_DELAY); - reg_val = 0xFFFF; - ixgbe_read_cs4227(hw, reg, ®_val); - if (!reg_val) - break; - } - if (reg_val) { - hw_err(hw, "CS4227 reg 0x%04X failed diagnostic\n", reg); - return status; - } - - return 0; -} - -/** - * ixgbe_get_cs4227_status - Return CS4227 status - * @hw: pointer to hardware structure - * - * Performs a diagnostic on the CS4227 chip. Returns an error if it is - * not operating correctly. - * This function assumes that the caller has acquired the proper semaphore. - */ -static s32 ixgbe_get_cs4227_status(struct ixgbe_hw *hw) -{ - s32 status; - u16 value = 0; - - /* Exit if the diagnostic has already been performed. */ - status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value); - if (status) - return status; - if (value == IXGBE_CS4227_RESET_COMPLETE) - return 0; - - /* Check port 0. */ - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB); - if (status) - return status; - - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB); - if (status) - return status; - - /* Check port 1. */ - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB + - (1 << 12)); - if (status) - return status; - - return ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB + - (1 << 12)); -} - -/** * ixgbe_read_pe - Read register from port expander * @hw: pointer to hardware structure * @reg: register number to read @@ -326,13 +255,6 @@ static void ixgbe_check_cs4227(struct ixgbe_hw *hw) return; } - /* Is the CS4227 working correctly? */ - status = ixgbe_get_cs4227_status(hw); - if (status) { - hw_err(hw, "CS4227 status failed: %d", status); - goto out; - } - /* Record completion for next time. */ status = ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH, IXGBE_CS4227_RESET_COMPLETE); @@ -1257,31 +1179,71 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, if (status) return status; - /* Configure CS4227 LINE side to 10G SR. */ - slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); - value = IXGBE_CS4227_SPEED_10G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); - - /* Configure CS4227 for HOST connection rate then type. */ - slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); - value = speed & IXGBE_LINK_SPEED_10GB_FULL ? - IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); + if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* Configure CS4227 LINE side to 10G SR. */ + slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); + value = IXGBE_CS4227_SPEED_10G; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; - slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else + slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + + /* Configure CS4227 for HOST connection rate then type. */ + slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); + value = speed & IXGBE_LINK_SPEED_10GB_FULL ? + IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; - /* If internal link mode is XFI, then setup XFI internal link. */ - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) + slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + else + value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + + /* Setup XFI internal link. */ status = ixgbe_setup_ixfi_x550em(hw, &speed); + if (status) { + hw_dbg(hw, "setup_ixfi failed with %d\n", status); + return status; + } + } else { + /* Configure internal PHY for KR/KX. */ + status = ixgbe_setup_kr_speed_x550em(hw, speed); + if (status) { + hw_dbg(hw, "setup_kr_speed failed with %d\n", status); + return status; + } + + /* Configure CS4227 LINE side to proper mode. */ + slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + else + value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + } + return 0; + +i2c_err: + hw_dbg(hw, "combined i2c access failed with %d\n", status); return status; } @@ -1982,12 +1944,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) * to determine internal PHY mode. */ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL); - - /* If internal PHY mode is KR, then initialize KR link */ if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { speed = IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); } } diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index d3e5f5b37999..c48aef613b0a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -774,7 +774,7 @@ static int ixgbevf_set_coalesce(struct net_device *netdev, adapter->tx_itr_setting = ec->tx_coalesce_usecs; if (adapter->tx_itr_setting == 1) - tx_itr_param = IXGBE_10K_ITR; + tx_itr_param = IXGBE_12K_ITR; else tx_itr_param = adapter->tx_itr_setting; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index ec3147279621..68ec7daa04fd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -326,8 +326,7 @@ static inline bool ixgbevf_qv_disable(struct ixgbevf_q_vector *q_vector) #define IXGBE_MIN_RSC_ITR 24 #define IXGBE_100K_ITR 40 #define IXGBE_20K_ITR 200 -#define IXGBE_10K_ITR 400 -#define IXGBE_8K_ITR 500 +#define IXGBE_12K_ITR 336 /* Helper macros to switch between ints/sec and what the register uses. * And yes, it's the same math going both ways. The lowest value diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 592ff237d692..dbbd1be47462 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1138,7 +1138,7 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter) if (q_vector->tx.ring && !q_vector->rx.ring) { /* Tx only vector */ if (adapter->tx_itr_setting == 1) - q_vector->itr = IXGBE_10K_ITR; + q_vector->itr = IXGBE_12K_ITR; else q_vector->itr = adapter->tx_itr_setting; } else { @@ -1196,7 +1196,7 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector, /* simple throttle rate management * 0-20MB/s lowest (100000 ints/s) * 20-100MB/s low (20000 ints/s) - * 100-1249MB/s bulk (8000 ints/s) + * 100-1249MB/s bulk (12000 ints/s) */ /* what was last interrupt timeslice? */ timepassed_us = q_vector->itr >> 2; @@ -1247,7 +1247,7 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) break; case bulk_latency: default: - new_itr = IXGBE_8K_ITR; + new_itr = IXGBE_12K_ITR; break; } @@ -1288,7 +1288,7 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data) /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -2260,10 +2260,8 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) } if (is_valid_ether_addr(adapter->hw.mac.addr)) { - memcpy(netdev->dev_addr, adapter->hw.mac.addr, - netdev->addr_len); - memcpy(netdev->perm_addr, adapter->hw.mac.addr, - netdev->addr_len); + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } adapter->last_reset = jiffies; @@ -2483,9 +2481,6 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64); -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_add(&q_vector->napi); -#endif adapter->q_vector[q_idx] = q_vector; } @@ -2662,13 +2657,14 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + ether_addr_copy(netdev->dev_addr, hw->mac.addr); } if (!is_valid_ether_addr(netdev->dev_addr)) { dev_info(&pdev->dev, "Assigning random MAC address\n"); eth_hw_addr_random(netdev); - memcpy(hw->mac.addr, netdev->dev_addr, netdev->addr_len); + ether_addr_copy(hw->mac.addr, netdev->dev_addr); + ether_addr_copy(hw->mac.perm_addr, netdev->dev_addr); } /* Enable dynamic interrupt throttling rates */ @@ -3698,8 +3694,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + ether_addr_copy(hw->mac.addr, addr->sa_data); spin_lock_bh(&adapter->mbx_lock); diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 427f3605cbfc..61a98f4c5746 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -117,7 +117,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK)) return IXGBE_ERR_INVALID_MAC_ADDR; - ether_addr_copy(hw->mac.perm_addr, addr); + if (msgbuf[0] == (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK)) + ether_addr_copy(hw->mac.perm_addr, addr); + hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD]; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index eb8a4988de63..af975a2b74c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -155,13 +155,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { - netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, - NAPI_POLL_WEIGHT); - } else { + if (cq->is_tx) + netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, + NAPI_POLL_WEIGHT); + else netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - napi_hash_add(&cq->napi); - } napi_enable(&cq->napi); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ddb5541882f5..dd84cabb2a51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -337,11 +337,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return bitmap_iterator_count(&it) + (priv->tx_ring_num * 2) + -#ifdef CONFIG_NET_RX_BUSY_POLL - (priv->rx_ring_num * 5); -#else (priv->rx_ring_num * 2); -#endif case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; @@ -408,11 +404,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->bytes; -#ifdef CONFIG_NET_RX_BUSY_POLL - data[index++] = priv->rx_ring[i]->yields; - data[index++] = priv->rx_ring[i]->misses; - data[index++] = priv->rx_ring[i]->cleaned; -#endif } spin_unlock_bh(&priv->stats_lock); @@ -486,14 +477,6 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); -#ifdef CONFIG_NET_RX_BUSY_POLL - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_napi_yield", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_misses", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_cleaned", i); -#endif } break; case ETH_SS_PRIV_FLAGS: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 886e1bc86374..659209ff7af6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -69,34 +69,6 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -static int mlx4_en_low_latency_recv(struct napi_struct *napi) -{ - struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); - struct net_device *dev = cq->dev; - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; - int done; - - if (!priv->port_up) - return LL_FLUSH_FAILED; - - if (!mlx4_en_cq_lock_poll(cq)) - return LL_FLUSH_BUSY; - - done = mlx4_en_process_rx_cq(dev, cq, 4); - if (likely(done)) - rx_ring->cleaned += done; - else - rx_ring->misses++; - - mlx4_en_cq_unlock_poll(cq); - - return done; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -1561,8 +1533,6 @@ int mlx4_en_start_port(struct net_device *dev) for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; - mlx4_en_cq_init_lock(cq); - err = mlx4_en_init_affinity_hint(priv, i); if (err) { en_err(priv, "Failed preparing IRQ affinity hint\n"); @@ -1859,13 +1829,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; - local_bh_disable(); - while (!mlx4_en_cq_lock_napi(cq)) { - pr_info("CQ %d locked\n", i); - mdelay(1); - } - local_bh_enable(); - napi_synchronize(&cq->napi); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); @@ -2504,9 +2467,6 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = mlx4_en_low_latency_recv, -#endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e7a5000aa12c..41440b2b20a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -873,10 +873,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * - TCP/IP (v4) * - without IP options * - not an IP fragment - * - no LLS polling in progress */ - if (!mlx4_en_cq_busy_polling(cq) && - (dev->features & NETIF_F_GRO)) { + if (dev->features & NETIF_F_GRO) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) goto next; @@ -927,7 +925,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); @@ -990,13 +987,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } - skb_mark_napi_id(skb, &cq->napi); - - if (!mlx4_en_cq_busy_polling(cq)) - napi_gro_receive(&cq->napi, skb); - else - netif_receive_skb(skb); - + napi_gro_receive(&cq->napi, skb); next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); @@ -1038,13 +1029,8 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) struct mlx4_en_priv *priv = netdev_priv(dev); int done; - if (!mlx4_en_cq_lock_napi(cq)) - return budget; - done = mlx4_en_process_rx_cq(dev, cq, budget); - mlx4_en_cq_unlock_napi(cq); - /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { const struct cpumask *aff; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c41f15102ae0..35de7d2e6b34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -320,11 +320,6 @@ struct mlx4_en_rx_ring { void *rx_info; unsigned long bytes; unsigned long packets; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned long yields; - unsigned long misses; - unsigned long cleaned; -#endif unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; @@ -347,18 +342,6 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int state; -#define MLX4_EN_CQ_STATE_IDLE 0 -#define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */ -#define MLX4_EN_CQ_STATE_POLL 2 /* poll owns this CQ */ -#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATE_NAPI | MLX4_EN_CQ_STATE_POLL) -#define MLX4_EN_CQ_STATE_NAPI_YIELD 4 /* NAPI yielded this CQ */ -#define MLX4_EN_CQ_STATE_POLL_YIELD 8 /* poll yielded this CQ */ -#define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD) -#define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) - spinlock_t poll_lock; /* protects from LLS/napi conflicts */ -#endif /* CONFIG_NET_RX_BUSY_POLL */ struct irq_desc *irq_desc; }; @@ -622,115 +605,6 @@ static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz) return buf + idx * cqe_sz; } -#ifdef CONFIG_NET_RX_BUSY_POLL -static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) -{ - spin_lock_init(&cq->poll_lock); - cq->state = MLX4_EN_CQ_STATE_IDLE; -} - -/* called from the device poll rutine to get ownership of a cq */ -static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) -{ - int rc = true; - spin_lock(&cq->poll_lock); - if (cq->state & MLX4_CQ_LOCKED) { - WARN_ON(cq->state & MLX4_EN_CQ_STATE_NAPI); - cq->state |= MLX4_EN_CQ_STATE_NAPI_YIELD; - rc = false; - } else - /* we don't care if someone yielded */ - cq->state = MLX4_EN_CQ_STATE_NAPI; - spin_unlock(&cq->poll_lock); - return rc; -} - -/* returns true is someone tried to get the cq while napi had it */ -static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) -{ - int rc = false; - spin_lock(&cq->poll_lock); - WARN_ON(cq->state & (MLX4_EN_CQ_STATE_POLL | - MLX4_EN_CQ_STATE_NAPI_YIELD)); - - if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) - rc = true; - cq->state = MLX4_EN_CQ_STATE_IDLE; - spin_unlock(&cq->poll_lock); - return rc; -} - -/* called from mlx4_en_low_latency_poll() */ -static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) -{ - int rc = true; - spin_lock_bh(&cq->poll_lock); - if ((cq->state & MLX4_CQ_LOCKED)) { - struct net_device *dev = cq->dev; - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; - - cq->state |= MLX4_EN_CQ_STATE_POLL_YIELD; - rc = false; - rx_ring->yields++; - } else - /* preserve yield marks */ - cq->state |= MLX4_EN_CQ_STATE_POLL; - spin_unlock_bh(&cq->poll_lock); - return rc; -} - -/* returns true if someone tried to get the cq while it was locked */ -static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) -{ - int rc = false; - spin_lock_bh(&cq->poll_lock); - WARN_ON(cq->state & (MLX4_EN_CQ_STATE_NAPI)); - - if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) - rc = true; - cq->state = MLX4_EN_CQ_STATE_IDLE; - spin_unlock_bh(&cq->poll_lock); - return rc; -} - -/* true if a socket is polling, even if it did not get the lock */ -static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) -{ - WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); - return cq->state & CQ_USER_PEND; -} -#else -static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) -{ -} - -static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) -{ - return true; -} - -static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) -{ - return false; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_update_loopback_state(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 22e72bf1ae48..69f1c1a412b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -564,7 +564,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); -bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1e52db32c73d..f6a8cc787603 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1020,6 +1020,7 @@ err_close_tx_cqs: err_napi_del: netif_napi_del(&c->napi); + napi_hash_del(&c->napi); kfree(c); return err; @@ -1033,6 +1034,10 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); netif_napi_del(&c->napi); + + napi_hash_del(&c->napi); + synchronize_rcu(); + kfree(c); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index cf0098596e85..7c8c4088d1be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -33,6 +33,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <net/busy_poll.h> #include "en.h" static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, @@ -215,16 +216,16 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); } -bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - int i; + int work_done; /* avoid accessing cq (dma coherent memory) if not needed */ if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) - return false; + return 0; - for (i = 0; i < budget; i++) { + for (work_done = 0; work_done < budget; work_done++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; struct sk_buff *skb; @@ -269,10 +270,8 @@ wq_ll_pop: /* ensure cq space is freed before enabling more cqes */ wmb(); - if (i == budget) { + if (work_done == budget) set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); - return true; - } - return false; + return work_done; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 2c7cb6755d1d..4ac8d716dbdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -54,6 +54,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); bool busy = false; + int work_done; int i; clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); @@ -61,26 +62,26 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq); - busy |= mlx5e_poll_rx_cq(&c->rq.cq, budget); - + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + busy |= work_done == budget; busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) return budget; - napi_complete(napi); + napi_complete_done(napi, work_done); /* avoid losing completion event during/after polling cqs */ if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { napi_schedule(napi); - return 0; + return work_done; } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->rq.cq); - return 0; + return work_done; } void mlx5e_completion_event(struct mlx5_core_cq *mcq) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 617fb22b5d81..f21e23983a1a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -342,14 +342,35 @@ err_port_add_vid: return err; } +static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, bool is_member, + bool untagged) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, + is_member, untagged); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; + u16 vid, last_visited_vid, old_pvid; enum mlxsw_reg_svfa_mt mt; - u16 vid, vid_e; int err; /* In case this is invoked with BRIDGE_FLAGS_SELF and port is @@ -377,15 +398,18 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, if (err) { netdev_err(dev, "Failed to create FID=VID=%d mapping\n", vid); - return err; + goto err_port_vid_to_fid_set; } } + } - /* Set FID mapping according to port's mode */ + /* Set FID mapping according to port's mode */ + for (vid = vid_begin; vid <= vid_end; vid++) { err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid); if (err) { netdev_err(dev, "Failed to map FID=%d", vid); - return err; + last_visited_vid = --vid; + goto err_port_fid_map; } } @@ -393,40 +417,62 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, true, false); if (err) { netdev_err(dev, "Failed to configure flooding\n"); - return err; + goto err_port_flood_set; } - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, true, - flag_untagged); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to add VIDs %d-%d\n", - vid, vid_e); - return err; - } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + true, flag_untagged); + if (err) { + netdev_err(dev, "Unable to add VIDs %d-%d\n", vid_begin, + vid_end); + goto err_port_vlans_set; } - vid = vid_begin; - if (flag_pvid && mlxsw_sp_port->pvid != vid) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); + old_pvid = mlxsw_sp_port->pvid; + if (flag_pvid && old_pvid != vid_begin) { + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid_begin); if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to add PVID %d\n", - vid); - return err; + netdev_err(dev, "Unable to add PVID %d\n", vid_begin); + goto err_port_pvid_set; } - mlxsw_sp_port->pvid = vid; + mlxsw_sp_port->pvid = vid_begin; } /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) set_bit(vid, mlxsw_sp_port->active_vlans); - return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, - mlxsw_sp_port->stp_state); + /* STP state change must be done after we set active VLANs */ + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, + mlxsw_sp_port->stp_state); + if (err) { + netdev_err(dev, "Failed to set STP state\n"); + goto err_port_stp_state_set; + } + + return 0; + +err_port_vid_to_fid_set: + mlxsw_sp_fid_destroy(mlxsw_sp, vid); + return err; + +err_port_stp_state_set: + for (vid = vid_begin; vid <= vid_end; vid++) + clear_bit(vid, mlxsw_sp_port->active_vlans); + if (old_pvid != mlxsw_sp_port->pvid) + mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); +err_port_pvid_set: + __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); +err_port_vlans_set: + __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); +err_port_flood_set: + last_visited_vid = vid_end; +err_port_fid_map: + for (vid = last_visited_vid; vid >= vid_begin; vid--) + mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); + return err; } static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -532,7 +578,7 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool init) { struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, vid_e; + u16 vid, pvid; int err; /* In case this is invoked with BRIDGE_FLAGS_SELF and port is @@ -542,30 +588,23 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (!init && !mlxsw_sp_port->bridged) return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, false, - false); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to del VIDs %d-%d\n", - vid, vid_e); - return err; - } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); + if (err) { + netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, + vid_end); + return err; } - if ((mlxsw_sp_port->pvid >= vid_begin) && - (mlxsw_sp_port->pvid <= vid_end)) { + pvid = mlxsw_sp_port->pvid; + if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) { /* Default VLAN is always 1 */ - mlxsw_sp_port->pvid = 1; - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, - mlxsw_sp_port->pvid); + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to del PVID %d\n", - vid); + netdev_err(dev, "Unable to del PVID %d\n", pvid); return err; } + mlxsw_sp_port->pvid = 1; } if (init) diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 2056b719c262..7df318346b05 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -600,22 +600,11 @@ static void encx24j600_set_rxfilter_mode(struct encx24j600_priv *priv) static int encx24j600_hw_init(struct encx24j600_priv *priv) { - struct net_device *dev = priv->ndev; int ret = 0; - u16 eidled; u16 macon2; priv->hw_enabled = false; - eidled = encx24j600_read_reg(priv, EIDLED); - if (((eidled & DEVID_MASK) >> DEVID_SHIFT) != ENCX24J600_DEV_ID) { - ret = -EINVAL; - goto err_out; - } - - netif_info(priv, drv, dev, "Silicon rev ID: 0x%02x\n", - (eidled & REVID_MASK) >> REVID_SHIFT); - /* PHY Leds: link status, * LEDA: Link State + collision events * LEDB: Link State + transmit/receive events @@ -655,7 +644,6 @@ static int encx24j600_hw_init(struct encx24j600_priv *priv) if (netif_msg_hw(priv)) encx24j600_dump_config(priv, "Hw is initialized"); -err_out: return ret; } @@ -1004,6 +992,7 @@ static int encx24j600_spi_probe(struct spi_device *spi) struct net_device *ndev; struct encx24j600_priv *priv; + u16 eidled; ndev = alloc_etherdev(sizeof(struct encx24j600_priv)); @@ -1072,10 +1061,21 @@ static int encx24j600_spi_probe(struct spi_device *spi) goto out_free; } + eidled = encx24j600_read_reg(priv, EIDLED); + if (((eidled & DEVID_MASK) >> DEVID_SHIFT) != ENCX24J600_DEV_ID) { + ret = -EINVAL; + goto out_unregister; + } + + netif_info(priv, probe, ndev, "Silicon rev ID: 0x%02x\n", + (eidled & REVID_MASK) >> REVID_SHIFT); + netif_info(priv, drv, priv->ndev, "MAC address %pM\n", ndev->dev_addr); return ret; +out_unregister: + unregister_netdev(priv->ndev); out_free: free_netdev(ndev); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 83651ac8ddb9..270c9eeb7ab6 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1488,7 +1488,6 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) } myri10ge_vlan_rx(mgp->dev, va, skb); skb_record_rx_queue(skb, ss - &mgp->ss[0]); - skb_mark_napi_id(skb, &ss->napi); if (polling) { int hlen; @@ -1506,6 +1505,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) skb->data_len -= hlen; skb->tail += hlen; skb->protocol = eth_type_trans(skb, dev); + skb_mark_napi_id(skb, &ss->napi); netif_receive_skb(skb); } else @@ -3814,7 +3814,6 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) ss->dev = mgp->dev; netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, myri10ge_napi_weight); - napi_hash_add(&ss->napi); } return 0; abort: diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index d4b5085a21fa..7bd6f25b4625 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -1604,7 +1604,7 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, NAPI_POLL_WEIGHT); } } @@ -2135,7 +2135,7 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_83xx_msix_tx_poll, NAPI_POLL_WEIGHT); } diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 0623fff932e4..f9dee7436e81 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -576,6 +576,9 @@ enum GTI_BIT { GTI_TIV = 0x0FFFFFFF, }; +#define GTI_TIV_MAX GTI_TIV +#define GTI_TIV_MIN 0x20 + /* GIC */ enum GIC_BIT { GIC_PTCE = 0x00000001, /* Undocumented? */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ee8d1ec61fab..990dc55cdada 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -32,6 +32,8 @@ #include <linux/slab.h> #include <linux/spinlock.h> +#include <asm/div64.h> + #include "ravb.h" #define RAVB_DEF_MSG_ENABLE \ @@ -1659,6 +1661,38 @@ static const struct of_device_id ravb_match_table[] = { }; MODULE_DEVICE_TABLE(of, ravb_match_table); +static int ravb_set_gti(struct net_device *ndev) +{ + + struct device *dev = ndev->dev.parent; + struct device_node *np = dev->of_node; + unsigned long rate; + struct clk *clk; + uint64_t inc; + + clk = of_clk_get(np, 0); + if (IS_ERR(clk)) { + dev_err(dev, "could not get clock\n"); + return PTR_ERR(clk); + } + + rate = clk_get_rate(clk); + clk_put(clk); + + inc = 1000000000ULL << 20; + do_div(inc, rate); + + if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) { + dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n", + inc, GTI_TIV_MIN, GTI_TIV_MAX); + return -EINVAL; + } + + ravb_write(ndev, inc, GTI); + + return 0; +} + static int ravb_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1755,7 +1789,9 @@ static int ravb_probe(struct platform_device *pdev) CCC); /* Set GTI value */ - ravb_write(ndev, ((1000 << 20) / 130) & GTI_TIV, GTI); + error = ravb_set_gti(ndev); + if (error) + goto out_release; /* Request GTI loading */ ravb_write(ndev, ravb_read(ndev, GCCR) | GCCR_LTI, GCCR); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e9f2349e98bc..a4ab71d43e4e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4998,7 +4998,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; dev->switchdev_ops = &rocker_port_switchdev_ops; - netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, + netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, NAPI_POLL_WEIGHT); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a3c42a376741..4e82bcfbe3e0 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2059,7 +2059,6 @@ static void efx_init_napi_channel(struct efx_channel *channel) channel->napi_dev = efx->net_dev; netif_napi_add(channel->napi_dev, &channel->napi_str, efx_poll, napi_weight); - napi_hash_add(&channel->napi_str); efx_channel_busy_poll_init(channel); } diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 809ea4610a77..8956995b2fe7 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -463,7 +463,6 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, skb_record_rx_queue(skb, channel->rx_queue.core_index); - skb_mark_napi_id(skb, &channel->napi_str); gro_result = napi_gro_frags(napi); if (gro_result != GRO_DROP) channel->irq_mod_score += 2; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 48b92c9de12a..15322c08de80 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2469,7 +2469,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 37b9b39192ec..e5e20e734f21 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1990,7 +1990,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, /* NAPI register */ netif_napi_add(ndev, &netcp->rx_napi, netcp_rx_poll, NETCP_NAPI_WEIGHT); - netif_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); + netif_tx_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); /* Register the network device */ ndev->dev_id = 0; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5e0b43283bce..277e6827d7cd 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -311,7 +311,7 @@ static void pppoe_flush_dev(struct net_device *dev) lock_sock(sk); if (po->pppoe_dev == dev && - sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { + sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state_change(sk); po->pppoe_dev = NULL; @@ -500,27 +500,9 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, pn = pppoe_pernet(dev_net(dev)); po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); - if (po) { - struct sock *sk = sk_pppox(po); - - bh_lock_sock(sk); - - /* If the user has locked the socket, just ignore - * the packet. With the way two rcv protocols hook into - * one socket family type, we cannot (easily) distinguish - * what kind of SKB it is during backlog rcv. - */ - if (sock_owned_by_user(sk) == 0) { - /* We're no longer connect at the PPPOE layer, - * and must wait for ppp channel to disconnect us. - */ - sk->sk_state = PPPOX_ZOMBIE; - } - - bh_unlock_sock(sk); + if (po) if (!schedule_work(&po->proto.pppoe.padt_work)) - sock_put(sk); - } + sock_put(sk_pppox(po)); abort: kfree_skb(skb); @@ -793,7 +775,7 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, struct pppox_sock *relay_po; err = -EBUSY; - if (sk->sk_state & (PPPOX_BOUND | PPPOX_ZOMBIE | PPPOX_DEAD)) + if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) break; err = -ENOTCONN; diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 0e1b30622477..0200de74eebc 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -58,7 +58,7 @@ void pppox_unbind_sock(struct sock *sk) { /* Clear connection to ppp device, if attached. */ - if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED | PPPOX_ZOMBIE)) { + if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED)) { ppp_unregister_channel(&pppox_sk(sk)->chan); sk->sk_state = PPPOX_DEAD; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d8838dedb7a4..b1ae4cbf2453 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -516,8 +516,6 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, skb_shinfo(skb)->gso_segs = 0; } - skb_mark_napi_id(skb, &rq->napi); - napi_gro_receive(&rq->napi, skb); return; @@ -1612,7 +1610,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) vi->rq[i].pages = NULL; netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight); - napi_hash_add(&vi->rq[i].napi); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index e3b3c8fb4605..56aaa2d4fb0e 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -183,7 +183,7 @@ void *wil_if_alloc(struct device *dev) netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, WIL6210_NAPI_BUDGET); - netif_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, + netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, WIL6210_NAPI_BUDGET); netif_tx_stop_all_queues(ndev); diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 519b6e2d769e..661e5c2a8e2a 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -16,6 +16,10 @@ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } +#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] __read_mostly = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index b49cf923becc..ba7a9b0c7c57 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -91,7 +91,6 @@ enum { PPPOX_CONNECTED = 1, /* connection established ==TCP_ESTABLISHED */ PPPOX_BOUND = 2, /* bound to ppp device */ PPPOX_RELAY = 4, /* forwarding is enabled */ - PPPOX_ZOMBIE = 8, /* dead, but still bound to ppp device */ PPPOX_DEAD = 16 /* dead, useless, please clean me up!*/ }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bfac1abfc1..7d2d1d7aaec7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -326,7 +326,8 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_HASHED, /* In NAPI hash */ + NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ }; enum gro_result { @@ -461,19 +462,13 @@ static inline void napi_complete(struct napi_struct *n) } /** - * napi_by_id - lookup a NAPI by napi_id - * @napi_id: hashed napi_id - * - * lookup @napi_id in napi_hash table - * must be called under rcu_read_lock() - */ -struct napi_struct *napi_by_id(unsigned int napi_id); - -/** * napi_hash_add - add a NAPI to global hashtable * @napi: napi context * * generate a new napi_id and store a @napi under it in napi_hash + * Used for busy polling (CONFIG_NET_RX_BUSY_POLL) + * Note: This is normally automatically done from netif_napi_add(), + * so might disappear in a future linux version. */ void napi_hash_add(struct napi_struct *napi); @@ -482,9 +477,14 @@ void napi_hash_add(struct napi_struct *napi); * @napi: napi context * * Warning: caller must observe rcu grace period - * before freeing memory containing @napi + * before freeing memory containing @napi, if + * this function returns true. + * Note: core networking stack automatically calls it + * from netif_napi_del() + * Drivers might want to call this helper to combine all + * the needed rcu grace periods into a single one. */ -void napi_hash_del(struct napi_struct *napi); +bool napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling @@ -1948,6 +1948,26 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); /** + * netif_tx_napi_add - initialize a napi context + * @dev: network device + * @napi: napi context + * @poll: polling function + * @weight: default weight + * + * This variant of netif_napi_add() should be used from drivers using NAPI + * to exclusively poll a TX queue. + * This will avoid we add it into napi_hash[], thus polluting this hash table. + */ +static inline void netif_tx_napi_add(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) +{ + set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); + netif_napi_add(dev, napi, poll, weight); +} + +/** * netif_napi_del - remove a napi context * @napi: napi context * diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4355129fff91..c9c394bf0771 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1082,9 +1082,6 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) static inline void skb_sender_cpu_clear(struct sk_buff *skb) { -#ifdef CONFIG_XPS - skb->sender_cpu = 0; -#endif } #ifdef NET_SKBUFF_DATA_USES_OFFSET diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 1d67fb6b23a0..2fbeb1313c0f 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -72,50 +72,7 @@ static inline bool busy_loop_timeout(unsigned long end_time) return time_after(now, end_time); } -/* when used in sock_poll() nonblock is known at compile time to be true - * so the loop and end_time will be optimized out - */ -static inline bool sk_busy_loop(struct sock *sk, int nonblock) -{ - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; - struct napi_struct *napi; - int rc = false; - - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); - - napi = napi_by_id(sk->sk_napi_id); - if (!napi) - goto out; - - ops = napi->dev->netdev_ops; - if (!ops->ndo_busy_poll) - goto out; - - do { - rc = ops->ndo_busy_poll(napi); - - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); - cpu_relax(); - - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && - !need_resched() && !busy_loop_timeout(end_time)); - - rc = !skb_queue_empty(&sk->sk_receive_queue); -out: - rcu_read_unlock_bh(); - return rc; -} +bool sk_busy_loop(struct sock *sk, int nonblock); /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, diff --git a/include/net/dsa.h b/include/net/dsa.h index 82a4c6011173..3f23dd9d6a69 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -16,6 +16,7 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/of.h> +#include <linux/of_gpio.h> #include <linux/phy.h> #include <linux/phy_fixed.h> #include <linux/ethtool.h> @@ -64,6 +65,13 @@ struct dsa_chip_data { * NULL if there is only one switch chip. */ s8 *rtable; + + /* + * A switch may have a GPIO line tied to its reset pin. Parse + * this from the device tree, and use it before performing + * switch soft reset. + */ + struct gpio_desc *reset; }; struct dsa_platform_data { diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h new file mode 100644 index 000000000000..4cf6bac4686d --- /dev/null +++ b/include/trace/events/fib6.h @@ -0,0 +1,76 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fib6 + +#if !defined(_TRACE_FIB6_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FIB6_H + +#include <linux/in6.h> +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(fib6_table_lookup, + + TP_PROTO(const struct net *net, const struct rt6_info *rt, + u32 tb_id, const struct flowi6 *flp), + + TP_ARGS(net, rt, tb_id, flp), + + TP_STRUCT__entry( + __field( u32, tb_id ) + + __field( int, oif ) + __field( int, iif ) + __field( __u8, tos ) + __field( __u8, scope ) + __field( __u8, flags ) + __array( __u8, src, 16 ) + __array( __u8, dst, 16 ) + + __dynamic_array( char, name, IFNAMSIZ ) + __array( __u8, gw, 16 ) + ), + + TP_fast_assign( + struct in6_addr *in6; + + __entry->tb_id = tb_id; + __entry->oif = flp->flowi6_oif; + __entry->iif = flp->flowi6_iif; + __entry->tos = flp->flowi6_tos; + __entry->scope = flp->flowi6_scope; + __entry->flags = flp->flowi6_flags; + + in6 = (struct in6_addr *)__entry->src; + *in6 = flp->saddr; + + in6 = (struct in6_addr *)__entry->dst; + *in6 = flp->daddr; + + if (rt->rt6i_idev) { + __assign_str(name, rt->rt6i_idev->dev->name); + } else { + __assign_str(name, ""); + } + if (rt == net->ipv6.ip6_null_entry) { + struct in6_addr in6_zero = {}; + + in6 = (struct in6_addr *)__entry->gw; + *in6 = in6_zero; + + } else if (rt) { + in6 = (struct in6_addr *)__entry->gw; + *in6 = rt->rt6i_gateway; + } + ), + + TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c", + __entry->tb_id, __entry->oif, __entry->iif, + __entry->src, __entry->dst, __entry->tos, __entry->scope, + __entry->flags, __get_str(name), __entry->gw) +); + +#endif /* _TRACE_FIB6_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index a382d2c04a42..cf943016930f 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -4,15 +4,13 @@ #include <linux/sockios.h> #include <linux/types.h> -/* - * Based on the MROUTING 3.5 defines primarily to keep - * source compatibility with BSD. +/* Based on the MROUTING 3.5 defines primarily to keep + * source compatibility with BSD. * - * See the mrouted code for the original history. - * - * Protocol Independent Multicast (PIM) data structures included - * Carlos Picoto (cap@di.fc.ul.pt) + * See the mrouted code for the original history. * + * Protocol Independent Multicast (PIM) data structures included + * Carlos Picoto (cap@di.fc.ul.pt) */ #define MRT_BASE 200 @@ -34,15 +32,13 @@ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) -#define MAXVIFS 32 +#define MAXVIFS 32 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; #define ALL_VIFS ((vifi_t)(-1)) -/* - * Same idea as select - */ - +/* Same idea as select */ + #define VIFM_SET(n,m) ((m)|=(1<<(n))) #define VIFM_CLR(n,m) ((m)&=~(1<<(n))) #define VIFM_ISSET(n,m) ((m)&(1<<(n))) @@ -50,11 +46,9 @@ typedef unsigned short vifi_t; #define VIFM_COPY(mfrom,mto) ((mto)=(mfrom)) #define VIFM_SAME(m1,m2) ((m1)==(m2)) -/* - * Passed by mrouted for an MRT_ADD_VIF - again we use the - * mrouted 3.6 structures for compatibility +/* Passed by mrouted for an MRT_ADD_VIF - again we use the + * mrouted 3.6 structures for compatibility */ - struct vifctl { vifi_t vifc_vifi; /* Index of VIF */ unsigned char vifc_flags; /* VIFF_ flags */ @@ -73,10 +67,7 @@ struct vifctl { #define VIFF_USE_IFINDEX 0x8 /* use vifc_lcl_ifindex instead of vifc_lcl_addr to find an interface */ -/* - * Cache manipulation structures for mrouted and PIMd - */ - +/* Cache manipulation structures for mrouted and PIMd */ struct mfcctl { struct in_addr mfcc_origin; /* Origin of mcast */ struct in_addr mfcc_mcastgrp; /* Group in question */ @@ -88,10 +79,7 @@ struct mfcctl { int mfcc_expire; }; -/* - * Group count retrieval for mrouted - */ - +/* Group count retrieval for mrouted */ struct sioc_sg_req { struct in_addr src; struct in_addr grp; @@ -100,10 +88,7 @@ struct sioc_sg_req { unsigned long wrong_if; }; -/* - * To get vif packet counts - */ - +/* To get vif packet counts */ struct sioc_vif_req { vifi_t vifi; /* Which iface */ unsigned long icount; /* In packets */ @@ -112,11 +97,9 @@ struct sioc_vif_req { unsigned long obytes; /* Out bytes */ }; -/* - * This is the format the mroute daemon expects to see IGMP control - * data. Magically happens to be like an IP packet as per the original +/* This is the format the mroute daemon expects to see IGMP control + * data. Magically happens to be like an IP packet as per the original */ - struct igmpmsg { __u32 unused1,unused2; unsigned char im_msgtype; /* What is this */ @@ -126,21 +109,13 @@ struct igmpmsg { struct in_addr im_src,im_dst; }; -/* - * That's all usermode folks - */ - - +/* That's all usermode folks */ #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ -/* - * Pseudo messages used by mrouted - */ - +/* Pseudo messages used by mrouted */ #define IGMPMSG_NOCACHE 1 /* Kern cache fill request to mrouted */ #define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */ #define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */ - #endif /* _UAPI__LINUX_MROUTE_H */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0d3313d02a7e..6d1407bc1531 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -93,6 +93,23 @@ void bpf_map_put(struct bpf_map *map) } } +#ifdef CONFIG_PROC_FS +static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct bpf_map *map = filp->private_data; + + seq_printf(m, + "map_type:\t%u\n" + "key_size:\t%u\n" + "value_size:\t%u\n" + "max_entries:\t%u\n", + map->map_type, + map->key_size, + map->value_size, + map->max_entries); +} +#endif + static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; @@ -108,7 +125,10 @@ static int bpf_map_release(struct inode *inode, struct file *filp) } static const struct file_operations bpf_map_fops = { - .release = bpf_map_release, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_map_show_fdinfo, +#endif + .release = bpf_map_release, }; int bpf_map_new_fd(struct bpf_map *map) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 8c1ad1ced72c..270bf7289b1e 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -36,9 +36,9 @@ static int runs = 4; module_param(runs, int, 0); MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)"); -static int max_size = 65536; +static int max_size = 0; module_param(max_size, int, 0); -MODULE_PARM_DESC(runs, "Maximum table size (default: 65536)"); +MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)"); static bool shrinking = false; module_param(shrinking, bool, 0); @@ -52,6 +52,10 @@ static int tcount = 10; module_param(tcount, int, 0); MODULE_PARM_DESC(tcount, "Number of threads to spawn (default: 10)"); +static bool enomem_retry = false; +module_param(enomem_retry, bool, 0); +MODULE_PARM_DESC(enomem_retry, "Retry insert even if -ENOMEM was returned (default: off)"); + struct test_obj { int value; struct rhash_head node; @@ -76,6 +80,28 @@ static struct rhashtable_params test_rht_params = { static struct semaphore prestart_sem; static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0); +static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + int err, retries = -1, enomem_retries = 0; + + do { + retries++; + cond_resched(); + err = rhashtable_insert_fast(ht, obj, params); + if (err == -ENOMEM && enomem_retry) { + enomem_retries++; + err = -EBUSY; + } + } while (err == -EBUSY); + + if (enomem_retries) + pr_info(" %u insertions retried after -ENOMEM\n", + enomem_retries); + + return err ? : retries; +} + static int __init test_rht_lookup(struct rhashtable *ht) { unsigned int i; @@ -157,7 +183,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht) { struct test_obj *obj; int err; - unsigned int i, insert_fails = 0; + unsigned int i, insert_retries = 0; s64 start, end; /* @@ -170,22 +196,16 @@ static s64 __init test_rhashtable(struct rhashtable *ht) struct test_obj *obj = &array[i]; obj->value = i * 2; - - err = rhashtable_insert_fast(ht, &obj->node, test_rht_params); - if (err == -ENOMEM || err == -EBUSY) { - /* Mark failed inserts but continue */ - obj->value = TEST_INSERT_FAIL; - insert_fails++; - } else if (err) { + err = insert_retry(ht, &obj->node, test_rht_params); + if (err > 0) + insert_retries += err; + else if (err) return err; - } - - cond_resched(); } - if (insert_fails) - pr_info(" %u insertions failed due to memory pressure\n", - insert_fails); + if (insert_retries) + pr_info(" %u insertions retried due to memory pressure\n", + insert_retries); test_bucket_stats(ht); rcu_read_lock(); @@ -236,13 +256,15 @@ static int thread_lookup_test(struct thread_data *tdata) obj->value, key); err++; } + + cond_resched(); } return err; } static int threadfunc(void *data) { - int i, step, err = 0, insert_fails = 0; + int i, step, err = 0, insert_retries = 0; struct thread_data *tdata = data; up(&prestart_sem); @@ -251,20 +273,18 @@ static int threadfunc(void *data) for (i = 0; i < entries; i++) { tdata->objs[i].value = (tdata->id << 16) | i; - err = rhashtable_insert_fast(&ht, &tdata->objs[i].node, - test_rht_params); - if (err == -ENOMEM || err == -EBUSY) { - tdata->objs[i].value = TEST_INSERT_FAIL; - insert_fails++; + err = insert_retry(&ht, &tdata->objs[i].node, test_rht_params); + if (err > 0) { + insert_retries += err; } else if (err) { pr_err(" thread[%d]: rhashtable_insert_fast failed\n", tdata->id); goto out; } } - if (insert_fails) - pr_info(" thread[%d]: %d insert failures\n", - tdata->id, insert_fails); + if (insert_retries) + pr_info(" thread[%d]: %u insertions retried due to memory pressure\n", + tdata->id, insert_retries); err = thread_lookup_test(tdata); if (err) { @@ -285,6 +305,8 @@ static int threadfunc(void *data) goto out; } tdata->objs[i].value = TEST_INSERT_FAIL; + + cond_resched(); } err = thread_lookup_test(tdata); if (err) { @@ -311,7 +333,7 @@ static int __init test_rht_init(void) entries = min(entries, MAX_ENTRIES); test_rht_params.automatic_shrinking = shrinking; - test_rht_params.max_size = max_size; + test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries); test_rht_params.nelem_hint = size; pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n", @@ -357,6 +379,8 @@ static int __init test_rht_init(void) return -ENOMEM; } + test_rht_params.max_size = max_size ? : + roundup_pow_of_two(tcount * entries); err = rhashtable_init(&ht, &test_rht_params); if (err < 0) { pr_warn("Test failed: Unable to initialize hashtable: %d\n", diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 0919a88bbc70..cfc7b745aa91 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -21,11 +21,11 @@ struct mpoa_client { uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ rwlock_t ingress_lock; - struct in_cache_ops *in_ops; /* ingress cache operations */ + const struct in_cache_ops *in_ops; /* ingress cache operations */ in_cache_entry *in_cache; /* the ingress cache of this MPC */ rwlock_t egress_lock; - struct eg_cache_ops *eg_ops; /* egress cache operations */ + const struct eg_cache_ops *eg_ops; /* egress cache operations */ eg_cache_entry *eg_cache; /* the egress cache of this MPC */ uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index d1b2d9a03144..9e60e74c807d 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -534,7 +534,7 @@ static void eg_destroy_cache(struct mpoa_client *mpc) } -static struct in_cache_ops ingress_ops = { +static const struct in_cache_ops ingress_ops = { in_cache_add_entry, /* add_entry */ in_cache_get, /* get */ in_cache_get_with_mask, /* get_with_mask */ @@ -548,7 +548,7 @@ static struct in_cache_ops ingress_ops = { in_destroy_cache /* destroy_cache */ }; -static struct eg_cache_ops egress_ops = { +static const struct eg_cache_ops egress_ops = { eg_cache_add_entry, /* add_entry */ eg_cache_get_by_cache_id, /* get_by_cache_id */ eg_cache_get_by_tag, /* get_by_tag */ diff --git a/net/core/dev.c b/net/core/dev.c index ae00b894e675..5df6cbce727c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -96,6 +96,7 @@ #include <linux/skbuff.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/busy_poll.h> #include <linux/rtnetlink.h> #include <linux/stat.h> #include <net/dst.h> @@ -182,8 +183,8 @@ EXPORT_SYMBOL(dev_base_lock); /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); -static unsigned int napi_gen_id; -static DEFINE_HASHTABLE(napi_hash, 8); +static unsigned int napi_gen_id = NR_CPUS; +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -3021,7 +3022,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, int queue_index = 0; #ifdef CONFIG_XPS - if (skb->sender_cpu == 0) + u32 sender_cpu = skb->sender_cpu - 1; + + if (sender_cpu >= (u32)NR_CPUS) skb->sender_cpu = raw_smp_processor_id() + 1; #endif @@ -4353,6 +4356,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + skb_mark_napi_id(skb, napi); trace_napi_gro_receive_entry(skb); skb_gro_reset_offset(skb); @@ -4386,7 +4390,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); - napi->skb = skb; + if (skb) { + napi->skb = skb; + skb_mark_napi_id(skb, napi); + } } return skb; } @@ -4661,7 +4668,7 @@ void napi_complete_done(struct napi_struct *n, int work_done) EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *napi_by_id(unsigned int napi_id) +static struct napi_struct *napi_by_id(unsigned int napi_id) { unsigned int hash = napi_id % HASH_SIZE(napi_hash); struct napi_struct *napi; @@ -4672,43 +4679,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id) return NULL; } -EXPORT_SYMBOL_GPL(napi_by_id); -void napi_hash_add(struct napi_struct *napi) +#if defined(CONFIG_NET_RX_BUSY_POLL) +#define BUSY_POLL_BUDGET 8 +bool sk_busy_loop(struct sock *sk, int nonblock) { - if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + int (*busy_poll)(struct napi_struct *dev); + struct napi_struct *napi; + int rc = false; - spin_lock(&napi_hash_lock); + rcu_read_lock(); - /* 0 is not a valid id, we also skip an id that is taken - * we expect both events to be extremely rare - */ - napi->napi_id = 0; - while (!napi->napi_id) { - napi->napi_id = ++napi_gen_id; - if (napi_by_id(napi->napi_id)) - napi->napi_id = 0; + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + /* Note: ndo_busy_poll method is optional in linux-4.5 */ + busy_poll = napi->dev->netdev_ops->ndo_busy_poll; + + do { + rc = 0; + local_bh_disable(); + if (busy_poll) { + rc = busy_poll(napi); + } else if (napi_schedule_prep(napi)) { + void *have = netpoll_poll_lock(napi); + + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + rc = napi->poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi); + if (rc == BUSY_POLL_BUDGET) { + napi_complete_done(napi, rc); + napi_schedule(napi); + } + } + netpoll_poll_unlock(have); } + if (rc > 0) + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + local_bh_enable(); - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ - spin_unlock(&napi_hash_lock); - } + cpu_relax(); + } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && + !need_resched() && !busy_loop_timeout(end_time)); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock(); + return rc; +} +EXPORT_SYMBOL(sk_busy_loop); + +#endif /* CONFIG_NET_RX_BUSY_POLL */ + +void napi_hash_add(struct napi_struct *napi) +{ + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || + test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + return; + + spin_lock(&napi_hash_lock); + + /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + do { + if (unlikely(++napi_gen_id < NR_CPUS + 1)) + napi_gen_id = NR_CPUS + 1; + } while (napi_by_id(napi_gen_id)); + napi->napi_id = napi_gen_id; + + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + + spin_unlock(&napi_hash_lock); } EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi */ -void napi_hash_del(struct napi_struct *napi) +bool napi_hash_del(struct napi_struct *napi) { + bool rcu_sync_needed = false; + spin_lock(&napi_hash_lock); - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { + rcu_sync_needed = true; hlist_del_rcu(&napi->napi_hash_node); - + } spin_unlock(&napi_hash_lock); + return rcu_sync_needed; } EXPORT_SYMBOL_GPL(napi_hash_del); @@ -4744,6 +4809,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); @@ -4763,8 +4829,12 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); +/* Must be called in process context */ void netif_napi_del(struct napi_struct *napi) { + might_sleep(); + if (napi_hash_del(napi)) + synchronize_net(); list_del_init(&napi->dev_list); napi_free_frags(napi); @@ -7164,11 +7234,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs); * This function does the last stage of destroying an allocated device * interface. The reference to the device object is released. * If this is the last reference then it will be freed. + * Must be called in process context. */ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + might_sleep(); netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index adef015b2f41..92da5e4ceb4f 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -32,6 +32,10 @@ #include <trace/events/sock.h> #include <trace/events/udp.h> #include <trace/events/fib.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <trace/events/fib6.h> +EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1eba07feb34a..b7448c8490ac 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -21,8 +21,10 @@ #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/of_net.h> +#include <linux/of_gpio.h> #include <linux/sysfs.h> #include <linux/phy_fixed.h> +#include <linux/gpio/consumer.h> #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; @@ -688,6 +690,9 @@ static int dsa_of_probe(struct device *dev) const char *port_name; int chip_index, port_index; const unsigned int *sw_addr, *port_reg; + int gpio; + enum of_gpio_flags of_flags; + unsigned long flags; u32 eeprom_len; int ret; @@ -766,6 +771,19 @@ static int dsa_of_probe(struct device *dev) put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } + gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, + &of_flags); + if (gpio_is_valid(gpio)) { + flags = (of_flags == OF_GPIO_ACTIVE_LOW ? + GPIOF_ACTIVE_LOW : 0); + ret = devm_gpio_request_one(dev, gpio, flags, + "switch_reset"); + if (ret) + goto out_free_chip; + + cd->reset = gpio_to_desc(gpio); + gpiod_direction_output(cd->reset, 0); + } for_each_available_child_of_node(child, port) { port_reg = of_get_property(port, "reg", NULL); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e86e8a9738ea..67f7c9de0b16 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1239,13 +1239,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - pr_notice(","); + pr_cont(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - pr_notice(" OK\n"); + pr_cont(" OK\n"); break; } @@ -1253,7 +1253,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - pr_notice(" timed out!\n"); + pr_cont(" timed out!\n"); break; } @@ -1263,7 +1263,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - pr_notice("."); + pr_cont("."); } #ifdef IPCONFIG_BOOTP @@ -1280,11 +1280,10 @@ static int __init ic_dynamic(void) return -1; } - pr_info("IP-Config: Got %s answer from %pI4, ", + pr_info("IP-Config: Got %s answer from %pI4, my address is %pI4\n", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_addrservaddr); - pr_info("my address is %pI4\n", &ic_myaddr); + &ic_addrservaddr, &ic_myaddr); return 0; } @@ -1527,14 +1526,14 @@ static int __init ip_auto_config(void) pr_cont(", mtu=%d", ic_dev_mtu); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) { - pr_info(" nameserver%u=%pI4", + pr_cont(" nameserver%u=%pI4", i, &ic_nameservers[i]); break; } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_info(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_info("\n"); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92dd4b74d513..a2d248d9c35c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,10 +67,6 @@ #include <net/fib_rules.h> #include <linux/netconf.h> -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) -#define CONFIG_IP_PIMSM 1 -#endif - struct mr_table { struct list_head list; possible_net_t net; @@ -84,9 +80,7 @@ struct mr_table { atomic_t cache_resolve_queue_len; bool mroute_do_assert; bool mroute_do_pim; -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) int mroute_reg_vif_num; -#endif }; struct ipmr_rule { @@ -97,15 +91,18 @@ struct ipmr_result { struct mr_table *mrt; }; +static inline bool pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ +/* Multicast router control variables */ #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -252,8 +249,8 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (!mrt) { - err = -ENOMEM; + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); goto err1; } @@ -301,8 +298,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, static int __net_init ipmr_rules_init(struct net *net) { - net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - return net->ipv4.mrt ? 0 : -ENOMEM; + struct mr_table *mrt; + + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) + return PTR_ERR(mrt); + net->ipv4.mrt = mrt; + return 0; } static void __net_exit ipmr_rules_exit(struct net *net) @@ -319,13 +321,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) struct mr_table *mrt; unsigned int i; + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (id != RT_TABLE_DEFAULT && id >= 1000000000) + return ERR_PTR(-EINVAL); + mrt = ipmr_get_table(net, id); if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); if (!mrt) - return NULL; + return ERR_PTR(-ENOMEM); write_pnet(&mrt->net, net); mrt->id = id; @@ -338,9 +344,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, (unsigned long)mrt); -#ifdef CONFIG_IP_PIMSM mrt->mroute_reg_vif_num = -1; -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); #endif @@ -387,8 +391,24 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } -static -struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) +/* Initialize ipmr pimreg/tunnel in_device */ +static bool ipmr_init_vif_indev(const struct net_device *dev) +{ + struct in_device *in_dev; + + ASSERT_RTNL(); + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return false; + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + + return true; +} + +static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -399,7 +419,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) int err; struct ifreq ifr; struct ip_tunnel_parm p; - struct in_device *in_dev; memset(&p, 0, sizeof(p)); p.iph.daddr = v->vifc_rmt_addr.s_addr; @@ -424,15 +443,8 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) + if (!ipmr_init_vif_indev(dev)) goto failure; - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - if (dev_open(dev)) goto failure; dev_hold(dev); @@ -449,8 +461,7 @@ failure: return NULL; } -#ifdef CONFIG_IP_PIMSM - +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); @@ -500,7 +511,6 @@ static void reg_vif_setup(struct net_device *dev) static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; - struct in_device *in_dev; char name[IFNAMSIZ]; if (mrt->id == RT_TABLE_DEFAULT) @@ -520,18 +530,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) { - rcu_read_unlock(); + if (!ipmr_init_vif_indev(dev)) goto failure; - } - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - rcu_read_unlock(); - if (dev_open(dev)) goto failure; @@ -547,13 +547,56 @@ failure: unregister_netdevice(dev); return NULL; } + +/* called with rcu_read_lock() */ +static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, + unsigned int pimlen) +{ + struct net_device *reg_dev = NULL; + struct iphdr *encap; + + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); + /* Check that: + * a. packet is really sent to a multicast group + * b. packet is not a NULL-REGISTER + * c. packet is not truncated + */ + if (!ipv4_is_multicast(encap->daddr) || + encap->tot_len == 0 || + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; + + read_lock(&mrt_lock); + if (mrt->mroute_reg_vif_num >= 0) + reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + read_unlock(&mrt_lock); + + if (!reg_dev) + return 1; + + skb->mac_header = skb->network_header; + skb_pull(skb, (u8 *)encap - skb->data); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = CHECKSUM_NONE; + + skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); + + netif_rx(skb); + + return NET_RX_SUCCESS; +} +#else +static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) +{ + return NULL; +} #endif /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call */ - static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { @@ -575,10 +618,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, return -EADDRNOTAVAIL; } -#ifdef CONFIG_IP_PIMSM if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; -#endif if (vifi + 1 == mrt->maxvif) { int tmp; @@ -625,7 +666,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c) /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. */ - static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { struct net *net = read_pnet(&mrt->net); @@ -653,9 +693,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) ipmr_cache_free(c); } - /* Timer process for the unresolved queue. */ - static void ipmr_expire_process(unsigned long arg) { struct mr_table *mrt = (struct mr_table *)arg; @@ -695,7 +733,6 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ - static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, unsigned char *ttls) { @@ -731,10 +768,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRINUSE; switch (vifc->vifc_flags) { -#ifdef CONFIG_IP_PIMSM case VIFF_REGISTER: - /* - * Special Purpose VIF in PIM + if (!pimsm_enabled()) + return -EINVAL; + /* Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) @@ -749,7 +786,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; -#endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); if (!dev) @@ -761,7 +797,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; - case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { @@ -815,10 +850,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; -#ifdef CONFIG_IP_PIMSM if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; -#endif if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); @@ -883,9 +916,7 @@ skip: return ipmr_cache_find_any_parent(mrt, vifi); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); @@ -906,10 +937,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) return c; } -/* - * A cache entry has gone into a resolved state from queued - */ - +/* A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c) { @@ -917,7 +945,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -941,34 +968,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* - * Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted + * expects the following bizarre scheme. * - * Called under mrt_lock. + * Called under mrt_lock. */ - static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb; const int ihl = ip_hdrlen(pkt); + struct sock *mroute_sk; struct igmphdr *igmp; struct igmpmsg *msg; - struct sock *mroute_sk; + struct sk_buff *skb; int ret; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else -#endif skb = alloc_skb(128, GFP_ATOMIC); if (!skb) return -ENOBUFS; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. @@ -986,28 +1008,23 @@ static int ipmr_cache_report(struct mr_table *mrt, ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); - } else -#endif - { - - /* Copy the IP header */ - - skb_set_network_header(skb, skb->len); - skb_put(skb, ihl); - skb_copy_to_linear_data(skb, pkt->data, ihl); - ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ - msg = (struct igmpmsg *)skb_network_header(skb); - msg->im_vif = vifi; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); - - /* Add our header */ - - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - igmp->type = - msg->im_msgtype = assert; - igmp->code = 0; - ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ - skb->transport_header = skb->network_header; + } else { + /* Copy the IP header */ + skb_set_network_header(skb, skb->len); + skb_put(skb, ihl); + skb_copy_to_linear_data(skb, pkt->data, ihl); + /* Flag to the kernel this is a route add */ + ip_hdr(skb)->protocol = 0; + msg = (struct igmpmsg *)skb_network_header(skb); + msg->im_vif = vifi; + skb_dst_set(skb, dst_clone(skb_dst(pkt))); + /* Add our header */ + igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp->type = assert; + msg->im_msgtype = assert; + igmp->code = 0; + ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ + skb->transport_header = skb->network_header; } rcu_read_lock(); @@ -1019,7 +1036,6 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Deliver to mrouted */ - ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); if (ret < 0) { @@ -1030,12 +1046,9 @@ static int ipmr_cache_report(struct mr_table *mrt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, + struct sk_buff *skb) { bool found = false; int err; @@ -1053,7 +1066,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || (c = ipmr_cache_alloc_unres()) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -1063,13 +1075,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* Fill in the new cache entry */ - c->mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ - err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry @@ -1091,7 +1101,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; @@ -1104,9 +1113,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) return err; } -/* - * MFC cache manipulation by user space mroute daemon - */ +/* MFC cache manipulation by user space mroute daemon */ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { @@ -1177,9 +1184,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); @@ -1204,10 +1210,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } -/* - * Close the multicast socket, and clear the vif tables etc - */ - +/* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt) { int i; @@ -1215,7 +1218,6 @@ static void mroute_clean_tables(struct mr_table *mrt) struct mfc_cache *c, *next; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { if (!(mrt->vif_table[i].flags & VIFF_STATIC)) vif_delete(mrt, i, 0, &list); @@ -1223,7 +1225,6 @@ static void mroute_clean_tables(struct mr_table *mrt) unregister_netdevice_many(&list); /* Wipe the cache */ - for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { if (c->mfc_flags & MFC_STATIC) @@ -1267,45 +1268,51 @@ static void mrtsock_destruct(struct sock *sk) rtnl_unlock(); } -/* - * Socket options and virtual interface manipulation. The whole - * virtual interface system is a complete heap, but unfortunately - * that's how BSD mrouted happens to think. Maybe one day with a proper - * MOSPF/PIM router set up we can clean this up. +/* Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) { - int ret, parent = 0; - struct vifctl vif; - struct mfcctl mfc; struct net *net = sock_net(sk); + int val, ret = 0, parent = 0; struct mr_table *mrt; + struct vifctl vif; + struct mfcctl mfc; + u32 uval; + /* There's one exception to the lock - MRT_DONE which needs to unlock */ + rtnl_lock(); if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_IGMP) - return -EOPNOTSUPP; + inet_sk(sk)->inet_num != IPPROTO_IGMP) { + ret = -EOPNOTSUPP; + goto out_unlock; + } mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (!mrt) - return -ENOENT; - + if (!mrt) { + ret = -ENOENT; + goto out_unlock; + } if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && - !ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EACCES; + !ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EACCES; + goto out_unlock; + } } switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) - return -EINVAL; - - rtnl_lock(); - if (rtnl_dereference(mrt->mroute_sk)) { - rtnl_unlock(); - return -EADDRINUSE; - } + ret = -EINVAL; + if (rtnl_dereference(mrt->mroute_sk)) + ret = -EADDRINUSE; + if (ret) + break; ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { @@ -1315,129 +1322,133 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } - rtnl_unlock(); - return ret; + break; case MRT_DONE: - if (sk != rcu_access_pointer(mrt->mroute_sk)) - return -EACCES; - return ip_ra_control(sk, 0, NULL); + if (sk != rcu_access_pointer(mrt->mroute_sk)) { + ret = -EACCES; + } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); + ret = ip_ra_control(sk, 0, NULL); + goto out; + } + break; case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen != sizeof(vif)) - return -EINVAL; - if (copy_from_user(&vif, optval, sizeof(vif))) - return -EFAULT; - if (vif.vifc_vifi >= MAXVIFS) - return -ENFILE; - rtnl_lock(); + if (optlen != sizeof(vif)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&vif, optval, sizeof(vif))) { + ret = -EFAULT; + break; + } + if (vif.vifc_vifi >= MAXVIFS) { + ret = -ENFILE; + break; + } if (optname == MRT_ADD_VIF) { ret = vif_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); } else { ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } - rtnl_unlock(); - return ret; - - /* - * Manipulate the forwarding caches. These live - * in a sort of kernel/user symbiosis. - */ + break; + /* Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: - if (optlen != sizeof(mfc)) - return -EINVAL; - if (copy_from_user(&mfc, optval, sizeof(mfc))) - return -EFAULT; + if (optlen != sizeof(mfc)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&mfc, optval, sizeof(mfc))) { + ret = -EFAULT; + break; + } if (parent == 0) parent = mfc.mfcc_parent; - rtnl_lock(); if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); - rtnl_unlock(); - return ret; - /* - * Control PIM assert. - */ + break; + /* Control PIM assert. */ case MRT_ASSERT: - { - int v; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - mrt->mroute_do_assert = v; - return 0; - } -#ifdef CONFIG_IP_PIMSM + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + mrt->mroute_do_assert = val; + break; case MRT_PIM: - { - int v; - - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - v = !!v; + if (!pimsm_enabled()) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; - if (v != mrt->mroute_do_pim) { - mrt->mroute_do_pim = v; - mrt->mroute_do_assert = v; + val = !!val; + if (val != mrt->mroute_do_pim) { + mrt->mroute_do_pim = val; + mrt->mroute_do_assert = val; } - rtnl_unlock(); - return ret; - } -#endif -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + break; case MRT_TABLE: - { - u32 v; - - if (optlen != sizeof(u32)) - return -EINVAL; - if (get_user(v, (u32 __user *)optval)) - return -EFAULT; - - /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ - if (v != RT_TABLE_DEFAULT && v >= 1000000000) - return -EINVAL; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(uval)) { + ret = -EINVAL; + break; + } + if (get_user(uval, (u32 __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - if (!ipmr_new_table(net, v)) - ret = -ENOMEM; + mrt = ipmr_new_table(net, uval); + if (IS_ERR(mrt)) + ret = PTR_ERR(mrt); else - raw_sk(sk)->ipmr_table = v; + raw_sk(sk)->ipmr_table = uval; } - rtnl_unlock(); - return ret; - } -#endif - /* - * Spurious command, or MRT_VERSION which you cannot - * set. - */ + break; + /* Spurious command, or MRT_VERSION which you cannot set. */ default: - return -ENOPROTOOPT; + ret = -ENOPROTOOPT; } +out_unlock: + rtnl_unlock(); +out: + return ret; } -/* - * Getsock opt support for the multicast routing system. - */ - +/* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; @@ -1453,39 +1464,35 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (!mrt) return -ENOENT; - if (optname != MRT_VERSION && -#ifdef CONFIG_IP_PIMSM - optname != MRT_PIM && -#endif - optname != MRT_ASSERT) + switch (optname) { + case MRT_VERSION: + val = 0x0305; + break; + case MRT_PIM: + if (!pimsm_enabled()) + return -ENOPROTOOPT; + val = mrt->mroute_do_pim; + break; + case MRT_ASSERT: + val = mrt->mroute_do_assert; + break; + default: return -ENOPROTOOPT; + } if (get_user(olr, optlen)) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; - if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) - val = 0x0305; -#ifdef CONFIG_IP_PIMSM - else if (optname == MRT_PIM) - val = mrt->mroute_do_pim; -#endif - else - val = mrt->mroute_do_assert; if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; } -/* - * The IP multicast ioctl support routines. - */ - +/* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req sr; @@ -1618,7 +1625,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif - static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -1640,17 +1646,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v return NOTIFY_DONE; } - static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; -/* - * Encapsulate a packet by attaching a valid IPIP header to it. - * This avoids tunnel drivers and other mess and gives us the speed so - * important for multicast video. +/* Encapsulate a packet by attaching a valid IPIP header to it. + * This avoids tunnel drivers and other mess and gives us the speed so + * important for multicast video. */ - static void ip_encap(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { @@ -1692,9 +1695,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -/* - * Processing handlers for ipmr_forward - */ +/* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, int vifi) @@ -1709,7 +1710,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (!vif->dev) goto out_free; -#ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; @@ -1718,7 +1718,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } -#endif if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, @@ -1745,7 +1744,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * allow to send ICMP, so that packets will disappear * to blackhole. */ - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; @@ -1777,8 +1775,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPCB(skb)->flags |= IPSKB_FORWARDED; - /* - * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface @@ -1809,7 +1806,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ - static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local) @@ -1834,9 +1830,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto forward; } - /* - * Wrong interface: drop packet and (maybe) send PIM assert. - */ + /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. @@ -1875,9 +1869,7 @@ forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; - /* - * Forward the frame - */ + /* Forward the frame */ if (cache->mfc_origin == htonl(INADDR_ANY) && cache->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && @@ -1951,11 +1943,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) return mrt; } -/* - * Multicast packets for forwarding arrive here - * Called with rcu_read_lock(); +/* Multicast packets for forwarding arrive here + * Called with rcu_read_lock(); */ - int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; @@ -2006,9 +1996,7 @@ int ip_mr_input(struct sk_buff *skb) vif); } - /* - * No usable cache entry - */ + /* No usable cache entry */ if (!cache) { int vif; @@ -2049,53 +2037,8 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM -/* called with rcu_read_lock() */ -static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, - unsigned int pimlen) -{ - struct net_device *reg_dev = NULL; - struct iphdr *encap; - - encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: - * a. packet is really sent to a multicast group - * b. packet is not a NULL-REGISTER - * c. packet is not truncated - */ - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + pimlen > skb->len) - return 1; - - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; - read_unlock(&mrt_lock); - - if (!reg_dev) - return 1; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8 *)encap - skb->data); - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = CHECKSUM_NONE; - - skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); - - netif_rx(skb); - - return NET_RX_SUCCESS; -} -#endif - #ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - +/* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) { struct igmphdr *pim; @@ -2420,9 +2363,8 @@ done: } #ifdef CONFIG_PROC_FS -/* - * The /proc interfaces to multicast routing : - * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif +/* The /proc interfaces to multicast routing : + * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ struct ipmr_vif_iter { struct seq_net_private p; @@ -2706,10 +2648,7 @@ static const struct net_protocol pim_protocol = { }; #endif - -/* - * Setup for IP multicast routing - */ +/* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; @@ -2759,8 +2698,6 @@ int __init ip_mr_init(void) sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - if (!mrt_cachep) - return -ENOMEM; err = register_pernet_subsys(&ipmr_net_ops); if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f01fe122abd..89758be9c6a6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -62,6 +62,7 @@ #include <net/lwtunnel.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <trace/events/fib6.h> #include <asm/uaccess.h> @@ -865,6 +866,9 @@ restart: } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + return rt; } @@ -1078,6 +1082,8 @@ redo_rt6_select: read_unlock_bh(&table->tb6_lock); rt6_dst_from_metrics_check(rt); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { @@ -1101,6 +1107,8 @@ redo_rt6_select: uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); + + trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; } else { @@ -1125,6 +1133,7 @@ redo_rt6_select: dst_release(&rt->dst); } + trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt; } @@ -1474,6 +1483,7 @@ out: read_unlock_bh(&table->tb6_lock); + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; }; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9dc239dfe192..e401108360a2 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -332,131 +332,15 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, - struct tipc_stats *stats) -{ - int i; - struct nlattr *nest; - - struct nla_map { - __u32 key; - __u32 val; - }; - - struct nla_map map[] = { - {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, - {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, - {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, - {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, - {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, - {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, - {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, - {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, - {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, - {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, - {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, - {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, - {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, - {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, - {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, - {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, - {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, - {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, - {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? - (stats->accu_queue_sz / stats->queue_sz_counts) : 0} - }; - - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); - if (!nest) - return -EMSGSIZE; - - for (i = 0; i < ARRAY_SIZE(map); i++) - if (nla_put_u32(skb, map[i].key, map[i].val)) - goto msg_full; - - nla_nest_end(skb, nest); - - return 0; -msg_full: - nla_nest_cancel(skb, nest); - - return -EMSGSIZE; -} - -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) -{ - int err; - void *hdr; - struct nlattr *attrs; - struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - - if (!bcl) - return 0; - - tipc_bcast_lock(net); - - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_LINK_GET); - if (!hdr) - return -EMSGSIZE; - - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); - if (!attrs) - goto msg_full; - - /* The broadcast link is always up */ - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) - goto attr_msg_full; - - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) - goto attr_msg_full; - if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) - goto attr_msg_full; - - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); - if (!prop) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) - goto prop_msg_full; - nla_nest_end(msg->skb, prop); - - err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); - if (err) - goto attr_msg_full; - - tipc_bcast_unlock(net); - nla_nest_end(msg->skb, attrs); - genlmsg_end(msg->skb, hdr); - - return 0; - -prop_msg_full: - nla_nest_cancel(msg->skb, prop); -attr_msg_full: - nla_nest_cancel(msg->skb, attrs); -msg_full: - tipc_bcast_unlock(net); - genlmsg_cancel(msg->skb, hdr); - - return -EMSGSIZE; -} - int tipc_bclink_reset_stats(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_link *l = tipc_bc_sndlink(net); - if (!bcl) + if (!l) return -ENOPROTOOPT; tipc_bcast_lock(net); - memset(&bcl->stats, 0, sizeof(bcl->stats)); + tipc_link_reset_stats(l); tipc_bcast_unlock(net); return 0; } @@ -530,9 +414,7 @@ enomem: void tipc_bcast_reinit(struct net *net) { - struct tipc_bc_base *b = tipc_bc_base(net); - - msg_set_prevnode(b->link->pmsg, tipc_own_addr(net)); + tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net)); } void tipc_bcast_stop(struct net *net) diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 2855b9356a15..1944c6c00bb9 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -43,6 +43,7 @@ struct tipc_node; struct tipc_msg; struct tipc_nl_msg; struct tipc_node_map; +extern const char tipc_bclink_name[]; int tipc_bcast_init(struct net *net); void tipc_bcast_reinit(struct net *net); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 648f2a67f314..802ffad3200d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -71,7 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr); +static void bearer_disable(struct net *net, struct tipc_bearer *b); /** * tipc_media_find - locates specified media object by name @@ -107,13 +107,13 @@ static struct tipc_media *media_find_id(u8 type) void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; - struct tipc_media *m_ptr; + struct tipc_media *m; int ret; - m_ptr = media_find_id(a->media_id); + m = media_find_id(a->media_id); - if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; @@ -175,13 +175,13 @@ static int bearer_name_validate(const char *name, struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr && (!strcmp(b_ptr->name, name))) - return b_ptr; + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (!strcmp(b->name, name))) + return b; } return NULL; } @@ -189,24 +189,24 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_add_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_add_dest(b->link_req); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_remove_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_remove_dest(b->link_req); rcu_read_unlock(); } @@ -218,8 +218,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; + struct tipc_bearer *b; + struct tipc_media *m; struct tipc_bearer_names b_names; char addr_string[16]; u32 bearer_id; @@ -255,31 +255,31 @@ static int tipc_enable_bearer(struct net *net, const char *name, return -EINVAL; } - m_ptr = tipc_media_find(b_names.media_name); - if (!m_ptr) { + m = tipc_media_find(b_names.media_name); + if (!m) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) - priority = m_ptr->priority; + priority = m->priority; restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (!b_ptr) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { bearer_id = i; continue; } - if (!strcmp(name, b_ptr->name)) { + if (!strcmp(name, b->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); return -EINVAL; } - if ((b_ptr->priority == priority) && + if ((b->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", @@ -297,35 +297,35 @@ restart: return -EINVAL; } - b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) + b = kzalloc(sizeof(*b), GFP_ATOMIC); + if (!b) return -ENOMEM; - strcpy(b_ptr->name, name); - b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr, attr); + strcpy(b->name, name); + b->media = m; + res = m->enable_media(net, b, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); return -EINVAL; } - b_ptr->identity = bearer_id; - b_ptr->tolerance = m_ptr->tolerance; - b_ptr->window = m_ptr->window; - b_ptr->domain = disc_domain; - b_ptr->net_plane = bearer_id + 'A'; - b_ptr->priority = priority; + b->identity = bearer_id; + b->tolerance = m->tolerance; + b->window = m->window; + b->domain = disc_domain; + b->net_plane = bearer_id + 'A'; + b->priority = priority; - res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr); if (res) { - bearer_disable(net, b_ptr); + bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -336,11 +336,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { - pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_node_delete_links(net, b_ptr->identity); - tipc_disc_reset(net, b_ptr); + pr_info("Resetting bearer <%s>\n", b->name); + tipc_node_delete_links(net, b->identity); + tipc_disc_reset(net, b); return 0; } @@ -349,26 +349,26 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) +static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; - pr_info("Disabling bearer <%s>\n", b_ptr->name); - b_ptr->media->disable_media(b_ptr); + pr_info("Disabling bearer <%s>\n", b->name); + b->media->disable_media(b); - tipc_node_delete_links(net, b_ptr->identity); - RCU_INIT_POINTER(b_ptr->media_ptr, NULL); - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); + tipc_node_delete_links(net, b->identity); + RCU_INIT_POINTER(b->media_ptr, NULL); + if (b->link_req) + tipc_disc_delete(b->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + if (b == rtnl_dereference(tn->bearer_list[i])) { RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } - kfree_rcu(b_ptr, rcu); + kfree_rcu(b, rcu); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, @@ -411,7 +411,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent - * @b_ptr: the bearer through which the packet is to be sent + * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, @@ -532,14 +532,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b_ptr)) { + b = rcu_dereference_rtnl(dev->tipc_ptr); + if (likely(b)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -564,13 +564,13 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; - b_ptr = rtnl_dereference(dev->tipc_ptr); - if (!b_ptr) + b = rtnl_dereference(dev->tipc_ptr); + if (!b) return NOTIFY_DONE; - b_ptr->mtu = dev->mtu; + b->mtu = dev->mtu; switch (evt) { case NETDEV_CHANGE: @@ -578,16 +578,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_GOING_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: - b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(dev_net(dev), b_ptr); + bearer_disable(dev_net(dev), b); break; } return NOTIFY_OK; @@ -623,13 +623,13 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr) { - bearer_disable(net, b_ptr); + b = rtnl_dereference(tn->bearer_list[i]); + if (b) { + bearer_disable(net, b); tn->bearer_list[i] = NULL; } } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 552185bc4773..e31820516774 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -103,11 +103,11 @@ struct tipc_bearer; */ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, - struct tipc_bearer *b_ptr, + struct tipc_bearer *b, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + int (*enable_media)(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]); - void (*disable_media)(struct tipc_bearer *b_ptr); + void (*disable_media)(struct tipc_bearer *b); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, int bufsz); @@ -176,7 +176,7 @@ struct tipc_bearer_names { * TIPC routines available to supported media types */ -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b); /* * Routines made available to TIPC by supported media types diff --git a/net/tipc/core.h b/net/tipc/core.h index 18e95a8020cd..5504d63503df 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -118,6 +118,11 @@ static inline int tipc_netid(struct net *net) return tipc_net(net)->net_id; } +static inline struct list_head *tipc_nodes(struct net *net) +{ + return &tipc_net(net)->node_list; +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index afe8c47c4085..f1e738e80535 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -75,14 +75,14 @@ struct tipc_link_req { * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace * @type: message type (request or response) - * @b_ptr: ptr to bearer issuing message + * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, - struct tipc_bearer *b_ptr) + struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; - u32 dest_domain = b_ptr->domain; + u32 dest_domain = b->domain; msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, @@ -92,16 +92,16 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); - b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); + b->media->addr2msg(msg_media_addr(msg), &b->addr); } /** * disc_dupl_alert - issue node address duplication alert - * @b_ptr: pointer to bearer detecting duplication + * @b: pointer to bearer detecting duplication * @node_addr: duplicated node address * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, struct tipc_media_addr *media_addr) { char node_addr_str[16]; @@ -111,7 +111,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), media_addr); pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, - media_addr_str, b_ptr->name); + media_addr_str, b->name); } /** @@ -261,13 +261,13 @@ exit: /** * tipc_disc_create - create object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, +int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -282,17 +282,17 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, return -ENOMEM; } - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); memcpy(&req->dest, dest, sizeof(*dest)); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); - b_ptr->link_req = req; + b->link_req = req; skb = skb_clone(req->buf, GFP_ATOMIC); if (skb) tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); @@ -313,19 +313,19 @@ void tipc_disc_delete(struct tipc_link_req *req) /** * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { - struct tipc_link_req *req = b_ptr->link_req; + struct tipc_link_req *req = b->link_req; struct sk_buff *skb; spin_lock_bh(&req->lock); - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; mod_timer(&req->timer, jiffies + req->timer_intv); diff --git a/net/tipc/link.c b/net/tipc/link.c index 9efbdbde2b08..b11afe71dfc1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -45,28 +45,156 @@ #include <linux/pkt_sched.h> +struct tipc_stats { + u32 sent_info; /* used in counting # sent packets */ + u32 recv_info; /* used in counting # recv'd packets */ + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; + +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @media_addr: media address to use when sending messages over link + * @timer: link timer + * @net: pointer to namespace struct + * @refcnt: reference counter for permanent references (owner node & timer) + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @keepalive_intv: link keepalive timer interval + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node + * @silent_intv_cnt: # of timer intervals without any reception from peer + * @proto_msg: template for control messages generated by link + * @pmsg: convenience pointer to "proto_msg" field + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) + * @exp_msg_count: # of tunnelled messages expected during link changeover + * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent + * @snt_nxt: next sequence number to use for outbound messages + * @last_retransmitted: sequence number of most recently retransmitted message + * @stale_count: # of identical retransmit requests made by peer + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. + * @rcv_nxt: next sequence number to expect for inbound messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @next_out: ptr to first unsent outbound message in queue + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @long_msg_seq_no: next identifier to use for outbound fragmented messages + * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link + * @stats: collects statistics regarding link activity + */ +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct tipc_media_addr *media_addr; + struct net *net; + + /* Management and link supervision data */ + u32 peer_session; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + unsigned long keepalive_intv; + u32 abort_limit; + u32 state; + u16 peer_caps; + bool active; + u32 silent_intv_cnt; + struct { + unchar hdr[INT_H_SIZE]; + unchar body[TIPC_MAX_IF_NAME]; + } proto_msg; + struct tipc_msg *pmsg; + u32 priority; + char net_plane; + + /* Failover/synch */ + u16 drop_point; + struct sk_buff *failover_reasm_skb; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; + u16 snd_nxt; + u16 last_retransm; + u16 window; + u32 stale_count; + + /* Reception */ + u16 rcv_nxt; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + int nack_state; + bool bc_peer_is_up; + + /* Statistics */ + struct tipc_stats stats; +}; + /* * Error message prefixes */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; -static const char tipc_bclink_name[] = "broadcast-link"; - -static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { - [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { - .type = NLA_STRING, - .len = TIPC_MAX_LINK_NAME - }, - [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } -}; /* Properties valid for media, bearar and link */ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { @@ -117,8 +245,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); -static void link_reset_statistics(struct tipc_link *l_ptr); -static void link_print(struct tipc_link *l_ptr, const char *str); +static void link_print(struct tipc_link *l, const char *str); static void tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, @@ -183,6 +310,36 @@ void tipc_link_set_active(struct tipc_link *l, bool active) l->active = active; } +u32 tipc_link_id(struct tipc_link *l) +{ + return l->peer_bearer_id << 16 | l->bearer_id; +} + +int tipc_link_window(struct tipc_link *l) +{ + return l->window; +} + +int tipc_link_prio(struct tipc_link *l) +{ + return l->priority; +} + +unsigned long tipc_link_tolerance(struct tipc_link *l) +{ + return l->tolerance; +} + +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) +{ + return l->inputq; +} + +char tipc_link_plane(struct tipc_link *l) +{ + return l->net_plane; +} + void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq) @@ -225,11 +382,31 @@ int tipc_link_mtu(struct tipc_link *l) return l->mtu; } +u16 tipc_link_rcv_nxt(struct tipc_link *l) +{ + return l->rcv_nxt; +} + +u16 tipc_link_acked(struct tipc_link *l) +{ + return l->acked; +} + +char *tipc_link_name(struct tipc_link *l) +{ + return l->name; +} + static u32 link_own_addr(struct tipc_link *l) { return msg_prevnode(l->pmsg); } +void tipc_link_reinit(struct tipc_link *l, u32 addr) +{ + msg_set_prevnode(l->pmsg, addr); +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -692,7 +869,7 @@ void tipc_link_reset(struct tipc_link *l) l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; - link_reset_statistics(l); + tipc_link_reset_stats(l); } /** @@ -1085,8 +1262,9 @@ drop: /* * Send protocol message to the other endpoint. */ -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority) +static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, + int probe_msg, u32 gap, u32 tolerance, + u32 priority) { struct sk_buff *skb = NULL; struct sk_buff_head xmitq; @@ -1260,6 +1438,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: + skb_linearize(skb); + hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; @@ -1525,53 +1705,17 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } -/* tipc_link_find_owner - locate owner node of link by link's name - * @net: the applicable net namespace - * @name: pointer to link name string - * @bearer_id: pointer to index in 'node->links' array where the link was found. - * - * Returns pointer to node owning the link, or 0 if no matching link is found. - */ -static struct tipc_node *tipc_link_find_owner(struct net *net, - const char *link_name, - unsigned int *bearer_id) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - struct tipc_node *found_node = NULL; - int i; - - *bearer_id = 0; - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i].link; - if (l_ptr && !strcmp(l_ptr->name, link_name)) { - *bearer_id = i; - found_node = n_ptr; - break; - } - } - tipc_node_unlock(n_ptr); - if (found_node) - break; - } - rcu_read_unlock(); - - return found_node; -} - /** - * link_reset_statistics - reset link statistics - * @l_ptr: pointer to link + * link_reset_stats - reset link statistics + * @l: pointer to link */ -static void link_reset_statistics(struct tipc_link *l_ptr) +void tipc_link_reset_stats(struct tipc_link *l) { - memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); - l_ptr->stats.sent_info = l_ptr->snd_nxt; - l_ptr->stats.recv_info = l_ptr->rcv_nxt; + memset(&l->stats, 0, sizeof(l->stats)); + if (!link_is_bc_sndlink(l)) { + l->stats.sent_info = l->snd_nxt; + l->stats.recv_info = l->rcv_nxt; + } } static void link_print(struct tipc_link *l, const char *str) @@ -1624,84 +1768,6 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) return 0; } -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) -{ - int err; - int res = 0; - int bearer_id; - char *name; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); - if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - - if (strcmp(name, tipc_bclink_name) == 0) - return tipc_nl_bc_link_set(net, attrs); - - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - - tipc_node_lock(node); - - link = node->links[bearer_id].link; - if (!link) { - res = -EINVAL; - goto out; - } - - if (attrs[TIPC_NLA_LINK_PROP]) { - struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - - err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], - props); - if (err) { - res = err; - goto out; - } - - if (props[TIPC_NLA_PROP_TOL]) { - u32 tol; - - tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); - } - if (props[TIPC_NLA_PROP_PRIO]) { - u32 prio; - - prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); - } - if (props[TIPC_NLA_PROP_WIN]) { - u32 win; - - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); - } - } - -out: - tipc_node_unlock(node); - - return res; -} - static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; @@ -1768,8 +1834,8 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, - struct tipc_link *link, int nlflags) +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) { int err; void *hdr; @@ -1838,198 +1904,134 @@ msg_full: return -EMSGSIZE; } -/* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) { - u32 i; - int err; - - for (i = *prev_link; i < MAX_BEARERS; i++) { - *prev_link = i; - - if (!node->links[i].link) - continue; + int i; + struct nlattr *nest; - err = __tipc_nl_add_link(net, msg, - node->links[i].link, NLM_F_MULTI); - if (err) - return err; - } - *prev_link = 0; + struct nla_map { + __u32 key; + __u32 val; + }; - return 0; -} + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node; - struct tipc_nl_msg msg; - u32 prev_node = cb->args[0]; - u32 prev_link = cb->args[1]; - int done = cb->args[2]; - int err; + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; - if (done) - return 0; + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; - msg.skb = skb; - msg.portid = NETLINK_CB(cb->skb).portid; - msg.seq = cb->nlh->nlmsg_seq; - - rcu_read_lock(); - if (prev_node) { - node = tipc_node_find(net, prev_node); - if (!node) { - /* We never set seq or call nl_dump_check_consistent() - * this means that setting prev_seq here will cause the - * consistence check to fail in the netlink callback - * handler. Resulting in the last NLMSG_DONE message - * having the NLM_F_DUMP_INTR flag set. - */ - cb->prev_seq = 1; - goto out; - } - tipc_node_put(node); - - list_for_each_entry_continue_rcu(node, &tn->node_list, - list) { - tipc_node_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } else { - err = tipc_nl_add_bc_link(net, &msg); - if (err) - goto out; - - list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } - done = 1; -out: - rcu_read_unlock(); + nla_nest_end(skb, nest); - cb->args[0] = prev_node; - cb->args[1] = prev_link; - cb->args[2] = done; + return 0; +msg_full: + nla_nest_cancel(skb, nest); - return skb->len; + return -EMSGSIZE; } -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) { - struct net *net = genl_info_net(info); - struct tipc_nl_msg msg; - char *name; int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; - msg.portid = info->snd_portid; - msg.seq = info->snd_seq; - - if (!info->attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + if (!bcl) + return 0; - msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!msg.skb) - return -ENOMEM; + tipc_bcast_lock(net); - if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } else { - int bearer_id; - struct tipc_node *node; - struct tipc_link *link; + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; - tipc_node_lock(node); - link = node->links[bearer_id].link; - if (!link) { - tipc_node_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; - } + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; - err = __tipc_nl_add_link(net, &msg, link, 0); - tipc_node_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) + goto attr_msg_full; - return genlmsg_reply(msg.skb, info); -} + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) -{ - int err; - char *link_name; - unsigned int bearer_id; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + goto attr_msg_full; - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); - if (err) - return err; - return 0; - } + tipc_bcast_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); - node = tipc_link_find_owner(net, link_name, &bearer_id); - if (!node) - return -EINVAL; + return 0; - tipc_node_lock(node); +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bcast_unlock(net); + genlmsg_cancel(msg->skb, hdr); - link = node->links[bearer_id].link; - if (!link) { - tipc_node_unlock(node); - return -EINVAL; - } + return -EMSGSIZE; +} - link_reset_statistics(link); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) +{ + l->tolerance = tol; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); +} - tipc_node_unlock(node); +void tipc_link_set_prio(struct tipc_link *l, u32 prio) +{ + l->priority = prio; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); +} - return 0; +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) +{ + l->abort_limit = limit; } diff --git a/net/tipc/link.h b/net/tipc/link.h index 66d859b66c84..b2ae0f4276af 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -45,10 +45,6 @@ */ #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ -/* Out-of-range value for link sequence numbers - */ -#define INVALID_LINK_SEQ 0x10000 - /* Link FSM events: */ enum { @@ -75,151 +71,6 @@ enum { */ #define MAX_PKT_DEFAULT 1500 -struct tipc_stats { - u32 sent_info; /* used in counting # sent packets */ - u32 recv_info; /* used in counting # recv'd packets */ - u32 sent_states; - u32 recv_states; - u32 sent_probes; - u32 recv_probes; - u32 sent_nacks; - u32 recv_nacks; - u32 sent_acks; - u32 sent_bundled; - u32 sent_bundles; - u32 recv_bundled; - u32 recv_bundles; - u32 retransmitted; - u32 sent_fragmented; - u32 sent_fragments; - u32 recv_fragmented; - u32 recv_fragments; - u32 link_congs; /* # port sends blocked by congestion */ - u32 deferred_recv; - u32 duplicates; - u32 max_queue_sz; /* send queue size high water mark */ - u32 accu_queue_sz; /* used for send queue size profiling */ - u32 queue_sz_counts; /* used for send queue size profiling */ - u32 msg_length_counts; /* used for message length profiling */ - u32 msg_lengths_total; /* used for message length profiling */ - u32 msg_length_profile[7]; /* used for msg. length profiling */ -}; - -/** - * struct tipc_link - TIPC link data structure - * @addr: network address of link's peer node - * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer - * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) - * @peer_session: link session # being used by peer end of link - * @peer_bearer_id: bearer id used by link's peer endpoint - * @bearer_id: local bearer id used by link - * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @keepalive_intv: link keepalive timer interval - * @abort_limit: # of unacknowledged continuity probes needed to reset link - * @state: current state of link FSM - * @peer_caps: bitmap describing capabilities of peer node - * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field - * @priority: current link priority - * @net_plane: current link network plane ('A' through 'H') - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset - * @mtu: current maximum packet size for this link - * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages - * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages - * @last_retransmitted: sequence number of most recently retransmitted message - * @stale_count: # of identical retransmit requests made by peer - * @ackers: # of peers that needs to ack each packet before it can be released - * @acked: # last packet acked by a certain peer. Used for broadcast. - * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer - * @inputq: buffer queue for messages to be delivered upwards - * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue - * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link - * @stats: collects statistics regarding link activity - */ -struct tipc_link { - u32 addr; - char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr *media_addr; - struct net *net; - - /* Management and link supervision data */ - u32 peer_session; - u32 peer_bearer_id; - u32 bearer_id; - u32 tolerance; - unsigned long keepalive_intv; - u32 abort_limit; - u32 state; - u16 peer_caps; - bool active; - u32 silent_intv_cnt; - struct { - unchar hdr[INT_H_SIZE]; - unchar body[TIPC_MAX_IF_NAME]; - } proto_msg; - struct tipc_msg *pmsg; - u32 priority; - char net_plane; - - /* Failover/synch */ - u16 drop_point; - struct sk_buff *failover_reasm_skb; - - /* Max packet negotiation */ - u16 mtu; - u16 advertised_mtu; - - /* Sending */ - struct sk_buff_head transmq; - struct sk_buff_head backlogq; - struct { - u16 len; - u16 limit; - } backlog[5]; - u16 snd_nxt; - u16 last_retransm; - u16 window; - u32 stale_count; - - /* Reception */ - u16 rcv_nxt; - u32 rcv_unacked; - struct sk_buff_head deferdq; - struct sk_buff_head *inputq; - struct sk_buff_head *namedq; - - /* Congestion handling */ - struct sk_buff_head wakeupq; - - /* Fragmentation/reassembly */ - struct sk_buff *reasm_buf; - - /* Broadcast */ - u16 ackers; - u16 acked; - struct tipc_link *bc_rcvlink; - struct tipc_link *bc_sndlink; - int nack_state; - bool bc_peer_is_up; - - /* Statistics */ - struct tipc_stats stats; -}; - bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, int window, u32 session, u32 ownnode, u32 peer, @@ -235,11 +86,11 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, struct tipc_link **link); +void tipc_link_reinit(struct tipc_link *l, u32 addr); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); -void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); bool tipc_link_peer_is_down(struct tipc_link *l); bool tipc_link_is_reset(struct tipc_link *l); @@ -248,15 +99,25 @@ bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); -void tipc_link_reset(struct tipc_link *l_ptr); -int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, +void tipc_link_reset(struct tipc_link *l); +void tipc_link_reset_stats(struct tipc_link *l); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l); +u16 tipc_link_rcv_nxt(struct tipc_link *l); +u16 tipc_link_acked(struct tipc_link *l); +u32 tipc_link_id(struct tipc_link *l); +char *tipc_link_name(struct tipc_link *l); +char tipc_link_plane(struct tipc_link *l); +int tipc_link_prio(struct tipc_link *l); +int tipc_link_window(struct tipc_link *l); +unsigned long tipc_link_tolerance(struct tipc_link *l); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); +void tipc_link_set_prio(struct tipc_link *l, u32 prio); +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); - -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c07612bab95c..ebe9d0ff6e9e 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -84,31 +84,6 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, return buf; } -void named_cluster_distribute(struct net *net, struct sk_buff *skb) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *oskb; - struct tipc_node *node; - u32 dnode; - - rcu_read_lock(); - list_for_each_entry_rcu(node, &tn->node_list, list) { - dnode = node->addr; - if (in_own_node(net, dnode)) - continue; - if (!tipc_node_is_up(node)) - continue; - oskb = pskb_copy(skb, GFP_ATOMIC); - if (!oskb) - break; - msg_set_destnode(buf_msg(oskb), dnode); - tipc_node_xmit_skb(net, oskb, dnode, 0); - } - rcu_read_unlock(); - - kfree_skb(skb); -} - /** * tipc_named_publish - tell other nodes about a new publication by this node */ @@ -226,42 +201,6 @@ void tipc_named_node_up(struct net *net, u32 dnode) tipc_node_xmit(net, &head, dnode, 0); } -static void tipc_publ_subscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - if (in_own_node(net, addr)) - return; - - node = tipc_node_find(net, addr); - if (!node) { - pr_warn("Node subscription rejected, unknown node 0x%x\n", - addr); - return; - } - - tipc_node_lock(node); - list_add_tail(&publ->nodesub_list, &node->publ_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - -static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - node = tipc_node_find(net, addr); - if (!node) - return; - - tipc_node_lock(node); - list_del_init(&publ->nodesub_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - /** * tipc_publ_purge - remove publication associated with a failed node * @@ -277,7 +216,7 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(net, p, addr); + tipc_node_unsubscribe(net, &p->nodesub_list, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { @@ -317,7 +256,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(net, publ, node); + tipc_node_subscribe(net, &publ->nodesub_list, node); return true; } } else if (dtype == WITHDRAWAL) { @@ -326,7 +265,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(net, publ, node); + tipc_node_unsubscribe(net, &publ->nodesub_list, node); kfree_rcu(publ, rcu); return true; } @@ -397,6 +336,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) spin_lock_bh(&tn->nametbl_lock); for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + skb_linearize(skb); msg = buf_msg(skb); mtype = msg_type(msg); item = (struct distr_item *)msg_data(msg); diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index dd2d9fd80da2..1264ba0af937 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -69,7 +69,6 @@ struct distr_item { struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); -void named_cluster_distribute(struct net *net, struct sk_buff *buf); void tipc_named_node_up(struct net *net, u32 dnode); void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); void tipc_named_reinit(struct net *net); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 0f47f08bf38f..91fce70291a8 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -42,6 +42,7 @@ #include "subscr.h" #include "bcast.h" #include "addr.h" +#include "node.h" #include <net/genetlink.h> #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -677,7 +678,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(net, buf); + tipc_node_broadcast(net, buf); return publ; } @@ -709,7 +710,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(net, skb); + tipc_node_broadcast(net, skb); return 1; } return 0; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 7f6475efc984..8975b0135b76 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -101,18 +101,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_LINK_GET, - .doit = tipc_nl_link_get, - .dumpit = tipc_nl_link_dump, + .doit = tipc_nl_node_get_link, + .dumpit = tipc_nl_node_dump_link, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, - .doit = tipc_nl_link_set, + .doit = tipc_nl_node_set_link, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, - .doit = tipc_nl_link_reset_stats, + .doit = tipc_nl_node_reset_link_stats, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1eadc95e1132..2c016fdefe97 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1023,25 +1023,25 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) msg->req_type = TIPC_TLV_LINK_NAME; msg->rep_size = ULTRA_STRING_MAX_LEN; msg->rep_type = TIPC_TLV_ULTRA_STRING; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_stat_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_GET_LINKS: msg->req_type = TIPC_TLV_NET_ADDR; msg->rep_size = ULTRA_STRING_MAX_LEN; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_SET_LINK_TOL: case TIPC_CMD_SET_LINK_PRI: case TIPC_CMD_SET_LINK_WINDOW: msg->req_type = TIPC_TLV_LINK_CONFIG; - doit.doit = tipc_nl_link_set; + doit.doit = tipc_nl_node_set_link; doit.transcode = tipc_nl_compat_link_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_RESET_LINK_STATS: msg->req_type = TIPC_TLV_LINK_NAME; - doit.doit = tipc_nl_link_reset_stats; + doit.doit = tipc_nl_node_reset_link_stats; doit.transcode = tipc_nl_compat_link_reset_stats; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SHOW_NAME_TABLE: diff --git a/net/tipc/node.c b/net/tipc/node.c index 20cddec0a43c..3f7a4ed71990 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,84 @@ #include "bcast.h" #include "discover.h" +#define INVALID_NODE_SIG 0x10000 + +/* Flags used to take different actions according to flag type + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7) +}; + +struct tipc_link_entry { + struct tipc_link *link; + spinlock_t lock; /* per link */ + u32 mtu; + struct sk_buff_head inputq; + struct tipc_media_addr maddr; +}; + +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @ref: reference counter to node object + * @lock: rwlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @active_links: bearer ids of active links, used as index into links[] array + * @links: array containing references to all links to node + * @action_flags: bit mask of different types of node actions + * @state: connectivity state vs peer node + * @sync_point: sequence number where synch/failover is finished + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @publ_list: list of publications + * @rcu: rcu struct for tipc_node + */ +struct tipc_node { + u32 addr; + struct kref kref; + rwlock_t lock; + struct net *net; + struct hlist_node hash; + int active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; + int action_flags; + struct list_head list; + int state; + u16 sync_point; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + struct list_head publ_list; + struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; + struct rcu_head rcu; +}; + /* Node FSM states and events: */ enum { @@ -75,6 +153,9 @@ static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); +static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static void tipc_node_put(struct tipc_node *node); +static bool tipc_node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -83,12 +164,54 @@ struct tipc_sock_conn { struct list_head list; }; +static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME + }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } }; +static struct tipc_link *node_active_link(struct tipc_node *n, int sel) +{ + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; + + return n->links[bearer_id].link; +} + +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +{ + struct tipc_node *n; + int bearer_id; + unsigned int mtu = MAX_MSG_SIZE; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; + tipc_node_put(n); + return mtu; +} /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -107,7 +230,7 @@ static void tipc_node_kref_release(struct kref *kref) tipc_node_delete(node); } -void tipc_node_put(struct tipc_node *node) +static void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } @@ -120,7 +243,7 @@ static void tipc_node_get(struct tipc_node *node) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(struct net *net, u32 addr) +static struct tipc_node *tipc_node_find(struct net *net, u32 addr) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; @@ -141,66 +264,122 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } +static void tipc_node_read_lock(struct tipc_node *n) +{ + read_lock_bh(&n->lock); +} + +static void tipc_node_read_unlock(struct tipc_node *n) +{ + read_unlock_bh(&n->lock); +} + +static void tipc_node_write_lock(struct tipc_node *n) +{ + write_lock_bh(&n->lock); +} + +static void tipc_node_write_unlock(struct tipc_node *n) +{ + struct net *net = n->net; + u32 addr = 0; + u32 flags = n->action_flags; + u32 link_id = 0; + struct list_head *publ_list; + + if (likely(!flags)) { + write_unlock_bh(&n->lock); + return; + } + + addr = n->addr; + link_id = n->link_id; + publ_list = &n->publ_list; + + n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); + + write_unlock_bh(&n->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, + link_id, addr); +} + struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *n_ptr, *temp_node; + struct tipc_node *n, *temp_node; + int i; spin_lock_bh(&tn->node_list_lock); - n_ptr = tipc_node_find(net, addr); - if (n_ptr) + n = tipc_node_find(net, addr); + if (n) goto exit; - n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); - if (!n_ptr) { + n = kzalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { pr_warn("Node creation failed, no memory\n"); goto exit; } - n_ptr->addr = addr; - n_ptr->net = net; - n_ptr->capabilities = capabilities; - kref_init(&n_ptr->kref); - spin_lock_init(&n_ptr->lock); - INIT_HLIST_NODE(&n_ptr->hash); - INIT_LIST_HEAD(&n_ptr->list); - INIT_LIST_HEAD(&n_ptr->publ_list); - INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bc_entry.namedq); - skb_queue_head_init(&n_ptr->bc_entry.inputq1); - __skb_queue_head_init(&n_ptr->bc_entry.arrvq); - skb_queue_head_init(&n_ptr->bc_entry.inputq2); - hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + n->addr = addr; + n->net = net; + n->capabilities = capabilities; + kref_init(&n->kref); + rwlock_init(&n->lock); + INIT_HLIST_NODE(&n->hash); + INIT_LIST_HEAD(&n->list); + INIT_LIST_HEAD(&n->publ_list); + INIT_LIST_HEAD(&n->conn_sks); + skb_queue_head_init(&n->bc_entry.namedq); + skb_queue_head_init(&n->bc_entry.inputq1); + __skb_queue_head_init(&n->bc_entry.arrvq); + skb_queue_head_init(&n->bc_entry.inputq2); + for (i = 0; i < MAX_BEARERS; i++) + spin_lock_init(&n->links[i].lock); + hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { - if (n_ptr->addr < temp_node->addr) + if (n->addr < temp_node->addr) break; } - list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->state = SELF_DOWN_PEER_LEAVING; - n_ptr->signature = INVALID_NODE_SIG; - n_ptr->active_links[0] = INVALID_BEARER_ID; - n_ptr->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, - U16_MAX, tipc_bc_sndlink(net)->window, - n_ptr->capabilities, - &n_ptr->bc_entry.inputq1, - &n_ptr->bc_entry.namedq, + list_add_tail_rcu(&n->list, &temp_node->list); + n->state = SELF_DOWN_PEER_LEAVING; + n->signature = INVALID_NODE_SIG; + n->active_links[0] = INVALID_BEARER_ID; + n->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr, + U16_MAX, + tipc_link_window(tipc_bc_sndlink(net)), + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, tipc_bc_sndlink(net), - &n_ptr->bc_entry.link)) { + &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n_ptr); - n_ptr = NULL; + kfree(n); + n = NULL; goto exit; } - tipc_node_get(n_ptr); - setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); - n_ptr->keepalive_intv = U32_MAX; + tipc_node_get(n); + setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + n->keepalive_intv = U32_MAX; exit: spin_unlock_bh(&tn->node_list_lock); - return n_ptr; + return n; } static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { - unsigned long tol = l->tolerance; + unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; unsigned long keepalive_intv = msecs_to_jiffies(intv); @@ -209,7 +388,7 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) n->keepalive_intv = keepalive_intv; /* Ensure link's abort limit corresponds to current interval */ - l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv); + tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv)); } static void tipc_node_delete(struct tipc_node *node) @@ -234,6 +413,42 @@ void tipc_node_stop(struct net *net) spin_unlock_bh(&tn->node_list_lock); } +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_add_tail(subscr, &n->publ_list); + tipc_node_write_unlock(n); + tipc_node_put(n); +} + +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_del_init(subscr); + tipc_node_write_unlock(n); + tipc_node_put(n); +} + int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; @@ -257,9 +472,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) conn->port = port; conn->peer_port = peer_port; - tipc_node_lock(node); + tipc_node_write_lock(node); list_add_tail(&conn->list, &node->conn_sks); - tipc_node_unlock(node); + tipc_node_write_unlock(node); exit: tipc_node_put(node); return err; @@ -277,14 +492,14 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) if (!node) return; - tipc_node_lock(node); + tipc_node_write_lock(node); list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { if (port != conn->port) continue; list_del(&conn->list); kfree(conn); } - tipc_node_unlock(node); + tipc_node_write_unlock(node); tipc_node_put(node); } @@ -301,14 +516,16 @@ static void tipc_node_timeout(unsigned long data) __skb_queue_head_init(&xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { - tipc_node_lock(n); + tipc_node_read_lock(n); le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); if (le->link) { /* Link tolerance may change asynchronously: */ tipc_node_calculate_timer(n, le->link); rc = tipc_link_timeout(le->link, &xmitq); } - tipc_node_unlock(n); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); @@ -340,16 +557,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; - n->link_id = nl->peer_bearer_id << 16 | bearer_id; + n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; + n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", - nl->name, nl->net_plane); + tipc_link_name(nl), tipc_link_plane(nl)); /* First link? => give it both slots */ if (!ol) { @@ -362,17 +579,17 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, } /* Second link => redistribute slots */ - if (nl->priority > ol->priority) { - pr_debug("Old link <%s> becomes standby\n", ol->name); + if (tipc_link_prio(nl) > tipc_link_prio(ol)) { + pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); *slot0 = bearer_id; *slot1 = bearer_id; tipc_link_set_active(nl, true); tipc_link_set_active(ol, false); - } else if (nl->priority == ol->priority) { + } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { tipc_link_set_active(nl, true); *slot1 = bearer_id; } else { - pr_debug("New link <%s> is standby\n", nl->name); + pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); } /* Prepare synchronization with first link */ @@ -387,9 +604,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { - tipc_node_lock(n); + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); - tipc_node_unlock(n); + tipc_node_write_unlock(n); } /** @@ -402,7 +619,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct tipc_link_entry *le = &n->links[*bearer_id]; int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; - int i, highest = 0; + int i, highest = 0, prio; struct tipc_link *l, *_l, *tnl; l = n->links[*bearer_id].link; @@ -411,12 +628,12 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n->link_id = l->peer_bearer_id << 16 | *bearer_id; + n->link_id = tipc_link_id(l); tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); pr_debug("Lost link <%s> on network plane %c\n", - l->name, l->net_plane); + tipc_link_name(l), tipc_link_plane(l)); /* Select new active link if any available */ *slot0 = INVALID_BEARER_ID; @@ -427,10 +644,11 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, continue; if (_l == l) continue; - if (_l->priority < highest) + prio = tipc_link_prio(_l); + if (prio < highest) continue; - if (_l->priority > highest) { - highest = _l->priority; + if (prio > highest) { + highest = prio; *slot0 = i; *slot1 = i; continue; @@ -453,17 +671,17 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ - tnl = node_active_link(n, 0); + *bearer_id = n->active_links[0]; + tnl = n->links[*bearer_id].link; tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); - n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); - *maddr = &n->links[tnl->bearer_id].maddr; - *bearer_id = tnl->bearer_id; + *maddr = &n->links[*bearer_id].maddr; } static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) @@ -478,7 +696,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) __skb_queue_head_init(&xmitq); - tipc_node_lock(n); + tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); if (delete) { @@ -490,12 +708,12 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) /* Defuse pending tipc_node_link_up() */ tipc_link_fsm_evt(l, LINK_RESET_EVT); } - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } -bool tipc_node_is_up(struct tipc_node *n) +static bool tipc_node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } @@ -523,7 +741,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (!n) return; - tipc_node_lock(n); + tipc_node_write_lock(n); le = &n->links[b->identity]; @@ -626,7 +844,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: - tipc_node_unlock(n); + tipc_node_write_unlock(n); if (reset && !tipc_link_is_reset(l)) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); @@ -834,24 +1052,6 @@ illegal_evt: pr_err("Illegal node fsm evt %x in state %x\n", evt, state); } -bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) -{ - int state = n->state; - - if (likely(state == SELF_UP_PEER_UP)) - return true; - - if (state == SELF_LEAVING_PEER_DOWN) - return false; - - if (state == SELF_DOWN_PEER_LEAVING) { - if (msg_peer_node_is_up(hdr)) - return false; - } - - return true; -} - static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { @@ -913,56 +1113,18 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, if (bearer_id >= MAX_BEARERS) goto exit; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { - strncpy(linkname, link->name, len); + strncpy(linkname, tipc_link_name(link), len); err = 0; } exit: - tipc_node_unlock(node); + tipc_node_read_unlock(node); tipc_node_put(node); return err; } -void tipc_node_unlock(struct tipc_node *node) -{ - struct net *net = node->net; - u32 addr = 0; - u32 flags = node->action_flags; - u32 link_id = 0; - struct list_head *publ_list; - - if (likely(!flags)) { - spin_unlock_bh(&node->lock); - return; - } - - addr = node->addr; - link_id = node->link_id; - publ_list = &node->publ_list; - - node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); - - spin_unlock_bh(&node->lock); - - if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); - - if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); - - if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, addr); - - if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - link_id, addr); - -} - /* Caller should hold node lock for the passed node */ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { @@ -997,20 +1159,6 @@ msg_full: return -EMSGSIZE; } -static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, - int *bearer_id, - struct tipc_media_addr **maddr) -{ - int id = n->active_links[sel & 1]; - - if (unlikely(id < 0)) - return NULL; - - *bearer_id = id; - *maddr = &n->links[id].maddr; - return n->links[id].link; -} - /** * tipc_node_xmit() is the general link level function for message sending * @net: the applicable net namespace @@ -1023,34 +1171,38 @@ static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { - struct tipc_link *l = NULL; + struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; - struct tipc_media_addr *maddr; - int bearer_id; + int bearer_id = -1; int rc = -EHOSTUNREACH; __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); if (likely(n)) { - tipc_node_lock(n); - l = tipc_node_select_link(n, selector, &bearer_id, &maddr); - if (likely(l)) - rc = tipc_link_xmit(l, list, &xmitq); - tipc_node_unlock(n); + tipc_node_read_lock(n); + bearer_id = n->active_links[selector & 1]; + if (bearer_id >= 0) { + le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + } + tipc_node_read_unlock(n); + if (likely(!skb_queue_empty(&xmitq))) { + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + return 0; + } if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); + return rc; } - if (likely(!rc)) { - tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); - return 0; - } - if (likely(in_own_node(net, dnode))) { - tipc_sk_rcv(net, list); - return 0; - } - return rc; + + if (unlikely(!in_own_node(net, dnode))) + return rc; + tipc_sk_rcv(net, list); + return 0; } /* tipc_node_xmit_skb(): send single buffer to destination @@ -1075,6 +1227,30 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +{ + struct sk_buff *txskb; + struct tipc_node *n; + u32 dst; + + rcu_read_lock(); + list_for_each_entry_rcu(n, tipc_nodes(net), list) { + dst = n->addr; + if (in_own_node(net, dst)) + continue; + if (!tipc_node_is_up(n)) + continue; + txskb = pskb_copy(skb, GFP_ATOMIC); + if (!txskb) + break; + msg_set_destnode(buf_msg(txskb), dst); + tipc_node_xmit_skb(net, txskb, dst, 0); + } + rcu_read_unlock(); + + kfree_skb(skb); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace @@ -1116,9 +1292,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { - tipc_node_lock(n); + tipc_node_read_lock(n); tipc_link_build_ack_msg(le->link, &xmitq); - tipc_node_unlock(n); + tipc_node_read_unlock(n); } if (!skb_queue_empty(&xmitq)) @@ -1151,30 +1327,30 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 oseqno = msg_seqno(hdr); u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); u16 exp_pkts = msg_msgcnt(hdr); - u16 rcv_nxt, syncpt, dlv_nxt; + u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; - int i, pb_id; + int pb_id; l = n->links[bearer_id].link; if (!l) return false; - rcv_nxt = l->rcv_nxt; + rcv_nxt = tipc_link_rcv_nxt(l); if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) return true; /* Find parallel link, if any */ - for (i = 0; i < MAX_BEARERS; i++) { - if ((i != bearer_id) && n->links[i].link) { - pl = n->links[i].link; + for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { + if ((pb_id != bearer_id) && n->links[pb_id].link) { + pl = n->links[pb_id].link; break; } } - /* Update node accesibility if applicable */ + /* Check and update node accesibility if applicable */ if (state == SELF_UP_PEER_COMING) { if (!tipc_link_is_up(l)) return true; @@ -1187,8 +1363,12 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if (msg_peer_node_is_up(hdr)) return false; tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; } + if (state == SELF_LEAVING_PEER_DOWN) + return false; + /* Ignore duplicate packets */ if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; @@ -1197,9 +1377,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; if (pl && tipc_link_is_up(pl)) { - pb_id = pl->bearer_id; __tipc_node_link_down(n, &pb_id, xmitq, &maddr); - tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq); + tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), + tipc_link_inputq(l)); } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) @@ -1232,19 +1412,18 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); } - if (less(syncpt, n->sync_point)) - n->sync_point = syncpt; } /* Open tunnel link when parallel link reaches synch point */ - if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { + if (n->state == NODE_SYNCHING) { if (tipc_link_is_synching(l)) { tnl = l; } else { tnl = pl; pl = l; } - dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); + inputq_len = skb_queue_len(tipc_link_inputq(pl)); + dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; if (more(dlv_nxt, n->sync_point)) { tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); @@ -1304,22 +1483,32 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); - else if (unlikely(n->bc_entry.link->acked != bc_ack)) + else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); - tipc_node_lock(n); - - /* Is reception permitted at the moment ? */ - if (!tipc_node_filter_pkt(n, hdr)) - goto unlock; - - /* Check and if necessary update node state */ - if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { - rc = tipc_link_rcv(le->link, skb, &xmitq); - skb = NULL; + /* Receive packet directly if conditions permit */ + tipc_node_read_lock(n); + if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { + spin_lock_bh(&le->lock); + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + spin_unlock_bh(&le->lock); + } + tipc_node_read_unlock(n); + + /* Check/update node state before receiving */ + if (unlikely(skb)) { + tipc_node_write_lock(n); + if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + } + tipc_node_write_unlock(n); } -unlock: - tipc_node_unlock(n); if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id, &xmitq); @@ -1384,15 +1573,15 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node(&msg, node); if (err) { last_addr = node->addr; - tipc_node_unlock(node); + tipc_node_read_unlock(node); goto out; } - tipc_node_unlock(node); + tipc_node_read_unlock(node); } done = 1; out: @@ -1402,3 +1591,314 @@ out: return skb->len; } + +/* tipc_node_find_by_name - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_node_find_by_name(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l; + struct tipc_node *n; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_read_lock(n); + for (i = 0; i < MAX_BEARERS; i++) { + l = n->links[i].link; + if (l && !strcmp(tipc_link_name(l), link_name)) { + *bearer_id = i; + found_node = n; + break; + } + } + tipc_node_read_unlock(n); + if (found_node) + break; + } + rcu_read_unlock(); + + return found_node; +} + +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + + link = node->links[bearer_id].link; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_link_set_tolerance(link, tol); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + tipc_link_set_prio(link, prio); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_read_unlock(node); + + return res; +} + +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct tipc_nl_msg msg; + char *name; + int err; + + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + if (!info->attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); + nlmsg_free(msg.skb); + return -EINVAL; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(net); + if (err) + return err; + return 0; + } + + node = tipc_node_find_by_name(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + le = &node->links[bearer_id]; + tipc_node_read_lock(node); + spin_lock_bh(&le->lock); + link = node->links[bearer_id].link; + if (!link) { + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return -EINVAL; + } + tipc_link_reset_stats(link); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return 0; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i].link) + continue; + + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 6734562d3c6e..f39d9d06e8bb 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,23 +42,6 @@ #include "bearer.h" #include "msg.h" -/* Out-of-range value for node signature */ -#define INVALID_NODE_SIG 0x10000 - -#define INVALID_BEARER_ID -1 - -/* Flags used to take different actions according to flag type - * TIPC_NOTIFY_NODE_DOWN: notify node is down - * TIPC_NOTIFY_NODE_UP: notify node is up - * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type - */ -enum { - TIPC_NOTIFY_NODE_DOWN = (1 << 3), - TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_NOTIFY_LINK_UP = (1 << 6), - TIPC_NOTIFY_LINK_DOWN = (1 << 7) -}; - /* Optional capabilities supported by this code version */ enum { @@ -66,72 +49,8 @@ enum { }; #define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH +#define INVALID_BEARER_ID -1 -struct tipc_link_entry { - struct tipc_link *link; - u32 mtu; - struct sk_buff_head inputq; - struct tipc_media_addr maddr; -}; - -struct tipc_bclink_entry { - struct tipc_link *link; - struct sk_buff_head inputq1; - struct sk_buff_head arrvq; - struct sk_buff_head inputq2; - struct sk_buff_head namedq; -}; - -/** - * struct tipc_node - TIPC node structure - * @addr: network address of node - * @ref: reference counter to node object - * @lock: spinlock governing access to structure - * @net: the applicable net namespace - * @hash: links to adjacent nodes in unsorted hash chain - * @inputq: pointer to input queue containing messages for msg event - * @namedq: pointer to name table input queue with name table messages - * @active_links: bearer ids of active links, used as index into links[] array - * @links: array containing references to all links to node - * @action_flags: bit mask of different types of node actions - * @state: connectivity state vs peer node - * @sync_point: sequence number where synch/failover is finished - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @working_links: number of working links to node (both active and standby) - * @link_cnt: number of links to node - * @capabilities: bitmap, indicating peer node's functional capabilities - * @signature: node instance identifier - * @link_id: local and remote bearer ids of changing link, if any - * @publ_list: list of publications - * @rcu: rcu struct for tipc_node - */ -struct tipc_node { - u32 addr; - struct kref kref; - spinlock_t lock; - struct net *net; - struct hlist_node hash; - int active_links[2]; - struct tipc_link_entry links[MAX_BEARERS]; - struct tipc_bclink_entry bc_entry; - int action_flags; - struct list_head list; - int state; - u16 sync_point; - int link_cnt; - u16 working_links; - u16 capabilities; - u32 signature; - u32 link_id; - struct list_head publ_list; - struct list_head conn_sks; - unsigned long keepalive_intv; - struct timer_list timer; - struct rcu_head rcu; -}; - -struct tipc_node *tipc_node_find(struct net *net, u32 addr); -void tipc_node_put(struct tipc_node *node); void tipc_node_stop(struct net *net); void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *bearer, @@ -139,50 +58,22 @@ void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -void tipc_node_unlock(struct tipc_node *node); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); - -static inline void tipc_node_lock(struct tipc_node *node) -{ - spin_lock_bh(&node->lock); -} - -static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) -{ - int bearer_id = n->active_links[sel & 1]; - - if (unlikely(bearer_id == INVALID_BEARER_ID)) - return NULL; - - return n->links[bearer_id].link; -} - -static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) -{ - struct tipc_node *n; - int bearer_id; - unsigned int mtu = MAX_MSG_SIZE; - - n = tipc_node_find(net, addr); - if (unlikely(!n)) - return mtu; - - bearer_id = n->active_links[sel & 1]; - if (likely(bearer_id != INVALID_BEARER_ID)) - mtu = n->links[bearer_id].mtu; - tipc_node_put(n); - return mtu; -} +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ad2719ad4c1b..816914ef228d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,7 +48,6 @@ #include <linux/tipc_netlink.h> #include "core.h" #include "bearer.h" -#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -221,10 +220,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; - int usr = msg_user(buf_msg(skb)); - - if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) - skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 2ad46f39649f..1820e74a5752 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -121,7 +121,7 @@ struct vmci_transport { u64 queue_pair_max_size; u32 detach_sub_id; union vmci_transport_notify notify; - struct vmci_transport_notify_ops *notify_ops; + const struct vmci_transport_notify_ops *notify_ops; struct list_head elem; struct sock *sk; spinlock_t lock; /* protects sk. */ diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 9b7f207f2bee..fd8cf0214d51 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) } /* Socket control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h index 7df793249b6c..3c464d394a8f 100644 --- a/net/vmw_vsock/vmci_transport_notify.h +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { void (*process_negotiate) (struct sock *sk); }; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; +extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern const +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index dc9c7929a2f9..21e591dafb03 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( } /* Socket always on control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, |
