diff options
39 files changed, 426 insertions, 322 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml index b99d7a694b70..51cf574249be 100644 --- a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml +++ b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml @@ -52,6 +52,25 @@ properties: allOf: - $ref: dsa.yaml#/$defs/ethernet-ports +patternProperties: + "^(ethernet-)?ports$": + additionalProperties: true + patternProperties: + "^(ethernet-)?port@6$": + allOf: + - if: + properties: + phy-mode: + contains: + enum: + - rgmii + then: + properties: + rx-internal-delay-ps: + $ref: "#/$defs/internal-delay-ps" + tx-internal-delay-ps: + $ref: "#/$defs/internal-delay-ps" + # This checks if reg is a chipselect so the device is on an SPI # bus, the if-clause will fail if reg is a tuple such as for a # platform device. @@ -67,6 +86,15 @@ required: - compatible - reg +$defs: + internal-delay-ps: + description: + Disable tunable delay lines using 0 ps, or enable them and select + the phase between 1400 ps and 2000 ps in increments of 300 ps. + default: 2000 + enum: + [0, 1400, 1700, 2000] + unevaluatedProperties: false examples: @@ -108,6 +136,8 @@ examples: reg = <6>; ethernet = <&gmac1>; phy-mode = "rgmii"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; fixed-link { speed = <1000>; full-duplex; @@ -150,6 +180,8 @@ examples: ethernet-port@6 { reg = <6>; ethernet = <&enet0>; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; phy-mode = "rgmii"; fixed-link { speed = <1000>; diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 3616389c8c2d..eacf8983e230 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2362,6 +2362,20 @@ ra_honor_pio_life - BOOLEAN Default: 0 (disabled) +ra_honor_pio_pflag - BOOLEAN + The Prefix Information Option P-flag indicates the network can + allocate a unique IPv6 prefix per client using DHCPv6-PD. + This sysctl can be enabled when a userspace DHCPv6-PD client + is running to cause the P-flag to take effect: i.e. the + P-flag suppresses any effects of the A-flag within the same + PIO. For a given PIO, P=1 and A=1 is treated as A=0. + + - If disabled, the P-flag is ignored. + - If enabled, the P-flag will disable SLAAC autoconfiguration + for the given Prefix Information Option. + + Default: 0 (disabled) + accept_ra_rt_info_min_plen - INTEGER Minimum prefix length of Route Information in RA. diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index d9d3e30fd47a..07b704a1557e 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -684,6 +684,67 @@ vsc73xx_update_vlan_table(struct vsc73xx *vsc, int port, u16 vid, bool set) return vsc73xx_write_vlan_table_entry(vsc, vid, portmap); } +static int vsc73xx_configure_rgmii_port_delay(struct dsa_switch *ds) +{ + /* Keep 2.0 ns delay for backward complatibility */ + u32 tx_delay = VSC73XX_GMIIDELAY_GMII0_GTXDELAY_2_0_NS; + u32 rx_delay = VSC73XX_GMIIDELAY_GMII0_RXDELAY_2_0_NS; + struct dsa_port *dp = dsa_to_port(ds, CPU_PORT); + struct device_node *port_dn = dp->dn; + struct vsc73xx *vsc = ds->priv; + u32 delay; + + if (!of_property_read_u32(port_dn, "tx-internal-delay-ps", &delay)) { + switch (delay) { + case 0: + tx_delay = VSC73XX_GMIIDELAY_GMII0_GTXDELAY_NONE; + break; + case 1400: + tx_delay = VSC73XX_GMIIDELAY_GMII0_GTXDELAY_1_4_NS; + break; + case 1700: + tx_delay = VSC73XX_GMIIDELAY_GMII0_GTXDELAY_1_7_NS; + break; + case 2000: + break; + default: + dev_err(vsc->dev, + "Unsupported RGMII Transmit Clock Delay\n"); + return -EINVAL; + } + } else { + dev_dbg(vsc->dev, + "RGMII Transmit Clock Delay isn't configured, set to 2.0 ns\n"); + } + + if (!of_property_read_u32(port_dn, "rx-internal-delay-ps", &delay)) { + switch (delay) { + case 0: + rx_delay = VSC73XX_GMIIDELAY_GMII0_RXDELAY_NONE; + break; + case 1400: + rx_delay = VSC73XX_GMIIDELAY_GMII0_RXDELAY_1_4_NS; + break; + case 1700: + rx_delay = VSC73XX_GMIIDELAY_GMII0_RXDELAY_1_7_NS; + break; + case 2000: + break; + default: + dev_err(vsc->dev, + "Unsupported RGMII Receive Clock Delay value\n"); + return -EINVAL; + } + } else { + dev_dbg(vsc->dev, + "RGMII Receive Clock Delay isn't configured, set to 2.0 ns\n"); + } + + /* MII delay, set both GTX and RX delay */ + return vsc73xx_write(vsc, VSC73XX_BLOCK_SYSTEM, 0, VSC73XX_GMIIDELAY, + tx_delay | rx_delay); +} + static int vsc73xx_setup(struct dsa_switch *ds) { struct vsc73xx *vsc = ds->priv; @@ -746,10 +807,11 @@ static int vsc73xx_setup(struct dsa_switch *ds) VSC73XX_MAC_CFG, VSC73XX_MAC_CFG_RESET); } - /* MII delay, set both GTX and RX delay to 2 ns */ - vsc73xx_write(vsc, VSC73XX_BLOCK_SYSTEM, 0, VSC73XX_GMIIDELAY, - VSC73XX_GMIIDELAY_GMII0_GTXDELAY_2_0_NS | - VSC73XX_GMIIDELAY_GMII0_RXDELAY_2_0_NS); + /* Configure RGMII delay */ + ret = vsc73xx_configure_rgmii_port_delay(ds); + if (ret) + return ret; + /* Ingess VLAN reception mask (table 145) */ vsc73xx_write(vsc, VSC73XX_BLOCK_ANALYZER, 0, VSC73XX_VLANMASK, 0xff); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 3d8ac63132fb..9e6f91df2ba0 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -1560,9 +1560,9 @@ static void ace_watchdog(struct net_device *data, unsigned int txqueue) } -static void ace_tasklet(struct tasklet_struct *t) +static void ace_bh_work(struct work_struct *work) { - struct ace_private *ap = from_tasklet(ap, t, ace_tasklet); + struct ace_private *ap = from_work(ap, work, ace_bh_work); struct net_device *dev = ap->ndev; int cur_size; @@ -1595,7 +1595,7 @@ static void ace_tasklet(struct tasklet_struct *t) #endif ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size); } - ap->tasklet_pending = 0; + ap->bh_work_pending = 0; } @@ -1617,7 +1617,7 @@ static void ace_dump_trace(struct ace_private *ap) * * Loading rings is safe without holding the spin lock since this is * done only before the device is enabled, thus no interrupts are - * generated and by the interrupt handler/tasklet handler. + * generated and by the interrupt handler/bh handler. */ static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs) { @@ -2160,7 +2160,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) */ if (netif_running(dev)) { int cur_size; - int run_tasklet = 0; + int run_bh_work = 0; cur_size = atomic_read(&ap->cur_rx_bufs); if (cur_size < RX_LOW_STD_THRES) { @@ -2172,7 +2172,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size); } else - run_tasklet = 1; + run_bh_work = 1; } if (!ACE_IS_TIGON_I(ap)) { @@ -2188,7 +2188,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size); } else - run_tasklet = 1; + run_bh_work = 1; } } @@ -2205,12 +2205,12 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size); } else - run_tasklet = 1; + run_bh_work = 1; } } - if (run_tasklet && !ap->tasklet_pending) { - ap->tasklet_pending = 1; - tasklet_schedule(&ap->ace_tasklet); + if (run_bh_work && !ap->bh_work_pending) { + ap->bh_work_pending = 1; + queue_work(system_bh_wq, &ap->ace_bh_work); } } @@ -2267,7 +2267,7 @@ static int ace_open(struct net_device *dev) /* * Setup the bottom half rx ring refill handler */ - tasklet_setup(&ap->ace_tasklet, ace_tasklet); + INIT_WORK(&ap->ace_bh_work, ace_bh_work); return 0; } @@ -2301,7 +2301,7 @@ static int ace_close(struct net_device *dev) cmd.idx = 0; ace_issue_cmd(regs, &cmd); - tasklet_kill(&ap->ace_tasklet); + cancel_work_sync(&ap->ace_bh_work); /* * Make sure one CPU is not processing packets while diff --git a/drivers/net/ethernet/alteon/acenic.h b/drivers/net/ethernet/alteon/acenic.h index ca5ce0cbbad1..0e45a97b9c9b 100644 --- a/drivers/net/ethernet/alteon/acenic.h +++ b/drivers/net/ethernet/alteon/acenic.h @@ -2,7 +2,7 @@ #ifndef _ACENIC_H_ #define _ACENIC_H_ #include <linux/interrupt.h> - +#include <linux/workqueue.h> /* * Generate TX index update each time, when TX ring is closed. @@ -667,8 +667,8 @@ struct ace_private struct rx_desc *rx_mini_ring; struct rx_desc *rx_return_ring; - int tasklet_pending, jumbo; - struct tasklet_struct ace_tasklet; + int bh_work_pending, jumbo; + struct work_struct ace_bh_work; struct event *evt_ring; @@ -776,7 +776,7 @@ static int ace_open(struct net_device *dev); static netdev_tx_t ace_start_xmit(struct sk_buff *skb, struct net_device *dev); static int ace_close(struct net_device *dev); -static void ace_tasklet(struct tasklet_struct *t); +static void ace_bh_work(struct work_struct *work); static void ace_dump_trace(struct ace_private *ap); static void ace_set_multicast_list(struct net_device *dev); static int ace_change_mtu(struct net_device *dev, int new_mtu); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index c4a4e316683f..5475867708f4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -403,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period, return false; } -static void xgbe_ecc_isr_task(struct tasklet_struct *t) +static void xgbe_ecc_isr_bh_work(struct work_struct *work) { - struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_ecc); + struct xgbe_prv_data *pdata = from_work(pdata, work, ecc_bh_work); unsigned int ecc_isr; bool stop = false; @@ -465,17 +465,17 @@ static irqreturn_t xgbe_ecc_isr(int irq, void *data) { struct xgbe_prv_data *pdata = data; - if (pdata->isr_as_tasklet) - tasklet_schedule(&pdata->tasklet_ecc); + if (pdata->isr_as_bh_work) + queue_work(system_bh_wq, &pdata->ecc_bh_work); else - xgbe_ecc_isr_task(&pdata->tasklet_ecc); + xgbe_ecc_isr_bh_work(&pdata->ecc_bh_work); return IRQ_HANDLED; } -static void xgbe_isr_task(struct tasklet_struct *t) +static void xgbe_isr_bh_work(struct work_struct *work) { - struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_dev); + struct xgbe_prv_data *pdata = from_work(pdata, work, dev_bh_work); struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_channel *channel; unsigned int dma_isr, dma_ch_isr; @@ -582,7 +582,7 @@ isr_done: /* If there is not a separate ECC irq, handle it here */ if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq)) - xgbe_ecc_isr_task(&pdata->tasklet_ecc); + xgbe_ecc_isr_bh_work(&pdata->ecc_bh_work); /* If there is not a separate I2C irq, handle it here */ if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq)) @@ -604,10 +604,10 @@ static irqreturn_t xgbe_isr(int irq, void *data) { struct xgbe_prv_data *pdata = data; - if (pdata->isr_as_tasklet) - tasklet_schedule(&pdata->tasklet_dev); + if (pdata->isr_as_bh_work) + queue_work(system_bh_wq, &pdata->dev_bh_work); else - xgbe_isr_task(&pdata->tasklet_dev); + xgbe_isr_bh_work(&pdata->dev_bh_work); return IRQ_HANDLED; } @@ -1007,8 +1007,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) unsigned int i; int ret; - tasklet_setup(&pdata->tasklet_dev, xgbe_isr_task); - tasklet_setup(&pdata->tasklet_ecc, xgbe_ecc_isr_task); + INIT_WORK(&pdata->dev_bh_work, xgbe_isr_bh_work); + INIT_WORK(&pdata->ecc_bh_work, xgbe_ecc_isr_bh_work); ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0, netdev_name(netdev), pdata); @@ -1078,8 +1078,8 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) devm_free_irq(pdata->dev, pdata->dev_irq, pdata); - tasklet_kill(&pdata->tasklet_dev); - tasklet_kill(&pdata->tasklet_ecc); + cancel_work_sync(&pdata->dev_bh_work); + cancel_work_sync(&pdata->ecc_bh_work); if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index a9ccc4258ee5..7a833894f52a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -274,9 +274,9 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata, XI2C_IOREAD(pdata, IC_CLR_STOP_DET); } -static void xgbe_i2c_isr_task(struct tasklet_struct *t) +static void xgbe_i2c_isr_bh_work(struct work_struct *work) { - struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_i2c); + struct xgbe_prv_data *pdata = from_work(pdata, work, i2c_bh_work); struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; unsigned int isr; @@ -321,10 +321,10 @@ static irqreturn_t xgbe_i2c_isr(int irq, void *data) { struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; - if (pdata->isr_as_tasklet) - tasklet_schedule(&pdata->tasklet_i2c); + if (pdata->isr_as_bh_work) + queue_work(system_bh_wq, &pdata->i2c_bh_work); else - xgbe_i2c_isr_task(&pdata->tasklet_i2c); + xgbe_i2c_isr_bh_work(&pdata->i2c_bh_work); return IRQ_HANDLED; } @@ -369,7 +369,7 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr) static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata) { - xgbe_i2c_isr_task(&pdata->tasklet_i2c); + xgbe_i2c_isr_bh_work(&pdata->i2c_bh_work); return IRQ_HANDLED; } @@ -449,7 +449,7 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) if (pdata->dev_irq != pdata->i2c_irq) { devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); - tasklet_kill(&pdata->tasklet_i2c); + cancel_work_sync(&pdata->i2c_bh_work); } } @@ -464,7 +464,7 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata) /* If we have a separate I2C irq, enable it */ if (pdata->dev_irq != pdata->i2c_irq) { - tasklet_setup(&pdata->tasklet_i2c, xgbe_i2c_isr_task); + INIT_WORK(&pdata->i2c_bh_work, xgbe_i2c_isr_bh_work); ret = devm_request_irq(pdata->dev, pdata->i2c_irq, xgbe_i2c_isr, 0, pdata->i2c_name, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 4a2dc705b528..07f4f3418d01 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -703,9 +703,9 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata) } } -static void xgbe_an_isr_task(struct tasklet_struct *t) +static void xgbe_an_isr_bh_work(struct work_struct *work) { - struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_an); + struct xgbe_prv_data *pdata = from_work(pdata, work, an_bh_work); netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n"); @@ -727,17 +727,17 @@ static irqreturn_t xgbe_an_isr(int irq, void *data) { struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; - if (pdata->isr_as_tasklet) - tasklet_schedule(&pdata->tasklet_an); + if (pdata->isr_as_bh_work) + queue_work(system_bh_wq, &pdata->an_bh_work); else - xgbe_an_isr_task(&pdata->tasklet_an); + xgbe_an_isr_bh_work(&pdata->an_bh_work); return IRQ_HANDLED; } static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata) { - xgbe_an_isr_task(&pdata->tasklet_an); + xgbe_an_isr_bh_work(&pdata->an_bh_work); return IRQ_HANDLED; } @@ -1454,7 +1454,7 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) if (pdata->dev_irq != pdata->an_irq) { devm_free_irq(pdata->dev, pdata->an_irq, pdata); - tasklet_kill(&pdata->tasklet_an); + cancel_work_sync(&pdata->an_bh_work); } pdata->phy_if.phy_impl.stop(pdata); @@ -1477,7 +1477,7 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) /* If we have a separate AN irq, enable it */ if (pdata->dev_irq != pdata->an_irq) { - tasklet_setup(&pdata->tasklet_an, xgbe_an_isr_task); + INIT_WORK(&pdata->an_bh_work, xgbe_an_isr_bh_work); ret = devm_request_irq(pdata->dev, pdata->an_irq, xgbe_an_isr, 0, pdata->an_name, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index c5e5fac49779..c636999a6a84 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -139,7 +139,7 @@ static int xgbe_config_multi_msi(struct xgbe_prv_data *pdata) return ret; } - pdata->isr_as_tasklet = 1; + pdata->isr_as_bh_work = 1; pdata->irq_count = ret; pdata->dev_irq = pci_irq_vector(pdata->pcidev, 0); @@ -176,7 +176,7 @@ static int xgbe_config_irqs(struct xgbe_prv_data *pdata) return ret; } - pdata->isr_as_tasklet = pdata->pcidev->msi_enabled ? 1 : 0; + pdata->isr_as_bh_work = pdata->pcidev->msi_enabled ? 1 : 0; pdata->irq_count = 1; pdata->channel_irq_count = 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index f01a1e566da6..d85386cac8d1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -1298,11 +1298,11 @@ struct xgbe_prv_data { unsigned int lpm_ctrl; /* CTRL1 for resume */ - unsigned int isr_as_tasklet; - struct tasklet_struct tasklet_dev; - struct tasklet_struct tasklet_ecc; - struct tasklet_struct tasklet_i2c; - struct tasklet_struct tasklet_an; + unsigned int isr_as_bh_work; + struct work_struct dev_bh_work; + struct work_struct ecc_bh_work; + struct work_struct i2c_bh_work; + struct work_struct an_bh_work; struct dentry *xgbe_debugfs; diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index c2b4188a1ef1..a9040c42d2ff 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -31,6 +31,7 @@ #include <linux/if_vlan.h> #include <linux/prefetch.h> #include <linux/random.h> +#include <linux/workqueue.h> #if IS_ENABLED(CONFIG_VLAN_8021Q) #define BCM_VLAN 1 #endif @@ -3015,9 +3016,9 @@ static int cnic_service_bnx2(void *data, void *status_blk) return cnic_service_bnx2_queues(dev); } -static void cnic_service_bnx2_msix(struct tasklet_struct *t) +static void cnic_service_bnx2_msix(struct work_struct *work) { - struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task); + struct cnic_local *cp = from_work(cp, work, cnic_irq_bh_work); struct cnic_dev *dev = cp->dev; cp->last_status_idx = cnic_service_bnx2_queues(dev); @@ -3036,7 +3037,7 @@ static void cnic_doirq(struct cnic_dev *dev) prefetch(cp->status_blk.gen); prefetch(&cp->kcq1.kcq[KCQ_PG(prod)][KCQ_IDX(prod)]); - tasklet_schedule(&cp->cnic_irq_task); + queue_work(system_bh_wq, &cp->cnic_irq_bh_work); } } @@ -3140,9 +3141,9 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info) return last_status; } -static void cnic_service_bnx2x_bh(struct tasklet_struct *t) +static void cnic_service_bnx2x_bh_work(struct work_struct *work) { - struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task); + struct cnic_local *cp = from_work(cp, work, cnic_irq_bh_work); struct cnic_dev *dev = cp->dev; struct bnx2x *bp = netdev_priv(dev->netdev); u32 status_idx, new_status_idx; @@ -4428,7 +4429,7 @@ static void cnic_free_irq(struct cnic_dev *dev) if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) { cp->disable_int_sync(dev); - tasklet_kill(&cp->cnic_irq_task); + cancel_work_sync(&cp->cnic_irq_bh_work); free_irq(ethdev->irq_arr[0].vector, dev); } } @@ -4441,7 +4442,7 @@ static int cnic_request_irq(struct cnic_dev *dev) err = request_irq(ethdev->irq_arr[0].vector, cnic_irq, 0, "cnic", dev); if (err) - tasklet_disable(&cp->cnic_irq_task); + disable_work_sync(&cp->cnic_irq_bh_work); return err; } @@ -4464,7 +4465,7 @@ static int cnic_init_bnx2_irq(struct cnic_dev *dev) CNIC_WR(dev, base + BNX2_HC_CMD_TICKS_OFF, (64 << 16) | 220); cp->last_status_idx = cp->status_blk.bnx2->status_idx; - tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2_msix); + INIT_WORK(&cp->cnic_irq_bh_work, cnic_service_bnx2_msix); err = cnic_request_irq(dev); if (err) return err; @@ -4873,7 +4874,7 @@ static int cnic_init_bnx2x_irq(struct cnic_dev *dev) struct cnic_eth_dev *ethdev = cp->ethdev; int err = 0; - tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2x_bh); + INIT_WORK(&cp->cnic_irq_bh_work, cnic_service_bnx2x_bh_work); if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) err = cnic_request_irq(dev); diff --git a/drivers/net/ethernet/broadcom/cnic.h b/drivers/net/ethernet/broadcom/cnic.h index fedc84ada937..1a314a75d2d2 100644 --- a/drivers/net/ethernet/broadcom/cnic.h +++ b/drivers/net/ethernet/broadcom/cnic.h @@ -268,7 +268,7 @@ struct cnic_local { u32 bnx2x_igu_sb_id; u32 int_num; u32 last_status_idx; - struct tasklet_struct cnic_irq_task; + struct work_struct cnic_irq_bh_work; struct kcqe *completed_kcq[MAX_COMPLETED_KCQE]; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index ea71612f6b36..5740c98d8c9f 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -13,6 +13,7 @@ #include <linux/net_tstamp.h> #include <linux/interrupt.h> #include <linux/phy/phy.h> +#include <linux/workqueue.h> #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP) #define MACB_EXT_DESC @@ -1330,7 +1331,7 @@ struct macb { spinlock_t rx_fs_lock; unsigned int max_tuples; - struct tasklet_struct hresp_err_tasklet; + struct work_struct hresp_err_bh_work; int rx_bd_rd_prefetch; int tx_bd_rd_prefetch; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 11665be3a22c..95e8742dce1d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1792,9 +1792,9 @@ static int macb_tx_poll(struct napi_struct *napi, int budget) return work_done; } -static void macb_hresp_error_task(struct tasklet_struct *t) +static void macb_hresp_error_task(struct work_struct *work) { - struct macb *bp = from_tasklet(bp, t, hresp_err_tasklet); + struct macb *bp = from_work(bp, work, hresp_err_bh_work); struct net_device *dev = bp->dev; struct macb_queue *queue; unsigned int q; @@ -1994,7 +1994,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) } if (status & MACB_BIT(HRESP)) { - tasklet_schedule(&bp->hresp_err_tasklet); + queue_work(system_bh_wq, &bp->hresp_err_bh_work); netdev_err(dev, "DMA bus error: HRESP not OK\n"); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) @@ -5172,7 +5172,7 @@ static int macb_probe(struct platform_device *pdev) goto err_out_unregister_mdio; } - tasklet_setup(&bp->hresp_err_tasklet, macb_hresp_error_task); + INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task); netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n", macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID), @@ -5216,7 +5216,7 @@ static void macb_remove(struct platform_device *pdev) mdiobus_free(bp->mii_bus); unregister_netdev(dev); - tasklet_kill(&bp->hresp_err_tasklet); + cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); if (!pm_runtime_suspended(&pdev->dev)) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index eb1708b43aa3..0d5225f1d3ee 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -724,12 +724,8 @@ enum mtk_clks_map { MTK_CLK_ETHWARP_WOCPU2, MTK_CLK_ETHWARP_WOCPU1, MTK_CLK_ETHWARP_WOCPU0, - MTK_CLK_TOP_USXGMII_SBUS_0_SEL, - MTK_CLK_TOP_USXGMII_SBUS_1_SEL, MTK_CLK_TOP_SGM_0_SEL, MTK_CLK_TOP_SGM_1_SEL, - MTK_CLK_TOP_XFI_PHY_0_XTAL_SEL, - MTK_CLK_TOP_XFI_PHY_1_XTAL_SEL, MTK_CLK_TOP_ETH_GMII_SEL, MTK_CLK_TOP_ETH_REFCK_50M_SEL, MTK_CLK_TOP_ETH_SYS_200M_SEL, @@ -800,19 +796,9 @@ enum mtk_clks_map { BIT_ULL(MTK_CLK_GP3) | BIT_ULL(MTK_CLK_XGP1) | \ BIT_ULL(MTK_CLK_XGP2) | BIT_ULL(MTK_CLK_XGP3) | \ BIT_ULL(MTK_CLK_CRYPTO) | \ - BIT_ULL(MTK_CLK_SGMII_TX_250M) | \ - BIT_ULL(MTK_CLK_SGMII_RX_250M) | \ - BIT_ULL(MTK_CLK_SGMII2_TX_250M) | \ - BIT_ULL(MTK_CLK_SGMII2_RX_250M) | \ BIT_ULL(MTK_CLK_ETHWARP_WOCPU2) | \ BIT_ULL(MTK_CLK_ETHWARP_WOCPU1) | \ BIT_ULL(MTK_CLK_ETHWARP_WOCPU0) | \ - BIT_ULL(MTK_CLK_TOP_USXGMII_SBUS_0_SEL) | \ - BIT_ULL(MTK_CLK_TOP_USXGMII_SBUS_1_SEL) | \ - BIT_ULL(MTK_CLK_TOP_SGM_0_SEL) | \ - BIT_ULL(MTK_CLK_TOP_SGM_1_SEL) | \ - BIT_ULL(MTK_CLK_TOP_XFI_PHY_0_XTAL_SEL) | \ - BIT_ULL(MTK_CLK_TOP_XFI_PHY_1_XTAL_SEL) | \ BIT_ULL(MTK_CLK_TOP_ETH_GMII_SEL) | \ BIT_ULL(MTK_CLK_TOP_ETH_REFCK_50M_SEL) | \ BIT_ULL(MTK_CLK_TOP_ETH_SYS_200M_SEL) | \ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 943d6918c2ec..cd17a3f4faf8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2036,20 +2036,20 @@ static int mlx4_en_get_module_info(struct net_device *dev, switch (data[0] /* identifier */) { case MLX4_MODULE_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; break; case MLX4_MODULE_ID_QSFP_PLUS: if (data[1] >= 0x3) { /* revision id */ modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; } else { modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; } break; case MLX4_MODULE_ID_QSFP28: modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index d894a88fa9f2..972e8e9df585 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -608,6 +608,11 @@ enum { RELEASE_ALL_PAGES_MASK = 0x4000, }; +/* This limit is based on the capability of the firmware as it cannot release + * more than 50000 back to the host in one go. + */ +#define MAX_RECLAIM_NPAGES (-50000) + static int req_pages_handler(struct notifier_block *nb, unsigned long type, void *data) { @@ -639,7 +644,16 @@ static int req_pages_handler(struct notifier_block *nb, req->dev = dev; req->func_id = func_id; - req->npages = npages; + + /* npages > 0 means HCA asking host to allocate/give pages, + * npages < 0 means HCA asking host to reclaim back the pages allocated. + * Here we are restricting the maximum number of pages that can be + * reclaimed to be MAX_RECLAIM_NPAGES. Note that MAX_RECLAIM_NPAGES is + * a negative value. + * Since MAX_RECLAIM is negative, we are using max() to restrict + * req->npages (and not min ()). + */ + req->npages = max_t(s32, npages, MAX_RECLAIM_NPAGES); req->ec_function = ec_function; req->release_all = release_all; INIT_WORK(&req->work, pages_work_handler); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index d61478c0c632..303d2ce4dc1e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -411,7 +411,7 @@ static const struct thermal_cooling_device_ops mlxsw_cooling_ops = { static int mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) { - char tz_name[THERMAL_NAME_LENGTH]; + char tz_name[40]; int err; if (module_tz->slot_index) @@ -445,17 +445,13 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) thermal_zone_device_unregister(tzdev); } -static void -mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal *thermal, +static int +mlxsw_thermal_module_init(struct mlxsw_thermal *thermal, struct mlxsw_thermal_area *area, u8 module) { struct mlxsw_thermal_module *module_tz; module_tz = &area->tz_module_arr[module]; - /* Skip if parent is already set (case of port split). */ - if (module_tz->parent) - return; module_tz->module = module; module_tz->slot_index = area->slot_index; module_tz->parent = thermal; @@ -465,15 +461,13 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, sizeof(thermal->trips)); memcpy(module_tz->cooling_states, default_cooling_states, sizeof(thermal->cooling_states)); + + return mlxsw_thermal_module_tz_init(module_tz); } static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) { - if (module_tz && module_tz->tzdev) { - mlxsw_thermal_module_tz_fini(module_tz->tzdev); - module_tz->tzdev = NULL; - module_tz->parent = NULL; - } + mlxsw_thermal_module_tz_fini(module_tz->tzdev); } static int @@ -481,7 +475,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal, struct mlxsw_thermal_area *area) { - struct mlxsw_thermal_module *module_tz; char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; @@ -503,22 +496,16 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, if (!area->tz_module_arr) return -ENOMEM; - for (i = 0; i < area->tz_module_num; i++) - mlxsw_thermal_module_init(dev, core, thermal, area, i); - for (i = 0; i < area->tz_module_num; i++) { - module_tz = &area->tz_module_arr[i]; - if (!module_tz->parent) - continue; - err = mlxsw_thermal_module_tz_init(module_tz); + err = mlxsw_thermal_module_init(thermal, area, i); if (err) - goto err_thermal_module_tz_init; + goto err_thermal_module_init; } return 0; -err_thermal_module_tz_init: - for (i = area->tz_module_num - 1; i >= 0; i--) +err_thermal_module_init: + for (i--; i >= 0; i--) mlxsw_thermal_module_fini(&area->tz_module_arr[i]); kfree(area->tz_module_arr); return err; @@ -821,10 +808,7 @@ err_linecards_event_ops_register: err_thermal_gearboxes_init: mlxsw_thermal_modules_fini(thermal, &thermal->line_cards[0]); err_thermal_modules_init: - if (thermal->tzdev) { - thermal_zone_device_unregister(thermal->tzdev); - thermal->tzdev = NULL; - } + thermal_zone_device_unregister(thermal->tzdev); err_thermal_zone_device_register: err_thermal_cooling_device_register: for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) @@ -845,10 +829,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) thermal); mlxsw_thermal_gearboxes_fini(thermal, &thermal->line_cards[0]); mlxsw_thermal_modules_fini(thermal, &thermal->line_cards[0]); - if (thermal->tzdev) { - thermal_zone_device_unregister(thermal->tzdev); - thermal->tzdev = NULL; - } + thermal_zone_device_unregister(thermal->tzdev); for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) thermal_cooling_device_unregister(thermal->cdevs[i].cdev); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 1eecba984f3b..2b3d6586f44a 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -251,10 +251,7 @@ static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring, rx_buffer->page_offset; /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + net_prefetch(page_addr); /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, WX_RXBUFFER_256); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 383a0ea2ab91..a6e2aadbb91b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -89,6 +89,7 @@ struct ipv6_devconf { __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; __u8 ra_honor_pio_life; + __u8 ra_honor_pio_pflag; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 29c3ea5b6e93..cf8f6ce06742 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1225,7 +1225,7 @@ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; - if (likely(refcount_read(&skb->users) == 1)) + if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1)) smp_rmb(); else if (likely(!refcount_dec_and_test(&skb->users))) return false; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 62a407db1bf5..b18e81f0c9e1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -37,10 +37,14 @@ struct prefix_info { struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) __u8 onlink : 1, - autoconf : 1, - reserved : 6; + autoconf : 1, + routeraddr : 1, + preferpd : 1, + reserved : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 4, + preferpd : 1, + routeraddr : 1, autoconf : 1, onlink : 1; #else diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 5c317d23787b..30b8ce275159 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -35,7 +35,7 @@ static int linkinfo_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; ret = __ethtool_get_link_ksettings(dev, &data->ksettings); - if (ret < 0 && info) + if (ret < 0) GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); ethnl_ops_complete(dev); diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index b2591db49f7d..259cd9ef1f2a 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -40,7 +40,7 @@ static int linkmodes_prepare_data(const struct ethnl_req_info *req_base, return ret; ret = __ethtool_get_link_ksettings(dev, &data->ksettings); - if (ret < 0 && info) { + if (ret < 0) { GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); goto out; } diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index c678b484a079..56b99606f00b 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -289,8 +289,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, for (i = 0; i < ETH_SS_COUNT; i++) { if ((req_info->req_ids & (1U << i)) && data->sets[i].per_dev) { - if (info) - GENL_SET_ERR_MSG(info, "requested per device strings without dev"); + GENL_SET_ERR_MSG(info, "requested per device strings without dev"); return -EINVAL; } } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e94ed7c56a0..6d2c97f8e9ef 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -661,7 +661,8 @@ config TCP_CONG_CDG For further details see: D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using - delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg + delay gradients." In Networking 2011. Preprint: + http://caia.swin.edu.au/cv/dahayes/content/networking2011-cdg-preprint.pdf config TCP_CONG_BBR tristate "BBR TCP" diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b90d0f78ac80..8a10a7c67834 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1534,6 +1534,7 @@ void ip_flush_pending_frames(struct sock *sk) { __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); } +EXPORT_SYMBOL_GPL(ip_flush_pending_frames); struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f70d8757af1a..c87d008aefa4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -239,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, + .ra_honor_pio_pflag = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -302,6 +303,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, + .ra_honor_pio_pflag = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -2762,6 +2764,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) u32 addr_flags = 0; struct inet6_dev *in6_dev; struct net *net = dev_net(dev); + bool ignore_autoconf = false; pinfo = (struct prefix_info *) opt; @@ -2864,7 +2867,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) /* Try to figure out our local address for this prefix */ - if (pinfo->autoconf && in6_dev->cnf.autoconf) { + ignore_autoconf = READ_ONCE(in6_dev->cnf.ra_honor_pio_pflag) && pinfo->preferpd; + if (pinfo->autoconf && in6_dev->cnf.autoconf && !ignore_autoconf) { struct in6_addr addr; bool tokenized = false, dev_addr_generated = false; @@ -6926,6 +6930,15 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "ra_honor_pio_pflag", + .data = &ipv6_devconf.ra_honor_pio_pflag, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #ifdef CONFIG_IPV6_ROUTER_PREF { .procname = "accept_ra_rtr_pref", diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c80ab3f26084..5d2068b6c778 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -137,9 +137,28 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net) static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { + struct sock *sk = tunnel->sock; + trace_free_tunnel(tunnel); - sock_put(tunnel->sock); - /* the tunnel is freed in the socket destructor */ + + if (sk) { + /* Disable udp encapsulation */ + switch (tunnel->encap) { + case L2TP_ENCAPTYPE_UDP: + /* No longer an encapsulation socket. See net/ipv4/udp.c */ + WRITE_ONCE(udp_sk(sk)->encap_type, 0); + udp_sk(sk)->encap_rcv = NULL; + udp_sk(sk)->encap_destroy = NULL; + break; + case L2TP_ENCAPTYPE_IP: + break; + } + + tunnel->sock = NULL; + sock_put(sk); + } + + kfree_rcu(tunnel, rcu); } static void l2tp_session_free(struct l2tp_session *session) @@ -147,18 +166,29 @@ static void l2tp_session_free(struct l2tp_session *session) trace_free_session(session); if (session->tunnel) l2tp_tunnel_dec_refcount(session->tunnel); - kfree(session); + kfree_rcu(session, rcu); } -struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk) +struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk) { - struct l2tp_tunnel *tunnel = sk->sk_user_data; + const struct net *net = sock_net(sk); + unsigned long tunnel_id, tmp; + struct l2tp_tunnel *tunnel; + struct l2tp_net *pn; - if (tunnel) - if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC)) - return NULL; + rcu_read_lock_bh(); + pn = l2tp_pernet(net); + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel && + tunnel->sock == sk && + refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; + } + } + rcu_read_unlock_bh(); - return tunnel; + return NULL; } EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel); @@ -249,7 +279,14 @@ struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session, hlist, key) { - if (session->tunnel->sock == sk && + /* session->tunnel may be NULL if another thread is in + * l2tp_session_register and has added an item to + * l2tp_v3_session_htable but hasn't yet added the + * session to its tunnel's session_list. + */ + struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); + + if (tunnel && tunnel->sock == sk && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; @@ -382,12 +419,12 @@ static int l2tp_session_collision_add(struct l2tp_net *pn, /* If existing session isn't already in the session hlist, add it. */ if (!hash_hashed(&session2->hlist)) - hash_add(pn->l2tp_v3_session_htable, &session2->hlist, - session2->hlist_key); + hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist, + session2->hlist_key); /* Add new session to the hlist and collision list */ - hash_add(pn->l2tp_v3_session_htable, &session1->hlist, - session1->hlist_key); + hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist, + session1->hlist_key); refcount_inc(&clist->ref_count); l2tp_session_coll_list_add(clist, session1); @@ -403,7 +440,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn, lockdep_assert_held(&pn->l2tp_session_idr_lock); - hash_del(&session->hlist); + hash_del_rcu(&session->hlist); if (clist) { /* Remove session from its collision list. If there @@ -437,6 +474,7 @@ int l2tp_session_register(struct l2tp_session *session, { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); struct l2tp_session *other_session = NULL; + void *old = NULL; u32 session_key; int err; @@ -477,15 +515,22 @@ int l2tp_session_register(struct l2tp_session *session, } l2tp_tunnel_inc_refcount(tunnel); - list_add(&session->list, &tunnel->session_list); + WRITE_ONCE(session->tunnel, tunnel); + list_add_rcu(&session->list, &tunnel->session_list); + /* this makes session available to lockless getters */ if (tunnel->version == L2TP_HDR_VER_3) { if (!other_session) - idr_replace(&pn->l2tp_v3_session_idr, session, session_key); + old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key); } else { - idr_replace(&pn->l2tp_v2_session_idr, session, session_key); + old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key); } + /* old should be NULL, unless something removed or modified + * the IDR entry after our idr_alloc_32 above (which shouldn't + * happen). + */ + WARN_ON_ONCE(old); out: spin_unlock_bh(&pn->l2tp_session_idr_lock); spin_unlock_bh(&tunnel->list_lock); @@ -792,7 +837,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, if (!session->lns_mode && !session->send_seq) { trace_session_seqnum_lns_enable(session); session->send_seq = 1; - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, + tunnel->encap); } } else { /* No sequence numbers. @@ -813,7 +859,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, if (!session->lns_mode && session->send_seq) { trace_session_seqnum_lns_disable(session); session->send_seq = 0; - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, + tunnel->encap); } else if (session->send_seq) { pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", session->name); @@ -1207,44 +1254,6 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); * Tinnel and session create/destroy. *****************************************************************************/ -/* Tunnel socket destruct hook. - * The tunnel context is deleted only when all session sockets have been - * closed. - */ -static void l2tp_tunnel_destruct(struct sock *sk) -{ - struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk); - - if (!tunnel) - goto end; - - /* Disable udp encapsulation */ - switch (tunnel->encap) { - case L2TP_ENCAPTYPE_UDP: - /* No longer an encapsulation socket. See net/ipv4/udp.c */ - WRITE_ONCE(udp_sk(sk)->encap_type, 0); - udp_sk(sk)->encap_rcv = NULL; - udp_sk(sk)->encap_destroy = NULL; - break; - case L2TP_ENCAPTYPE_IP: - break; - } - - /* Remove hooks into tunnel socket */ - write_lock_bh(&sk->sk_callback_lock); - sk->sk_destruct = tunnel->old_sk_destruct; - sk->sk_user_data = NULL; - write_unlock_bh(&sk->sk_callback_lock); - - /* Call the original destructor */ - if (sk->sk_destruct) - (*sk->sk_destruct)(sk); - - kfree_rcu(tunnel, rcu); -end: - return; -} - /* Remove an l2tp session from l2tp_core's lists. */ static void l2tp_session_unhash(struct l2tp_session *session) { @@ -1277,8 +1286,6 @@ static void l2tp_session_unhash(struct l2tp_session *session) spin_unlock_bh(&pn->l2tp_session_idr_lock); spin_unlock_bh(&tunnel->list_lock); - - synchronize_rcu(); } } @@ -1290,28 +1297,21 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) spin_lock_bh(&tunnel->list_lock); tunnel->acpt_newsess = false; - for (;;) { - session = list_first_entry_or_null(&tunnel->session_list, - struct l2tp_session, list); - if (!session) - break; - l2tp_session_inc_refcount(session); - list_del_init(&session->list); - spin_unlock_bh(&tunnel->list_lock); + list_for_each_entry(session, &tunnel->session_list, list) l2tp_session_delete(session); - spin_lock_bh(&tunnel->list_lock); - l2tp_session_dec_refcount(session); - } spin_unlock_bh(&tunnel->list_lock); } /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk); + struct l2tp_tunnel *tunnel; - if (tunnel) + tunnel = l2tp_sk_to_tunnel(sk); + if (tunnel) { l2tp_tunnel_delete(tunnel); + l2tp_tunnel_dec_refcount(tunnel); + } } static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel) @@ -1494,7 +1494,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, tunnel->tunnel_id = tunnel_id; tunnel->peer_tunnel_id = peer_tunnel_id; - tunnel->magic = L2TP_TUNNEL_MAGIC; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); spin_lock_init(&tunnel->list_lock); tunnel->acpt_newsess = true; @@ -1520,6 +1519,8 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); static int l2tp_validate_socket(const struct sock *sk, const struct net *net, enum l2tp_encap_type encap) { + struct l2tp_tunnel *tunnel; + if (!net_eq(sock_net(sk), net)) return -EINVAL; @@ -1533,8 +1534,11 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net, (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP)) return -EPROTONOSUPPORT; - if (sk->sk_user_data) + tunnel = l2tp_sk_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_dec_refcount(tunnel); return -EBUSY; + } return 0; } @@ -1573,12 +1577,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) goto err_inval_sock; - rcu_assign_sk_user_data(sk, tunnel); write_unlock_bh(&sk->sk_callback_lock); if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { - .sk_user_data = tunnel, .encap_type = UDP_ENCAP_L2TPINUDP, .encap_rcv = l2tp_udp_encap_recv, .encap_err_rcv = l2tp_udp_encap_err_recv, @@ -1588,8 +1590,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, setup_udp_tunnel_sock(net, sock, &udp_cfg); } - tunnel->old_sk_destruct = sk->sk_destruct; - sk->sk_destruct = &l2tp_tunnel_destruct; sk->sk_allocation = GFP_ATOMIC; release_sock(sk); @@ -1636,23 +1636,37 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); void l2tp_session_delete(struct l2tp_session *session) { - if (test_and_set_bit(0, &session->dead)) - return; + if (!test_and_set_bit(0, &session->dead)) { + trace_delete_session(session); + l2tp_session_inc_refcount(session); + queue_work(l2tp_wq, &session->del_work); + } +} +EXPORT_SYMBOL_GPL(l2tp_session_delete); + +/* Workqueue session deletion function */ +static void l2tp_session_del_work(struct work_struct *work) +{ + struct l2tp_session *session = container_of(work, struct l2tp_session, + del_work); - trace_delete_session(session); l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close) (*session->session_close)(session); + /* drop initial ref */ + l2tp_session_dec_refcount(session); + + /* drop workqueue ref */ l2tp_session_dec_refcount(session); } -EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or * l2specific_type parameters are set. */ -void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version, + enum l2tp_encap_type encap) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; @@ -1661,7 +1675,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) } else { session->hdr_len = 4 + session->cookie_len; session->hdr_len += l2tp_get_l2specific_len(session); - if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) + if (encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } } @@ -1675,7 +1689,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL); if (session) { session->magic = L2TP_SESSION_MAGIC; - session->tunnel = tunnel; session->session_id = session_id; session->peer_session_id = peer_session_id; @@ -1699,6 +1712,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn INIT_HLIST_NODE(&session->hlist); INIT_LIST_HEAD(&session->clist); INIT_LIST_HEAD(&session->list); + INIT_WORK(&session->del_work, l2tp_session_del_work); if (cfg) { session->pwtype = cfg->pw_type; @@ -1713,7 +1727,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len); } - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); refcount_set(&session->ref_count, 1); @@ -1742,7 +1756,7 @@ static __net_init int l2tp_init_net(struct net *net) return 0; } -static __net_exit void l2tp_exit_net(struct net *net) +static __net_exit void l2tp_pre_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; @@ -1756,8 +1770,12 @@ static __net_exit void l2tp_exit_net(struct net *net) rcu_read_unlock_bh(); if (l2tp_wq) - flush_workqueue(l2tp_wq); - rcu_barrier(); + drain_workqueue(l2tp_wq); +} + +static __net_exit void l2tp_exit_net(struct net *net) +{ + struct l2tp_net *pn = l2tp_pernet(net); idr_destroy(&pn->l2tp_v2_session_idr); idr_destroy(&pn->l2tp_v3_session_idr); @@ -1767,6 +1785,7 @@ static __net_exit void l2tp_exit_net(struct net *net) static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, .exit = l2tp_exit_net, + .pre_exit = l2tp_pre_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8ac81bc1bc6f..c907687705b9 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -16,7 +16,6 @@ #endif /* Random numbers used for internal consistency checks of tunnel and session structures */ -#define L2TP_TUNNEL_MAGIC 0x42114DDA #define L2TP_SESSION_MAGIC 0x0C04EB7D struct sk_buff; @@ -67,6 +66,7 @@ struct l2tp_session_coll_list { struct l2tp_session { int magic; /* should be L2TP_SESSION_MAGIC */ long dead; + struct rcu_head rcu; struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */ u32 session_id; @@ -103,6 +103,7 @@ struct l2tp_session { int reorder_skip; /* set if skip to next nr */ enum l2tp_pwtype pwtype; struct l2tp_stats stats; + struct work_struct del_work; /* Session receive handler for data packets. * Each pseudowire implementation should implement this callback in order to @@ -155,8 +156,6 @@ struct l2tp_tunnel_cfg { */ #define L2TP_TUNNEL_NAME_MAX 20 struct l2tp_tunnel { - int magic; /* Should be L2TP_TUNNEL_MAGIC */ - unsigned long dead; struct rcu_head rcu; @@ -176,7 +175,6 @@ struct l2tp_tunnel { struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; - void (*old_sk_destruct)(struct sock *sk); struct sock *sock; /* parent socket */ int fd; /* parent fd, if tunnel socket was created * by userspace @@ -260,7 +258,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); /* Transmit path helpers for sending packets over the tunnel socket. */ -void l2tp_session_set_header_len(struct l2tp_session *session, int version); +void l2tp_session_set_header_len(struct l2tp_session *session, int version, + enum l2tp_encap_type encap); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb); /* Pseudowire management. @@ -273,10 +272,7 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); /* IOCTL helper for IP encap modules. */ int l2tp_ioctl(struct sock *sk, int cmd, int *karg); -/* Extract the tunnel structure from a socket's sk_user_data pointer, - * validating the tunnel magic feather. - */ -struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk); +struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk); static inline int l2tp_get_l2specific_len(struct l2tp_session *session) { diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8ba00ad433c2..cc8a3ce716e9 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -322,7 +322,7 @@ err_sess_dev: l2tp_session_dec_refcount(session); free_netdev(dev); err_sess: - kfree(session); + l2tp_session_dec_refcount(session); err: return rc; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index e48aa177d74c..f21dcbf3efd5 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -235,14 +235,17 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk); - struct sk_buff *skb; + struct l2tp_tunnel *tunnel; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) - kfree_skb(skb); + lock_sock(sk); + ip_flush_pending_frames(sk); + release_sock(sk); - if (tunnel) + tunnel = l2tp_sk_to_tunnel(sk); + if (tunnel) { l2tp_tunnel_delete(tunnel); + l2tp_tunnel_dec_refcount(tunnel); + } } static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d217ff1f229e..3b0465f2d60d 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -246,14 +246,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk); + struct l2tp_tunnel *tunnel; lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - if (tunnel) + tunnel = l2tp_sk_to_tunnel(sk); + if (tunnel) { l2tp_tunnel_delete(tunnel); + l2tp_tunnel_dec_refcount(tunnel); + } } static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d105030520f9..fc43ecbd128c 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -692,8 +692,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); if (info->attrs[L2TP_ATTR_SEND_SEQ]) { + struct l2tp_tunnel *tunnel = session->tunnel; + session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); - l2tp_session_set_header_len(session, session->tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); } if (info->attrs[L2TP_ATTR_LNS_MODE]) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 3596290047b2..90bf3a8ccab6 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -119,7 +119,6 @@ struct pppol2tp_session { struct mutex sk_lock; /* Protects .sk */ struct sock __rcu *sk; /* Pointer to the session PPPoX socket */ struct sock *__sk; /* Copy of .sk, for cleanup */ - struct rcu_head rcu; /* For asynchronous release */ }; static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb); @@ -157,20 +156,16 @@ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) if (!sk) return NULL; - sock_hold(sk); - session = (struct l2tp_session *)(sk->sk_user_data); - if (!session) { - sock_put(sk); - goto out; - } - if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) { - session = NULL; - sock_put(sk); - goto out; + rcu_read_lock(); + session = rcu_dereference_sk_user_data(sk); + if (session && refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock(); + WARN_ON_ONCE(session->magic != L2TP_SESSION_MAGIC); + return session; } + rcu_read_unlock(); -out: - return session; + return NULL; } /***************************************************************************** @@ -318,12 +313,12 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, l2tp_xmit_skb(session, skb); local_bh_enable(); - sock_put(sk); + l2tp_session_dec_refcount(session); return total_len; error_put_sess: - sock_put(sk); + l2tp_session_dec_refcount(session); error: return error; } @@ -377,12 +372,12 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) l2tp_xmit_skb(session, skb); local_bh_enable(); - sock_put(sk); + l2tp_session_dec_refcount(session); return 1; abort_put_sess: - sock_put(sk); + l2tp_session_dec_refcount(session); abort: /* Free the original skb */ kfree_skb(skb); @@ -393,28 +388,31 @@ abort: * Session (and tunnel control) socket create/destroy. *****************************************************************************/ -static void pppol2tp_put_sk(struct rcu_head *head) -{ - struct pppol2tp_session *ps; - - ps = container_of(head, typeof(*ps), rcu); - sock_put(ps->__sk); -} - /* Really kill the session socket. (Called from sock_put() if * refcnt == 0.) */ static void pppol2tp_session_destruct(struct sock *sk) { - struct l2tp_session *session = sk->sk_user_data; - skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); +} - if (session) { - sk->sk_user_data = NULL; - if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) - return; +static void pppol2tp_session_close(struct l2tp_session *session) +{ + struct pppol2tp_session *ps; + + ps = l2tp_session_priv(session); + mutex_lock(&ps->sk_lock); + ps->__sk = rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock)); + RCU_INIT_POINTER(ps->sk, NULL); + mutex_unlock(&ps->sk_lock); + if (ps->__sk) { + /* detach socket */ + rcu_assign_sk_user_data(ps->__sk, NULL); + sock_put(ps->__sk); + + /* drop ref taken when we referenced socket via sk_user_data */ l2tp_session_dec_refcount(session); } } @@ -444,30 +442,13 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); if (session) { - struct pppol2tp_session *ps; - l2tp_session_delete(session); - - ps = l2tp_session_priv(session); - mutex_lock(&ps->sk_lock); - ps->__sk = rcu_dereference_protected(ps->sk, - lockdep_is_held(&ps->sk_lock)); - RCU_INIT_POINTER(ps->sk, NULL); - mutex_unlock(&ps->sk_lock); - call_rcu(&ps->rcu, pppol2tp_put_sk); - - /* Rely on the sock_put() call at the end of the function for - * dropping the reference held by pppol2tp_sock_to_session(). - * The last reference will be dropped by pppol2tp_put_sk(). - */ + /* drop ref taken by pppol2tp_sock_to_session */ + l2tp_session_dec_refcount(session); } release_sock(sk); - /* This will delete the session context via - * pppol2tp_session_destruct() if the socket's refcnt drops to - * zero. - */ sock_put(sk); return 0; @@ -506,6 +487,7 @@ static int pppol2tp_create(struct net *net, struct socket *sock, int kern) goto out; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sock->state = SS_UNCONNECTED; sock->ops = &pppol2tp_ops; @@ -542,6 +524,7 @@ static void pppol2tp_session_init(struct l2tp_session *session) struct pppol2tp_session *ps; session->recv_skb = pppol2tp_recv; + session->session_close = pppol2tp_session_close; if (IS_ENABLED(CONFIG_L2TP_DEBUGFS)) session->show = pppol2tp_show; @@ -787,6 +770,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, goto end; } + drop_refcnt = true; + pppol2tp_session_init(session); ps = l2tp_session_priv(session); l2tp_session_inc_refcount(session); @@ -795,10 +780,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = l2tp_session_register(session, tunnel); if (error < 0) { mutex_unlock(&ps->sk_lock); - kfree(session); + l2tp_session_dec_refcount(session); goto end; } - drop_refcnt = true; + new_session = true; } @@ -830,12 +815,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, out_no_ppp: /* This is how we get the session context from the socket. */ - sk->sk_user_data = session; + sock_hold(sk); + rcu_assign_sk_user_data(sk, session); rcu_assign_pointer(ps->sk, sk); mutex_unlock(&ps->sk_lock); /* Keep the reference we've grabbed on the session: sk doesn't expect - * the session to disappear. pppol2tp_session_destruct() is responsible + * the session to disappear. pppol2tp_session_close() is responsible * for dropping it. */ drop_refcnt = false; @@ -891,7 +877,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, return 0; err_sess: - kfree(session); + l2tp_session_dec_refcount(session); err: return error; } @@ -1002,7 +988,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, error = len; - sock_put(sk); + l2tp_session_dec_refcount(session); end: return error; } @@ -1205,7 +1191,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk, po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } - l2tp_session_set_header_len(session, session->tunnel->version); + l2tp_session_set_header_len(session, session->tunnel->version, + session->tunnel->encap); break; case PPPOL2TP_SO_LNSMODE: @@ -1274,7 +1261,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, err = pppol2tp_session_setsockopt(sk, session, optname, val); } - sock_put(sk); + l2tp_session_dec_refcount(session); end: return err; } @@ -1395,7 +1382,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, err = 0; end_put_sess: - sock_put(sk); + l2tp_session_dec_refcount(session); end: return err; } @@ -1513,7 +1500,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) seq_printf(m, "\nTUNNEL '%s', %c %d\n", tunnel->name, - (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', + tunnel->sock ? 'Y' : 'N', refcount_read(&tunnel->ref_count) - 1); seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", 0, diff --git a/net/rds/ib.h b/net/rds/ib.h index 2ba71102b1f1..8ef3178ed4d6 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -369,9 +369,6 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); void rds_ib_conn_free(void *arg); int rds_ib_conn_path_connect(struct rds_conn_path *cp); void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); -void rds_ib_state_change(struct sock *sk); -int rds_ib_listen_init(void); -void rds_ib_listen_stop(void); __printf(2, 3) void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, @@ -402,7 +399,6 @@ void rds_ib_inc_free(struct rds_incoming *inc); int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc, struct rds_ib_ack_state *state); -void rds_ib_recv_tasklet_fn(unsigned long data); void rds_ib_recv_init_ring(struct rds_ib_connection *ic); void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); void rds_ib_recv_init_ack(struct rds_ib_connection *ic); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8e3093938cd2..0316217b7687 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1466,10 +1466,6 @@ connect_abort: static int smc_connect_check_aclc(struct smc_init_info *ini, struct smc_clc_msg_accept_confirm *aclc) { - if (aclc->hdr.typev1 != SMC_TYPE_R && - aclc->hdr.typev1 != SMC_TYPE_D) - return SMC_CLC_DECL_MODEUNSUPP; - if (aclc->hdr.version >= SMC_V2) { if ((aclc->hdr.typev1 == SMC_TYPE_R && !smcr_indicated(ini->smc_type_v2)) || @@ -1523,10 +1519,6 @@ static int __smc_connect(struct smc_sock *smc) ini->smcd_version &= ~SMC_V1; ini->smcr_version = 0; ini->smc_type_v1 = SMC_TYPE_N; - if (!ini->smcd_version) { - rc = SMC_CLC_DECL_GETVLANERR; - goto fallback; - } } rc = smc_find_proposal_devices(smc, ini); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3b95828d9976..71fb334d8234 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -2250,7 +2250,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk) } static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, - bool is_rmb, int bufsize) + int bufsize) { struct smc_buf_desc *buf_desc; @@ -2398,7 +2398,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_smcd) buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); else - buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); + buf_desc = smcr_new_buf_create(lgr, bufsize); if (PTR_ERR(buf_desc) == -ENOMEM) break; diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h index 6dd4292dae56..04dc6808d2e1 100644 --- a/net/smc/smc_loopback.h +++ b/net/smc/smc_loopback.h @@ -15,7 +15,6 @@ #define _SMC_LOOPBACK_H #include <linux/device.h> -#include <linux/err.h> #include <net/smc.h> #if IS_ENABLED(CONFIG_SMC_LO) |