diff options
Diffstat (limited to 'drivers/net/ethernet/ibm')
| -rw-r--r-- | drivers/net/ethernet/ibm/Kconfig | 13 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea_main.c | 19 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/core.c | 275 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/core.h | 11 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/mal.c | 114 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/mal.h | 2 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/rgmii.c | 55 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/tah.c | 53 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/emac/zmii.c | 53 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/ibmveth.c | 696 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/ibmveth.h | 86 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.c | 552 | ||||
| -rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.h | 21 |
13 files changed, 1172 insertions, 778 deletions
diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig index c0c112d95b89..4f4b23465c47 100644 --- a/drivers/net/ethernet/ibm/Kconfig +++ b/drivers/net/ethernet/ibm/Kconfig @@ -27,6 +27,19 @@ config IBMVETH To compile this driver as a module, choose M here. The module will be called ibmveth. +config IBMVETH_KUNIT_TEST + bool "KUnit test for IBM LAN Virtual Ethernet support" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on KUNIT=y && IBMVETH=y + default KUNIT_ALL_TESTS + help + This builds unit tests for the IBM LAN Virtual Ethernet driver. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + source "drivers/net/ethernet/ibm/emac/Kconfig" config EHEA diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index b4aff59b3eb4..9b006bc353a1 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -31,6 +31,7 @@ #include <linux/prefetch.h> #include <linux/of.h> #include <linux/of_device.h> +#include <linux/platform_device.h> #include <net/ip.h> @@ -89,7 +90,7 @@ static struct ehea_bcmc_reg_array ehea_bcmc_regs; static int ehea_probe_adapter(struct platform_device *dev); -static int ehea_remove(struct platform_device *dev); +static void ehea_remove(struct platform_device *dev); static const struct of_device_id ehea_module_device_table[] = { { @@ -899,7 +900,7 @@ static int ehea_poll(struct napi_struct *napi, int budget) if (!cqe && !cqe_skb) return rx; - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) return rx; cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); @@ -3062,14 +3063,13 @@ static void ehea_shutdown_single_port(struct ehea_port *port) static int ehea_setup_ports(struct ehea_adapter *adapter) { struct device_node *lhea_dn; - struct device_node *eth_dn = NULL; + struct device_node *eth_dn; const u32 *dn_log_port_id; int i = 0; lhea_dn = adapter->ofdev->dev.of_node; - while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { - + for_each_child_of_node(lhea_dn, eth_dn) { dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", NULL); if (!dn_log_port_id) { @@ -3101,12 +3101,11 @@ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter, u32 logical_port_id) { struct device_node *lhea_dn; - struct device_node *eth_dn = NULL; + struct device_node *eth_dn; const u32 *dn_log_port_id; lhea_dn = adapter->ofdev->dev.of_node; - while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { - + for_each_child_of_node(lhea_dn, eth_dn) { dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", NULL); if (dn_log_port_id) @@ -3470,7 +3469,7 @@ out: return ret; } -static int ehea_remove(struct platform_device *dev) +static void ehea_remove(struct platform_device *dev) { struct ehea_adapter *adapter = platform_get_drvdata(dev); int i; @@ -3491,8 +3490,6 @@ static int ehea_remove(struct platform_device *dev) list_del(&adapter->list); ehea_update_firmware_handles(); - - return 0; } static int check_module_parm(void) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 9b08e41ccc29..417dfa18daae 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -32,12 +32,12 @@ #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/bitops.h> -#include <linux/workqueue.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -95,11 +95,6 @@ MODULE_LICENSE("GPL"); static u32 busy_phy_map; static DEFINE_MUTEX(emac_phy_map_lock); -/* This is the wait queue used to wait on any event related to probe, that - * is discovery of MALs, other EMACs, ZMII/RGMIIs, etc... - */ -static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait); - /* Having stable interface names is a doomed idea. However, it would be nice * if we didn't have completely random interface names at boot too :-) It's * just a matter of making everybody's life easier. Since we are doing @@ -115,9 +110,6 @@ static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait); #define EMAC_BOOT_LIST_SIZE 4 static struct device_node *emac_boot_list[EMAC_BOOT_LIST_SIZE]; -/* How long should I wait for dependent devices ? */ -#define EMAC_PROBE_DEP_TIMEOUT (HZ * 5) - /* I don't want to litter system log with timeout errors * when we have brain-damaged PHY. */ @@ -417,8 +409,8 @@ do_retry: static void emac_hash_mc(struct emac_instance *dev) { + u32 __iomem *gaht_base = emac_gaht_base(dev); const int regs = EMAC_XAHT_REGS(dev); - u32 *gaht_base = emac_gaht_base(dev); u32 gaht_temp[EMAC_XAHT_MAX_REGS]; struct netdev_hw_addr *ha; int i; @@ -972,8 +964,6 @@ static void __emac_set_multicast_list(struct emac_instance *dev) * we need is just to stop RX channel. This seems to work on all * tested SoCs. --ebs * - * If we need the full reset, we might just trigger the workqueue - * and do it async... a bit nasty but should work --BenH */ dev->mcast_pending = 0; emac_rx_disable(dev); @@ -1097,7 +1087,7 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) /* This is to prevent starting RX channel in emac_rx_enable() */ set_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); - dev->ndev->mtu = new_mtu; + WRITE_ONCE(dev->ndev->mtu, new_mtu); emac_full_tx_reset(dev); } @@ -1129,7 +1119,7 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu) } if (!ret) { - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); dev->rx_skb_size = emac_rx_skb_size(new_mtu); dev->rx_sync_size = emac_rx_sync_size(new_mtu); } @@ -1227,18 +1217,10 @@ static void emac_print_link_status(struct emac_instance *dev) static int emac_open(struct net_device *ndev) { struct emac_instance *dev = netdev_priv(ndev); - int err, i; + int i; DBG(dev, "open" NL); - /* Setup error IRQ handler */ - err = request_irq(dev->emac_irq, emac_irq, 0, "EMAC", dev); - if (err) { - printk(KERN_ERR "%s: failed to request IRQ %d\n", - ndev->name, dev->emac_irq); - return err; - } - /* Allocate RX ring */ for (i = 0; i < NUM_RX_BUFF; ++i) if (emac_alloc_rx_skb(dev, i)) { @@ -1292,8 +1274,6 @@ static int emac_open(struct net_device *ndev) return 0; oom: emac_clean_rx_ring(dev); - free_irq(dev->emac_irq, dev); - return -ENOMEM; } @@ -1407,8 +1387,6 @@ static int emac_close(struct net_device *ndev) emac_clean_tx_ring(dev); emac_clean_rx_ring(dev); - free_irq(dev->emac_irq, dev); - netif_carrier_off(ndev); return 0; @@ -1749,6 +1727,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) /* NAPI poll context */ static int emac_poll_rx(void *param, int budget) { + LIST_HEAD(rx_list); struct emac_instance *dev = param; int slot = dev->rx_slot, received = 0; @@ -1805,8 +1784,7 @@ static int emac_poll_rx(void *param, int budget) skb->protocol = eth_type_trans(skb, dev->ndev); emac_rx_csum(dev, skb, ctrl); - if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) - ++dev->estats.rx_dropped_stack; + list_add_tail(&skb->list, &rx_list); next: ++dev->stats.rx_packets; skip: @@ -1850,6 +1828,8 @@ static int emac_poll_rx(void *param, int budget) goto next; } + netif_receive_skb_list(&rx_list); + if (received) { DBG2(dev, "rx %d BDs" NL, received); dev->rx_slot = slot; @@ -2389,7 +2369,9 @@ static int emac_check_deps(struct emac_instance *dev, if (deps[i].drvdata != NULL) there++; } - return there == EMAC_DEP_COUNT; + if (there != EMAC_DEP_COUNT) + return -EPROBE_DEFER; + return 0; } static void emac_put_deps(struct emac_instance *dev) @@ -2401,19 +2383,6 @@ static void emac_put_deps(struct emac_instance *dev) platform_device_put(dev->tah_dev); } -static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action, - void *data) -{ - /* We are only intereted in device addition */ - if (action == BUS_NOTIFY_BOUND_DRIVER) - wake_up_all(&emac_probe_wait); - return 0; -} - -static struct notifier_block emac_of_bus_notifier = { - .notifier_call = emac_of_bus_notify -}; - static int emac_wait_deps(struct emac_instance *dev) { struct emac_depentry deps[EMAC_DEP_COUNT]; @@ -2430,18 +2399,13 @@ static int emac_wait_deps(struct emac_instance *dev) deps[EMAC_DEP_MDIO_IDX].phandle = dev->mdio_ph; if (dev->blist && dev->blist > emac_boot_list) deps[EMAC_DEP_PREV_IDX].phandle = 0xffffffffu; - bus_register_notifier(&platform_bus_type, &emac_of_bus_notifier); - wait_event_timeout(emac_probe_wait, - emac_check_deps(dev, deps), - EMAC_PROBE_DEP_TIMEOUT); - bus_unregister_notifier(&platform_bus_type, &emac_of_bus_notifier); - err = emac_check_deps(dev, deps) ? 0 : -ENODEV; + err = emac_check_deps(dev, deps); for (i = 0; i < EMAC_DEP_COUNT; i++) { of_node_put(deps[i].node); if (err) platform_device_put(deps[i].ofdev); } - if (err == 0) { + if (!err) { dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; dev->zmii_dev = deps[EMAC_DEP_ZMII_IDX].ofdev; dev->rgmii_dev = deps[EMAC_DEP_RGMII_IDX].ofdev; @@ -2455,22 +2419,21 @@ static int emac_wait_deps(struct emac_instance *dev) static int emac_read_uint_prop(struct device_node *np, const char *name, u32 *val, int fatal) { - int len; - const u32 *prop = of_get_property(np, name, &len); - if (prop == NULL || len < sizeof(u32)) { + int err; + + err = of_property_read_u32(np, name, val); + if (err) { if (fatal) - printk(KERN_ERR "%pOF: missing %s property\n", - np, name); - return -ENODEV; + pr_err("%pOF: missing %s property", np, name); + return err; } - *val = *prop; return 0; } static void emac_adjust_link(struct net_device *ndev) { struct emac_instance *dev = netdev_priv(ndev); - struct phy_device *phy = dev->phy_dev; + struct phy_device *phy = ndev->phydev; dev->phy.autoneg = phy->autoneg; dev->phy.speed = phy->speed; @@ -2521,22 +2484,20 @@ static int emac_mdio_phy_start_aneg(struct mii_phy *phy, static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise) { struct net_device *ndev = phy->dev; - struct emac_instance *dev = netdev_priv(ndev); phy->autoneg = AUTONEG_ENABLE; phy->advertising = advertise; - return emac_mdio_phy_start_aneg(phy, dev->phy_dev); + return emac_mdio_phy_start_aneg(phy, ndev->phydev); } static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd) { struct net_device *ndev = phy->dev; - struct emac_instance *dev = netdev_priv(ndev); phy->autoneg = AUTONEG_DISABLE; phy->speed = speed; phy->duplex = fd; - return emac_mdio_phy_start_aneg(phy, dev->phy_dev); + return emac_mdio_phy_start_aneg(phy, ndev->phydev); } static int emac_mdio_poll_link(struct mii_phy *phy) @@ -2545,20 +2506,19 @@ static int emac_mdio_poll_link(struct mii_phy *phy) struct emac_instance *dev = netdev_priv(ndev); int res; - res = phy_read_status(dev->phy_dev); + res = phy_read_status(ndev->phydev); if (res) { dev_err(&dev->ofdev->dev, "link update failed (%d).", res); return ethtool_op_get_link(ndev); } - return dev->phy_dev->link; + return ndev->phydev->link; } static int emac_mdio_read_link(struct mii_phy *phy) { struct net_device *ndev = phy->dev; - struct emac_instance *dev = netdev_priv(ndev); - struct phy_device *phy_dev = dev->phy_dev; + struct phy_device *phy_dev = ndev->phydev; int res; res = phy_read_status(phy_dev); @@ -2575,10 +2535,9 @@ static int emac_mdio_read_link(struct mii_phy *phy) static int emac_mdio_init_phy(struct mii_phy *phy) { struct net_device *ndev = phy->dev; - struct emac_instance *dev = netdev_priv(ndev); - phy_start(dev->phy_dev); - return phy_init_hw(dev->phy_dev); + phy_start(ndev->phydev); + return phy_init_hw(ndev->phydev); } static const struct mii_phy_ops emac_dt_mdio_phy_ops = { @@ -2592,36 +2551,32 @@ static const struct mii_phy_ops emac_dt_mdio_phy_ops = { static int emac_dt_mdio_probe(struct emac_instance *dev) { struct device_node *mii_np; + struct mii_bus *bus; int res; - mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio"); + mii_np = of_get_available_child_by_name(dev->ofdev->dev.of_node, "mdio"); if (!mii_np) { dev_err(&dev->ofdev->dev, "no mdio definition found."); return -ENODEV; } - if (!of_device_is_available(mii_np)) { - res = -ENODEV; - goto put_node; - } - - dev->mii_bus = devm_mdiobus_alloc(&dev->ofdev->dev); - if (!dev->mii_bus) { + bus = devm_mdiobus_alloc(&dev->ofdev->dev); + if (!bus) { res = -ENOMEM; goto put_node; } - dev->mii_bus->priv = dev->ndev; - dev->mii_bus->parent = dev->ndev->dev.parent; - dev->mii_bus->name = "emac_mdio"; - dev->mii_bus->read = &emac_mii_bus_read; - dev->mii_bus->write = &emac_mii_bus_write; - dev->mii_bus->reset = &emac_mii_bus_reset; - snprintf(dev->mii_bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name); - res = of_mdiobus_register(dev->mii_bus, mii_np); + bus->priv = dev->ndev; + bus->parent = dev->ndev->dev.parent; + bus->name = "emac_mdio"; + bus->read = &emac_mii_bus_read; + bus->write = &emac_mii_bus_write; + bus->reset = &emac_mii_bus_reset; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name); + res = devm_of_mdiobus_register(&dev->ofdev->dev, bus, mii_np); if (res) { dev_err(&dev->ofdev->dev, "cannot register MDIO bus %s (%d)", - dev->mii_bus->name, res); + bus->name, res); } put_node: @@ -2632,26 +2587,28 @@ static int emac_dt_mdio_probe(struct emac_instance *dev) static int emac_dt_phy_connect(struct emac_instance *dev, struct device_node *phy_handle) { + struct phy_device *phy_dev; + dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def), GFP_KERNEL); if (!dev->phy.def) return -ENOMEM; - dev->phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link, - 0, dev->phy_mode); - if (!dev->phy_dev) { + phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link, 0, + dev->phy_mode); + if (!phy_dev) { dev_err(&dev->ofdev->dev, "failed to connect to PHY.\n"); return -ENODEV; } - dev->phy.def->phy_id = dev->phy_dev->drv->phy_id; - dev->phy.def->phy_id_mask = dev->phy_dev->drv->phy_id_mask; - dev->phy.def->name = dev->phy_dev->drv->name; + dev->phy.def->phy_id = phy_dev->drv->phy_id; + dev->phy.def->phy_id_mask = phy_dev->drv->phy_id_mask; + dev->phy.def->name = phy_dev->drv->name; dev->phy.def->ops = &emac_dt_mdio_phy_ops; ethtool_convert_link_mode_to_legacy_u32(&dev->phy.features, - dev->phy_dev->supported); - dev->phy.address = dev->phy_dev->mdio.addr; - dev->phy.mode = dev->phy_dev->interface; + phy_dev->supported); + dev->phy.address = phy_dev->mdio.addr; + dev->phy.mode = phy_dev->interface; return 0; } @@ -2667,8 +2624,6 @@ static int emac_dt_phy_probe(struct emac_instance *dev) res = emac_dt_mdio_probe(dev); if (!res) { res = emac_dt_phy_connect(dev, phy_handle); - if (res) - mdiobus_unregister(dev->mii_bus); } } @@ -2707,13 +2662,11 @@ static int emac_init_phy(struct emac_instance *dev) return res; res = of_phy_register_fixed_link(np); - dev->phy_dev = of_phy_find_device(np); - if (res || !dev->phy_dev) { - mdiobus_unregister(dev->mii_bus); + ndev->phydev = of_phy_find_device(np); + if (res || !ndev->phydev) return res ? res : -EINVAL; - } emac_adjust_link(dev->ndev); - put_device(&dev->phy_dev->mdio.dev); + put_device(&ndev->phydev->mdio.dev); } return 0; } @@ -2939,9 +2892,9 @@ static int emac_init_config(struct emac_instance *dev) } /* Fixup some feature bits based on the device tree */ - if (of_get_property(np, "has-inverted-stacr-oc", NULL)) + if (of_property_read_bool(np, "has-inverted-stacr-oc")) dev->features |= EMAC_FTR_STACR_OC_INVERT; - if (of_get_property(np, "has-new-stacr-staopc", NULL)) + if (of_property_read_bool(np, "has-new-stacr-staopc")) dev->features |= EMAC_FTR_HAS_NEW_STACR; /* CAB lacks the appropriate properties */ @@ -2979,9 +2932,12 @@ static int emac_init_config(struct emac_instance *dev) /* Read MAC-address */ err = of_get_ethdev_address(np, dev->ndev); - if (err) - return dev_err_probe(&dev->ofdev->dev, err, - "Can't get valid [local-]mac-address from OF !\n"); + if (err == -EPROBE_DEFER) + return err; + if (err) { + dev_warn(&dev->ofdev->dev, "Can't get valid mac-address. Generating random."); + eth_hw_addr_random(dev->ndev); + } /* IAHT and GAHT filter parameterization */ if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { @@ -3042,7 +2998,7 @@ static int emac_probe(struct platform_device *ofdev) * property here for now, but new flat device trees should set a * status property to "disabled" instead. */ - if (of_get_property(np, "unused", NULL) || !of_device_is_available(np)) + if (of_property_read_bool(np, "unused") || !of_device_is_available(np)) return -ENODEV; /* Find ourselves in the bootlist if we are there */ @@ -3052,7 +3008,7 @@ static int emac_probe(struct platform_device *ofdev) /* Allocate our net_device structure */ err = -ENOMEM; - ndev = alloc_etherdev(sizeof(struct emac_instance)); + ndev = devm_alloc_etherdev(&ofdev->dev, sizeof(struct emac_instance)); if (!ndev) goto err_gone; @@ -3063,43 +3019,45 @@ static int emac_probe(struct platform_device *ofdev) SET_NETDEV_DEV(ndev, &ofdev->dev); /* Initialize some embedded data structures */ - mutex_init(&dev->mdio_lock); - mutex_init(&dev->link_lock); + err = devm_mutex_init(&ofdev->dev, &dev->mdio_lock); + if (err) + goto err_gone; + + err = devm_mutex_init(&ofdev->dev, &dev->link_lock); + if (err) + goto err_gone; + spin_lock_init(&dev->lock); INIT_WORK(&dev->reset_work, emac_reset_work); /* Init various config data based on device-tree */ err = emac_init_config(dev); if (err) - goto err_free; + goto err_gone; - /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ - dev->emac_irq = irq_of_parse_and_map(np, 0); - dev->wol_irq = irq_of_parse_and_map(np, 1); - if (!dev->emac_irq) { - printk(KERN_ERR "%pOF: Can't map main interrupt\n", np); - err = -ENODEV; - goto err_free; + /* Setup error IRQ handler */ + dev->emac_irq = platform_get_irq(ofdev, 0); + err = devm_request_irq(&ofdev->dev, dev->emac_irq, emac_irq, 0, "EMAC", + dev); + if (err) { + dev_err_probe(&ofdev->dev, err, "failed to request IRQ %d", + dev->emac_irq); + goto err_gone; } + ndev->irq = dev->emac_irq; - /* Map EMAC regs */ - // TODO : platform_get_resource() and devm_ioremap_resource() - dev->emacp = of_iomap(np, 0); - if (dev->emacp == NULL) { - printk(KERN_ERR "%pOF: Can't map device registers!\n", np); - err = -ENOMEM; - goto err_irq_unmap; + dev->emacp = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->emacp)) { + dev_err(&ofdev->dev, "can't map device registers"); + err = PTR_ERR(dev->emacp); + goto err_gone; } /* Wait for dependent devices */ err = emac_wait_deps(dev); - if (err) { - printk(KERN_ERR - "%pOF: Timeout waiting for dependent devices\n", np); - /* display more info about what's missing ? */ - goto err_reg_unmap; - } + if (err) + goto err_gone; dev->mal = platform_get_drvdata(dev->mal_dev); if (dev->mdio_dev != NULL) dev->mdio_instance = platform_get_drvdata(dev->mdio_dev); @@ -3186,7 +3144,7 @@ static int emac_probe(struct platform_device *ofdev) netif_carrier_off(ndev); - err = register_netdev(ndev); + err = devm_register_netdev(&ofdev->dev, ndev); if (err) { printk(KERN_ERR "%pOF: failed to register net device (%d)!\n", np, err); @@ -3199,10 +3157,6 @@ static int emac_probe(struct platform_device *ofdev) wmb(); platform_set_drvdata(ofdev, dev); - /* There's a new kid in town ! Let's tell everybody */ - wake_up_all(&emac_probe_wait); - - printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n", ndev->name, dev->cell_index, np, ndev->dev_addr); @@ -3231,35 +3185,18 @@ static int emac_probe(struct platform_device *ofdev) mal_unregister_commac(dev->mal, &dev->commac); err_rel_deps: emac_put_deps(dev); - err_reg_unmap: - iounmap(dev->emacp); - err_irq_unmap: - if (dev->wol_irq) - irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq) - irq_dispose_mapping(dev->emac_irq); - err_free: - free_netdev(ndev); err_gone: - /* if we were on the bootlist, remove us as we won't show up and - * wake up all waiters to notify them in case they were waiting - * on us - */ - if (blist) { + if (blist) *blist = NULL; - wake_up_all(&emac_probe_wait); - } return err; } -static int emac_remove(struct platform_device *ofdev) +static void emac_remove(struct platform_device *ofdev) { struct emac_instance *dev = platform_get_drvdata(ofdev); DBG(dev, "remove" NL); - unregister_netdev(dev->ndev); - cancel_work_sync(&dev->reset_work); if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) @@ -3269,28 +3206,11 @@ static int emac_remove(struct platform_device *ofdev) if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) zmii_detach(dev->zmii_dev, dev->zmii_port); - if (dev->phy_dev) - phy_disconnect(dev->phy_dev); - - if (dev->mii_bus) - mdiobus_unregister(dev->mii_bus); - busy_phy_map &= ~(1 << dev->phy.address); DBG(dev, "busy_phy_map now %#x" NL, busy_phy_map); mal_unregister_commac(dev->mal, &dev->commac); emac_put_deps(dev); - - iounmap(dev->emacp); - - if (dev->wol_irq) - irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq) - irq_dispose_mapping(dev->emac_irq); - - free_netdev(dev->ndev); - - return 0; } /* XXX Features in here should be replaced by properties... */ @@ -3329,16 +3249,15 @@ static void __init emac_make_bootlist(void) /* Collect EMACs */ while((np = of_find_all_nodes(np)) != NULL) { - const u32 *idx; + u32 idx; if (of_match_node(emac_match, np) == NULL) continue; - if (of_get_property(np, "unused", NULL)) + if (of_property_read_bool(np, "unused")) continue; - idx = of_get_property(np, "cell-index", NULL); - if (idx == NULL) + if (of_property_read_u32(np, "cell-index", &idx)) continue; - cell_indices[i] = *idx; + cell_indices[i] = idx; emac_boot_list[i++] = of_node_get(np); if (i >= EMAC_BOOT_LIST_SIZE) { of_node_put(np); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 89a1b0fea158..89fa1683ec3c 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -27,7 +27,6 @@ #include <linux/netdevice.h> #include <linux/dma-mapping.h> #include <linux/spinlock.h> -#include <linux/of_platform.h> #include <linux/slab.h> #include <asm/io.h> @@ -189,10 +188,6 @@ struct emac_instance { struct emac_instance *mdio_instance; struct mutex mdio_lock; - /* Device-tree based phy configuration */ - struct mii_bus *mii_bus; - struct phy_device *phy_dev; - /* ZMII infos if any */ u32 zmii_ph; u32 zmii_port; @@ -401,7 +396,7 @@ static inline int emac_has_feature(struct emac_instance *dev, ((u32)(1 << (EMAC_XAHT_WIDTH(dev) - 1)) >> \ ((slot) & (u32)(EMAC_XAHT_WIDTH(dev) - 1))) -static inline u32 *emac_xaht_base(struct emac_instance *dev) +static inline u32 __iomem *emac_xaht_base(struct emac_instance *dev) { struct emac_regs __iomem *p = dev->emacp; int offset; @@ -414,10 +409,10 @@ static inline u32 *emac_xaht_base(struct emac_instance *dev) else offset = offsetof(struct emac_regs, u0.emac4.iaht1); - return (u32 *)((ptrdiff_t)p + offset); + return (u32 __iomem *)((__force ptrdiff_t)p + offset); } -static inline u32 *emac_gaht_base(struct emac_instance *dev) +static inline u32 __iomem *emac_gaht_base(struct emac_instance *dev) { /* GAHT registers always come after an identical number of * IAHT registers. diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index ff5487bbebe3..7d70056e9008 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -22,7 +22,9 @@ #include <linux/delay.h> #include <linux/slab.h> +#include <linux/of.h> #include <linux/of_irq.h> +#include <linux/platform_device.h> #include "core.h" #include <asm/dcr-regs.h> @@ -440,7 +442,7 @@ static int mal_poll(struct napi_struct *napi, int budget) if (unlikely(mc->ops->peek_rx(mc->dev) || test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { MAL_DBG2(mal, "rotting packet" NL); - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) goto more_work; spin_lock_irqsave(&mal->lock, flags); @@ -522,7 +524,8 @@ static int mal_probe(struct platform_device *ofdev) unsigned long irqflags; irq_handler_t hdlr_serr, hdlr_txde, hdlr_rxde; - mal = kzalloc(sizeof(struct mal_instance), GFP_KERNEL); + mal = devm_kzalloc(&ofdev->dev, sizeof(struct mal_instance), + GFP_KERNEL); if (!mal) return -ENOMEM; @@ -537,8 +540,7 @@ static int mal_probe(struct platform_device *ofdev) printk(KERN_ERR "mal%d: can't find MAL num-tx-chans property!\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } mal->num_tx_chans = prop[0]; @@ -547,8 +549,7 @@ static int mal_probe(struct platform_device *ofdev) printk(KERN_ERR "mal%d: can't find MAL num-rx-chans property!\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } mal->num_rx_chans = prop[0]; @@ -556,15 +557,13 @@ static int mal_probe(struct platform_device *ofdev) if (dcr_base == 0) { printk(KERN_ERR "mal%d: can't find DCR resource!\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } mal->dcr_host = dcr_map(ofdev->dev.of_node, dcr_base, 0x100); if (!DCR_MAP_OK(mal->dcr_host)) { printk(KERN_ERR "mal%d: failed to map DCRs !\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } if (of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal-405ez")) { @@ -576,36 +575,21 @@ static int mal_probe(struct platform_device *ofdev) printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n", ofdev->dev.of_node); err = -ENODEV; - goto fail; -#endif - } - - mal->txeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - mal->rxeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 1); - mal->serr_irq = irq_of_parse_and_map(ofdev->dev.of_node, 2); - - if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { - mal->txde_irq = mal->rxde_irq = mal->serr_irq; - } else { - mal->txde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 3); - mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); - } - - if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq || - !mal->txde_irq || !mal->rxde_irq) { - printk(KERN_ERR - "mal%d: failed to map interrupts !\n", index); - err = -ENODEV; goto fail_unmap; +#endif } INIT_LIST_HEAD(&mal->poll_list); INIT_LIST_HEAD(&mal->list); spin_lock_init(&mal->lock); - init_dummy_netdev(&mal->dummy_dev); + mal->dummy_dev = alloc_netdev_dummy(0); + if (!mal->dummy_dev) { + err = -ENOMEM; + goto fail_unmap; + } - netif_napi_add_weight(&mal->dummy_dev, &mal->napi, mal_poll, + netif_napi_add_weight(mal->dummy_dev, &mal->napi, mal_poll, CONFIG_IBM_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ @@ -635,7 +619,7 @@ static int mal_probe(struct platform_device *ofdev) GFP_KERNEL); if (mal->bd_virt == NULL) { err = -ENOMEM; - goto fail_unmap; + goto fail_dummy; } for (i = 0; i < mal->num_tx_chans; ++i) @@ -648,31 +632,43 @@ static int mal_probe(struct platform_device *ofdev) sizeof(struct mal_descriptor) * mal_rx_bd_offset(mal, i)); + mal->txeob_irq = platform_get_irq(ofdev, 0); + mal->rxeob_irq = platform_get_irq(ofdev, 1); + mal->serr_irq = platform_get_irq(ofdev, 2); + if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { + mal->txde_irq = mal->rxde_irq = mal->serr_irq; irqflags = IRQF_SHARED; hdlr_serr = hdlr_txde = hdlr_rxde = mal_int; } else { + mal->txde_irq = platform_get_irq(ofdev, 3); + mal->rxde_irq = platform_get_irq(ofdev, 4); irqflags = 0; hdlr_serr = mal_serr; hdlr_txde = mal_txde; hdlr_rxde = mal_rxde; } - err = request_irq(mal->serr_irq, hdlr_serr, irqflags, "MAL SERR", mal); + err = devm_request_irq(&ofdev->dev, mal->serr_irq, hdlr_serr, irqflags, + "MAL SERR", mal); if (err) goto fail2; - err = request_irq(mal->txde_irq, hdlr_txde, irqflags, "MAL TX DE", mal); + err = devm_request_irq(&ofdev->dev, mal->txde_irq, hdlr_txde, irqflags, + "MAL TX DE", mal); if (err) - goto fail3; - err = request_irq(mal->txeob_irq, mal_txeob, 0, "MAL TX EOB", mal); + goto fail2; + err = devm_request_irq(&ofdev->dev, mal->txeob_irq, mal_txeob, 0, + "MAL TX EOB", mal); if (err) - goto fail4; - err = request_irq(mal->rxde_irq, hdlr_rxde, irqflags, "MAL RX DE", mal); + goto fail2; + err = devm_request_irq(&ofdev->dev, mal->rxde_irq, hdlr_rxde, irqflags, + "MAL RX DE", mal); if (err) - goto fail5; - err = request_irq(mal->rxeob_irq, mal_rxeob, 0, "MAL RX EOB", mal); + goto fail2; + err = devm_request_irq(&ofdev->dev, mal->rxeob_irq, mal_rxeob, 0, + "MAL RX EOB", mal); if (err) - goto fail6; + goto fail2; /* Enable all MAL SERR interrupt sources */ set_mal_dcrn(mal, MAL_IER, MAL_IER_EVENTS); @@ -691,25 +687,16 @@ static int mal_probe(struct platform_device *ofdev) return 0; - fail6: - free_irq(mal->rxde_irq, mal); - fail5: - free_irq(mal->txeob_irq, mal); - fail4: - free_irq(mal->txde_irq, mal); - fail3: - free_irq(mal->serr_irq, mal); fail2: dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); + fail_dummy: + free_netdev(mal->dummy_dev); fail_unmap: dcr_unmap(mal->dcr_host, 0x100); - fail: - kfree(mal); - return err; } -static int mal_remove(struct platform_device *ofdev) +static void mal_remove(struct platform_device *ofdev) { struct mal_instance *mal = platform_get_drvdata(ofdev); @@ -724,22 +711,17 @@ static int mal_remove(struct platform_device *ofdev) "mal%d: commac list is not empty on remove!\n", mal->index); - free_irq(mal->serr_irq, mal); - free_irq(mal->txde_irq, mal); - free_irq(mal->txeob_irq, mal); - free_irq(mal->rxde_irq, mal); - free_irq(mal->rxeob_irq, mal); - mal_reset(mal); + free_netdev(mal->dummy_dev); + + dcr_unmap(mal->dcr_host, 0x100); + dma_free_coherent(&ofdev->dev, sizeof(struct mal_descriptor) * - (NUM_TX_BUFF * mal->num_tx_chans + - NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt, - mal->bd_dma); - kfree(mal); - - return 0; + (NUM_TX_BUFF * mal->num_tx_chans + + NUM_RX_BUFF * mal->num_rx_chans), + mal->bd_virt, mal->bd_dma); } static const struct of_device_id mal_platform_match[] = diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h index d212373a72e7..e0ddc41186a2 100644 --- a/drivers/net/ethernet/ibm/emac/mal.h +++ b/drivers/net/ethernet/ibm/emac/mal.h @@ -205,7 +205,7 @@ struct mal_instance { int index; spinlock_t lock; - struct net_device dummy_dev; + struct net_device *dummy_dev; unsigned int features; }; diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c index 242ef976fd15..b544dd8633b7 100644 --- a/drivers/net/ethernet/ibm/emac/rgmii.c +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -19,7 +19,9 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/ethtool.h> +#include <linux/of.h> #include <linux/of_address.h> +#include <linux/platform_device.h> #include <asm/io.h> #include "emac.h" @@ -214,35 +216,28 @@ void *rgmii_dump_regs(struct platform_device *ofdev, void *buf) static int rgmii_probe(struct platform_device *ofdev) { - struct device_node *np = ofdev->dev.of_node; struct rgmii_instance *dev; - struct resource regs; - int rc; + int err; - rc = -ENOMEM; - dev = kzalloc(sizeof(struct rgmii_instance), GFP_KERNEL); - if (dev == NULL) - goto err_gone; + dev = devm_kzalloc(&ofdev->dev, sizeof(struct rgmii_instance), + GFP_KERNEL); + if (!dev) + return -ENOMEM; - mutex_init(&dev->lock); - dev->ofdev = ofdev; + err = devm_mutex_init(&ofdev->dev, &dev->lock); + if (err) + return err; - rc = -ENXIO; - if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%pOF: Can't get registers address\n", np); - goto err_free; - } + dev->ofdev = ofdev; - rc = -ENOMEM; - dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start, - sizeof(struct rgmii_regs)); - if (dev->base == NULL) { - printk(KERN_ERR "%pOF: Can't map device registers!\n", np); - goto err_free; + dev->base = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->base)) { + dev_err(&ofdev->dev, "can't map device registers"); + return PTR_ERR(dev->base); } /* Check for RGMII flags */ - if (of_get_property(ofdev->dev.of_node, "has-mdio", NULL)) + if (of_property_read_bool(ofdev->dev.of_node, "has-mdio")) dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; /* CAB lacks the right properties, fix this up */ @@ -264,23 +259,6 @@ static int rgmii_probe(struct platform_device *ofdev) platform_set_drvdata(ofdev, dev); return 0; - - err_free: - kfree(dev); - err_gone: - return rc; -} - -static int rgmii_remove(struct platform_device *ofdev) -{ - struct rgmii_instance *dev = platform_get_drvdata(ofdev); - - WARN_ON(dev->users != 0); - - iounmap(dev->base); - kfree(dev); - - return 0; } static const struct of_device_id rgmii_match[] = @@ -300,7 +278,6 @@ static struct platform_driver rgmii_driver = { .of_match_table = rgmii_match, }, .probe = rgmii_probe, - .remove = rgmii_remove, }; int __init rgmii_init(void) diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c index 008bbdaf1204..09f6373ed2f9 100644 --- a/drivers/net/ethernet/ibm/emac/tah.c +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -14,7 +14,9 @@ * * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> */ +#include <linux/mod_devicetable.h> #include <linux/of_address.h> +#include <linux/platform_device.h> #include <asm/io.h> #include "emac.h" @@ -85,31 +87,24 @@ void *tah_dump_regs(struct platform_device *ofdev, void *buf) static int tah_probe(struct platform_device *ofdev) { - struct device_node *np = ofdev->dev.of_node; struct tah_instance *dev; - struct resource regs; - int rc; + int err; - rc = -ENOMEM; - dev = kzalloc(sizeof(struct tah_instance), GFP_KERNEL); - if (dev == NULL) - goto err_gone; + dev = devm_kzalloc(&ofdev->dev, sizeof(struct tah_instance), + GFP_KERNEL); + if (!dev) + return -ENOMEM; - mutex_init(&dev->lock); - dev->ofdev = ofdev; + err = devm_mutex_init(&ofdev->dev, &dev->lock); + if (err) + return err; - rc = -ENXIO; - if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%pOF: Can't get registers address\n", np); - goto err_free; - } + dev->ofdev = ofdev; - rc = -ENOMEM; - dev->base = (struct tah_regs __iomem *)ioremap(regs.start, - sizeof(struct tah_regs)); - if (dev->base == NULL) { - printk(KERN_ERR "%pOF: Can't map device registers!\n", np); - goto err_free; + dev->base = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->base)) { + dev_err(&ofdev->dev, "can't map device registers"); + return PTR_ERR(dev->base); } platform_set_drvdata(ofdev, dev); @@ -121,23 +116,6 @@ static int tah_probe(struct platform_device *ofdev) wmb(); return 0; - - err_free: - kfree(dev); - err_gone: - return rc; -} - -static int tah_remove(struct platform_device *ofdev) -{ - struct tah_instance *dev = platform_get_drvdata(ofdev); - - WARN_ON(dev->users != 0); - - iounmap(dev->base); - kfree(dev); - - return 0; } static const struct of_device_id tah_match[] = @@ -158,7 +136,6 @@ static struct platform_driver tah_driver = { .of_match_table = tah_match, }, .probe = tah_probe, - .remove = tah_remove, }; int __init tah_init(void) diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c index 57a25c7a9e70..69ca6065de1c 100644 --- a/drivers/net/ethernet/ibm/emac/zmii.c +++ b/drivers/net/ethernet/ibm/emac/zmii.c @@ -19,7 +19,9 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/ethtool.h> +#include <linux/mod_devicetable.h> #include <linux/of_address.h> +#include <linux/platform_device.h> #include <asm/io.h> #include "emac.h" @@ -230,32 +232,25 @@ void *zmii_dump_regs(struct platform_device *ofdev, void *buf) static int zmii_probe(struct platform_device *ofdev) { - struct device_node *np = ofdev->dev.of_node; struct zmii_instance *dev; - struct resource regs; - int rc; + int err; - rc = -ENOMEM; - dev = kzalloc(sizeof(struct zmii_instance), GFP_KERNEL); - if (dev == NULL) - goto err_gone; + dev = devm_kzalloc(&ofdev->dev, sizeof(struct zmii_instance), + GFP_KERNEL); + if (!dev) + return -ENOMEM; + + err = devm_mutex_init(&ofdev->dev, &dev->lock); + if (err) + return err; - mutex_init(&dev->lock); dev->ofdev = ofdev; dev->mode = PHY_INTERFACE_MODE_NA; - rc = -ENXIO; - if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%pOF: Can't get registers address\n", np); - goto err_free; - } - - rc = -ENOMEM; - dev->base = (struct zmii_regs __iomem *)ioremap(regs.start, - sizeof(struct zmii_regs)); - if (dev->base == NULL) { - printk(KERN_ERR "%pOF: Can't map device registers!\n", np); - goto err_free; + dev->base = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->base)) { + dev_err(&ofdev->dev, "can't map device registers"); + return PTR_ERR(dev->base); } /* We may need FER value for autodetection later */ @@ -269,23 +264,6 @@ static int zmii_probe(struct platform_device *ofdev) platform_set_drvdata(ofdev, dev); return 0; - - err_free: - kfree(dev); - err_gone: - return rc; -} - -static int zmii_remove(struct platform_device *ofdev) -{ - struct zmii_instance *dev = platform_get_drvdata(ofdev); - - WARN_ON(dev->users != 0); - - iounmap(dev->base); - kfree(dev); - - return 0; } static const struct of_device_id zmii_match[] = @@ -306,7 +284,6 @@ static struct platform_driver zmii_driver = { .of_match_table = zmii_match, }, .probe = zmii_probe, - .remove = zmii_remove, }; int __init zmii_init(void) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 113fcb3e353e..6f0821f1e798 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -39,7 +39,6 @@ #include "ibmveth.h" static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; @@ -203,7 +202,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) unsigned long offset; for (offset = 0; offset < length; offset += SMP_CACHE_BYTES) - asm("dcbfl %0,%1" :: "b" (addr), "r" (offset)); + asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset)); } /* replenish the buffers for a pool. note that we don't need to @@ -212,88 +211,169 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struct ibmveth_buff_pool *pool) { - u32 i; - u32 count = pool->size - atomic_read(&pool->available); - u32 buffers_added = 0; - struct sk_buff *skb; - unsigned int free_index, index; - u64 correlator; + union ibmveth_buf_desc descs[IBMVETH_MAX_RX_PER_HCALL] = {0}; + u32 remaining = pool->size - atomic_read(&pool->available); + u64 correlators[IBMVETH_MAX_RX_PER_HCALL] = {0}; unsigned long lpar_rc; + u32 buffers_added = 0; + u32 i, filled, batch; + struct vio_dev *vdev; dma_addr_t dma_addr; + struct device *dev; + u32 index; - mb(); + vdev = adapter->vdev; + dev = &vdev->dev; - for (i = 0; i < count; ++i) { - union ibmveth_buf_desc desc; + mb(); - skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); + batch = adapter->rx_buffers_per_hcall; + + while (remaining > 0) { + unsigned int free_index = pool->consumer_index; + + /* Fill a batch of descriptors */ + for (filled = 0; filled < min(remaining, batch); filled++) { + index = pool->free_map[free_index]; + if (WARN_ON(index == IBM_VETH_INVALID_MAP)) { + adapter->replenish_add_buff_failure++; + netdev_info(adapter->netdev, + "Invalid map index %u, reset\n", + index); + schedule_work(&adapter->work); + break; + } - if (!skb) { - netdev_dbg(adapter->netdev, - "replenish: unable to allocate skb\n"); - adapter->replenish_no_mem++; - break; - } + if (!pool->skbuff[index]) { + struct sk_buff *skb = NULL; - free_index = pool->consumer_index; - pool->consumer_index++; - if (pool->consumer_index >= pool->size) - pool->consumer_index = 0; - index = pool->free_map[free_index]; + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + adapter->replenish_no_mem++; + adapter->replenish_add_buff_failure++; + break; + } - BUG_ON(index == IBM_VETH_INVALID_MAP); - BUG_ON(pool->skbuff[index] != NULL); + dma_addr = dma_map_single(dev, skb->data, + pool->buff_size, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + break; + } - dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, - pool->buff_size, DMA_FROM_DEVICE); + pool->dma_addr[index] = dma_addr; + pool->skbuff[index] = skb; + } else { + /* re-use case */ + dma_addr = pool->dma_addr[index]; + } - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto failure; + if (rx_flush) { + unsigned int len; - pool->free_map[free_index] = IBM_VETH_INVALID_MAP; - pool->dma_addr[index] = dma_addr; - pool->skbuff[index] = skb; + len = adapter->netdev->mtu + IBMVETH_BUFF_OH; + len = min(pool->buff_size, len); + ibmveth_flush_buffer(pool->skbuff[index]->data, + len); + } - correlator = ((u64)pool->index << 32) | index; - *(u64 *)skb->data = correlator; + descs[filled].fields.flags_len = IBMVETH_BUF_VALID | + pool->buff_size; + descs[filled].fields.address = dma_addr; - desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size; - desc.fields.address = dma_addr; + correlators[filled] = ((u64)pool->index << 32) | index; + *(u64 *)pool->skbuff[index]->data = correlators[filled]; - if (rx_flush) { - unsigned int len = min(pool->buff_size, - adapter->netdev->mtu + - IBMVETH_BUFF_OH); - ibmveth_flush_buffer(skb->data, len); + free_index++; + if (free_index >= pool->size) + free_index = 0; } - lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, - desc.desc); + if (!filled) + break; + + /* single buffer case*/ + if (filled == 1) + lpar_rc = h_add_logical_lan_buffer(vdev->unit_address, + descs[0].desc); + else + /* Multi-buffer hcall */ + lpar_rc = h_add_logical_lan_buffers(vdev->unit_address, + descs[0].desc, + descs[1].desc, + descs[2].desc, + descs[3].desc, + descs[4].desc, + descs[5].desc, + descs[6].desc, + descs[7].desc); if (lpar_rc != H_SUCCESS) { - goto failure; - } else { - buffers_added++; - adapter->replenish_add_buff_success++; + dev_warn_ratelimited(dev, + "RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n", + filled, lpar_rc, batch); + goto hcall_failure; } - } - mb(); - atomic_add(buffers_added, &(pool->available)); - return; + /* Only update pool state after hcall succeeds */ + for (i = 0; i < filled; i++) { + free_index = pool->consumer_index; + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; -failure: - pool->free_map[free_index] = index; - pool->skbuff[index] = NULL; - if (pool->consumer_index == 0) - pool->consumer_index = pool->size - 1; - else - pool->consumer_index--; - if (!dma_mapping_error(&adapter->vdev->dev, dma_addr)) - dma_unmap_single(&adapter->vdev->dev, - pool->dma_addr[index], pool->buff_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; + pool->consumer_index++; + if (pool->consumer_index >= pool->size) + pool->consumer_index = 0; + } + + buffers_added += filled; + adapter->replenish_add_buff_success += filled; + remaining -= filled; + + memset(&descs, 0, sizeof(descs)); + memset(&correlators, 0, sizeof(correlators)); + continue; + +hcall_failure: + for (i = 0; i < filled; i++) { + index = correlators[i] & 0xffffffffUL; + dma_addr = pool->dma_addr[index]; + + if (pool->skbuff[index]) { + if (dma_addr && + !dma_mapping_error(dev, dma_addr)) + dma_unmap_single(dev, dma_addr, + pool->buff_size, + DMA_FROM_DEVICE); + + dev_kfree_skb_any(pool->skbuff[index]); + pool->skbuff[index] = NULL; + } + } + adapter->replenish_add_buff_failure += filled; + + /* + * If multi rx buffers hcall is no longer supported by FW + * e.g. in the case of Live Parttion Migration + */ + if (batch > 1 && lpar_rc == H_FUNCTION) { + /* + * Instead of retry submit single buffer individually + * here just set the max rx buffer per hcall to 1 + * buffers will be respleshed next time + * when ibmveth_replenish_buffer_pool() is called again + * with single-buffer case + */ + netdev_info(adapter->netdev, + "RX Multi buffers not supported by FW, rc=%lu\n", + lpar_rc); + adapter->rx_buffers_per_hcall = 1; + netdev_info(adapter->netdev, + "Next rx replesh will fall back to single-buffer hcall\n"); + } + break; + } mb(); atomic_add(buffers_added, &(pool->available)); @@ -363,28 +443,52 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, } } -/* remove a buffer from a pool */ -static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, - u64 correlator) +/** + * ibmveth_remove_buffer_from_pool - remove a buffer from a pool + * @adapter: adapter instance + * @correlator: identifies pool and index + * @reuse: whether to reuse buffer + * + * Return: + * * %0 - success + * * %-EINVAL - correlator maps to pool or index out of range + * * %-EFAULT - pool and index map to null skb + */ +static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator, bool reuse) { unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; unsigned int free_index; struct sk_buff *skb; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return -EINVAL; + } skb = adapter->rx_buff_pool[pool].skbuff[index]; + if (WARN_ON(!skb)) { + schedule_work(&adapter->work); + return -EFAULT; + } - BUG_ON(skb == NULL); - - adapter->rx_buff_pool[pool].skbuff[index] = NULL; + /* if we are going to reuse the buffer then keep the pointers around + * but mark index as available. replenish will see the skb pointer and + * assume it is to be recycled. + */ + if (!reuse) { + /* remove the skb pointer to mark free. actual freeing is done + * by upper level networking after gro_recieve + */ + adapter->rx_buff_pool[pool].skbuff[index] = NULL; - dma_unmap_single(&adapter->vdev->dev, - adapter->rx_buff_pool[pool].dma_addr[index], - adapter->rx_buff_pool[pool].buff_size, - DMA_FROM_DEVICE); + dma_unmap_single(&adapter->vdev->dev, + adapter->rx_buff_pool[pool].dma_addr[index], + adapter->rx_buff_pool[pool].buff_size, + DMA_FROM_DEVICE); + } free_index = adapter->rx_buff_pool[pool].producer_index; adapter->rx_buff_pool[pool].producer_index++; @@ -396,6 +500,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, mb(); atomic_dec(&(adapter->rx_buff_pool[pool].available)); + + return 0; } /* get the current buffer on the rx queue */ @@ -405,62 +511,44 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return NULL; + } return adapter->rx_buff_pool[pool].skbuff[index]; } -/* recycle the current buffer on the rx queue */ -static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) +/** + * ibmveth_rxq_harvest_buffer - Harvest buffer from pool + * + * @adapter: pointer to adapter + * @reuse: whether to reuse buffer + * + * Context: called from ibmveth_poll + * + * Return: + * * %0 - success + * * other - non-zero return from ibmveth_remove_buffer_from_pool + */ +static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, + bool reuse) { - u32 q_index = adapter->rx_queue.index; - u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator; - unsigned int pool = correlator >> 32; - unsigned int index = correlator & 0xffffffffUL; - union ibmveth_buf_desc desc; - unsigned long lpar_rc; - int ret = 1; - - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); - - if (!adapter->rx_buff_pool[pool].active) { - ibmveth_rxq_harvest_buffer(adapter); - ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]); - goto out; - } - - desc.fields.flags_len = IBMVETH_BUF_VALID | - adapter->rx_buff_pool[pool].buff_size; - desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index]; - - lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); + u64 cor; + int rc; - if (lpar_rc != H_SUCCESS) { - netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed " - "during recycle rc=%ld", lpar_rc); - ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); - ret = 0; - } + cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; + rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + if (unlikely(rc)) + return rc; if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } -out: - return ret; -} - -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) -{ - ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); - - if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { - adapter->rx_queue.index = 0; - adapter->rx_queue.toggle = !adapter->rx_queue.toggle; - } + return 0; } static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) @@ -732,6 +820,35 @@ static int ibmveth_close(struct net_device *netdev) return 0; } +/** + * ibmveth_reset - Handle scheduled reset work + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: This routine acquires rtnl_mutex and disables its NAPI through + * ibmveth_close. It can't be called directly in a context that has + * already acquired rtnl_mutex or disabled its NAPI, or directly from + * a poll routine. + * + * Return: void + */ +static void ibmveth_reset(struct work_struct *w) +{ + struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work); + struct net_device *netdev = adapter->netdev; + + netdev_dbg(netdev, "reset starting\n"); + + rtnl_lock(); + + dev_close(adapter->netdev); + dev_open(adapter->netdev, NULL); + + rtnl_unlock(); + + netdev_dbg(netdev, "reset complete\n"); +} + static int ibmveth_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { @@ -1303,24 +1420,23 @@ static void ibmveth_rx_csum_helper(struct sk_buff *skb, * the user space for finding a flow. During this process, OVS computes * checksum on the first packet when CHECKSUM_PARTIAL flag is set. * - * So, re-compute TCP pseudo header checksum when configured for - * trunk mode. + * So, re-compute TCP pseudo header checksum. */ + if (iph_proto == IPPROTO_TCP) { struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen); + if (tcph->check == 0x0000) { /* Recompute TCP pseudo header checksum */ - if (adapter->is_active_trunk) { - tcphdrlen = skb->len - iphlen; - if (skb_proto == ETH_P_IP) - tcph->check = - ~csum_tcpudp_magic(iph->saddr, - iph->daddr, tcphdrlen, iph_proto, 0); - else if (skb_proto == ETH_P_IPV6) - tcph->check = - ~csum_ipv6_magic(&iph6->saddr, - &iph6->daddr, tcphdrlen, iph_proto, 0); - } + tcphdrlen = skb->len - iphlen; + if (skb_proto == ETH_P_IP) + tcph->check = + ~csum_tcpudp_magic(iph->saddr, + iph->daddr, tcphdrlen, iph_proto, 0); + else if (skb_proto == ETH_P_IPV6) + tcph->check = + ~csum_ipv6_magic(&iph6->saddr, + &iph6->daddr, tcphdrlen, iph_proto, 0); /* Setup SKB fields for checksum offload */ skb_partial_csum_set(skb, iphlen, offsetof(struct tcphdr, check)); @@ -1338,6 +1454,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) unsigned long lpar_rc; u16 mss = 0; +restart_poll: while (frames_processed < budget) { if (!ibmveth_rxq_pending_buffer(adapter)) break; @@ -1347,7 +1464,8 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) wmb(); /* suggested by larson1 */ adapter->rx_invalid_buffer++; netdev_dbg(netdev, "recycling invalid buffer\n"); - ibmveth_rxq_recycle_buffer(adapter); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; } else { struct sk_buff *skb, *new_skb; int length = ibmveth_rxq_frame_length(adapter); @@ -1357,6 +1475,8 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) __sum16 iph_check = 0; skb = ibmveth_rxq_get_buffer(adapter); + if (unlikely(!skb)) + break; /* if the large packet bit is set in the rx queue * descriptor, the mss will be written by PHYP eight @@ -1380,11 +1500,12 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); - if (!ibmveth_rxq_recycle_buffer(adapter)) - kfree_skb(skb); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; skb = new_skb; } else { - ibmveth_rxq_harvest_buffer(adapter); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false))) + break; skb_reserve(skb, offset); } @@ -1421,24 +1542,28 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) ibmveth_replenish_task(adapter); - if (frames_processed < budget) { - napi_complete_done(napi, frames_processed); + if (frames_processed == budget) + goto out; - /* We think we are done - reenable interrupts, - * then check once more to make sure we are done. - */ - lpar_rc = h_vio_signal(adapter->vdev->unit_address, - VIO_IRQ_ENABLE); + if (!napi_complete_done(napi, frames_processed)) + goto out; - BUG_ON(lpar_rc != H_SUCCESS); + /* We think we are done - reenable interrupts, + * then check once more to make sure we are done. + */ + lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE); + if (WARN_ON(lpar_rc != H_SUCCESS)) { + schedule_work(&adapter->work); + goto out; + } - if (ibmveth_rxq_pending_buffer(adapter) && - napi_reschedule(napi)) { - lpar_rc = h_vio_signal(adapter->vdev->unit_address, - VIO_IRQ_DISABLE); - } + if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + goto restart_poll; } +out: return frames_processed; } @@ -1451,7 +1576,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) if (napi_schedule_prep(&adapter->napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - BUG_ON(lpar_rc != H_SUCCESS); + WARN_ON(lpar_rc != H_SUCCESS); __napi_schedule(&adapter->napi); } return IRQ_HANDLED; @@ -1538,7 +1663,7 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) adapter->rx_buff_pool[i].active = 1; if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); vio_cmo_set_dev_desired(viodev, ibmveth_get_desired_dma (viodev)); @@ -1693,6 +1818,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; + INIT_WORK(&adapter->work, ibmveth_reset); adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); ibmveth_init_link_settings(netdev); @@ -1728,6 +1854,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netdev->features |= NETIF_F_FRAGLIST; } + if (ret == H_SUCCESS && + (ret_attr & IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT)) { + adapter->rx_buffers_per_hcall = IBMVETH_MAX_RX_PER_HCALL; + netdev_dbg(netdev, + "RX Multi-buffer hcall supported by FW, batch set to %u\n", + adapter->rx_buffers_per_hcall); + } else { + adapter->rx_buffers_per_hcall = 1; + netdev_dbg(netdev, + "RX Single-buffer hcall mode, batch set to %u\n", + adapter->rx_buffers_per_hcall); + } + netdev->min_mtu = IBMVETH_MIN_MTU; netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; @@ -1785,6 +1924,8 @@ static void ibmveth_remove(struct vio_dev *dev) struct ibmveth_adapter *adapter = netdev_priv(netdev); int i; + cancel_work_sync(&adapter->work); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) kobject_put(&adapter->rx_buff_pool[i].kobj); @@ -1814,6 +1955,26 @@ static ssize_t veth_pool_show(struct kobject *kobj, return 0; } +/** + * veth_pool_store - sysfs store handler for pool attributes + * @kobj: kobject embedded in pool + * @attr: attribute being changed + * @buf: value being stored + * @count: length of @buf in bytes + * + * Stores new value in pool attribute. Verifies the range of the new value for + * size and buff_size. Verifies that at least one pool remains available to + * receive MTU-sized packets. + * + * Context: Process context. + * Takes and releases rtnl_mutex to ensure correct ordering of close + * and open calls. + * Return: + * * %-EPERM - Not allowed to disabled all MTU-sized buffer pools + * * %-EINVAL - New pool size or buffer size is out of range + * * count - Return count for success + * * other - Return value from a failed ibmveth_open call + */ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { @@ -1823,24 +1984,30 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent)); struct ibmveth_adapter *adapter = netdev_priv(netdev); long value = simple_strtol(buf, NULL, 10); + bool change = false; + u32 newbuff_size; + u32 oldbuff_size; + int newactive; + int oldactive; + u32 newsize; + u32 oldsize; long rc; + rtnl_lock(); + + oldbuff_size = pool->buff_size; + oldactive = pool->active; + oldsize = pool->size; + + newbuff_size = oldbuff_size; + newactive = oldactive; + newsize = oldsize; + if (attr == &veth_active_attr) { - if (value && !pool->active) { - if (netif_running(netdev)) { - if (ibmveth_alloc_buffer_pool(pool)) { - netdev_err(netdev, - "unable to alloc pool\n"); - return -ENOMEM; - } - pool->active = 1; - ibmveth_close(netdev); - if ((rc = ibmveth_open(netdev))) - return rc; - } else { - pool->active = 1; - } - } else if (!value && pool->active) { + if (value && !oldactive) { + newactive = 1; + change = true; + } else if (!value && oldactive) { int mtu = netdev->mtu + IBMVETH_BUFF_OH; int i; /* Make sure there is a buffer pool with buffers that @@ -1856,48 +2023,60 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, if (i == IBMVETH_NUM_BUFF_POOLS) { netdev_err(netdev, "no active pool >= MTU\n"); - return -EPERM; + rc = -EPERM; + goto unlock_err; } - if (netif_running(netdev)) { - ibmveth_close(netdev); - pool->active = 0; - if ((rc = ibmveth_open(netdev))) - return rc; - } - pool->active = 0; + newactive = 0; + change = true; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { - return -EINVAL; - } else { - if (netif_running(netdev)) { - ibmveth_close(netdev); - pool->size = value; - if ((rc = ibmveth_open(netdev))) - return rc; - } else { - pool->size = value; - } + rc = -EINVAL; + goto unlock_err; + } + if (value != oldsize) { + newsize = value; + change = true; } } else if (attr == &veth_size_attr) { if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { - return -EINVAL; - } else { - if (netif_running(netdev)) { - ibmveth_close(netdev); - pool->buff_size = value; - if ((rc = ibmveth_open(netdev))) - return rc; - } else { - pool->buff_size = value; + rc = -EINVAL; + goto unlock_err; + } + if (value != oldbuff_size) { + newbuff_size = value; + change = true; + } + } + + if (change) { + if (netif_running(netdev)) + ibmveth_close(netdev); + + pool->active = newactive; + pool->buff_size = newbuff_size; + pool->size = newsize; + + if (netif_running(netdev)) { + rc = ibmveth_open(netdev); + if (rc) { + pool->active = oldactive; + pool->buff_size = oldbuff_size; + pool->size = oldsize; + goto unlock_err; } } } + rtnl_unlock(); /* kick the interrupt handler to allocate/deallocate pools */ ibmveth_interrupt(netdev->irq, netdev); return count; + +unlock_err: + rtnl_unlock(); + return rc; } @@ -1970,3 +2149,132 @@ static void __exit ibmveth_module_exit(void) module_init(ibmveth_module_init); module_exit(ibmveth_module_exit); + +#ifdef CONFIG_IBMVETH_KUNIT_TEST +#include <kunit/test.h> + +/** + * ibmveth_reset_kunit - reset routine for running in KUnit environment + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: Called in the KUnit environment. Does nothing. + * + * Return: void + */ +static void ibmveth_reset_kunit(struct work_struct *w) +{ + netdev_dbg(NULL, "reset_kunit starting\n"); + netdev_dbg(NULL, "reset_kunit complete\n"); +} + +/** + * ibmveth_remove_buffer_from_pool_test - unit test for some of + * ibmveth_remove_buffer_from_pool + * @test: pointer to kunit structure + * + * Tests the error returns from ibmveth_remove_buffer_from_pool. + * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be + * checked to see that these warnings happened. + * + * Return: void + */ +static void ibmveth_remove_buffer_from_pool_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + u64 correlator; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = (u64)0 | 0; + pool->skbuff[0] = NULL; + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + flush_work(&adapter->work); +} + +/** + * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer + * @test: pointer to kunit structure + * + * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for + * the NULL returns, so dmesg should be checked to see that these warnings + * happened. + * + * Return: void + */ +static void ibmveth_rxq_get_buffer_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + adapter->rx_queue.queue_len = 1; + adapter->rx_queue.index = 0; + adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry), + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + pool->skbuff[0] = skb; + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter)); + + flush_work(&adapter->work); +} + +static struct kunit_case ibmveth_test_cases[] = { + KUNIT_CASE(ibmveth_remove_buffer_from_pool_test), + KUNIT_CASE(ibmveth_rxq_get_buffer_test), + {} +}; + +static struct kunit_suite ibmveth_test_suite = { + .name = "ibmveth-kunit-test", + .test_cases = ibmveth_test_cases, +}; + +kunit_test_suite(ibmveth_test_suite); +#endif diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 8468e2c59d7a..068f99df133e 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -28,6 +28,7 @@ #define IbmVethMcastRemoveFilter 0x2UL #define IbmVethMcastClearFilterTable 0x3UL +#define IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT 0x0000000000040000UL #define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL #define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL #define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL @@ -46,6 +47,24 @@ #define h_add_logical_lan_buffer(ua, buf) \ plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) +static inline long h_add_logical_lan_buffers(unsigned long unit_address, + unsigned long desc1, + unsigned long desc2, + unsigned long desc3, + unsigned long desc4, + unsigned long desc5, + unsigned long desc6, + unsigned long desc7, + unsigned long desc8) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + return plpar_hcall9(H_ADD_LOGICAL_LAN_BUFFERS, + retbuf, unit_address, + desc1, desc2, desc3, desc4, + desc5, desc6, desc7, desc8); +} + /* FW allows us to send 6 descriptors but we only use one so mark * the other 5 as unused (0) */ @@ -101,6 +120,7 @@ static inline long h_illan_attributes(unsigned long unit_address, #define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) #define IBMVETH_MAX_QUEUES 16U #define IBMVETH_DEFAULT_QUEUES 8U +#define IBMVETH_MAX_RX_PER_HCALL 8U static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; @@ -134,38 +154,40 @@ struct ibmveth_rx_q { }; struct ibmveth_adapter { - struct vio_dev *vdev; - struct net_device *netdev; - struct napi_struct napi; - unsigned int mcastFilterSize; - void * buffer_list_addr; - void * filter_list_addr; - void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; - unsigned int tx_ltb_size; - dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; - dma_addr_t buffer_list_dma; - dma_addr_t filter_list_dma; - struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; - struct ibmveth_rx_q rx_queue; - int rx_csum; - int large_send; - bool is_active_trunk; - - u64 fw_ipv6_csum_support; - u64 fw_ipv4_csum_support; - u64 fw_large_send_support; - /* adapter specific stats */ - u64 replenish_task_cycles; - u64 replenish_no_mem; - u64 replenish_add_buff_failure; - u64 replenish_add_buff_success; - u64 rx_invalid_buffer; - u64 rx_no_buffer; - u64 tx_map_failed; - u64 tx_send_failed; - u64 tx_large_packets; - u64 rx_large_packets; - /* Ethtool settings */ + struct vio_dev *vdev; + struct net_device *netdev; + struct napi_struct napi; + struct work_struct work; + unsigned int mcastFilterSize; + void *buffer_list_addr; + void *filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; + dma_addr_t buffer_list_dma; + dma_addr_t filter_list_dma; + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; + struct ibmveth_rx_q rx_queue; + int rx_csum; + int large_send; + bool is_active_trunk; + unsigned int rx_buffers_per_hcall; + + u64 fw_ipv6_csum_support; + u64 fw_ipv4_csum_support; + u64 fw_large_send_support; + /* adapter specific stats */ + u64 replenish_task_cycles; + u64 replenish_no_mem; + u64 replenish_add_buff_failure; + u64 replenish_add_buff_success; + u64 rx_invalid_buffer; + u64 rx_no_buffer; + u64 tx_map_failed; + u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; + /* Ethtool settings */ u8 duplex; u32 speed; }; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index e19a6bb3f444..3808148c1fc7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *, static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, struct ibmvnic_sub_crq_queue *); static int ibmvnic_poll(struct napi_struct *napi, int data); +static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter); +static inline void reinit_init_done(struct ibmvnic_adapter *adapter); static void send_query_map(struct ibmvnic_adapter *adapter); static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); static int send_request_unmap(struct ibmvnic_adapter *, u8); @@ -114,6 +116,8 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, static void free_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb); static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); +static void flush_reset_queue(struct ibmvnic_adapter *adapter); +static void print_subcrq_error(struct device *dev, int rc, const char *func); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -191,9 +195,8 @@ static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter) struct ibmvnic_sub_crq_queue **rxqs; struct ibmvnic_sub_crq_queue **txqs; int num_rxqs, num_txqs; - int rc, i; + int i; - rc = 0; rxqs = adapter->rx_scrq; txqs = adapter->tx_scrq; num_txqs = adapter->num_active_tx_scrqs; @@ -231,11 +234,17 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue, (*stragglers)--; } /* atomic write is safer than writing bit by bit directly */ - for (i = 0; i < stride; i++) { - cpumask_set_cpu(*cpu, mask); - *cpu = cpumask_next_wrap(*cpu, cpu_online_mask, - nr_cpu_ids, false); + for_each_online_cpu_wrap(i, *cpu) { + if (!stride--) { + /* For the next queue we start from the first + * unused CPU in this queue + */ + *cpu = i; + break; + } + cpumask_set_cpu(i, mask); } + /* set queue affinity mask */ cpumask_copy(queue->affinity_mask, mask); rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask); @@ -250,10 +259,11 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq; struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq; struct ibmvnic_sub_crq_queue *queue; - int num_rxqs = adapter->num_active_rx_scrqs; - int num_txqs = adapter->num_active_tx_scrqs; + int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0; + int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0; int total_queues, stride, stragglers, i; - unsigned int num_cpu, cpu; + unsigned int num_cpu, cpu = 0; + bool is_rx_queue; int rc = 0; netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__); @@ -270,33 +280,33 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) stride = max_t(int, num_cpu / total_queues, 1); /* number of leftover cpu's */ stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0; - /* next available cpu to assign irq to */ - cpu = cpumask_next(-1, cpu_online_mask); - for (i = 0; i < num_txqs; i++) { - queue = txqs[i]; + for (i = 0; i < total_queues; i++) { + is_rx_queue = false; + /* balance core load by alternating rx and tx assignments + * ex: TX0 -> RX0 -> TX1 -> RX1 etc. + */ + if ((i % 2 == 1 && i_rxqs < num_rxqs) || i_txqs == num_txqs) { + queue = rxqs[i_rxqs++]; + is_rx_queue = true; + } else { + queue = txqs[i_txqs++]; + } + rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers, stride); if (rc) goto out; - if (!queue) + if (!queue || is_rx_queue) continue; rc = __netif_set_xps_queue(adapter->netdev, cpumask_bits(queue->affinity_mask), - i, XPS_CPUS); + i_txqs - 1, XPS_CPUS); if (rc) netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n", - __func__, i, rc); - } - - for (i = 0; i < num_rxqs; i++) { - queue = rxqs[i]; - rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers, - stride); - if (rc) - goto out; + __func__, i_txqs - 1, rc); } out: @@ -746,6 +756,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) adapter->rx_pool[i].active = 0; } +static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) +{ + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { + netdev_info(adapter->netdev, + "set max ind descs from %u to safe limit %u\n", + adapter->cur_max_ind_descs, + IBMVNIC_SAFE_IND_DESC); + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; + } +} + static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { @@ -833,7 +854,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); /* if send_subcrq_indirect queue is full, flush to VIOS */ - if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + if (ind_bufp->index == adapter->cur_max_ind_descs || i == count - 1) { lpar_rc = send_subcrq_indirect(adapter, handle, @@ -852,6 +873,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + + /* Detect platform limit H_PARAMETER */ + if (lpar_rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ for (i = ind_bufp->index - 1; i >= 0; --i) { struct ibmvnic_rx_buff *rx_buff; @@ -1502,8 +1531,8 @@ static const char *adapter_state_to_string(enum vnic_state state) static int ibmvnic_login(struct net_device *netdev) { + unsigned long flags, timeout = msecs_to_jiffies(20000); struct ibmvnic_adapter *adapter = netdev_priv(netdev); - unsigned long timeout = msecs_to_jiffies(20000); int retry_count = 0; int retries = 10; bool retry; @@ -1524,11 +1553,9 @@ static int ibmvnic_login(struct net_device *netdev) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - netdev_warn(netdev, "Login timed out, retrying...\n"); - retry = true; - adapter->init_done_rc = 0; - retry_count++; - continue; + netdev_warn(netdev, "Login timed out\n"); + adapter->login_pending = false; + goto partial_reset; } if (adapter->init_done_rc == ABORTED) { @@ -1570,10 +1597,69 @@ static int ibmvnic_login(struct net_device *netdev) "SCRQ irq initialization failed\n"); return rc; } + /* Default/timeout error handling, reset and start fresh */ } else if (adapter->init_done_rc) { netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", adapter->init_done_rc); - return -EIO; + +partial_reset: + /* adapter login failed, so free any CRQs or sub-CRQs + * and register again before attempting to login again. + * If we don't do this then the VIOS may think that + * we are already logged in and reject any subsequent + * attempts + */ + netdev_warn(netdev, + "Freeing and re-registering CRQs before attempting to login again\n"); + retry = true; + adapter->init_done_rc = 0; + release_sub_crqs(adapter, true); + /* Much of this is similar logic as ibmvnic_probe(), + * we are essentially re-initializing communication + * with the server. We really should not run any + * resets/failovers here because this is already a form + * of reset and we do not want parallel resets occurring + */ + do { + reinit_init_done(adapter); + /* Clear any failovers we got in the previous + * pass since we are re-initializing the CRQ + */ + adapter->failover_pending = false; + release_crq_queue(adapter); + /* If we don't sleep here then we risk an + * unnecessary failover event from the VIOS. + * This is a known VIOS issue caused by a vnic + * device freeing and registering a CRQ too + * quickly. + */ + msleep(1500); + /* Avoid any resets, since we are currently + * resetting. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, + flags); + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(netdev, "login recovery: init CRQ failed %d\n", + rc); + return -EIO; + } + + rc = ibmvnic_reset_init(adapter, false); + if (rc) + netdev_err(netdev, "login recovery: Reset init failed %d\n", + rc); + /* IBMVNIC_CRQ_INIT will return EAGAIN if it + * fails, since ibmvnic_reset_init will free + * irq's in failure, we won't be able to receive + * new CRQs so we need to keep trying. probe() + * handles this similarly. + */ + } while (rc == -EAGAIN && retry_count++ < retries); } } while (retry); @@ -1585,12 +1671,22 @@ static int ibmvnic_login(struct net_device *netdev) static void release_login_buffer(struct ibmvnic_adapter *adapter) { + if (!adapter->login_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token, + adapter->login_buf_sz, DMA_TO_DEVICE); kfree(adapter->login_buf); adapter->login_buf = NULL; } static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter) { + if (!adapter->login_rsp_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token, + adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); kfree(adapter->login_rsp_buf); adapter->login_rsp_buf = NULL; } @@ -1813,7 +1909,14 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); - netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); + /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL + * resets, don't reset the stats because there could be batched + * skb's waiting to be sent. If we reset dql stats, we risk + * num_completed being greater than num_queued. This will cause + * a BUG_ON in dql_completed(). + */ + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); @@ -2061,63 +2164,49 @@ static int ibmvnic_close(struct net_device *netdev) } /** - * build_hdr_data - creates L2/L3/L4 header data buffer + * get_hdr_lens - fills list of L2/L3/L4 hdr lens * @hdr_field: bitfield determining needed headers * @skb: socket buffer - * @hdr_len: array of header lengths - * @hdr_data: buffer to write the header to + * @hdr_len: array of header lengths to be filled * * Reads hdr_field to determine which headers are needed by firmware. * Builds a buffer containing these headers. Saves individual header * lengths and total buffer length to be used to build descriptors. + * + * Return: total len of all headers */ -static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, - int *hdr_len, u8 *hdr_data) +static int get_hdr_lens(u8 hdr_field, struct sk_buff *skb, + int *hdr_len) { int len = 0; - u8 *hdr; - if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb)) - hdr_len[0] = sizeof(struct vlan_ethhdr); - else - hdr_len[0] = sizeof(struct ethhdr); + + if ((hdr_field >> 6) & 1) { + hdr_len[0] = skb_mac_header_len(skb); + len += hdr_len[0]; + } + + if ((hdr_field >> 5) & 1) { + hdr_len[1] = skb_network_header_len(skb); + len += hdr_len[1]; + } + + if (!((hdr_field >> 4) & 1)) + return len; if (skb->protocol == htons(ETH_P_IP)) { - hdr_len[1] = ip_hdr(skb)->ihl * 4; if (ip_hdr(skb)->protocol == IPPROTO_TCP) hdr_len[2] = tcp_hdrlen(skb); else if (ip_hdr(skb)->protocol == IPPROTO_UDP) hdr_len[2] = sizeof(struct udphdr); } else if (skb->protocol == htons(ETH_P_IPV6)) { - hdr_len[1] = sizeof(struct ipv6hdr); if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) hdr_len[2] = tcp_hdrlen(skb); else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) hdr_len[2] = sizeof(struct udphdr); - } else if (skb->protocol == htons(ETH_P_ARP)) { - hdr_len[1] = arp_hdr_len(skb->dev); - hdr_len[2] = 0; } - memset(hdr_data, 0, 120); - if ((hdr_field >> 6) & 1) { - hdr = skb_mac_header(skb); - memcpy(hdr_data, hdr, hdr_len[0]); - len += hdr_len[0]; - } - - if ((hdr_field >> 5) & 1) { - hdr = skb_network_header(skb); - memcpy(hdr_data + len, hdr, hdr_len[1]); - len += hdr_len[1]; - } - - if ((hdr_field >> 4) & 1) { - hdr = skb_transport_header(skb); - memcpy(hdr_data + len, hdr, hdr_len[2]); - len += hdr_len[2]; - } - return len; + return len + hdr_len[2]; } /** @@ -2130,12 +2219,14 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, * * Creates header and, if needed, header extension descriptors and * places them in a descriptor array, scrq_arr + * + * Return: Number of header descs */ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, union sub_crq *scrq_arr) { - union sub_crq hdr_desc; + union sub_crq *hdr_desc; int tmp_len = len; int num_descs = 0; u8 *data, *cur; @@ -2144,28 +2235,26 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, while (tmp_len > 0) { cur = hdr_data + len - tmp_len; - memset(&hdr_desc, 0, sizeof(hdr_desc)); - if (cur != hdr_data) { - data = hdr_desc.hdr_ext.data; + hdr_desc = &scrq_arr[num_descs]; + if (num_descs) { + data = hdr_desc->hdr_ext.data; tmp = tmp_len > 29 ? 29 : tmp_len; - hdr_desc.hdr_ext.first = IBMVNIC_CRQ_CMD; - hdr_desc.hdr_ext.type = IBMVNIC_HDR_EXT_DESC; - hdr_desc.hdr_ext.len = tmp; + hdr_desc->hdr_ext.first = IBMVNIC_CRQ_CMD; + hdr_desc->hdr_ext.type = IBMVNIC_HDR_EXT_DESC; + hdr_desc->hdr_ext.len = tmp; } else { - data = hdr_desc.hdr.data; + data = hdr_desc->hdr.data; tmp = tmp_len > 24 ? 24 : tmp_len; - hdr_desc.hdr.first = IBMVNIC_CRQ_CMD; - hdr_desc.hdr.type = IBMVNIC_HDR_DESC; - hdr_desc.hdr.len = tmp; - hdr_desc.hdr.l2_len = (u8)hdr_len[0]; - hdr_desc.hdr.l3_len = cpu_to_be16((u16)hdr_len[1]); - hdr_desc.hdr.l4_len = (u8)hdr_len[2]; - hdr_desc.hdr.flag = hdr_field << 1; + hdr_desc->hdr.first = IBMVNIC_CRQ_CMD; + hdr_desc->hdr.type = IBMVNIC_HDR_DESC; + hdr_desc->hdr.len = tmp; + hdr_desc->hdr.l2_len = (u8)hdr_len[0]; + hdr_desc->hdr.l3_len = cpu_to_be16((u16)hdr_len[1]); + hdr_desc->hdr.l4_len = (u8)hdr_len[2]; + hdr_desc->hdr.flag = hdr_field << 1; } memcpy(data, cur, tmp); tmp_len -= tmp; - *scrq_arr = hdr_desc; - scrq_arr++; num_descs++; } @@ -2188,13 +2277,11 @@ static void build_hdr_descs_arr(struct sk_buff *skb, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; - u8 hdr_data[140] = {0}; int tot_len; - tot_len = build_hdr_data(hdr_field, skb, hdr_len, - hdr_data); - *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, - indir_arr + 1); + tot_len = get_hdr_lens(hdr_field, skb, hdr_len); + *num_entries += create_hdr_descs(hdr_field, skb_mac_header(skb), + tot_len, hdr_len, indir_arr + 1); } static int ibmvnic_xmit_workarounds(struct sk_buff *skb, @@ -2244,9 +2331,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, tx_pool->num_buffers - 1 : tx_pool->consumer_index - 1; tx_buff = &tx_pool->tx_buff[index]; - adapter->netdev->stats.tx_packets--; - adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; - adapter->tx_stats_buffers[queue_num].packets--; + adapter->tx_stats_buffers[queue_num].batched_packets--; adapter->tx_stats_buffers[queue_num].bytes -= tx_buff->skb->len; dev_kfree_skb_any(tx_buff->skb); @@ -2271,8 +2356,29 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, } } +static int send_subcrq_direct(struct ibmvnic_adapter *adapter, + u64 remote_handle, u64 *entry) +{ + unsigned int ua = adapter->vdev->unit_address; + struct device *dev = &adapter->vdev->dev; + int rc; + + /* Make sure the hypervisor sees the complete request */ + dma_wmb(); + rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua, + cpu_to_be64(remote_handle), + cpu_to_be64(entry[0]), cpu_to_be64(entry[1]), + cpu_to_be64(entry[2]), cpu_to_be64(entry[3])); + + if (rc) + print_subcrq_error(dev, rc, __func__); + + return rc; +} + static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, - struct ibmvnic_sub_crq_queue *tx_scrq) + struct ibmvnic_sub_crq_queue *tx_scrq, + bool indirect) { struct ibmvnic_ind_xmit_queue *ind_bufp; u64 dma_addr; @@ -2287,17 +2393,35 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, if (!entries) return 0; - rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); - if (rc) - ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + + if (indirect) + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); else + rc = send_subcrq_direct(adapter, handle, + (u64 *)ind_bufp->indir_arr); + + if (rc) { + dev_err_ratelimited(&adapter->vdev->dev, + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", + rc, entries, &dma_addr, handle); + /* Detect platform limit H_PARAMETER */ + if (rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + } else { ind_bufp->index = 0; - return 0; + } + return rc; } static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; @@ -2311,13 +2435,16 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int tx_map_failed = 0; union sub_crq indir_arr[16]; unsigned int tx_dropped = 0; - unsigned int tx_packets = 0; + unsigned int tx_dpackets = 0; + unsigned int tx_bpackets = 0; unsigned int tx_bytes = 0; dma_addr_t data_dma_addr; struct netdev_queue *txq; unsigned long lpar_rc; + unsigned int skblen; union sub_crq tx_crq; unsigned int offset; + bool use_scrq_send_direct = false; int num_entries = 1; unsigned char *dst; int bufidx = 0; @@ -2345,7 +2472,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_dropped++; tx_send_failed++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); + if (lpar_rc != H_SUCCESS) + goto tx_err; goto out; } @@ -2360,8 +2489,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); + if (lpar_rc != H_SUCCESS) + goto tx_err; goto out; } @@ -2373,6 +2504,20 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) memset(dst, 0, tx_pool->buf_size); data_dma_addr = ltb->addr + offset; + /* if we are going to send_subcrq_direct this then we need to + * update the checksum before copying the data into ltb. Essentially + * these packets force disable CSO so that we can guarantee that + * FW does not need header info and we can send direct. Also, vnic + * server must be able to xmit standard packets without header data + */ + if (*hdrs == 0 && !skb_is_gso(skb) && + !ind_bufp->index && !netdev_xmit_more()) { + use_scrq_send_direct = true; + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + use_scrq_send_direct = false; + } + if (skb_shinfo(skb)->nr_frags) { int cur, i; @@ -2392,16 +2537,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) skb_copy_from_linear_data(skb, dst, skb->len); } - /* post changes to long_term_buff *dst before VIOS accessing it */ - dma_wmb(); - tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; tx_buff = &tx_pool->tx_buff[bufidx]; + + /* Sanity checks on our free map to make sure it points to an index + * that is not being occupied by another skb. If skb memory is + * not freed then we see congestion control kick in and halt tx. + */ + if (unlikely(tx_buff->skb)) { + dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n", + skb_is_gso(skb) ? "tso_pool" : "tx_pool", + queue_num, bufidx); + dev_kfree_skb_any(tx_buff->skb); + } + tx_buff->skb = skb; tx_buff->index = bufidx; tx_buff->pool_index = queue_num; + skblen = skb->len; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -2445,6 +2600,19 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.flags1 |= IBMVNIC_TX_LSO; tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); hdrs += 2; + } else if (use_scrq_send_direct) { + /* See above comment, CSO disabled with direct xmit */ + tx_crq.v1.flags1 &= ~(IBMVNIC_TX_CHKSUM_OFFLOAD); + ind_bufp->index = 1; + tx_buff->num_entries = 1; + netdev_tx_sent_queue(txq, skb->len); + ind_bufp->indir_arr[0] = tx_crq; + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, false); + if (lpar_rc != H_SUCCESS) + goto tx_err; + + tx_dpackets++; + goto early_exit; } if ((*hdrs >> 7) & 1) @@ -2453,8 +2621,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_crq_elem = num_entries; tx_buff->num_entries = num_entries; /* flush buffer if current entry can not fit */ - if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { - lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (num_entries + ind_bufp->index > cur_max_ind_descs) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_flush_err; } @@ -2462,23 +2630,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) indir_arr[0] = tx_crq; memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], num_entries * sizeof(struct ibmvnic_generic_scrq)); + ind_bufp->index += num_entries; if (__netdev_tx_sent_queue(txq, skb->len, netdev_xmit_more() && - ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { - lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + ind_bufp->index < cur_max_ind_descs)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_err; } + tx_bpackets++; + +early_exit: if (atomic_add_return(num_entries, &tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { netdev_dbg(netdev, "Stopping queue %d\n", queue_num); netif_stop_subqueue(netdev, queue_num); } - tx_packets++; - tx_bytes += skb->len; + tx_bytes += skblen; txq_trans_cond_update(txq); ret = NETDEV_TX_OK; goto out; @@ -2505,12 +2676,10 @@ tx_err: } out: rcu_read_unlock(); - netdev->stats.tx_dropped += tx_dropped; - netdev->stats.tx_bytes += tx_bytes; - netdev->stats.tx_packets += tx_packets; adapter->tx_send_failed += tx_send_failed; adapter->tx_map_failed += tx_map_failed; - adapter->tx_stats_buffers[queue_num].packets += tx_packets; + adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets; + adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets; adapter->tx_stats_buffers[queue_num].bytes += tx_bytes; adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped; @@ -3309,6 +3478,25 @@ err: return -ret; } +static void ibmvnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->req_rx_queues; i++) { + stats->rx_packets += adapter->rx_stats_buffers[i].packets; + stats->rx_bytes += adapter->rx_stats_buffers[i].bytes; + } + + for (i = 0; i < adapter->req_tx_queues; i++) { + stats->tx_packets += adapter->tx_stats_buffers[i].batched_packets; + stats->tx_packets += adapter->tx_stats_buffers[i].direct_packets; + stats->tx_bytes += adapter->tx_stats_buffers[i].bytes; + stats->tx_dropped += adapter->tx_stats_buffers[i].dropped_packets; + } +} + static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); @@ -3424,23 +3612,20 @@ restart_poll: length = skb->len; napi_gro_receive(napi, skb); /* send it up */ - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += length; adapter->rx_stats_buffers[scrq_num].packets++; adapter->rx_stats_buffers[scrq_num].bytes += length; frames_processed++; } if (adapter->state != VNIC_CLOSING && - ((atomic_read(&adapter->rx_pool[scrq_num].available) < - adapter->req_rx_add_entries_per_subcrq / 2) || - frames_processed < budget)) + (atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2)) replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); if (frames_processed < budget) { if (napi_complete_done(napi, frames_processed)) { enable_scrq_irq(adapter, rx_scrq); if (pending_scrq(adapter, rx_scrq)) { - if (napi_reschedule(napi)) { + if (napi_schedule(napi)) { disable_scrq_irq(adapter, rx_scrq); goto restart_poll; } @@ -3536,6 +3721,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = { .ndo_set_rx_mode = ibmvnic_set_multi, .ndo_set_mac_address = ibmvnic_set_mac, .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = ibmvnic_get_stats64, .ndo_tx_timeout = ibmvnic_tx_timeout, .ndo_change_mtu = ibmvnic_change_mtu, .ndo_features_check = ibmvnic_features_check, @@ -3672,29 +3858,20 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) if (stringset != ETH_SS_STATS) return; - for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) - memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) + ethtool_puts(&data, ibmvnic_stats[i].name); for (i = 0; i < adapter->req_tx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); - data += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "tx%d_batched_packets", i); + ethtool_sprintf(&data, "tx%d_direct_packets", i); + ethtool_sprintf(&data, "tx%d_bytes", i); + ethtool_sprintf(&data, "tx%d_dropped_packets", i); } for (i = 0; i < adapter->req_rx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); - data += ETH_GSTRING_LEN; - - snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); - data += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "rx%d_packets", i); + ethtool_sprintf(&data, "rx%d_bytes", i); + ethtool_sprintf(&data, "rx%d_interrupts", i); } } @@ -3741,7 +3918,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, (adapter, ibmvnic_stats[i].offset)); for (j = 0; j < adapter->req_tx_queues; j++) { - data[i] = adapter->tx_stats_buffers[j].packets; + data[i] = adapter->tx_stats_buffers[j].batched_packets; + i++; + data[i] = adapter->tx_stats_buffers[j].direct_packets; i++; data[i] = adapter->tx_stats_buffers[j].bytes; i++; @@ -3858,7 +4037,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } dma_free_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, scrq->ind_buf.indir_arr, scrq->ind_buf.indir_dma); @@ -3915,7 +4094,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->ind_buf.indir_arr = dma_alloc_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, &scrq->ind_buf.indir_dma, GFP_KERNEL); @@ -3978,6 +4157,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) adapter->num_active_tx_scrqs = 0; } + /* Clean any remaining outstanding SKBs + * we freed the irq so we won't be hearing + * from them + */ + clean_tx_pools(adapter); + if (adapter->rx_scrq) { for (i = 0; i < adapter->num_active_rx_scrqs; i++) { if (!adapter->rx_scrq[i]) @@ -4068,20 +4253,17 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { struct device *dev = &adapter->vdev->dev; + int num_packets = 0, total_bytes = 0; struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_buff *txbuff; struct netdev_queue *txq; union sub_crq *next; - int index; - int i; + int index, i; restart_loop: while (pending_scrq(adapter, scrq)) { unsigned int pool = scrq->pool_index; int num_entries = 0; - int total_bytes = 0; - int num_packets = 0; - next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { index = be32_to_cpu(next->tx_comp.correlators[i]); @@ -4117,8 +4299,6 @@ restart_loop: /* remove tx_comp scrq*/ next->tx_comp.first = 0; - txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); - netdev_tx_completed_queue(txq, num_packets, total_bytes); if (atomic_sub_return(num_entries, &scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && @@ -4143,6 +4323,9 @@ restart_loop: goto restart_loop; } + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + return 0; } @@ -4694,6 +4877,18 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter, strscpy(vlcd->name, adapter->netdev->name, len); } +static void ibmvnic_print_hex_dump(struct net_device *dev, void *buf, + size_t len) +{ + unsigned char hex_str[16 * 3]; + + for (size_t i = 0; i < len; i += 16) { + hex_dump_to_buffer((unsigned char *)buf + i, len - i, 16, 8, + hex_str, sizeof(hex_str), false); + netdev_dbg(dev, "%s\n", hex_str); + } +} + static int send_login(struct ibmvnic_adapter *adapter) { struct ibmvnic_login_rsp_buffer *login_rsp_buffer; @@ -4804,10 +4999,8 @@ static int send_login(struct ibmvnic_adapter *adapter) vnic_add_client_data(adapter, vlcd); netdev_dbg(adapter->netdev, "Login Buffer:\n"); - for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { - netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long *)(adapter->login_buf))[i]); - } + ibmvnic_print_hex_dump(adapter->netdev, adapter->login_buf, + adapter->login_buf_sz); memset(&crq, 0, sizeof(crq)); crq.login.first = IBMVNIC_CRQ_CMD; @@ -4820,11 +5013,14 @@ static int send_login(struct ibmvnic_adapter *adapter) if (rc) { adapter->login_pending = false; netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); - goto buf_rsp_map_failed; + goto buf_send_failed; } return 0; +buf_send_failed: + dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size, + DMA_FROM_DEVICE); buf_rsp_map_failed: kfree(login_rsp_buffer); adapter->login_rsp_buf = NULL; @@ -5165,7 +5361,8 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq, /* copy firmware version string from vpd into adapter */ if ((substr + 3 + fw_level_len) < (adapter->vpd->buff + adapter->vpd->len)) { - strncpy((char *)adapter->fw_version, substr + 3, fw_level_len); + strscpy(adapter->fw_version, substr + 3, + sizeof(adapter->fw_version)); } else { dev_info(dev, "FW substr extrapolated VPD buff\n"); } @@ -5180,15 +5377,13 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; - int i; dma_unmap_single(dev, adapter->ip_offload_tok, sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE); netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); - for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) - netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long *)(buf))[i]); + ibmvnic_print_hex_dump(adapter->netdev, buf, + sizeof(adapter->ip_offload_buf)); netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); @@ -5386,6 +5581,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, int num_tx_pools; int num_rx_pools; u64 *size_array; + u32 rsp_len; int i; /* CHECK: Test/set of login_pending does not need to be atomic @@ -5397,11 +5593,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, } adapter->login_pending = false; - dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, - DMA_TO_DEVICE); - dma_unmap_single(dev, adapter->login_rsp_buf_token, - adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); - /* If the number of queues requested can't be allocated by the * server, the login response will return with code 1. We will need * to resend the login buffer with fewer queues requested. @@ -5423,10 +5614,8 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); - for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { - netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long *)(adapter->login_rsp_buf))[i]); - } + ibmvnic_print_hex_dump(netdev, adapter->login_rsp_buf, + adapter->login_rsp_buf_sz); /* Sanity checks */ if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs || @@ -5437,6 +5626,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, ibmvnic_reset(adapter, VNIC_RESET_FATAL); return -EIO; } + + rsp_len = be32_to_cpu(login_rsp->len); + if (be32_to_cpu(login->login_rsp_len) < rsp_len || + rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) || + rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) { + /* This can happen if a login request times out and there are + * 2 outstanding login requests sent, the LOGIN_RSP crq + * could have been for the older login request. So we are + * parsing the newer response buffer which may be incomplete + */ + dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n"); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + return -EIO; + } + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); /* variable buffer sizes are not supported, so just read the @@ -6194,6 +6400,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) rc = reset_sub_crq_queues(adapter); } } else { + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + /* After an LPM, reset the max number of indirect + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT + * hcall to the default max (e.g POWER8 -> POWER10) + * + * If the new destination platform does not support + * the higher limit max (e.g. POWER10-> POWER8 LPM) + * H_PARAMETER will trigger automatic fallback to the + * safe minimum limit. + */ + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + } + rc = init_sub_crqs(adapter); } @@ -6345,6 +6564,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; rc = register_netdev(netdev); if (rc) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index b35c9b6f913b..480dc587078f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -29,8 +29,9 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 16 -#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_SAFE_IND_DESC 16 +#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 @@ -48,7 +49,7 @@ * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus * some padding. * - * But the size of a single DMA region is limited by MAX_ORDER in the + * But the size of a single DMA region is limited by MAX_PAGE_ORDER in the * kernel (about 16MB currently). To support say 4K Jumbo frames, we * use a set of LTBs (struct ltb_set) per pool. * @@ -75,7 +76,7 @@ * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. */ -#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << MAX_PAGE_ORDER) * PAGE_SIZE)) #define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) #define IBMVNIC_LTB_SET_SIZE (38 << 20) @@ -211,20 +212,25 @@ struct ibmvnic_statistics { u8 reserved[72]; } __packed __aligned(8); -#define NUM_TX_STATS 3 struct ibmvnic_tx_queue_stats { - u64 packets; + u64 batched_packets; + u64 direct_packets; u64 bytes; u64 dropped_packets; }; -#define NUM_RX_STATS 3 +#define NUM_TX_STATS \ + (sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64)) + struct ibmvnic_rx_queue_stats { u64 packets; u64 bytes; u64 interrupts; }; +#define NUM_RX_STATS \ + (sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64)) + struct ibmvnic_acl_buffer { __be32 len; __be32 version; @@ -925,6 +931,7 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; + u32 cur_max_ind_descs; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; |
