diff options
Diffstat (limited to 'drivers/net/ethernet/ibm')
32 files changed, 10226 insertions, 1960 deletions
diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig index 563a1ac71dbc..4f4b23465c47 100644 --- a/drivers/net/ethernet/ibm/Kconfig +++ b/drivers/net/ethernet/ibm/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # IBM device configuration. # @@ -6,10 +7,8 @@ config NET_VENDOR_IBM bool "IBM devices" default y depends on PPC_PSERIES || PPC_DCR || (IBMEBUS && SPARSEMEM) - ---help--- - If you have a network (Ethernet) card belonging to this class, say Y - and read the Ethernet-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. + help + If you have a network (Ethernet) card belonging to this class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all @@ -21,22 +20,45 @@ if NET_VENDOR_IBM config IBMVETH tristate "IBM LAN Virtual Ethernet support" depends on PPC_PSERIES - ---help--- + help This driver supports virtual ethernet adapters on newer IBM iSeries and pSeries systems. To compile this driver as a module, choose M here. The module will be called ibmveth. +config IBMVETH_KUNIT_TEST + bool "KUnit test for IBM LAN Virtual Ethernet support" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on KUNIT=y && IBMVETH=y + default KUNIT_ALL_TESTS + help + This builds unit tests for the IBM LAN Virtual Ethernet driver. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + source "drivers/net/ethernet/ibm/emac/Kconfig" config EHEA tristate "eHEA Ethernet support" depends on IBMEBUS && SPARSEMEM - ---help--- + help This driver supports the IBM pSeries eHEA ethernet adapter. To compile the driver as a module, choose M here. The module will be called ehea. +config IBMVNIC + tristate "IBM Virtual NIC support" + depends on PPC_PSERIES + help + This driver supports Virtual NIC adapters on IBM i and IBM System p + systems. + + To compile this driver as a module, choose M here. The module will + be called ibmvnic. + endif # NET_VENDOR_IBM diff --git a/drivers/net/ethernet/ibm/Makefile b/drivers/net/ethernet/ibm/Makefile index 2f04e71a5926..1d17d0c33d4d 100644 --- a/drivers/net/ethernet/ibm/Makefile +++ b/drivers/net/ethernet/ibm/Makefile @@ -1,7 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for th IBM network device drivers. # obj-$(CONFIG_IBMVETH) += ibmveth.o +obj-$(CONFIG_IBMVNIC) += ibmvnic.o obj-$(CONFIG_IBM_EMAC) += emac/ obj-$(CONFIG_EHEA) += ehea/ diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile index 775d9969b5c2..9e1e5c7aafe2 100644 --- a/drivers/net/ethernet/ibm/ehea/Makefile +++ b/drivers/net/ethernet/ibm/ehea/Makefile @@ -1,6 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the eHEA ethernet device driver for IBM eServer System p # -ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o +ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o obj-$(CONFIG_EHEA) += ehea.o diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h index 6be7b9839f35..208c440a602b 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea.h +++ b/drivers/net/ethernet/ibm/ehea/ehea.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/net/ethernet/ibm/ehea/ehea.h * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __EHEA_H__ @@ -33,6 +19,7 @@ #include <linux/ethtool.h> #include <linux/vmalloc.h> #include <linux/if_vlan.h> +#include <linux/platform_device.h> #include <asm/ibmebus.h> #include <asm/io.h> diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c index 95837b99a464..1db5b6790a41 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * linux/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -31,9 +17,11 @@ #include "ehea.h" #include "ehea_phyp.h" -static int ehea_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int ehea_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct ehea_port *port = netdev_priv(dev); + u32 supported, advertising; u32 speed; int ret; @@ -60,68 +48,75 @@ static int ehea_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) speed = -1; break; /* BUG */ } - cmd->duplex = port->full_duplex == 1 ? + cmd->base.duplex = port->full_duplex == 1 ? DUPLEX_FULL : DUPLEX_HALF; } else { - speed = ~0; - cmd->duplex = -1; + speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - ethtool_cmd_speed_set(cmd, speed); + cmd->base.speed = speed; - if (cmd->speed == SPEED_10000) { - cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); - cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); - cmd->port = PORT_FIBRE; + if (cmd->base.speed == SPEED_10000) { + supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); + cmd->base.port = PORT_FIBRE; } else { - cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full + supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_10baseT_Half | SUPPORTED_Autoneg | SUPPORTED_TP); - cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg + advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | ADVERTISED_TP); - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; } - cmd->autoneg = port->autoneg == 1 ? AUTONEG_ENABLE : AUTONEG_DISABLE; + cmd->base.autoneg = port->autoneg == 1 ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } -static int ehea_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int ehea_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct ehea_port *port = netdev_priv(dev); int ret = 0; u32 sp; - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { sp = EHEA_SPEED_AUTONEG; goto doit; } - switch (cmd->speed) { + switch (cmd->base.speed) { case SPEED_10: - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) sp = H_SPEED_10M_F; else sp = H_SPEED_10M_H; break; case SPEED_100: - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) sp = H_SPEED_100M_F; else sp = H_SPEED_100M_H; break; case SPEED_1000: - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) sp = H_SPEED_1G_F; else ret = -EINVAL; break; case SPEED_10000: - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) sp = H_SPEED_10G_F; else ret = -EINVAL; @@ -164,8 +159,8 @@ static int ehea_nway_reset(struct net_device *dev) static void ehea_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static u32 ehea_get_msglevel(struct net_device *dev) @@ -264,7 +259,6 @@ static void ehea_get_ethtool_stats(struct net_device *dev, } static const struct ethtool_ops ehea_ethtool_ops = { - .get_settings = ehea_get_settings, .get_drvinfo = ehea_get_drvinfo, .get_msglevel = ehea_get_msglevel, .set_msglevel = ehea_set_msglevel, @@ -272,11 +266,12 @@ static const struct ethtool_ops ehea_ethtool_ops = { .get_strings = ehea_get_strings, .get_sset_count = ehea_get_sset_count, .get_ethtool_stats = ehea_get_ethtool_stats, - .set_settings = ehea_set_settings, .nway_reset = ehea_nway_reset, /* Restart autonegotiation */ + .get_link_ksettings = ehea_get_link_ksettings, + .set_link_ksettings = ehea_set_link_ksettings, }; void ehea_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops); + netdev->ethtool_ops = &ehea_ethtool_ops; } diff --git a/drivers/net/ethernet/ibm/ehea/ehea_hw.h b/drivers/net/ethernet/ibm/ehea/ehea_hw.h index 180d4128a711..590933a45d65 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_hw.h +++ b/drivers/net/ethernet/ibm/ehea/ehea_hw.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/net/ethernet/ibm/ehea/ehea_hw.h * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __EHEA_HW_H__ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 35853b43d66e..9b006bc353a1 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * linux/drivers/net/ethernet/ibm/ehea/ehea_main.c * @@ -9,25 +10,11 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/device.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -42,6 +29,9 @@ #include <asm/kexec.h> #include <linux/mutex.h> #include <linux/prefetch.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> #include <net/ip.h> @@ -100,9 +90,22 @@ static struct ehea_bcmc_reg_array ehea_bcmc_regs; static int ehea_probe_adapter(struct platform_device *dev); -static int ehea_remove(struct platform_device *dev); +static void ehea_remove(struct platform_device *dev); -static struct of_device_id ehea_device_table[] = { +static const struct of_device_id ehea_module_device_table[] = { + { + .name = "lhea", + .compatible = "IBM,lhea", + }, + { + .type = "network", + .compatible = "IBM,lhea-ethernet", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehea_module_device_table); + +static const struct of_device_id ehea_device_table[] = { { .name = "lhea", .compatible = "IBM,lhea", @@ -315,8 +318,8 @@ out: spin_unlock_irqrestore(&ehea_bcmc_regs.lock, flags); } -static struct rtnl_link_stats64 *ehea_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void ehea_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct ehea_port *port = netdev_priv(dev); u64 rx_packets = 0, tx_packets = 0, rx_bytes = 0, tx_bytes = 0; @@ -339,7 +342,6 @@ static struct rtnl_link_stats64 *ehea_get_stats64(struct net_device *dev, stats->multicast = port->stats.multicast; stats->rx_errors = port->stats.rx_errors; - return stats; } static void ehea_update_stats(struct work_struct *work) @@ -478,7 +480,7 @@ static int ehea_refill_rq_def(struct ehea_port_res *pr, skb_arr[index] = skb; tmp_addr = ehea_map_vaddr(skb->data); if (tmp_addr == -1) { - dev_kfree_skb(skb); + dev_consume_skb_any(skb); q_skba->os_skbs = fill_wqes - i; ret = 0; break; @@ -766,12 +768,11 @@ static void check_sqs(struct ehea_port *port) { struct ehea_swqe *swqe; int swqe_index; - int i, k; + int i; for (i = 0; i < port->num_def_qps; i++) { struct ehea_port_res *pr = &port->port_res[i]; int ret; - k = 0; swqe = ehea_get_swqe(pr->qp, &swqe_index); memset(swqe, 0, SWQE_HEADER_SIZE); atomic_dec(&pr->swqe_avail); @@ -844,7 +845,7 @@ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota) index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id); skb = pr->sq_skba.arr[index]; - dev_kfree_skb(skb); + dev_consume_skb_any(skb); pr->sq_skba.arr[index] = NULL; } @@ -899,7 +900,7 @@ static int ehea_poll(struct napi_struct *napi, int budget) if (!cqe && !cqe_skb) return rx; - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) return rx; cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); @@ -909,17 +910,6 @@ static int ehea_poll(struct napi_struct *napi, int budget) return rx; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ehea_netpoll(struct net_device *dev) -{ - struct ehea_port *port = netdev_priv(dev); - int i; - - for (i = 0; i < port->num_def_qps; i++) - napi_schedule(&port->port_res[i].napi); -} -#endif - static irqreturn_t ehea_recv_irq_handler(int irq, void *param) { struct ehea_port_res *pr = param; @@ -1156,16 +1146,15 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) ec = EHEA_BMASK_GET(NEQE_EVENT_CODE, eqe); portnum = EHEA_BMASK_GET(NEQE_PORTNUM, eqe); port = ehea_get_port(adapter, portnum); + if (!port) { + netdev_err(NULL, "unknown portnum %x\n", portnum); + return; + } dev = port->netdev; switch (ec) { case EHEA_EC_PORTSTATE_CHG: /* port state change */ - if (!port) { - netdev_err(dev, "unknown portnum %x\n", portnum); - break; - } - if (EHEA_BMASK_GET(NEQE_PORT_UP, eqe)) { if (!netif_carrier_ok(dev)) { ret = ehea_sense_port_attr(port); @@ -1227,9 +1216,9 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) } } -static void ehea_neq_tasklet(unsigned long data) +static void ehea_neq_tasklet(struct tasklet_struct *t) { - struct ehea_adapter *adapter = (struct ehea_adapter *)data; + struct ehea_adapter *adapter = from_tasklet(adapter, t, neq_tasklet); struct ehea_eqe *eqe; u64 event_mask; @@ -1285,7 +1274,7 @@ static int ehea_reg_interrupts(struct net_device *dev) ret = ibmebus_request_irq(port->qp_eq->attr.ist1, ehea_qp_aff_irq_handler, - IRQF_DISABLED, port->int_aff_name, port); + 0, port->int_aff_name, port); if (ret) { netdev_err(dev, "failed registering irq for qp_aff_irq_handler:ist=%X\n", port->qp_eq->attr.ist1); @@ -1303,8 +1292,7 @@ static int ehea_reg_interrupts(struct net_device *dev) "%s-queue%d", dev->name, i); ret = ibmebus_request_irq(pr->eq->attr.ist1, ehea_recv_irq_handler, - IRQF_DISABLED, pr->int_send_name, - pr); + 0, pr->int_send_name, pr); if (ret) { netdev_err(dev, "failed registering irq for ehea_queue port_res_nr:%d, ist=%X\n", i, pr->eq->attr.ist1); @@ -1465,7 +1453,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, memset(pr, 0, sizeof(struct ehea_port_res)); - pr->tx_bytes = rx_bytes; + pr->tx_bytes = tx_bytes; pr->tx_packets = tx_packets; pr->rx_bytes = rx_bytes; pr->rx_packets = rx_packets; @@ -1559,7 +1547,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, kfree(init_attr); - netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64); + netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll); ret = 0; goto out; @@ -1593,20 +1581,16 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) ehea_destroy_eq(pr->eq); for (i = 0; i < pr->rq1_skba.len; i++) - if (pr->rq1_skba.arr[i]) - dev_kfree_skb(pr->rq1_skba.arr[i]); + dev_kfree_skb(pr->rq1_skba.arr[i]); for (i = 0; i < pr->rq2_skba.len; i++) - if (pr->rq2_skba.arr[i]) - dev_kfree_skb(pr->rq2_skba.arr[i]); + dev_kfree_skb(pr->rq2_skba.arr[i]); for (i = 0; i < pr->rq3_skba.len; i++) - if (pr->rq3_skba.arr[i]) - dev_kfree_skb(pr->rq3_skba.arr[i]); + dev_kfree_skb(pr->rq3_skba.arr[i]); for (i = 0; i < pr->sq_skba.len; i++) - if (pr->sq_skba.arr[i]) - dev_kfree_skb(pr->sq_skba.arr[i]); + dev_kfree_skb(pr->sq_skba.arr[i]); vfree(pr->rq1_skba.arr); vfree(pr->rq2_skba.arr); @@ -1634,7 +1618,7 @@ static void write_swqe2_immediate(struct sk_buff *skb, struct ehea_swqe *swqe, * For TSO packets we only copy the headers into the * immediate area. */ - immediate_len = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); + immediate_len = skb_tcp_all_headers(skb); } if (skb_is_gso(skb) || skb_data_size >= SWQE2_MAX_IMM) { @@ -1760,7 +1744,7 @@ static int ehea_set_mac_addr(struct net_device *dev, void *sa) goto out_free; } - memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len); + eth_hw_addr_set(dev, mac_addr->sa_data); /* Deregister old MAC in pHYP */ if (port->state == EHEA_PORT_UP) { @@ -1970,19 +1954,11 @@ out: ehea_update_bcmc_registrations(); } -static int ehea_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe) { swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC; - if (skb->protocol != htons(ETH_P_IP)) + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) return; if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -2033,10 +2009,10 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev, skb_copy_bits(skb, 0, imm_data, skb->len); swqe->immediate_data_length = skb->len; - dev_kfree_skb(skb); + dev_consume_skb_any(skb); } -static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); struct ehea_swqe *swqe; @@ -2052,9 +2028,9 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) memset(swqe, 0, SWQE_HEADER_SIZE); atomic_dec(&pr->swqe_avail); - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { swqe->tx_control |= EHEA_SWQE_VLAN_INSERT; - swqe->vlan_tag = vlan_tx_tag_get(skb); + swqe->vlan_tag = skb_vlan_tag_get(skb); } pr->tx_packets++; @@ -2435,6 +2411,8 @@ static int ehea_open(struct net_device *dev) netif_info(port, ifup, dev, "enabling port\n"); + netif_carrier_off(dev); + ret = ehea_up(dev); if (!ret) { port_napi_enable(port); @@ -2643,10 +2621,8 @@ static int ehea_restart_qps(struct net_device *dev) u16 dummy16 = 0; cb0 = (void *)get_zeroed_page(GFP_KERNEL); - if (!cb0) { - ret = -ENOMEM; - goto out; - } + if (!cb0) + return -ENOMEM; for (i = 0; i < (port->num_def_qps); i++) { struct ehea_port_res *pr = &port->port_res[i]; @@ -2666,6 +2642,7 @@ static int ehea_restart_qps(struct net_device *dev) cb0); if (hret != H_SUCCESS) { netdev_err(dev, "query_ehea_qp failed (1)\n"); + ret = -EFAULT; goto out; } @@ -2678,6 +2655,7 @@ static int ehea_restart_qps(struct net_device *dev) &dummy64, &dummy16, &dummy16); if (hret != H_SUCCESS) { netdev_err(dev, "modify_ehea_qp failed (1)\n"); + ret = -EFAULT; goto out; } @@ -2686,6 +2664,7 @@ static int ehea_restart_qps(struct net_device *dev) cb0); if (hret != H_SUCCESS) { netdev_err(dev, "query_ehea_qp failed (2)\n"); + ret = -EFAULT; goto out; } @@ -2812,7 +2791,7 @@ out: return; } -static void ehea_tx_watchdog(struct net_device *dev) +static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct ehea_port *port = netdev_priv(dev); @@ -2892,15 +2871,14 @@ out: return ret; } -static ssize_t ehea_show_port_id(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t log_port_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev); return sprintf(buf, "%d", port->logical_port_id); } -static DEVICE_ATTR(log_port_id, S_IRUSR | S_IRGRP | S_IROTH, ehea_show_port_id, - NULL); +static DEVICE_ATTR_RO(log_port_id); static void logical_port_release(struct device *dev) { @@ -2923,6 +2901,7 @@ static struct device *ehea_register_port(struct ehea_port *port, ret = of_device_register(&port->ofdev); if (ret) { pr_err("failed to register device. ret=%d\n", ret); + put_device(&port->ofdev.dev); goto out; } @@ -2950,14 +2929,10 @@ static const struct net_device_ops ehea_netdev_ops = { .ndo_open = ehea_open, .ndo_stop = ehea_stop, .ndo_start_xmit = ehea_start_xmit, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ehea_netpoll, -#endif .ndo_get_stats64 = ehea_get_stats64, .ndo_set_mac_address = ehea_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = ehea_set_multicast_list, - .ndo_change_mtu = ehea_change_mtu, .ndo_vlan_rx_add_vid = ehea_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ehea_vlan_rx_kill_vid, .ndo_tx_timeout = ehea_tx_watchdog, @@ -3015,14 +2990,14 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, SET_NETDEV_DEV(dev, port_dev); /* initialize net_device structure */ - memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN); + eth_hw_addr_set(dev, (u8 *)&port->mac_addr); dev->netdev_ops = &ehea_netdev_ops; ehea_set_ethtool_ops(dev); dev->hw_features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO | + dev->features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXCSUM; @@ -3030,13 +3005,16 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, NETIF_F_IP_CSUM; dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; + /* MTU range: 68 - 9022 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = EHEA_MAX_PACKET_SIZE; + INIT_WORK(&port->reset_task, ehea_reset_port); INIT_DELAYED_WORK(&port->stats_work, ehea_update_stats); init_waitqueue_head(&port->swqe_avail_wq); init_waitqueue_head(&port->restart_wq); - memset(&port->stats, 0, sizeof(struct net_device_stats)); ret = register_netdev(dev); if (ret) { pr_err("register_netdev failed. ret=%d\n", ret); @@ -3085,19 +3063,17 @@ static void ehea_shutdown_single_port(struct ehea_port *port) static int ehea_setup_ports(struct ehea_adapter *adapter) { struct device_node *lhea_dn; - struct device_node *eth_dn = NULL; + struct device_node *eth_dn; const u32 *dn_log_port_id; int i = 0; lhea_dn = adapter->ofdev->dev.of_node; - while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { - + for_each_child_of_node(lhea_dn, eth_dn) { dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", NULL); if (!dn_log_port_id) { - pr_err("bad device node: eth_dn name=%s\n", - eth_dn->full_name); + pr_err("bad device node: eth_dn name=%pOF\n", eth_dn); continue; } @@ -3125,12 +3101,11 @@ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter, u32 logical_port_id) { struct device_node *lhea_dn; - struct device_node *eth_dn = NULL; + struct device_node *eth_dn; const u32 *dn_log_port_id; lhea_dn = adapter->ofdev->dev.of_node; - while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { - + for_each_child_of_node(lhea_dn, eth_dn) { dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", NULL); if (dn_log_port_id) @@ -3141,7 +3116,7 @@ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter, return NULL; } -static ssize_t ehea_probe_port(struct device *dev, +static ssize_t probe_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -3171,6 +3146,7 @@ static ssize_t ehea_probe_port(struct device *dev, if (ehea_add_adapter_mr(adapter)) { pr_err("creating MR failed\n"); + of_node_put(eth_dn); return -EIO; } @@ -3195,9 +3171,9 @@ static ssize_t ehea_probe_port(struct device *dev, return (ssize_t) count; } -static ssize_t ehea_remove_port(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t remove_port_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct ehea_adapter *adapter = dev_get_drvdata(dev); struct ehea_port *port; @@ -3230,8 +3206,8 @@ static ssize_t ehea_remove_port(struct device *dev, return (ssize_t) count; } -static DEVICE_ATTR(probe_port, S_IWUSR, NULL, ehea_probe_port); -static DEVICE_ATTR(remove_port, S_IWUSR, NULL, ehea_remove_port); +static DEVICE_ATTR_WO(probe_port); +static DEVICE_ATTR_WO(remove_port); static int ehea_create_device_sysfs(struct platform_device *dev) { @@ -3250,6 +3226,141 @@ static void ehea_remove_device_sysfs(struct platform_device *dev) device_remove_file(&dev->dev, &dev_attr_remove_port); } +static int ehea_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + if (action == SYS_RESTART) { + pr_info("Reboot: freeing all eHEA resources\n"); + ibmebus_unregister_driver(&ehea_driver); + } + return NOTIFY_DONE; +} + +static struct notifier_block ehea_reboot_nb = { + .notifier_call = ehea_reboot_notifier, +}; + +static int ehea_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret = NOTIFY_BAD; + struct memory_notify *arg = data; + + mutex_lock(&dlpar_mem_lock); + + switch (action) { + case MEM_CANCEL_OFFLINE: + pr_info("memory offlining canceled"); + fallthrough; /* re-add canceled memory block */ + + case MEM_ONLINE: + pr_info("memory is going online"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + case MEM_GOING_OFFLINE: + pr_info("memory is going offline"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + default: + break; + } + + ehea_update_firmware_handles(); + ret = NOTIFY_OK; + +out_unlock: + mutex_unlock(&dlpar_mem_lock); + return ret; +} + +static struct notifier_block ehea_mem_nb = { + .notifier_call = ehea_mem_notifier, +}; + +static void ehea_crash_handler(void) +{ + int i; + + if (ehea_fw_handles.arr) + for (i = 0; i < ehea_fw_handles.num_entries; i++) + ehea_h_free_resource(ehea_fw_handles.arr[i].adh, + ehea_fw_handles.arr[i].fwh, + FORCE_FREE); + + if (ehea_bcmc_regs.arr) + for (i = 0; i < ehea_bcmc_regs.num_entries; i++) + ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, + ehea_bcmc_regs.arr[i].port_id, + ehea_bcmc_regs.arr[i].reg_type, + ehea_bcmc_regs.arr[i].macaddr, + 0, H_DEREG_BCMC); +} + +static atomic_t ehea_memory_hooks_registered; + +/* Register memory hooks on probe of first adapter */ +static int ehea_register_memory_hooks(void) +{ + int ret = 0; + + if (atomic_inc_return(&ehea_memory_hooks_registered) > 1) + return 0; + + ret = ehea_create_busmap(); + if (ret) { + pr_info("ehea_create_busmap failed\n"); + goto out; + } + + ret = register_reboot_notifier(&ehea_reboot_nb); + if (ret) { + pr_info("register_reboot_notifier failed\n"); + goto out; + } + + ret = register_memory_notifier(&ehea_mem_nb); + if (ret) { + pr_info("register_memory_notifier failed\n"); + goto out2; + } + + ret = crash_shutdown_register(ehea_crash_handler); + if (ret) { + pr_info("crash_shutdown_register failed\n"); + goto out3; + } + + return 0; + +out3: + unregister_memory_notifier(&ehea_mem_nb); +out2: + unregister_reboot_notifier(&ehea_reboot_nb); +out: + atomic_dec(&ehea_memory_hooks_registered); + return ret; +} + +static void ehea_unregister_memory_hooks(void) +{ + /* Only remove the hooks if we've registered them */ + if (atomic_read(&ehea_memory_hooks_registered) == 0) + return; + + unregister_reboot_notifier(&ehea_reboot_nb); + if (crash_shutdown_unregister(ehea_crash_handler)) + pr_info("failed unregistering crash handler\n"); + unregister_memory_notifier(&ehea_mem_nb); +} + static int ehea_probe_adapter(struct platform_device *dev) { struct ehea_adapter *adapter; @@ -3257,12 +3368,16 @@ static int ehea_probe_adapter(struct platform_device *dev) int ret; int i; + ret = ehea_register_memory_hooks(); + if (ret) + return ret; + if (!dev || !dev->dev.of_node) { pr_err("Invalid ibmebus device probed\n"); return -EINVAL; } - adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + adapter = devm_kzalloc(&dev->dev, sizeof(*adapter), GFP_KERNEL); if (!adapter) { ret = -ENOMEM; dev_err(&dev->dev, "no mem for ehea_adapter\n"); @@ -3280,7 +3395,7 @@ static int ehea_probe_adapter(struct platform_device *dev) if (!adapter->handle) { dev_err(&dev->dev, "failed getting handle for adapter" - " '%s'\n", dev->dev.of_node->full_name); + " '%pOF'\n", dev->dev.of_node); ret = -ENODEV; goto out_free_ad; } @@ -3306,8 +3421,7 @@ static int ehea_probe_adapter(struct platform_device *dev) goto out_free_ad; } - tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet, - (unsigned long)adapter); + tasklet_setup(&adapter->neq_tasklet, ehea_neq_tasklet); ret = ehea_create_device_sysfs(dev); if (ret) @@ -3320,7 +3434,7 @@ static int ehea_probe_adapter(struct platform_device *dev) } ret = ibmebus_request_irq(adapter->neq->attr.ist1, - ehea_interrupt_neq, IRQF_DISABLED, + ehea_interrupt_neq, 0, "ehea_neq", adapter); if (ret) { dev_err(&dev->dev, "requesting NEQ IRQ failed\n"); @@ -3348,7 +3462,6 @@ out_kill_eq: out_free_ad: list_del(&adapter->list); - kfree(adapter); out: ehea_update_firmware_handles(); @@ -3356,7 +3469,7 @@ out: return ret; } -static int ehea_remove(struct platform_device *dev) +static void ehea_remove(struct platform_device *dev) { struct ehea_adapter *adapter = platform_get_drvdata(dev); int i; @@ -3375,88 +3488,10 @@ static int ehea_remove(struct platform_device *dev) ehea_destroy_eq(adapter->neq); ehea_remove_adapter_mr(adapter); list_del(&adapter->list); - kfree(adapter); - - ehea_update_firmware_handles(); - - return 0; -} - -static void ehea_crash_handler(void) -{ - int i; - - if (ehea_fw_handles.arr) - for (i = 0; i < ehea_fw_handles.num_entries; i++) - ehea_h_free_resource(ehea_fw_handles.arr[i].adh, - ehea_fw_handles.arr[i].fwh, - FORCE_FREE); - - if (ehea_bcmc_regs.arr) - for (i = 0; i < ehea_bcmc_regs.num_entries; i++) - ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, - ehea_bcmc_regs.arr[i].port_id, - ehea_bcmc_regs.arr[i].reg_type, - ehea_bcmc_regs.arr[i].macaddr, - 0, H_DEREG_BCMC); -} - -static int ehea_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - int ret = NOTIFY_BAD; - struct memory_notify *arg = data; - - mutex_lock(&dlpar_mem_lock); - - switch (action) { - case MEM_CANCEL_OFFLINE: - pr_info("memory offlining canceled"); - /* Readd canceled memory block */ - case MEM_ONLINE: - pr_info("memory is going online"); - set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); - if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) - goto out_unlock; - ehea_rereg_mrs(); - break; - case MEM_GOING_OFFLINE: - pr_info("memory is going offline"); - set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); - if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) - goto out_unlock; - ehea_rereg_mrs(); - break; - default: - break; - } ehea_update_firmware_handles(); - ret = NOTIFY_OK; - -out_unlock: - mutex_unlock(&dlpar_mem_lock); - return ret; } -static struct notifier_block ehea_mem_nb = { - .notifier_call = ehea_mem_notifier, -}; - -static int ehea_reboot_notifier(struct notifier_block *nb, - unsigned long action, void *unused) -{ - if (action == SYS_RESTART) { - pr_info("Reboot: freeing all eHEA resources\n"); - ibmebus_unregister_driver(&ehea_driver); - } - return NOTIFY_DONE; -} - -static struct notifier_block ehea_reboot_nb = { - .notifier_call = ehea_reboot_notifier, -}; - static int check_module_parm(void) { int ret = 0; @@ -3485,14 +3520,12 @@ static int check_module_parm(void) return ret; } -static ssize_t ehea_show_capabilities(struct device_driver *drv, - char *buf) +static ssize_t capabilities_show(struct device_driver *drv, char *buf) { return sprintf(buf, "%d", EHEA_CAPABILITIES); } -static DRIVER_ATTR(capabilities, S_IRUSR | S_IRGRP | S_IROTH, - ehea_show_capabilities, NULL); +static DRIVER_ATTR_RO(capabilities); static int __init ehea_module_init(void) { @@ -3510,26 +3543,10 @@ static int __init ehea_module_init(void) if (ret) goto out; - ret = ehea_create_busmap(); - if (ret) - goto out; - - ret = register_reboot_notifier(&ehea_reboot_nb); - if (ret) - pr_info("failed registering reboot notifier\n"); - - ret = register_memory_notifier(&ehea_mem_nb); - if (ret) - pr_info("failed registering memory remove notifier\n"); - - ret = crash_shutdown_register(ehea_crash_handler); - if (ret) - pr_info("failed registering crash handler\n"); - ret = ibmebus_register_driver(&ehea_driver); if (ret) { pr_err("failed registering eHEA device driver on ebus\n"); - goto out2; + goto out; } ret = driver_create_file(&ehea_driver.driver, @@ -3537,32 +3554,22 @@ static int __init ehea_module_init(void) if (ret) { pr_err("failed to register capabilities attribute, ret=%d\n", ret); - goto out3; + goto out2; } return ret; -out3: - ibmebus_unregister_driver(&ehea_driver); out2: - unregister_memory_notifier(&ehea_mem_nb); - unregister_reboot_notifier(&ehea_reboot_nb); - crash_shutdown_unregister(ehea_crash_handler); + ibmebus_unregister_driver(&ehea_driver); out: return ret; } static void __exit ehea_module_exit(void) { - int ret; - driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); ibmebus_unregister_driver(&ehea_driver); - unregister_reboot_notifier(&ehea_reboot_nb); - ret = crash_shutdown_unregister(ehea_crash_handler); - if (ret) - pr_info("failed unregistering crash handler\n"); - unregister_memory_notifier(&ehea_mem_nb); + ehea_unregister_memory_hooks(); kfree(ehea_fw_handles.arr); kfree(ehea_bcmc_regs.arr); ehea_destroy_busmap(); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c index d3a130ccdcc8..e63716e139f5 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * linux/drivers/net/ethernet/ibm/ehea/ehea_phyp.c * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h index 99b6c2a38dbf..e8b56c103410 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/net/ethernet/ibm/ehea/ehea_phyp.h * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __EHEA_PHYP_H__ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c index 9b03033bb557..db45373ea31c 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * linux/drivers/net/ethernet/ibm/ehea/ehea_qmr.c * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -103,12 +89,14 @@ out_nomem: static void hw_queue_dtor(struct hw_queue *queue) { - int pages_per_kpage = PAGE_SIZE / queue->pagesize; + int pages_per_kpage; int i, nr_pages; if (!queue || !queue->queue_pages) return; + pages_per_kpage = PAGE_SIZE / queue->pagesize; + nr_pages = queue->queue_length / queue->pagesize; for (i = 0; i < nr_pages; i += pages_per_kpage) @@ -121,9 +109,8 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, int nr_of_cqe, u64 eq_handle, u32 cq_token) { struct ehea_cq *cq; - struct h_epa epa; - u64 *cq_handle_ref, hret, rpage; - u32 act_nr_of_entries, act_pages, counter; + u64 hret, rpage; + u32 counter; int ret; void *vpage; @@ -137,10 +124,6 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, cq->adapter = adapter; - cq_handle_ref = &cq->fw_handle; - act_nr_of_entries = 0; - act_pages = 0; - hret = ehea_h_alloc_resource_cq(adapter->handle, &cq->attr, &cq->fw_handle, &cq->epas); if (hret != H_SUCCESS) { @@ -188,7 +171,6 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, } hw_qeit_reset(&cq->hw_queue); - epa = cq->epas.kernel; ehea_reset_cq_ep(cq); ehea_reset_cq_n1(cq); @@ -688,13 +670,10 @@ int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages) static int ehea_is_hugepage(unsigned long pfn) { - int page_order; - if (pfn & EHEA_HUGEPAGE_PFN_MASK) return 0; - page_order = compound_order(pfn_to_page(pfn)); - if (page_order + PAGE_SHIFT != EHEA_HUGEPAGESHIFT) + if (page_shift(pfn_to_page(pfn)) != EHEA_HUGEPAGESHIFT) return 0; return 1; diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.h b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h index 8e4a70c20ab7..7c7cccd820f7 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.h +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/net/ethernet/ibm/ehea/ehea_qmr.h * @@ -9,21 +10,6 @@ * Christoph Raisch <raisch@de.ibm.com> * Jan-Bernd Themann <themann@de.ibm.com> * Thomas Klein <tklein@de.ibm.com> - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __EHEA_QMR_H__ diff --git a/drivers/net/ethernet/ibm/emac/Kconfig b/drivers/net/ethernet/ibm/emac/Kconfig index 3f44a30e0615..c8e5de5987ac 100644 --- a/drivers/net/ethernet/ibm/emac/Kconfig +++ b/drivers/net/ethernet/ibm/emac/Kconfig @@ -1,7 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only config IBM_EMAC tristate "IBM EMAC Ethernet support" depends on PPC_DCR select CRC32 + select PHYLIB help This driver supports the IBM EMAC family of Ethernet controllers typically found on 4xx embedded PowerPC chips, but also on the @@ -27,18 +29,6 @@ config IBM_EMAC_RX_COPY_THRESHOLD depends on IBM_EMAC default "256" -config IBM_EMAC_RX_SKB_HEADROOM - int "Additional RX skb headroom (bytes)" - depends on IBM_EMAC - default "0" - help - Additional receive skb headroom. Note, that driver - will always reserve at least 2 bytes to make IP header - aligned, so usually there is no need to add any additional - headroom. - - If unsure, set to 0. - config IBM_EMAC_DEBUG bool "Debugging" depends on IBM_EMAC diff --git a/drivers/net/ethernet/ibm/emac/Makefile b/drivers/net/ethernet/ibm/emac/Makefile index eba21835d90d..ddf1ce3c8cca 100644 --- a/drivers/net/ethernet/ibm/emac/Makefile +++ b/drivers/net/ethernet/ibm/emac/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the PowerPC 4xx on-chip ethernet driver # @@ -8,4 +9,3 @@ ibm_emac-y := mal.o core.o phy.o ibm_emac-$(CONFIG_IBM_EMAC_ZMII) += zmii.o ibm_emac-$(CONFIG_IBM_EMAC_RGMII) += rgmii.o ibm_emac-$(CONFIG_IBM_EMAC_TAH) += tah.o -ibm_emac-$(CONFIG_IBM_EMAC_DEBUG) += debug.o diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index d300a0c0eafc..417dfa18daae 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/ethernet/ibm/emac/core.c * @@ -16,12 +17,6 @@ * (c) 2003 Benjamin Herrenschmidt <benh@kernel.crashing.org> * Armin Kuster <akuster@mvista.com> * Johnnie Peters <jpeters@mvista.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include <linux/module.h> @@ -37,15 +32,19 @@ #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/bitops.h> -#include <linux/workqueue.h> #include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <linux/of_net.h> +#include <linux/of_mdio.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> #include <asm/dma.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/dcr.h> #include <asm/dcr-regs.h> @@ -77,13 +76,6 @@ MODULE_AUTHOR ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>"); MODULE_LICENSE("GPL"); -/* - * PPC64 doesn't (yet) have a cacheable_memcpy - */ -#ifdef CONFIG_PPC64 -#define cacheable_memcpy(d,s,n) memcpy((d),(s),(n)) -#endif - /* minimum number of free TX descriptors required to wake up TX process */ #define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4) @@ -103,11 +95,6 @@ MODULE_LICENSE("GPL"); static u32 busy_phy_map; static DEFINE_MUTEX(emac_phy_map_lock); -/* This is the wait queue used to wait on any event related to probe, that - * is discovery of MALs, other EMACs, ZMII/RGMIIs, etc... - */ -static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait); - /* Having stable interface names is a doomed idea. However, it would be nice * if we didn't have completely random interface names at boot too :-) It's * just a matter of making everybody's life easier. Since we are doing @@ -123,9 +110,6 @@ static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait); #define EMAC_BOOT_LIST_SIZE 4 static struct device_node *emac_boot_list[EMAC_BOOT_LIST_SIZE]; -/* How long should I wait for dependent devices ? */ -#define EMAC_PROBE_DEP_TIMEOUT (HZ * 5) - /* I don't want to litter system log with timeout errors * when we have brain-damaged PHY. */ @@ -137,8 +121,7 @@ static inline void emac_report_timeout_error(struct emac_instance *dev, EMAC_FTR_440EP_PHY_CLK_FIX)) DBG(dev, "%s" NL, error); else if (net_ratelimit()) - printk(KERN_ERR "%s: %s\n", dev->ofdev->dev.of_node->full_name, - error); + printk(KERN_ERR "%pOF: %s\n", dev->ofdev->dev.of_node, error); } /* EMAC PHY clock workaround: @@ -204,18 +187,18 @@ static void __emac_set_multicast_list(struct emac_instance *dev); static inline int emac_phy_supports_gige(int phy_mode) { - return phy_mode == PHY_MODE_GMII || - phy_mode == PHY_MODE_RGMII || - phy_mode == PHY_MODE_SGMII || - phy_mode == PHY_MODE_TBI || - phy_mode == PHY_MODE_RTBI; + return phy_interface_mode_is_rgmii(phy_mode) || + phy_mode == PHY_INTERFACE_MODE_GMII || + phy_mode == PHY_INTERFACE_MODE_SGMII || + phy_mode == PHY_INTERFACE_MODE_TBI || + phy_mode == PHY_INTERFACE_MODE_RTBI; } static inline int emac_phy_gpcs(int phy_mode) { - return phy_mode == PHY_MODE_SGMII || - phy_mode == PHY_MODE_TBI || - phy_mode == PHY_MODE_RTBI; + return phy_mode == PHY_INTERFACE_MODE_SGMII || + phy_mode == PHY_INTERFACE_MODE_TBI || + phy_mode == PHY_INTERFACE_MODE_RTBI; } static inline void emac_tx_enable(struct emac_instance *dev) @@ -306,7 +289,7 @@ static inline void emac_netif_stop(struct emac_instance *dev) dev->no_mcast = 1; netif_addr_unlock(dev->ndev); netif_tx_unlock_bh(dev->ndev); - dev->ndev->trans_start = jiffies; /* prevent tx timeout */ + netif_trans_update(dev->ndev); /* prevent tx timeout */ mal_poll_disable(dev->mal, &dev->commac); netif_tx_disable(dev->ndev); } @@ -347,6 +330,7 @@ static int emac_reset(struct emac_instance *dev) { struct emac_regs __iomem *p = dev->emacp; int n = 20; + bool __maybe_unused try_internal_clock = false; DBG(dev, "reset" NL); @@ -359,6 +343,7 @@ static int emac_reset(struct emac_instance *dev) } #ifdef CONFIG_PPC_DCR_NATIVE +do_retry: /* * PPC460EX/GT Embedded Processor Advanced User's Manual * section 28.10.1 Mode Register 0 (EMACx_MR0) states: @@ -366,10 +351,19 @@ static int emac_reset(struct emac_instance *dev) * of the EMAC. If none is present, select the internal clock * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1). * After a soft reset, select the external clock. + * + * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the + * ethernet cable is not attached. This causes the reset to timeout + * and the PHY detection code in emac_init_phy() is unable to + * communicate and detect the AR8035-A PHY. As a result, the emac + * driver bails out early and the user has no ethernet. + * In order to stay compatible with existing configurations, the + * driver will temporarily switch to the internal clock, after + * the first reset fails. */ if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { - if (dev->phy_address == 0xffffffff && - dev->phy_map == 0xffffffff) { + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { /* No PHY: select internal loop clock before reset */ dcri_clrset(SDR0, SDR0_ETH_CFG, 0, SDR0_ETH_CFG_ECS << dev->cell_index); @@ -387,8 +381,15 @@ static int emac_reset(struct emac_instance *dev) #ifdef CONFIG_PPC_DCR_NATIVE if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { - if (dev->phy_address == 0xffffffff && - dev->phy_map == 0xffffffff) { + if (!n && !try_internal_clock) { + /* first attempt has timed out. */ + n = 20; + try_internal_clock = true; + goto do_retry; + } + + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { /* No PHY: restore external clock source after reset */ dcri_clrset(SDR0, SDR0_ETH_CFG, SDR0_ETH_CFG_ECS << dev->cell_index, 0); @@ -408,9 +409,9 @@ static int emac_reset(struct emac_instance *dev) static void emac_hash_mc(struct emac_instance *dev) { + u32 __iomem *gaht_base = emac_gaht_base(dev); const int regs = EMAC_XAHT_REGS(dev); - u32 *gaht_base = emac_gaht_base(dev); - u32 gaht_temp[regs]; + u32 gaht_temp[EMAC_XAHT_MAX_REGS]; struct netdev_hw_addr *ha; int i; @@ -503,6 +504,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ case 16384: ret |= EMAC4_MR1_TFS_16K; break; + case 8192: + ret |= EMAC4_MR1_TFS_8K; + break; case 4096: ret |= EMAC4_MR1_TFS_4K; break; @@ -518,6 +522,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ case 16384: ret |= EMAC4_MR1_RFS_16K; break; + case 8192: + ret |= EMAC4_MR1_RFS_8K; + break; case 4096: ret |= EMAC4_MR1_RFS_4K; break; @@ -762,7 +769,7 @@ static void emac_reset_work(struct work_struct *work) mutex_unlock(&dev->link_lock); } -static void emac_tx_timeout(struct net_device *ndev) +static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_instance *dev = netdev_priv(ndev); @@ -858,7 +865,7 @@ static void __emac_mdio_write(struct emac_instance *dev, u8 id, u8 reg, { struct emac_regs __iomem *p = dev->emacp; u32 r = 0; - int n, err = -ETIMEDOUT; + int n; mutex_lock(&dev->mdio_lock); @@ -905,7 +912,6 @@ static void __emac_mdio_write(struct emac_instance *dev, u8 id, u8 reg, goto bail; } } - err = 0; bail: if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) rgmii_put_mdio(dev->rgmii_dev, dev->rgmii_port); @@ -958,8 +964,6 @@ static void __emac_set_multicast_list(struct emac_instance *dev) * we need is just to stop RX channel. This seems to work on all * tested SoCs. --ebs * - * If we need the full reset, we might just trigger the workqueue - * and do it async... a bit nasty but should work --BenH */ dev->mcast_pending = 0; emac_rx_disable(dev); @@ -982,7 +986,37 @@ static void emac_set_multicast_list(struct net_device *ndev) dev->mcast_pending = 1; return; } + + mutex_lock(&dev->link_lock); __emac_set_multicast_list(dev); + mutex_unlock(&dev->link_lock); +} + +static int emac_set_mac_address(struct net_device *ndev, void *sa) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sockaddr *addr = sa; + struct emac_regs __iomem *p = dev->emacp; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&dev->link_lock); + + eth_hw_addr_set(ndev, addr->sa_data); + + emac_rx_disable(dev); + emac_tx_disable(dev); + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + emac_tx_enable(dev); + emac_rx_enable(dev); + + mutex_unlock(&dev->link_lock); + + return 0; } static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) @@ -1022,7 +1056,9 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) /* Second pass, allocate new skbs */ for (i = 0; i < NUM_RX_BUFF; ++i) { - struct sk_buff *skb = alloc_skb(rx_skb_size, GFP_ATOMIC); + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev->ndev, rx_skb_size); if (!skb) { ret = -ENOMEM; goto oom; @@ -1031,10 +1067,10 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) BUG_ON(!dev->rx_skb[i]); dev_kfree_skb(dev->rx_skb[i]); - skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); dev->rx_desc[i].data_ptr = - dma_map_single(&dev->ofdev->dev, skb->data - 2, rx_sync_size, - DMA_FROM_DEVICE) + 2; + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + rx_sync_size, DMA_FROM_DEVICE) + + NET_IP_ALIGN; dev->rx_skb[i] = skb; } skip: @@ -1051,7 +1087,7 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) /* This is to prevent starting RX channel in emac_rx_enable() */ set_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); - dev->ndev->mtu = new_mtu; + WRITE_ONCE(dev->ndev->mtu, new_mtu); emac_full_tx_reset(dev); } @@ -1074,9 +1110,6 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu) struct emac_instance *dev = netdev_priv(ndev); int ret = 0; - if (new_mtu < EMAC_MIN_MTU || new_mtu > dev->max_mtu) - return -EINVAL; - DBG(dev, "change_mtu(%d)" NL, new_mtu); if (netif_running(ndev)) { @@ -1086,7 +1119,7 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu) } if (!ret) { - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); dev->rx_skb_size = emac_rx_skb_size(new_mtu); dev->rx_sync_size = emac_rx_sync_size(new_mtu); } @@ -1128,20 +1161,18 @@ static void emac_clean_rx_ring(struct emac_instance *dev) } } -static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot, - gfp_t flags) +static int +__emac_prepare_rx_skb(struct sk_buff *skb, struct emac_instance *dev, int slot) { - struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags); if (unlikely(!skb)) return -ENOMEM; dev->rx_skb[slot] = skb; dev->rx_desc[slot].data_len = 0; - skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); dev->rx_desc[slot].data_ptr = - dma_map_single(&dev->ofdev->dev, skb->data - 2, dev->rx_sync_size, - DMA_FROM_DEVICE) + 2; + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + dev->rx_sync_size, DMA_FROM_DEVICE) + NET_IP_ALIGN; wmb(); dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); @@ -1149,6 +1180,27 @@ static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot, return 0; } +static int +emac_alloc_rx_skb(struct emac_instance *dev, int slot) +{ + struct sk_buff *skb; + + skb = __netdev_alloc_skb_ip_align(dev->ndev, dev->rx_skb_size, + GFP_KERNEL); + + return __emac_prepare_rx_skb(skb, dev, slot); +} + +static int +emac_alloc_rx_skb_napi(struct emac_instance *dev, int slot) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(&dev->mal->napi, dev->rx_skb_size); + + return __emac_prepare_rx_skb(skb, dev, slot); +} + static void emac_print_link_status(struct emac_instance *dev) { if (netif_carrier_ok(dev->ndev)) @@ -1165,21 +1217,13 @@ static void emac_print_link_status(struct emac_instance *dev) static int emac_open(struct net_device *ndev) { struct emac_instance *dev = netdev_priv(ndev); - int err, i; + int i; DBG(dev, "open" NL); - /* Setup error IRQ handler */ - err = request_irq(dev->emac_irq, emac_irq, 0, "EMAC", dev); - if (err) { - printk(KERN_ERR "%s: failed to request IRQ %d\n", - ndev->name, dev->emac_irq); - return err; - } - /* Allocate RX ring */ for (i = 0; i < NUM_RX_BUFF; ++i) - if (emac_alloc_rx_skb(dev, i, GFP_KERNEL)) { + if (emac_alloc_rx_skb(dev, i)) { printk(KERN_ERR "%s: failed to allocate RX ring\n", ndev->name); goto oom; @@ -1230,8 +1274,6 @@ static int emac_open(struct net_device *ndev) return 0; oom: emac_clean_rx_ring(dev); - free_irq(dev->emac_irq, dev); - return -ENOMEM; } @@ -1345,8 +1387,6 @@ static int emac_close(struct net_device *ndev) emac_clean_tx_ring(dev); emac_clean_rx_ring(dev); - free_irq(dev->emac_irq, dev); - netif_carrier_off(ndev); return 0; @@ -1363,7 +1403,7 @@ static inline u16 emac_tx_csum(struct emac_instance *dev, return 0; } -static inline int emac_xmit_finish(struct emac_instance *dev, int len) +static inline netdev_tx_t emac_xmit_finish(struct emac_instance *dev, int len) { struct emac_regs __iomem *p = dev->emacp; struct net_device *ndev = dev->ndev; @@ -1382,7 +1422,7 @@ static inline int emac_xmit_finish(struct emac_instance *dev, int len) DBG2(dev, "stopped TX queue" NL); } - ndev->trans_start = jiffies; + netif_trans_update(ndev); ++dev->stats.tx_packets; dev->stats.tx_bytes += len; @@ -1390,7 +1430,7 @@ static inline int emac_xmit_finish(struct emac_instance *dev, int len) } /* Tx lock BH */ -static int emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct emac_instance *dev = netdev_priv(ndev); unsigned int len = skb->len; @@ -1448,7 +1488,8 @@ static inline int emac_xmit_split(struct emac_instance *dev, int slot, } /* Tx lock BH disabled (SG version for TAH equipped EMACs) */ -static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) { struct emac_instance *dev = netdev_priv(ndev); int nr_frags = skb_shinfo(skb)->nr_frags; @@ -1486,7 +1527,7 @@ static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) ctrl); /* skb fragments */ for (i = 0; i < nr_frags; ++i) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; len = skb_frag_size(frag); if (unlikely(dev->tx_cnt + mal_tx_chunks(len) >= NUM_TX_BUFF)) @@ -1613,8 +1654,9 @@ static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot, DBG2(dev, "recycle %d %d" NL, slot, len); if (len) - dma_map_single(&dev->ofdev->dev, skb->data - 2, - EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE); + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + SKB_DATA_ALIGN(len + NET_IP_ALIGN), + DMA_FROM_DEVICE); dev->rx_desc[slot].data_len = 0; wmb(); @@ -1666,12 +1708,12 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) int len = dev->rx_desc[slot].data_len; int tot_len = dev->rx_sg_skb->len + len; - if (unlikely(tot_len + 2 > dev->rx_skb_size)) { + if (unlikely(tot_len + NET_IP_ALIGN > dev->rx_skb_size)) { ++dev->estats.rx_dropped_mtu; dev_kfree_skb(dev->rx_sg_skb); dev->rx_sg_skb = NULL; } else { - cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb), + memcpy(skb_tail_pointer(dev->rx_sg_skb), dev->rx_skb[slot]->data, len); skb_put(dev->rx_sg_skb, len); emac_recycle_rx_skb(dev, slot, len); @@ -1685,6 +1727,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) /* NAPI poll context */ static int emac_poll_rx(void *param, int budget) { + LIST_HEAD(rx_list); struct emac_instance *dev = param; int slot = dev->rx_slot, received = 0; @@ -1722,17 +1765,18 @@ static int emac_poll_rx(void *param, int budget) } if (len && len < EMAC_RX_COPY_THRESH) { - struct sk_buff *copy_skb = - alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC); + struct sk_buff *copy_skb; + + copy_skb = napi_alloc_skb(&dev->mal->napi, len); if (unlikely(!copy_skb)) goto oom; - skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2); - cacheable_memcpy(copy_skb->data - 2, skb->data - 2, - len + 2); + memcpy(copy_skb->data - NET_IP_ALIGN, + skb->data - NET_IP_ALIGN, + len + NET_IP_ALIGN); emac_recycle_rx_skb(dev, slot, len); skb = copy_skb; - } else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) + } else if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) goto oom; skb_put(skb, len); @@ -1740,8 +1784,7 @@ static int emac_poll_rx(void *param, int budget) skb->protocol = eth_type_trans(skb, dev->ndev); emac_rx_csum(dev, skb, ctrl); - if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) - ++dev->estats.rx_dropped_stack; + list_add_tail(&skb->list, &rx_list); next: ++dev->stats.rx_packets; skip: @@ -1753,7 +1796,7 @@ static int emac_poll_rx(void *param, int budget) sg: if (ctrl & MAL_RX_CTRL_FIRST) { BUG_ON(dev->rx_sg_skb); - if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) { + if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) { DBG(dev, "rx OOM %d" NL, slot); ++dev->estats.rx_dropped_oom; emac_recycle_rx_skb(dev, slot, 0); @@ -1785,6 +1828,8 @@ static int emac_poll_rx(void *param, int budget) goto next; } + netif_receive_skb_list(&rx_list); + if (received) { DBG2(dev, "rx %d BDs" NL, received); dev->rx_slot = slot; @@ -1907,7 +1952,7 @@ static struct net_device_stats *emac_stats(struct net_device *ndev) struct emac_instance *dev = netdev_priv(ndev); struct emac_stats *st = &dev->stats; struct emac_error_stats *est = &dev->estats; - struct net_device_stats *nst = &dev->nstats; + struct net_device_stats *nst = &ndev->stats; unsigned long flags; DBG2(dev, "stats" NL); @@ -1970,69 +2015,79 @@ static struct mal_commac_ops emac_commac_sg_ops = { }; /* Ethtool support */ -static int emac_ethtool_get_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) +static int emac_ethtool_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct emac_instance *dev = netdev_priv(ndev); + u32 supported, advertising; - cmd->supported = dev->phy.features; - cmd->port = PORT_MII; - cmd->phy_address = dev->phy.address; - cmd->transceiver = - dev->phy.address >= 0 ? XCVR_EXTERNAL : XCVR_INTERNAL; + supported = dev->phy.features; + cmd->base.port = PORT_MII; + cmd->base.phy_address = dev->phy.address; mutex_lock(&dev->link_lock); - cmd->advertising = dev->phy.advertising; - cmd->autoneg = dev->phy.autoneg; - cmd->speed = dev->phy.speed; - cmd->duplex = dev->phy.duplex; + advertising = dev->phy.advertising; + cmd->base.autoneg = dev->phy.autoneg; + cmd->base.speed = dev->phy.speed; + cmd->base.duplex = dev->phy.duplex; mutex_unlock(&dev->link_lock); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } -static int emac_ethtool_set_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) +static int +emac_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct emac_instance *dev = netdev_priv(ndev); u32 f = dev->phy.features; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); DBG(dev, "set_settings(%d, %d, %d, 0x%08x)" NL, - cmd->autoneg, cmd->speed, cmd->duplex, cmd->advertising); + cmd->base.autoneg, cmd->base.speed, cmd->base.duplex, advertising); /* Basic sanity checks */ if (dev->phy.address < 0) return -EOPNOTSUPP; - if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) + if (cmd->base.autoneg == AUTONEG_ENABLE && advertising == 0) return -EINVAL; - if (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) + if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE) { - switch (cmd->speed) { + if (cmd->base.autoneg == AUTONEG_DISABLE) { + switch (cmd->base.speed) { case SPEED_10: - if (cmd->duplex == DUPLEX_HALF && + if (cmd->base.duplex == DUPLEX_HALF && !(f & SUPPORTED_10baseT_Half)) return -EINVAL; - if (cmd->duplex == DUPLEX_FULL && + if (cmd->base.duplex == DUPLEX_FULL && !(f & SUPPORTED_10baseT_Full)) return -EINVAL; break; case SPEED_100: - if (cmd->duplex == DUPLEX_HALF && + if (cmd->base.duplex == DUPLEX_HALF && !(f & SUPPORTED_100baseT_Half)) return -EINVAL; - if (cmd->duplex == DUPLEX_FULL && + if (cmd->base.duplex == DUPLEX_FULL && !(f & SUPPORTED_100baseT_Full)) return -EINVAL; break; case SPEED_1000: - if (cmd->duplex == DUPLEX_HALF && + if (cmd->base.duplex == DUPLEX_HALF && !(f & SUPPORTED_1000baseT_Half)) return -EINVAL; - if (cmd->duplex == DUPLEX_FULL && + if (cmd->base.duplex == DUPLEX_FULL && !(f & SUPPORTED_1000baseT_Full)) return -EINVAL; break; @@ -2041,8 +2096,8 @@ static int emac_ethtool_set_settings(struct net_device *ndev, } mutex_lock(&dev->link_lock); - dev->phy.def->ops->setup_forced(&dev->phy, cmd->speed, - cmd->duplex); + dev->phy.def->ops->setup_forced(&dev->phy, cmd->base.speed, + cmd->base.duplex); mutex_unlock(&dev->link_lock); } else { @@ -2051,7 +2106,7 @@ static int emac_ethtool_set_settings(struct net_device *ndev, mutex_lock(&dev->link_lock); dev->phy.def->ops->setup_aneg(&dev->phy, - (cmd->advertising & f) | + (advertising & f) | (dev->phy.advertising & (ADVERTISED_Pause | ADVERTISED_Asym_Pause))); @@ -2062,8 +2117,11 @@ static int emac_ethtool_set_settings(struct net_device *ndev, return 0; } -static void emac_ethtool_get_ringparam(struct net_device *ndev, - struct ethtool_ringparam *rp) +static void +emac_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp, + struct kernel_ethtool_ringparam *kernel_rp, + struct netlink_ext_ack *extack) { rp->rx_max_pending = rp->rx_pending = NUM_RX_BUFF; rp->tx_max_pending = rp->tx_pending = NUM_TX_BUFF; @@ -2090,12 +2148,8 @@ static void emac_ethtool_get_pauseparam(struct net_device *ndev, static int emac_get_regs_len(struct emac_instance *dev) { - if (emac_has_feature(dev, EMAC_FTR_EMAC4)) - return sizeof(struct emac_ethtool_regs_subhdr) + - EMAC4_ETHTOOL_REGS_SIZE(dev); - else return sizeof(struct emac_ethtool_regs_subhdr) + - EMAC_ETHTOOL_REGS_SIZE(dev); + sizeof(struct emac_regs); } static int emac_ethtool_get_regs_len(struct net_device *ndev) @@ -2120,15 +2174,15 @@ static void *emac_dump_regs(struct emac_instance *dev, void *buf) struct emac_ethtool_regs_subhdr *hdr = buf; hdr->index = dev->cell_index; - if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { + hdr->version = EMAC4SYNC_ETHTOOL_REGS_VER; + } else if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { hdr->version = EMAC4_ETHTOOL_REGS_VER; - memcpy_fromio(hdr + 1, dev->emacp, EMAC4_ETHTOOL_REGS_SIZE(dev)); - return (void *)(hdr + 1) + EMAC4_ETHTOOL_REGS_SIZE(dev); } else { hdr->version = EMAC_ETHTOOL_REGS_VER; - memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE(dev)); - return (void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE(dev); } + memcpy_fromio(hdr + 1, dev->emacp, sizeof(struct emac_regs)); + return (void *)(hdr + 1) + sizeof(struct emac_regs); } static void emac_ethtool_get_regs(struct net_device *ndev, @@ -2210,16 +2264,13 @@ static void emac_ethtool_get_drvinfo(struct net_device *ndev, { struct emac_instance *dev = netdev_priv(ndev); - strlcpy(info->driver, "ibm_emac", sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); - snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %s", - dev->cell_index, dev->ofdev->dev.of_node->full_name); - info->regdump_len = emac_ethtool_get_regs_len(ndev); + strscpy(info->driver, "ibm_emac", sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); + snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF", + dev->cell_index, dev->ofdev->dev.of_node); } static const struct ethtool_ops emac_ethtool_ops = { - .get_settings = emac_ethtool_get_settings, - .set_settings = emac_ethtool_set_settings, .get_drvinfo = emac_ethtool_get_drvinfo, .get_regs_len = emac_ethtool_get_regs_len, @@ -2235,6 +2286,8 @@ static const struct ethtool_ops emac_ethtool_ops = { .get_ethtool_stats = emac_ethtool_get_ethtool_stats, .get_link = ethtool_op_get_link, + .get_link_ksettings = emac_ethtool_get_link_ksettings, + .set_link_ksettings = emac_ethtool_set_link_ksettings, }; static int emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) @@ -2250,7 +2303,7 @@ static int emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCGMIIPHY: data->phy_id = dev->phy.address; - /* Fall through */ + fallthrough; case SIOCGMIIREG: data->val_out = emac_mdio_read(ndev, dev->phy.address, data->reg_num); @@ -2312,40 +2365,24 @@ static int emac_check_deps(struct emac_instance *dev, if (deps[i].ofdev == NULL) continue; if (deps[i].drvdata == NULL) - deps[i].drvdata = dev_get_drvdata(&deps[i].ofdev->dev); + deps[i].drvdata = platform_get_drvdata(deps[i].ofdev); if (deps[i].drvdata != NULL) there++; } - return there == EMAC_DEP_COUNT; + if (there != EMAC_DEP_COUNT) + return -EPROBE_DEFER; + return 0; } static void emac_put_deps(struct emac_instance *dev) { - if (dev->mal_dev) - of_dev_put(dev->mal_dev); - if (dev->zmii_dev) - of_dev_put(dev->zmii_dev); - if (dev->rgmii_dev) - of_dev_put(dev->rgmii_dev); - if (dev->mdio_dev) - of_dev_put(dev->mdio_dev); - if (dev->tah_dev) - of_dev_put(dev->tah_dev); + platform_device_put(dev->mal_dev); + platform_device_put(dev->zmii_dev); + platform_device_put(dev->rgmii_dev); + platform_device_put(dev->mdio_dev); + platform_device_put(dev->tah_dev); } -static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action, - void *data) -{ - /* We are only intereted in device addition */ - if (action == BUS_NOTIFY_BOUND_DRIVER) - wake_up_all(&emac_probe_wait); - return 0; -} - -static struct notifier_block emac_of_bus_notifier = { - .notifier_call = emac_of_bus_notify -}; - static int emac_wait_deps(struct emac_instance *dev) { struct emac_depentry deps[EMAC_DEP_COUNT]; @@ -2362,45 +2399,238 @@ static int emac_wait_deps(struct emac_instance *dev) deps[EMAC_DEP_MDIO_IDX].phandle = dev->mdio_ph; if (dev->blist && dev->blist > emac_boot_list) deps[EMAC_DEP_PREV_IDX].phandle = 0xffffffffu; - bus_register_notifier(&platform_bus_type, &emac_of_bus_notifier); - wait_event_timeout(emac_probe_wait, - emac_check_deps(dev, deps), - EMAC_PROBE_DEP_TIMEOUT); - bus_unregister_notifier(&platform_bus_type, &emac_of_bus_notifier); - err = emac_check_deps(dev, deps) ? 0 : -ENODEV; + err = emac_check_deps(dev, deps); for (i = 0; i < EMAC_DEP_COUNT; i++) { - if (deps[i].node) - of_node_put(deps[i].node); - if (err && deps[i].ofdev) - of_dev_put(deps[i].ofdev); + of_node_put(deps[i].node); + if (err) + platform_device_put(deps[i].ofdev); } - if (err == 0) { + if (!err) { dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; dev->zmii_dev = deps[EMAC_DEP_ZMII_IDX].ofdev; dev->rgmii_dev = deps[EMAC_DEP_RGMII_IDX].ofdev; dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev; dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev; } - if (deps[EMAC_DEP_PREV_IDX].ofdev) - of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev); + platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev); return err; } static int emac_read_uint_prop(struct device_node *np, const char *name, u32 *val, int fatal) { - int len; - const u32 *prop = of_get_property(np, name, &len); - if (prop == NULL || len < sizeof(u32)) { + int err; + + err = of_property_read_u32(np, name, val); + if (err) { if (fatal) - printk(KERN_ERR "%s: missing %s property\n", - np->full_name, name); + pr_err("%pOF: missing %s property", np, name); + return err; + } + return 0; +} + +static void emac_adjust_link(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct phy_device *phy = ndev->phydev; + + dev->phy.autoneg = phy->autoneg; + dev->phy.speed = phy->speed; + dev->phy.duplex = phy->duplex; + dev->phy.pause = phy->pause; + dev->phy.asym_pause = phy->asym_pause; + ethtool_convert_link_mode_to_legacy_u32(&dev->phy.advertising, + phy->advertising); +} + +static int emac_mii_bus_read(struct mii_bus *bus, int addr, int regnum) +{ + int ret = emac_mdio_read(bus->priv, addr, regnum); + /* This is a workaround for powered down ports/phys. + * In the wild, this was seen on the Cisco Meraki MX60(W). + * This hardware disables ports as part of the handoff + * procedure. Accessing the ports will lead to errors + * (-ETIMEDOUT, -EREMOTEIO) that do more harm than good. + */ + return ret < 0 ? 0xffff : ret; +} + +static int emac_mii_bus_write(struct mii_bus *bus, int addr, + int regnum, u16 val) +{ + emac_mdio_write(bus->priv, addr, regnum, val); + return 0; +} + +static int emac_mii_bus_reset(struct mii_bus *bus) +{ + struct emac_instance *dev = netdev_priv(bus->priv); + + return emac_reset(dev); +} + +static int emac_mdio_phy_start_aneg(struct mii_phy *phy, + struct phy_device *phy_dev) +{ + phy_dev->autoneg = phy->autoneg; + phy_dev->speed = phy->speed; + phy_dev->duplex = phy->duplex; + ethtool_convert_legacy_u32_to_link_mode(phy_dev->advertising, + phy->advertising); + return phy_start_aneg(phy_dev); +} + +static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise) +{ + struct net_device *ndev = phy->dev; + + phy->autoneg = AUTONEG_ENABLE; + phy->advertising = advertise; + return emac_mdio_phy_start_aneg(phy, ndev->phydev); +} + +static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd) +{ + struct net_device *ndev = phy->dev; + + phy->autoneg = AUTONEG_DISABLE; + phy->speed = speed; + phy->duplex = fd; + return emac_mdio_phy_start_aneg(phy, ndev->phydev); +} + +static int emac_mdio_poll_link(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + int res; + + res = phy_read_status(ndev->phydev); + if (res) { + dev_err(&dev->ofdev->dev, "link update failed (%d).", res); + return ethtool_op_get_link(ndev); + } + + return ndev->phydev->link; +} + +static int emac_mdio_read_link(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct phy_device *phy_dev = ndev->phydev; + int res; + + res = phy_read_status(phy_dev); + if (res) + return res; + + phy->speed = phy_dev->speed; + phy->duplex = phy_dev->duplex; + phy->pause = phy_dev->pause; + phy->asym_pause = phy_dev->asym_pause; + return 0; +} + +static int emac_mdio_init_phy(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + + phy_start(ndev->phydev); + return phy_init_hw(ndev->phydev); +} + +static const struct mii_phy_ops emac_dt_mdio_phy_ops = { + .init = emac_mdio_init_phy, + .setup_aneg = emac_mdio_setup_aneg, + .setup_forced = emac_mdio_setup_forced, + .poll_link = emac_mdio_poll_link, + .read_link = emac_mdio_read_link, +}; + +static int emac_dt_mdio_probe(struct emac_instance *dev) +{ + struct device_node *mii_np; + struct mii_bus *bus; + int res; + + mii_np = of_get_available_child_by_name(dev->ofdev->dev.of_node, "mdio"); + if (!mii_np) { + dev_err(&dev->ofdev->dev, "no mdio definition found."); + return -ENODEV; + } + + bus = devm_mdiobus_alloc(&dev->ofdev->dev); + if (!bus) { + res = -ENOMEM; + goto put_node; + } + + bus->priv = dev->ndev; + bus->parent = dev->ndev->dev.parent; + bus->name = "emac_mdio"; + bus->read = &emac_mii_bus_read; + bus->write = &emac_mii_bus_write; + bus->reset = &emac_mii_bus_reset; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name); + res = devm_of_mdiobus_register(&dev->ofdev->dev, bus, mii_np); + if (res) { + dev_err(&dev->ofdev->dev, "cannot register MDIO bus %s (%d)", + bus->name, res); + } + + put_node: + of_node_put(mii_np); + return res; +} + +static int emac_dt_phy_connect(struct emac_instance *dev, + struct device_node *phy_handle) +{ + struct phy_device *phy_dev; + + dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def), + GFP_KERNEL); + if (!dev->phy.def) + return -ENOMEM; + + phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link, 0, + dev->phy_mode); + if (!phy_dev) { + dev_err(&dev->ofdev->dev, "failed to connect to PHY.\n"); return -ENODEV; } - *val = *prop; + + dev->phy.def->phy_id = phy_dev->drv->phy_id; + dev->phy.def->phy_id_mask = phy_dev->drv->phy_id_mask; + dev->phy.def->name = phy_dev->drv->name; + dev->phy.def->ops = &emac_dt_mdio_phy_ops; + ethtool_convert_link_mode_to_legacy_u32(&dev->phy.features, + phy_dev->supported); + dev->phy.address = phy_dev->mdio.addr; + dev->phy.mode = phy_dev->interface; return 0; } +static int emac_dt_phy_probe(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + struct device_node *phy_handle; + int res = 1; + + phy_handle = of_parse_phandle(np, "phy-handle", 0); + + if (phy_handle) { + res = emac_dt_mdio_probe(dev); + if (!res) { + res = emac_dt_phy_connect(dev, phy_handle); + } + } + + of_node_put(phy_handle); + return res; +} + static int emac_init_phy(struct emac_instance *dev) { struct device_node *np = dev->ofdev->dev.of_node; @@ -2411,15 +2641,12 @@ static int emac_init_phy(struct emac_instance *dev) dev->phy.dev = ndev; dev->phy.mode = dev->phy_mode; - /* PHY-less configuration. - * XXX I probably should move these settings to the dev tree - */ - if (dev->phy_address == 0xffffffff && dev->phy_map == 0xffffffff) { + /* PHY-less configuration. */ + if ((dev->phy_address == 0xffffffff && dev->phy_map == 0xffffffff) || + of_phy_is_fixed_link(np)) { emac_reset(dev); - /* PHY-less configuration. - * XXX I probably should move these settings to the dev tree - */ + /* PHY-less configuration. */ dev->phy.address = -1; dev->phy.features = SUPPORTED_MII; if (emac_phy_supports_gige(dev->phy_mode)) @@ -2428,6 +2655,19 @@ static int emac_init_phy(struct emac_instance *dev) dev->phy.features |= SUPPORTED_100baseT_Full; dev->phy.pause = 1; + if (of_phy_is_fixed_link(np)) { + int res = emac_dt_mdio_probe(dev); + + if (res) + return res; + + res = of_phy_register_fixed_link(np); + ndev->phydev = of_phy_find_device(np); + if (res || !ndev->phydev) + return res ? res : -EINVAL; + emac_adjust_link(dev->ndev); + put_device(&ndev->phydev->mdio.dev); + } return 0; } @@ -2471,6 +2711,29 @@ static int emac_init_phy(struct emac_instance *dev) emac_configure(dev); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { + int res = emac_dt_phy_probe(dev); + + switch (res) { + case 1: + /* No phy-handle property configured. + * Continue with the existing phy probe + * and setup code. + */ + break; + + case 0: + mutex_unlock(&emac_phy_map_lock); + goto init_phy; + + default: + mutex_unlock(&emac_phy_map_lock); + dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", + res); + return res; + } + } + if (dev->phy_address != 0xffffffff) phy_map = ~(1 << dev->phy_address); @@ -2494,10 +2757,11 @@ static int emac_init_phy(struct emac_instance *dev) #endif mutex_unlock(&emac_phy_map_lock); if (i == 0x20) { - printk(KERN_WARNING "%s: can't find PHY!\n", np->full_name); + printk(KERN_WARNING "%pOF: can't find PHY!\n", np); return -ENXIO; } + init_phy: /* Init PHY */ if (dev->phy.def->ops->init) dev->phy.def->ops->init(&dev->phy); @@ -2540,7 +2804,7 @@ static int emac_init_phy(struct emac_instance *dev) static int emac_init_config(struct emac_instance *dev) { struct device_node *np = dev->ofdev->dev.of_node; - const void *p; + int err; /* Read config from device-tree */ if (emac_read_uint_prop(np, "mal-device", &dev->mal_ph, 1)) @@ -2552,7 +2816,7 @@ static int emac_init_config(struct emac_instance *dev) if (emac_read_uint_prop(np, "cell-index", &dev->cell_index, 1)) return -ENXIO; if (emac_read_uint_prop(np, "max-frame-size", &dev->max_mtu, 0)) - dev->max_mtu = 1500; + dev->max_mtu = ETH_DATA_LEN; if (emac_read_uint_prop(np, "rx-fifo-size", &dev->rx_fifo_size, 0)) dev->rx_fifo_size = 2048; if (emac_read_uint_prop(np, "tx-fifo-size", &dev->tx_fifo_size, 0)) @@ -2589,9 +2853,9 @@ static int emac_init_config(struct emac_instance *dev) dev->mal_burst_size = 256; /* PHY mode needs some decoding */ - dev->phy_mode = of_get_phy_mode(np); - if (dev->phy_mode < 0) - dev->phy_mode = PHY_MODE_NA; + err = of_get_phy_mode(np, &dev->phy_mode); + if (err) + dev->phy_mode = PHY_INTERFACE_MODE_NA; /* Check EMAC version */ if (of_device_is_compatible(np, "ibm,emac4sync")) { @@ -2619,8 +2883,8 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL dev->features |= EMAC_FTR_NO_FLOW_CONTROL_40x; #else - printk(KERN_ERR "%s: Flow control not disabled!\n", - np->full_name); + printk(KERN_ERR "%pOF: Flow control not disabled!\n", + np); return -ENXIO; #endif } @@ -2628,9 +2892,9 @@ static int emac_init_config(struct emac_instance *dev) } /* Fixup some feature bits based on the device tree */ - if (of_get_property(np, "has-inverted-stacr-oc", NULL)) + if (of_property_read_bool(np, "has-inverted-stacr-oc")) dev->features |= EMAC_FTR_STACR_OC_INVERT; - if (of_get_property(np, "has-new-stacr-staopc", NULL)) + if (of_property_read_bool(np, "has-new-stacr-staopc")) dev->features |= EMAC_FTR_HAS_NEW_STACR; /* CAB lacks the appropriate properties */ @@ -2643,8 +2907,7 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_TAH dev->features |= EMAC_FTR_HAS_TAH; #else - printk(KERN_ERR "%s: TAH support not enabled !\n", - np->full_name); + printk(KERN_ERR "%pOF: TAH support not enabled !\n", np); return -ENXIO; #endif } @@ -2653,8 +2916,7 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_ZMII dev->features |= EMAC_FTR_HAS_ZMII; #else - printk(KERN_ERR "%s: ZMII support not enabled !\n", - np->full_name); + printk(KERN_ERR "%pOF: ZMII support not enabled !\n", np); return -ENXIO; #endif } @@ -2663,20 +2925,19 @@ static int emac_init_config(struct emac_instance *dev) #ifdef CONFIG_IBM_EMAC_RGMII dev->features |= EMAC_FTR_HAS_RGMII; #else - printk(KERN_ERR "%s: RGMII support not enabled !\n", - np->full_name); + printk(KERN_ERR "%pOF: RGMII support not enabled !\n", np); return -ENXIO; #endif } /* Read MAC-address */ - p = of_get_property(np, "local-mac-address", NULL); - if (p == NULL) { - printk(KERN_ERR "%s: Can't find local-mac-address property\n", - np->full_name); - return -ENXIO; + err = of_get_ethdev_address(np, dev->ndev); + if (err == -EPROBE_DEFER) + return err; + if (err) { + dev_warn(&dev->ofdev->dev, "Can't get valid mac-address. Generating random."); + eth_hw_addr_random(dev->ndev); } - memcpy(dev->ndev->dev_addr, p, 6); /* IAHT and GAHT filter parameterization */ if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { @@ -2687,6 +2948,10 @@ static int emac_init_config(struct emac_instance *dev) dev->xaht_width_shift = EMAC4_XAHT_WIDTH_SHIFT; } + /* This should never happen */ + if (WARN_ON(EMAC_XAHT_REGS(dev) > EMAC_XAHT_MAX_REGS)) + return -ENXIO; + DBG(dev, "features : 0x%08x / 0x%08x\n", dev->features, EMAC_FTRS_POSSIBLE); DBG(dev, "tx_fifo_size : %d (%d gige)\n", dev->tx_fifo_size, dev->tx_fifo_size_gige); DBG(dev, "rx_fifo_size : %d (%d gige)\n", dev->rx_fifo_size, dev->rx_fifo_size_gige); @@ -2701,12 +2966,11 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit, - .ndo_change_mtu = eth_change_mtu, }; static const struct net_device_ops emac_gige_netdev_ops = { @@ -2714,10 +2978,10 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit_sg, .ndo_change_mtu = emac_change_mtu, }; @@ -2734,7 +2998,7 @@ static int emac_probe(struct platform_device *ofdev) * property here for now, but new flat device trees should set a * status property to "disabled" instead. */ - if (of_get_property(np, "unused", NULL) || !of_device_is_available(np)) + if (of_property_read_bool(np, "unused") || !of_device_is_available(np)) return -ENODEV; /* Find ourselves in the bootlist if we are there */ @@ -2744,7 +3008,7 @@ static int emac_probe(struct platform_device *ofdev) /* Allocate our net_device structure */ err = -ENOMEM; - ndev = alloc_etherdev(sizeof(struct emac_instance)); + ndev = devm_alloc_etherdev(&ofdev->dev, sizeof(struct emac_instance)); if (!ndev) goto err_gone; @@ -2755,53 +3019,48 @@ static int emac_probe(struct platform_device *ofdev) SET_NETDEV_DEV(ndev, &ofdev->dev); /* Initialize some embedded data structures */ - mutex_init(&dev->mdio_lock); - mutex_init(&dev->link_lock); + err = devm_mutex_init(&ofdev->dev, &dev->mdio_lock); + if (err) + goto err_gone; + + err = devm_mutex_init(&ofdev->dev, &dev->link_lock); + if (err) + goto err_gone; + spin_lock_init(&dev->lock); INIT_WORK(&dev->reset_work, emac_reset_work); /* Init various config data based on device-tree */ err = emac_init_config(dev); - if (err != 0) - goto err_free; + if (err) + goto err_gone; - /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ - dev->emac_irq = irq_of_parse_and_map(np, 0); - dev->wol_irq = irq_of_parse_and_map(np, 1); - if (dev->emac_irq == NO_IRQ) { - printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name); - goto err_free; + /* Setup error IRQ handler */ + dev->emac_irq = platform_get_irq(ofdev, 0); + err = devm_request_irq(&ofdev->dev, dev->emac_irq, emac_irq, 0, "EMAC", + dev); + if (err) { + dev_err_probe(&ofdev->dev, err, "failed to request IRQ %d", + dev->emac_irq); + goto err_gone; } + ndev->irq = dev->emac_irq; - /* Map EMAC regs */ - if (of_address_to_resource(np, 0, &dev->rsrc_regs)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); - goto err_irq_unmap; - } - // TODO : request_mem_region - dev->emacp = ioremap(dev->rsrc_regs.start, - resource_size(&dev->rsrc_regs)); - if (dev->emacp == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); - err = -ENOMEM; - goto err_irq_unmap; + dev->emacp = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->emacp)) { + dev_err(&ofdev->dev, "can't map device registers"); + err = PTR_ERR(dev->emacp); + goto err_gone; } /* Wait for dependent devices */ err = emac_wait_deps(dev); - if (err) { - printk(KERN_ERR - "%s: Timeout waiting for dependent devices\n", - np->full_name); - /* display more info about what's missing ? */ - goto err_reg_unmap; - } - dev->mal = dev_get_drvdata(&dev->mal_dev->dev); + if (err) + goto err_gone; + dev->mal = platform_get_drvdata(dev->mal_dev); if (dev->mdio_dev != NULL) - dev->mdio_instance = dev_get_drvdata(&dev->mdio_dev->dev); + dev->mdio_instance = platform_get_drvdata(dev->mdio_dev); /* Register with MAL */ dev->commac.ops = &emac_commac_ops; @@ -2810,8 +3069,8 @@ static int emac_probe(struct platform_device *ofdev) dev->commac.rx_chan_mask = MAL_CHAN_MASK(dev->mal_rx_chan); err = mal_register_commac(dev->mal, &dev->commac); if (err) { - printk(KERN_ERR "%s: failed to register with mal %s!\n", - np->full_name, dev->mal_dev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: failed to register with mal %pOF!\n", + np, dev->mal_dev->dev.of_node); goto err_rel_deps; } dev->rx_skb_size = emac_rx_skb_size(ndev->mtu); @@ -2877,14 +3136,18 @@ static int emac_probe(struct platform_device *ofdev) dev->commac.ops = &emac_commac_sg_ops; } else ndev->netdev_ops = &emac_netdev_ops; - SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops); + ndev->ethtool_ops = &emac_ethtool_ops; + + /* MTU range: 46 - 1500 or whatever is in OF */ + ndev->min_mtu = EMAC_MIN_MTU; + ndev->max_mtu = dev->max_mtu; netif_carrier_off(ndev); - err = register_netdev(ndev); + err = devm_register_netdev(&ofdev->dev, ndev); if (err) { - printk(KERN_ERR "%s: failed to register net device (%d)!\n", - np->full_name, err); + printk(KERN_ERR "%pOF: failed to register net device (%d)!\n", + np, err); goto err_detach_tah; } @@ -2892,24 +3155,18 @@ static int emac_probe(struct platform_device *ofdev) * fully initialized */ wmb(); - dev_set_drvdata(&ofdev->dev, dev); + platform_set_drvdata(ofdev, dev); - /* There's a new kid in town ! Let's tell everybody */ - wake_up_all(&emac_probe_wait); + printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n", + ndev->name, dev->cell_index, np, ndev->dev_addr); - - printk(KERN_INFO "%s: EMAC-%d %s, MAC %pM\n", - ndev->name, dev->cell_index, np->full_name, ndev->dev_addr); - - if (dev->phy_mode == PHY_MODE_SGMII) + if (dev->phy_mode == PHY_INTERFACE_MODE_SGMII) printk(KERN_NOTICE "%s: in SGMII mode\n", ndev->name); if (dev->phy.address >= 0) printk("%s: found %s PHY (0x%02x)\n", ndev->name, dev->phy.def->name, dev->phy.address); - emac_dbg_register(dev); - /* Life is good */ return 0; @@ -2928,37 +3185,18 @@ static int emac_probe(struct platform_device *ofdev) mal_unregister_commac(dev->mal, &dev->commac); err_rel_deps: emac_put_deps(dev); - err_reg_unmap: - iounmap(dev->emacp); - err_irq_unmap: - if (dev->wol_irq != NO_IRQ) - irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) - irq_dispose_mapping(dev->emac_irq); - err_free: - free_netdev(ndev); err_gone: - /* if we were on the bootlist, remove us as we won't show up and - * wake up all waiters to notify them in case they were waiting - * on us - */ - if (blist) { + if (blist) *blist = NULL; - wake_up_all(&emac_probe_wait); - } return err; } -static int emac_remove(struct platform_device *ofdev) +static void emac_remove(struct platform_device *ofdev) { - struct emac_instance *dev = dev_get_drvdata(&ofdev->dev); + struct emac_instance *dev = platform_get_drvdata(ofdev); DBG(dev, "remove" NL); - dev_set_drvdata(&ofdev->dev, NULL); - - unregister_netdev(dev->ndev); - cancel_work_sync(&dev->reset_work); if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) @@ -2973,22 +3211,10 @@ static int emac_remove(struct platform_device *ofdev) mal_unregister_commac(dev->mal, &dev->commac); emac_put_deps(dev); - - emac_dbg_unregister(dev); - iounmap(dev->emacp); - - if (dev->wol_irq != NO_IRQ) - irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) - irq_dispose_mapping(dev->emac_irq); - - free_netdev(dev->ndev); - - return 0; } /* XXX Features in here should be replaced by properties... */ -static struct of_device_id emac_match[] = +static const struct of_device_id emac_match[] = { { .type = "network", @@ -3009,7 +3235,6 @@ MODULE_DEVICE_TABLE(of, emac_match); static struct platform_driver emac_driver = { .driver = { .name = "emac", - .owner = THIS_MODULE, .of_match_table = emac_match, }, .probe = emac_probe, @@ -3019,21 +3244,20 @@ static struct platform_driver emac_driver = { static void __init emac_make_bootlist(void) { struct device_node *np = NULL; - int j, max, i = 0, k; + int j, max, i = 0; int cell_indices[EMAC_BOOT_LIST_SIZE]; /* Collect EMACs */ while((np = of_find_all_nodes(np)) != NULL) { - const u32 *idx; + u32 idx; if (of_match_node(emac_match, np) == NULL) continue; - if (of_get_property(np, "unused", NULL)) + if (of_property_read_bool(np, "unused")) continue; - idx = of_get_property(np, "cell-index", NULL); - if (idx == NULL) + if (of_property_read_u32(np, "cell-index", &idx)) continue; - cell_indices[i] = *idx; + cell_indices[i] = idx; emac_boot_list[i++] = of_node_get(np); if (i >= EMAC_BOOT_LIST_SIZE) { of_node_put(np); @@ -3046,12 +3270,8 @@ static void __init emac_make_bootlist(void) for (i = 0; max > 1 && (i < (max - 1)); i++) for (j = i; j < max; j++) { if (cell_indices[i] > cell_indices[j]) { - np = emac_boot_list[i]; - emac_boot_list[i] = emac_boot_list[j]; - emac_boot_list[j] = np; - k = cell_indices[i]; - cell_indices[i] = cell_indices[j]; - cell_indices[j] = k; + swap(emac_boot_list[i], emac_boot_list[j]); + swap(cell_indices[i], cell_indices[j]); } } } @@ -3062,9 +3282,6 @@ static int __init emac_init(void) printk(KERN_INFO DRV_DESC ", version " DRV_VERSION "\n"); - /* Init debug stuff */ - emac_init_debug(); - /* Build EMAC boot list */ emac_make_bootlist(); @@ -3109,12 +3326,10 @@ static void __exit emac_exit(void) rgmii_exit(); zmii_exit(); mal_exit(); - emac_fini_debug(); /* Destroy EMAC boot list */ for (i = 0; i < EMAC_BOOT_LIST_SIZE; i++) - if (emac_boot_list[i]) - of_node_put(emac_boot_list[i]); + of_node_put(emac_boot_list[i]); } module_init(emac_init); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 70074792bdef..89fa1683ec3c 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/core.h * @@ -15,25 +16,17 @@ * Armin Kuster <akuster@mvista.com> * Johnnie Peters <jpeters@mvista.com> * Copyright 2000, 2001 MontaVista Softare Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #ifndef __IBM_NEWEMAC_CORE_H #define __IBM_NEWEMAC_CORE_H #include <linux/module.h> -#include <linux/init.h> #include <linux/list.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/dma-mapping.h> #include <linux/spinlock.h> -#include <linux/of_platform.h> #include <linux/slab.h> #include <asm/io.h> @@ -69,22 +62,18 @@ static inline int emac_rx_size(int mtu) return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD); } -#define EMAC_DMA_ALIGN(x) ALIGN((x), dma_get_cache_alignment()) - -#define EMAC_RX_SKB_HEADROOM \ - EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM) - /* Size of RX skb for the given MTU */ static inline int emac_rx_skb_size(int mtu) { int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu)); - return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM; + + return SKB_DATA_ALIGN(size + NET_IP_ALIGN) + NET_SKB_PAD; } /* RX DMA sync size */ static inline int emac_rx_sync_size(int mtu) { - return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2); + return SKB_DATA_ALIGN(emac_rx_size(mtu) + NET_IP_ALIGN); } /* Driver statistcs is split into two parts to make it more cache friendly: @@ -168,7 +157,6 @@ struct emac_error_stats { struct emac_instance { struct net_device *ndev; - struct resource rsrc_regs; struct emac_regs __iomem *emacp; struct platform_device *ofdev; struct device_node **blist; /* bootlist entry */ @@ -182,7 +170,7 @@ struct emac_instance { struct mal_commac commac; /* PHY infos */ - u32 phy_mode; + phy_interface_t phy_mode; u32 phy_map; u32 phy_address; u32 phy_feat_exc; @@ -262,7 +250,6 @@ struct emac_instance { /* Stats */ struct emac_error_stats estats; - struct net_device_stats nstats; struct emac_stats stats; /* Misc @@ -389,6 +376,9 @@ static inline int emac_has_feature(struct emac_instance *dev, #define EMAC4SYNC_XAHT_SLOTS_SHIFT 8 #define EMAC4SYNC_XAHT_WIDTH_SHIFT 5 +/* The largest span between slots and widths above is 3 */ +#define EMAC_XAHT_MAX_REGS (1 << 3) + #define EMAC_XAHT_SLOTS(dev) (1 << (dev)->xaht_slots_shift) #define EMAC_XAHT_WIDTH(dev) (1 << (dev)->xaht_width_shift) #define EMAC_XAHT_REGS(dev) (1 << ((dev)->xaht_slots_shift - \ @@ -406,7 +396,7 @@ static inline int emac_has_feature(struct emac_instance *dev, ((u32)(1 << (EMAC_XAHT_WIDTH(dev) - 1)) >> \ ((slot) & (u32)(EMAC_XAHT_WIDTH(dev) - 1))) -static inline u32 *emac_xaht_base(struct emac_instance *dev) +static inline u32 __iomem *emac_xaht_base(struct emac_instance *dev) { struct emac_regs __iomem *p = dev->emacp; int offset; @@ -419,10 +409,10 @@ static inline u32 *emac_xaht_base(struct emac_instance *dev) else offset = offsetof(struct emac_regs, u0.emac4.iaht1); - return (u32 *)((ptrdiff_t)p + offset); + return (u32 __iomem *)((__force ptrdiff_t)p + offset); } -static inline u32 *emac_gaht_base(struct emac_instance *dev) +static inline u32 __iomem *emac_gaht_base(struct emac_instance *dev) { /* GAHT registers always come after an identical number of * IAHT registers. @@ -461,11 +451,8 @@ struct emac_ethtool_regs_subhdr { u32 index; }; -#define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ - (dev)->rsrc_regs.start + 1) -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ - (dev)->rsrc_regs.start + 1) +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 #endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/drivers/net/ethernet/ibm/emac/debug.c b/drivers/net/ethernet/ibm/emac/debug.c deleted file mode 100644 index a559f326bf63..000000000000 --- a/drivers/net/ethernet/ibm/emac/debug.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * drivers/net/ethernet/ibm/emac/debug.c - * - * Driver for PowerPC 4xx on-chip ethernet controller, debug print routines. - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * - * Based on the arch/ppc version of the driver: - * - * Copyright (c) 2004, 2005 Zultys Technologies - * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netdevice.h> -#include <linux/sysrq.h> -#include <asm/io.h> - -#include "core.h" - -static DEFINE_SPINLOCK(emac_dbg_lock); - -static void emac_desc_dump(struct emac_instance *p) -{ - int i; - printk("** EMAC %s TX BDs **\n" - " tx_cnt = %d tx_slot = %d ack_slot = %d\n", - p->ofdev->dev.of_node->full_name, - p->tx_cnt, p->tx_slot, p->ack_slot); - for (i = 0; i < NUM_TX_BUFF / 2; ++i) - printk - ("bd[%2d] 0x%08x %c 0x%04x %4u - bd[%2d] 0x%08x %c 0x%04x %4u\n", - i, p->tx_desc[i].data_ptr, p->tx_skb[i] ? 'V' : ' ', - p->tx_desc[i].ctrl, p->tx_desc[i].data_len, - NUM_TX_BUFF / 2 + i, - p->tx_desc[NUM_TX_BUFF / 2 + i].data_ptr, - p->tx_skb[NUM_TX_BUFF / 2 + i] ? 'V' : ' ', - p->tx_desc[NUM_TX_BUFF / 2 + i].ctrl, - p->tx_desc[NUM_TX_BUFF / 2 + i].data_len); - - printk("** EMAC %s RX BDs **\n" - " rx_slot = %d flags = 0x%lx rx_skb_size = %d rx_sync_size = %d\n" - " rx_sg_skb = 0x%p\n", - p->ofdev->dev.of_node->full_name, - p->rx_slot, p->commac.flags, p->rx_skb_size, - p->rx_sync_size, p->rx_sg_skb); - for (i = 0; i < NUM_RX_BUFF / 2; ++i) - printk - ("bd[%2d] 0x%08x %c 0x%04x %4u - bd[%2d] 0x%08x %c 0x%04x %4u\n", - i, p->rx_desc[i].data_ptr, p->rx_skb[i] ? 'V' : ' ', - p->rx_desc[i].ctrl, p->rx_desc[i].data_len, - NUM_RX_BUFF / 2 + i, - p->rx_desc[NUM_RX_BUFF / 2 + i].data_ptr, - p->rx_skb[NUM_RX_BUFF / 2 + i] ? 'V' : ' ', - p->rx_desc[NUM_RX_BUFF / 2 + i].ctrl, - p->rx_desc[NUM_RX_BUFF / 2 + i].data_len); -} - -static void emac_mac_dump(struct emac_instance *dev) -{ - struct emac_regs __iomem *p = dev->emacp; - const int xaht_regs = EMAC_XAHT_REGS(dev); - u32 *gaht_base = emac_gaht_base(dev); - u32 *iaht_base = emac_iaht_base(dev); - int emac4sync = emac_has_feature(dev, EMAC_FTR_EMAC4SYNC); - int n; - - printk("** EMAC %s registers **\n" - "MR0 = 0x%08x MR1 = 0x%08x TMR0 = 0x%08x TMR1 = 0x%08x\n" - "RMR = 0x%08x ISR = 0x%08x ISER = 0x%08x\n" - "IAR = %04x%08x VTPID = 0x%04x VTCI = 0x%04x\n", - dev->ofdev->dev.of_node->full_name, - in_be32(&p->mr0), in_be32(&p->mr1), - in_be32(&p->tmr0), in_be32(&p->tmr1), - in_be32(&p->rmr), in_be32(&p->isr), in_be32(&p->iser), - in_be32(&p->iahr), in_be32(&p->ialr), in_be32(&p->vtpid), - in_be32(&p->vtci) - ); - - if (emac4sync) - printk("MAR = %04x%08x MMAR = %04x%08x\n", - in_be32(&p->u0.emac4sync.mahr), - in_be32(&p->u0.emac4sync.malr), - in_be32(&p->u0.emac4sync.mmahr), - in_be32(&p->u0.emac4sync.mmalr) - ); - - for (n = 0; n < xaht_regs; n++) - printk("IAHT%02d = 0x%08x\n", n + 1, in_be32(iaht_base + n)); - - for (n = 0; n < xaht_regs; n++) - printk("GAHT%02d = 0x%08x\n", n + 1, in_be32(gaht_base + n)); - - printk("LSA = %04x%08x IPGVR = 0x%04x\n" - "STACR = 0x%08x TRTR = 0x%08x RWMR = 0x%08x\n" - "OCTX = 0x%08x OCRX = 0x%08x\n", - in_be32(&p->lsah), in_be32(&p->lsal), in_be32(&p->ipgvr), - in_be32(&p->stacr), in_be32(&p->trtr), in_be32(&p->rwmr), - in_be32(&p->octx), in_be32(&p->ocrx) - ); - - if (!emac4sync) { - printk("IPCR = 0x%08x\n", - in_be32(&p->u1.emac4.ipcr) - ); - } else { - printk("REVID = 0x%08x TPC = 0x%08x\n", - in_be32(&p->u1.emac4sync.revid), - in_be32(&p->u1.emac4sync.tpc) - ); - } - - emac_desc_dump(dev); -} - -static void emac_mal_dump(struct mal_instance *mal) -{ - int i; - - printk("** MAL %s Registers **\n" - "CFG = 0x%08x ESR = 0x%08x IER = 0x%08x\n" - "TX|CASR = 0x%08x CARR = 0x%08x EOBISR = 0x%08x DEIR = 0x%08x\n" - "RX|CASR = 0x%08x CARR = 0x%08x EOBISR = 0x%08x DEIR = 0x%08x\n", - mal->ofdev->dev.of_node->full_name, - get_mal_dcrn(mal, MAL_CFG), get_mal_dcrn(mal, MAL_ESR), - get_mal_dcrn(mal, MAL_IER), - get_mal_dcrn(mal, MAL_TXCASR), get_mal_dcrn(mal, MAL_TXCARR), - get_mal_dcrn(mal, MAL_TXEOBISR), get_mal_dcrn(mal, MAL_TXDEIR), - get_mal_dcrn(mal, MAL_RXCASR), get_mal_dcrn(mal, MAL_RXCARR), - get_mal_dcrn(mal, MAL_RXEOBISR), get_mal_dcrn(mal, MAL_RXDEIR) - ); - - printk("TX|"); - for (i = 0; i < mal->num_tx_chans; ++i) { - if (i && !(i % 4)) - printk("\n "); - printk("CTP%d = 0x%08x ", i, get_mal_dcrn(mal, MAL_TXCTPR(i))); - } - printk("\nRX|"); - for (i = 0; i < mal->num_rx_chans; ++i) { - if (i && !(i % 4)) - printk("\n "); - printk("CTP%d = 0x%08x ", i, get_mal_dcrn(mal, MAL_RXCTPR(i))); - } - printk("\n "); - for (i = 0; i < mal->num_rx_chans; ++i) { - u32 r = get_mal_dcrn(mal, MAL_RCBS(i)); - if (i && !(i % 3)) - printk("\n "); - printk("RCBS%d = 0x%08x (%d) ", i, r, r * 16); - } - printk("\n"); -} - -static struct emac_instance *__emacs[4]; -static struct mal_instance *__mals[1]; - -void emac_dbg_register(struct emac_instance *dev) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&emac_dbg_lock, flags); - for (i = 0; i < ARRAY_SIZE(__emacs); i++) - if (__emacs[i] == NULL) { - __emacs[i] = dev; - break; - } - spin_unlock_irqrestore(&emac_dbg_lock, flags); -} - -void emac_dbg_unregister(struct emac_instance *dev) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&emac_dbg_lock, flags); - for (i = 0; i < ARRAY_SIZE(__emacs); i++) - if (__emacs[i] == dev) { - __emacs[i] = NULL; - break; - } - spin_unlock_irqrestore(&emac_dbg_lock, flags); -} - -void mal_dbg_register(struct mal_instance *mal) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&emac_dbg_lock, flags); - for (i = 0; i < ARRAY_SIZE(__mals); i++) - if (__mals[i] == NULL) { - __mals[i] = mal; - break; - } - spin_unlock_irqrestore(&emac_dbg_lock, flags); -} - -void mal_dbg_unregister(struct mal_instance *mal) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&emac_dbg_lock, flags); - for (i = 0; i < ARRAY_SIZE(__mals); i++) - if (__mals[i] == mal) { - __mals[i] = NULL; - break; - } - spin_unlock_irqrestore(&emac_dbg_lock, flags); -} - -void emac_dbg_dump_all(void) -{ - unsigned int i; - unsigned long flags; - - spin_lock_irqsave(&emac_dbg_lock, flags); - - for (i = 0; i < ARRAY_SIZE(__mals); ++i) - if (__mals[i]) - emac_mal_dump(__mals[i]); - - for (i = 0; i < ARRAY_SIZE(__emacs); ++i) - if (__emacs[i]) - emac_mac_dump(__emacs[i]); - - spin_unlock_irqrestore(&emac_dbg_lock, flags); -} - -#if defined(CONFIG_MAGIC_SYSRQ) -static void emac_sysrq_handler(int key) -{ - emac_dbg_dump_all(); -} - -static struct sysrq_key_op emac_sysrq_op = { - .handler = emac_sysrq_handler, - .help_msg = "emac(c)", - .action_msg = "Show EMAC(s) status", -}; - -int __init emac_init_debug(void) -{ - return register_sysrq_key('c', &emac_sysrq_op); -} - -void __exit emac_fini_debug(void) -{ - unregister_sysrq_key('c', &emac_sysrq_op); -} - -#else -int __init emac_init_debug(void) -{ - return 0; -} -void __exit emac_fini_debug(void) -{ -} -#endif /* CONFIG_MAGIC_SYSRQ */ diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h index 59a92d5870b5..c09a46a329d9 100644 --- a/drivers/net/ethernet/ibm/emac/debug.h +++ b/drivers/net/ethernet/ibm/emac/debug.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/debug.h * @@ -10,12 +11,6 @@ * * Copyright (c) 2004, 2005 Zultys Technologies * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #ifndef __IBM_NEWEMAC_DEBUG_H #define __IBM_NEWEMAC_DEBUG_H @@ -25,36 +20,13 @@ #include "core.h" #if defined(CONFIG_IBM_EMAC_DEBUG) - -struct emac_instance; -struct mal_instance; - -extern void emac_dbg_register(struct emac_instance *dev); -extern void emac_dbg_unregister(struct emac_instance *dev); -extern void mal_dbg_register(struct mal_instance *mal); -extern void mal_dbg_unregister(struct mal_instance *mal); -extern int emac_init_debug(void) __init; -extern void emac_fini_debug(void) __exit; -extern void emac_dbg_dump_all(void); - # define DBG_LEVEL 1 - #else - -# define emac_dbg_register(x) do { } while(0) -# define emac_dbg_unregister(x) do { } while(0) -# define mal_dbg_register(x) do { } while(0) -# define mal_dbg_unregister(x) do { } while(0) -# define emac_init_debug() do { } while(0) -# define emac_fini_debug() do { } while(0) -# define emac_dbg_dump_all() do { } while(0) - # define DBG_LEVEL 0 - #endif #define EMAC_DBG(d, name, fmt, arg...) \ - printk(KERN_DEBUG #name "%s: " fmt, d->ofdev->dev.of_node->full_name, ## arg) + printk(KERN_DEBUG #name "%pOF: " fmt, d->ofdev->dev.of_node, ## arg) #if DBG_LEVEL > 0 # define DBG(d,f,x...) EMAC_DBG(d, emac, f, ##x) diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h index 5afcc27ceebb..09d3ac374b2d 100644 --- a/drivers/net/ethernet/ibm/emac/emac.h +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/emac.h * @@ -15,12 +16,6 @@ * Matt Porter <mporter@kernel.crashing.org> * Armin Kuster <akuster@mvista.com> * Copyright 2002-2004 MontaVista Software Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #ifndef __IBM_NEWEMAC_H #define __IBM_NEWEMAC_H @@ -82,7 +77,7 @@ struct emac_regs { struct { u32 rsvd1; u32 revid; - u32 rsvd2[2]; + u32 rsvd2[2]; u32 iaht1; /* Reset, R */ u32 iaht2; /* Reset, R */ u32 iaht3; /* Reset, R */ @@ -104,19 +99,6 @@ struct emac_regs { } u1; }; -/* - * PHY mode settings (EMAC <-> ZMII/RGMII bridge <-> PHY) - */ -#define PHY_MODE_NA PHY_INTERFACE_MODE_NA -#define PHY_MODE_MII PHY_INTERFACE_MODE_MII -#define PHY_MODE_RMII PHY_INTERFACE_MODE_RMII -#define PHY_MODE_SMII PHY_INTERFACE_MODE_SMII -#define PHY_MODE_RGMII PHY_INTERFACE_MODE_RGMII -#define PHY_MODE_TBI PHY_INTERFACE_MODE_TBI -#define PHY_MODE_GMII PHY_INTERFACE_MODE_GMII -#define PHY_MODE_RTBI PHY_INTERFACE_MODE_RTBI -#define PHY_MODE_SGMII PHY_INTERFACE_MODE_SGMII - /* EMACx_MR0 */ #define EMAC_MR0_RXI 0x80000000 #define EMAC_MR0_TXI 0x40000000 @@ -151,9 +133,11 @@ struct emac_regs { #define EMAC4_MR1_RFS_2K 0x00100000 #define EMAC4_MR1_RFS_4K 0x00180000 +#define EMAC4_MR1_RFS_8K 0x00200000 #define EMAC4_MR1_RFS_16K 0x00280000 #define EMAC4_MR1_TFS_2K 0x00020000 #define EMAC4_MR1_TFS_4K 0x00030000 +#define EMAC4_MR1_TFS_8K 0x00040000 #define EMAC4_MR1_TFS_16K 0x00050000 #define EMAC4_MR1_TR 0x00008000 #define EMAC4_MR1_MWSW_001 0x00001000 diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 856ea66c9223..7d70056e9008 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/ethernet/ibm/emac/mal.c * @@ -17,16 +18,13 @@ * * Armin Kuster <akuster@mvista.com> * Copyright 2002 MontaVista Softare Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include <linux/delay.h> #include <linux/slab.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> #include "core.h" #include <asm/dcr-regs.h> @@ -263,7 +261,9 @@ static inline void mal_schedule_poll(struct mal_instance *mal) { if (likely(napi_schedule_prep(&mal->napi))) { MAL_DBG2(mal, "schedule_poll" NL); + spin_lock(&mal->lock); mal_disable_eob_irq(mal); + spin_unlock(&mal->lock); __napi_schedule(&mal->napi); } else MAL_DBG2(mal, "already in poll" NL); @@ -399,7 +399,7 @@ static int mal_poll(struct napi_struct *napi, int budget) unsigned long flags; MAL_DBG2(mal, "poll(%d)" NL, budget); - again: + /* Process TX skbs */ list_for_each(l, &mal->poll_list) { struct mal_commac *mc = @@ -418,20 +418,20 @@ static int mal_poll(struct napi_struct *napi, int budget) int n; if (unlikely(test_bit(MAL_COMMAC_POLL_DISABLED, &mc->flags))) continue; - n = mc->ops->poll_rx(mc->dev, budget); + n = mc->ops->poll_rx(mc->dev, budget - received); if (n) { received += n; - budget -= n; - if (budget <= 0) - goto more_work; // XXX What if this is the last one ? + if (received >= budget) + return budget; } } - /* We need to disable IRQs to protect from RXDE IRQ here */ - spin_lock_irqsave(&mal->lock, flags); - __napi_complete(napi); - mal_enable_eob_irq(mal); - spin_unlock_irqrestore(&mal->lock, flags); + if (napi_complete_done(napi, received)) { + /* We need to disable IRQs to protect from RXDE IRQ here */ + spin_lock_irqsave(&mal->lock, flags); + mal_enable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); + } /* Check for "rotting" packet(s) */ list_for_each(l, &mal->poll_list) { @@ -442,15 +442,12 @@ static int mal_poll(struct napi_struct *napi, int budget) if (unlikely(mc->ops->peek_rx(mc->dev) || test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { MAL_DBG2(mal, "rotting packet" NL); - if (napi_reschedule(napi)) - mal_disable_eob_irq(mal); - else - MAL_DBG2(mal, "already in poll list" NL); - - if (budget > 0) - goto again; - else + if (!napi_schedule(napi)) goto more_work; + + spin_lock_irqsave(&mal->lock, flags); + mal_disable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); } mc->ops->poll_tx(mc->dev); } @@ -527,7 +524,8 @@ static int mal_probe(struct platform_device *ofdev) unsigned long irqflags; irq_handler_t hdlr_serr, hdlr_txde, hdlr_rxde; - mal = kzalloc(sizeof(struct mal_instance), GFP_KERNEL); + mal = devm_kzalloc(&ofdev->dev, sizeof(struct mal_instance), + GFP_KERNEL); if (!mal) return -ENOMEM; @@ -542,8 +540,7 @@ static int mal_probe(struct platform_device *ofdev) printk(KERN_ERR "mal%d: can't find MAL num-tx-chans property!\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } mal->num_tx_chans = prop[0]; @@ -552,8 +549,7 @@ static int mal_probe(struct platform_device *ofdev) printk(KERN_ERR "mal%d: can't find MAL num-rx-chans property!\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } mal->num_rx_chans = prop[0]; @@ -561,15 +557,13 @@ static int mal_probe(struct platform_device *ofdev) if (dcr_base == 0) { printk(KERN_ERR "mal%d: can't find DCR resource!\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } mal->dcr_host = dcr_map(ofdev->dev.of_node, dcr_base, 0x100); if (!DCR_MAP_OK(mal->dcr_host)) { printk(KERN_ERR "mal%d: failed to map DCRs !\n", index); - err = -ENODEV; - goto fail; + return -ENODEV; } if (of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal-405ez")) { @@ -578,41 +572,25 @@ static int mal_probe(struct platform_device *ofdev) mal->features |= (MAL_FTR_CLEAR_ICINTSTAT | MAL_FTR_COMMON_ERR_INT); #else - printk(KERN_ERR "%s: Support for 405EZ not enabled!\n", - ofdev->dev.of_node->full_name); - err = -ENODEV; - goto fail; -#endif - } - - mal->txeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - mal->rxeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 1); - mal->serr_irq = irq_of_parse_and_map(ofdev->dev.of_node, 2); - - if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { - mal->txde_irq = mal->rxde_irq = mal->serr_irq; - } else { - mal->txde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 3); - mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); - } - - if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ || - mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ || - mal->rxde_irq == NO_IRQ) { - printk(KERN_ERR - "mal%d: failed to map interrupts !\n", index); + printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n", + ofdev->dev.of_node); err = -ENODEV; goto fail_unmap; +#endif } INIT_LIST_HEAD(&mal->poll_list); INIT_LIST_HEAD(&mal->list); spin_lock_init(&mal->lock); - init_dummy_netdev(&mal->dummy_dev); + mal->dummy_dev = alloc_netdev_dummy(0); + if (!mal->dummy_dev) { + err = -ENOMEM; + goto fail_unmap; + } - netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, - CONFIG_IBM_EMAC_POLL_WEIGHT); + netif_napi_add_weight(mal->dummy_dev, &mal->napi, mal_poll, + CONFIG_IBM_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ mal_reset(mal); @@ -638,10 +616,10 @@ static int mal_probe(struct platform_device *ofdev) (NUM_TX_BUFF * mal->num_tx_chans + NUM_RX_BUFF * mal->num_rx_chans); mal->bd_virt = dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (mal->bd_virt == NULL) { err = -ENOMEM; - goto fail_unmap; + goto fail_dummy; } for (i = 0; i < mal->num_tx_chans; ++i) @@ -654,73 +632,71 @@ static int mal_probe(struct platform_device *ofdev) sizeof(struct mal_descriptor) * mal_rx_bd_offset(mal, i)); + mal->txeob_irq = platform_get_irq(ofdev, 0); + mal->rxeob_irq = platform_get_irq(ofdev, 1); + mal->serr_irq = platform_get_irq(ofdev, 2); + if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { + mal->txde_irq = mal->rxde_irq = mal->serr_irq; irqflags = IRQF_SHARED; hdlr_serr = hdlr_txde = hdlr_rxde = mal_int; } else { + mal->txde_irq = platform_get_irq(ofdev, 3); + mal->rxde_irq = platform_get_irq(ofdev, 4); irqflags = 0; hdlr_serr = mal_serr; hdlr_txde = mal_txde; hdlr_rxde = mal_rxde; } - err = request_irq(mal->serr_irq, hdlr_serr, irqflags, "MAL SERR", mal); + err = devm_request_irq(&ofdev->dev, mal->serr_irq, hdlr_serr, irqflags, + "MAL SERR", mal); if (err) goto fail2; - err = request_irq(mal->txde_irq, hdlr_txde, irqflags, "MAL TX DE", mal); + err = devm_request_irq(&ofdev->dev, mal->txde_irq, hdlr_txde, irqflags, + "MAL TX DE", mal); if (err) - goto fail3; - err = request_irq(mal->txeob_irq, mal_txeob, 0, "MAL TX EOB", mal); + goto fail2; + err = devm_request_irq(&ofdev->dev, mal->txeob_irq, mal_txeob, 0, + "MAL TX EOB", mal); if (err) - goto fail4; - err = request_irq(mal->rxde_irq, hdlr_rxde, irqflags, "MAL RX DE", mal); + goto fail2; + err = devm_request_irq(&ofdev->dev, mal->rxde_irq, hdlr_rxde, irqflags, + "MAL RX DE", mal); if (err) - goto fail5; - err = request_irq(mal->rxeob_irq, mal_rxeob, 0, "MAL RX EOB", mal); + goto fail2; + err = devm_request_irq(&ofdev->dev, mal->rxeob_irq, mal_rxeob, 0, + "MAL RX EOB", mal); if (err) - goto fail6; + goto fail2; /* Enable all MAL SERR interrupt sources */ - if (mal->version == 2) - set_mal_dcrn(mal, MAL_IER, MAL2_IER_EVENTS); - else - set_mal_dcrn(mal, MAL_IER, MAL1_IER_EVENTS); + set_mal_dcrn(mal, MAL_IER, MAL_IER_EVENTS); /* Enable EOB interrupt */ mal_enable_eob_irq(mal); printk(KERN_INFO - "MAL v%d %s, %d TX channels, %d RX channels\n", - mal->version, ofdev->dev.of_node->full_name, + "MAL v%d %pOF, %d TX channels, %d RX channels\n", + mal->version, ofdev->dev.of_node, mal->num_tx_chans, mal->num_rx_chans); /* Advertise this instance to the rest of the world */ wmb(); platform_set_drvdata(ofdev, mal); - mal_dbg_register(mal); - return 0; - fail6: - free_irq(mal->rxde_irq, mal); - fail5: - free_irq(mal->txeob_irq, mal); - fail4: - free_irq(mal->txde_irq, mal); - fail3: - free_irq(mal->serr_irq, mal); fail2: dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); + fail_dummy: + free_netdev(mal->dummy_dev); fail_unmap: dcr_unmap(mal->dcr_host, 0x100); - fail: - kfree(mal); - return err; } -static int mal_remove(struct platform_device *ofdev) +static void mal_remove(struct platform_device *ofdev) { struct mal_instance *mal = platform_get_drvdata(ofdev); @@ -735,27 +711,20 @@ static int mal_remove(struct platform_device *ofdev) "mal%d: commac list is not empty on remove!\n", mal->index); - free_irq(mal->serr_irq, mal); - free_irq(mal->txde_irq, mal); - free_irq(mal->txeob_irq, mal); - free_irq(mal->rxde_irq, mal); - free_irq(mal->rxeob_irq, mal); - mal_reset(mal); - mal_dbg_unregister(mal); + free_netdev(mal->dummy_dev); + + dcr_unmap(mal->dcr_host, 0x100); dma_free_coherent(&ofdev->dev, sizeof(struct mal_descriptor) * - (NUM_TX_BUFF * mal->num_tx_chans + - NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt, - mal->bd_dma); - kfree(mal); - - return 0; + (NUM_TX_BUFF * mal->num_tx_chans + + NUM_RX_BUFF * mal->num_rx_chans), + mal->bd_virt, mal->bd_dma); } -static struct of_device_id mal_platform_match[] = +static const struct of_device_id mal_platform_match[] = { { .compatible = "ibm,mcmal", @@ -778,7 +747,6 @@ static struct of_device_id mal_platform_match[] = static struct platform_driver mal_of_driver = { .driver = { .name = "mcmal", - .owner = THIS_MODULE, .of_match_table = mal_platform_match, }, .probe = mal_probe, diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h index e431a32e3d69..e0ddc41186a2 100644 --- a/drivers/net/ethernet/ibm/emac/mal.h +++ b/drivers/net/ethernet/ibm/emac/mal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/mal.h * @@ -14,12 +15,6 @@ * Based on original work by * Armin Kuster <akuster@mvista.com> * Copyright 2002 MontaVista Softare Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #ifndef __IBM_NEWEMAC_MAL_H #define __IBM_NEWEMAC_MAL_H @@ -95,24 +90,20 @@ #define MAL_IER 0x02 +/* MAL IER bits */ #define MAL_IER_DE 0x00000010 #define MAL_IER_OTE 0x00000004 #define MAL_IER_OE 0x00000002 #define MAL_IER_PE 0x00000001 -/* MAL V1 IER bits */ -#define MAL1_IER_NWE 0x00000008 -#define MAL1_IER_SOC_EVENTS MAL1_IER_NWE -#define MAL1_IER_EVENTS (MAL1_IER_SOC_EVENTS | MAL_IER_DE | \ - MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) -/* MAL V2 IER bits */ -#define MAL2_IER_PT 0x00000080 -#define MAL2_IER_PRE 0x00000040 -#define MAL2_IER_PWE 0x00000020 -#define MAL2_IER_SOC_EVENTS (MAL2_IER_PT | MAL2_IER_PRE | MAL2_IER_PWE) -#define MAL2_IER_EVENTS (MAL2_IER_SOC_EVENTS | MAL_IER_DE | \ - MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) +/* PLB read/write/timeout errors */ +#define MAL_IER_PTE 0x00000080 +#define MAL_IER_PRE 0x00000040 +#define MAL_IER_PWE 0x00000020 +#define MAL_IER_SOC_EVENTS (MAL_IER_PTE | MAL_IER_PRE | MAL_IER_PWE) +#define MAL_IER_EVENTS (MAL_IER_SOC_EVENTS | MAL_IER_DE | \ + MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) #define MAL_TXCASR 0x04 #define MAL_TXCARR 0x05 @@ -140,7 +131,7 @@ static inline int mal_rx_size(int len) static inline int mal_tx_chunks(int len) { - return (len + MAL_MAX_TX_SIZE - 1) / MAL_MAX_TX_SIZE; + return DIV_ROUND_UP(len, MAL_MAX_TX_SIZE); } #define MAL_CHAN_MASK(n) (0x80000000 >> (n)) @@ -214,7 +205,7 @@ struct mal_instance { int index; spinlock_t lock; - struct net_device dummy_dev; + struct net_device *dummy_dev; unsigned int features; }; diff --git a/drivers/net/ethernet/ibm/emac/phy.c b/drivers/net/ethernet/ibm/emac/phy.c index d3b9d103353e..1e798cc9b6b8 100644 --- a/drivers/net/ethernet/ibm/emac/phy.c +++ b/drivers/net/ethernet/ibm/emac/phy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * drivers/net/ethernet/ibm/emac/phy.c * @@ -96,7 +97,7 @@ int emac_mii_reset_gpcs(struct mii_phy *phy) if ((val & BMCR_ISOLATE) && limit > 0) gpcs_phy_write(phy, MII_BMCR, val & ~BMCR_ISOLATE); - if (limit > 0 && phy->mode == PHY_MODE_SGMII) { + if (limit > 0 && phy->mode == PHY_INTERFACE_MODE_SGMII) { /* Configure GPCS interface to recommended setting for SGMII */ gpcs_phy_write(phy, 0x04, 0x8120); /* AsymPause, FDX */ gpcs_phy_write(phy, 0x07, 0x2801); /* msg_pg, toggle */ @@ -276,7 +277,7 @@ static int genmii_read_link(struct mii_phy *phy) } /* Generic implementation for most 10/100/1000 PHYs */ -static struct mii_phy_ops generic_phy_ops = { +static const struct mii_phy_ops generic_phy_ops = { .setup_aneg = genmii_setup_aneg, .setup_forced = genmii_setup_forced, .poll_link = genmii_poll_link, @@ -313,16 +314,16 @@ static int cis8201_init(struct mii_phy *phy) epcr &= ~EPCR_MODE_MASK; switch (phy->mode) { - case PHY_MODE_TBI: + case PHY_INTERFACE_MODE_TBI: epcr |= EPCR_TBI_MODE; break; - case PHY_MODE_RTBI: + case PHY_INTERFACE_MODE_RTBI: epcr |= EPCR_RTBI_MODE; break; - case PHY_MODE_GMII: + case PHY_INTERFACE_MODE_GMII: epcr |= EPCR_GMII_MODE; break; - case PHY_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII: default: epcr |= EPCR_RGMII_MODE; } @@ -340,7 +341,7 @@ static int cis8201_init(struct mii_phy *phy) return 0; } -static struct mii_phy_ops cis8201_phy_ops = { +static const struct mii_phy_ops cis8201_phy_ops = { .init = cis8201_init, .setup_aneg = genmii_setup_aneg, .setup_forced = genmii_setup_forced, @@ -420,7 +421,7 @@ static int et1011c_init(struct mii_phy *phy) return 0; } -static struct mii_phy_ops et1011c_phy_ops = { +static const struct mii_phy_ops et1011c_phy_ops = { .init = et1011c_init, .setup_aneg = genmii_setup_aneg, .setup_forced = genmii_setup_forced, @@ -439,7 +440,7 @@ static struct mii_phy_def et1011c_phy_def = { -static struct mii_phy_ops m88e1111_phy_ops = { +static const struct mii_phy_ops m88e1111_phy_ops = { .init = m88e1111_init, .setup_aneg = genmii_setup_aneg, .setup_forced = genmii_setup_forced, @@ -455,7 +456,7 @@ static struct mii_phy_def m88e1111_phy_def = { .ops = &m88e1111_phy_ops, }; -static struct mii_phy_ops m88e1112_phy_ops = { +static const struct mii_phy_ops m88e1112_phy_ops = { .init = m88e1112_init, .setup_aneg = genmii_setup_aneg, .setup_forced = genmii_setup_forced, @@ -470,12 +471,38 @@ static struct mii_phy_def m88e1112_phy_def = { .ops = &m88e1112_phy_ops, }; +static int ar8035_init(struct mii_phy *phy) +{ + phy_write(phy, 0x1d, 0x5); /* Address debug register 5 */ + phy_write(phy, 0x1e, 0x2d47); /* Value copied from u-boot */ + phy_write(phy, 0x1d, 0xb); /* Address hib ctrl */ + phy_write(phy, 0x1e, 0xbc20); /* Value copied from u-boot */ + + return 0; +} + +static const struct mii_phy_ops ar8035_phy_ops = { + .init = ar8035_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link, +}; + +static struct mii_phy_def ar8035_phy_def = { + .phy_id = 0x004dd070, + .phy_id_mask = 0xfffffff0, + .name = "Atheros 8035 Gigabit Ethernet", + .ops = &ar8035_phy_ops, +}; + static struct mii_phy_def *mii_phy_table[] = { &et1011c_phy_def, &cis8201_phy_def, &bcm5248_phy_def, &m88e1111_phy_def, &m88e1112_phy_def, + &ar8035_phy_def, &genmii_phy_def, NULL }; diff --git a/drivers/net/ethernet/ibm/emac/phy.h b/drivers/net/ethernet/ibm/emac/phy.h index d7e41ec37467..2184e8373ee5 100644 --- a/drivers/net/ethernet/ibm/emac/phy.h +++ b/drivers/net/ethernet/ibm/emac/phy.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/phy.h * @@ -13,11 +14,6 @@ * * Minor additions by Eugene Surovegin <ebs@ebshome.net>, 2004 * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * * This file basically duplicates sungem_phy.{c,h} with different PHYs * supported. I'm looking into merging that in a single mii layer more * flexible than mii.c diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c index c47e23d6eeaa..b544dd8633b7 100644 --- a/drivers/net/ethernet/ibm/emac/rgmii.c +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/ethernet/ibm/emac/rgmii.c * @@ -14,16 +15,13 @@ * Based on original work by * Matt Porter <mporter@kernel.crashing.org> * Copyright 2004 MontaVista Software, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/ethtool.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> #include <asm/io.h> #include "emac.h" @@ -44,49 +42,35 @@ /* RGMIIx_SSR */ #define RGMII_SSR_MASK(idx) (0x7 << ((idx) * 8)) +#define RGMII_SSR_10(idx) (0x1 << ((idx) * 8)) #define RGMII_SSR_100(idx) (0x2 << ((idx) * 8)) #define RGMII_SSR_1000(idx) (0x4 << ((idx) * 8)) /* RGMII bridge supports only GMII/TBI and RGMII/RTBI PHYs */ static inline int rgmii_valid_mode(int phy_mode) { - return phy_mode == PHY_MODE_GMII || - phy_mode == PHY_MODE_MII || - phy_mode == PHY_MODE_RGMII || - phy_mode == PHY_MODE_TBI || - phy_mode == PHY_MODE_RTBI; -} - -static inline const char *rgmii_mode_name(int mode) -{ - switch (mode) { - case PHY_MODE_RGMII: - return "RGMII"; - case PHY_MODE_TBI: - return "TBI"; - case PHY_MODE_GMII: - return "GMII"; - case PHY_MODE_MII: - return "MII"; - case PHY_MODE_RTBI: - return "RTBI"; - default: - BUG(); - } + return phy_interface_mode_is_rgmii(phy_mode) || + phy_mode == PHY_INTERFACE_MODE_GMII || + phy_mode == PHY_INTERFACE_MODE_MII || + phy_mode == PHY_INTERFACE_MODE_TBI || + phy_mode == PHY_INTERFACE_MODE_RTBI; } static inline u32 rgmii_mode_mask(int mode, int input) { switch (mode) { - case PHY_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: return RGMII_FER_RGMII(input); - case PHY_MODE_TBI: + case PHY_INTERFACE_MODE_TBI: return RGMII_FER_TBI(input); - case PHY_MODE_GMII: + case PHY_INTERFACE_MODE_GMII: return RGMII_FER_GMII(input); - case PHY_MODE_MII: + case PHY_INTERFACE_MODE_MII: return RGMII_FER_MII(input); - case PHY_MODE_RTBI: + case PHY_INTERFACE_MODE_RTBI: return RGMII_FER_RTBI(input); default: BUG(); @@ -102,8 +86,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode) /* Check if we need to attach to a RGMII */ if (input < 0 || !rgmii_valid_mode(mode)) { - printk(KERN_ERR "%s: unsupported settings !\n", - ofdev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: unsupported settings !\n", + ofdev->dev.of_node); return -ENODEV; } @@ -112,8 +96,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode) /* Enable this input */ out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input)); - printk(KERN_NOTICE "%s: input %d in %s mode\n", - ofdev->dev.of_node->full_name, input, rgmii_mode_name(mode)); + printk(KERN_NOTICE "%pOF: input %d in %s mode\n", + ofdev->dev.of_node, input, phy_modes(mode)); ++dev->users; @@ -138,6 +122,8 @@ void rgmii_set_speed(struct platform_device *ofdev, int input, int speed) ssr |= RGMII_SSR_1000(input); else if (speed == SPEED_100) ssr |= RGMII_SSR_100(input); + else if (speed == SPEED_10) + ssr |= RGMII_SSR_10(input); out_be32(&p->ssr, ssr); @@ -230,37 +216,28 @@ void *rgmii_dump_regs(struct platform_device *ofdev, void *buf) static int rgmii_probe(struct platform_device *ofdev) { - struct device_node *np = ofdev->dev.of_node; struct rgmii_instance *dev; - struct resource regs; - int rc; + int err; - rc = -ENOMEM; - dev = kzalloc(sizeof(struct rgmii_instance), GFP_KERNEL); - if (dev == NULL) - goto err_gone; + dev = devm_kzalloc(&ofdev->dev, sizeof(struct rgmii_instance), + GFP_KERNEL); + if (!dev) + return -ENOMEM; - mutex_init(&dev->lock); - dev->ofdev = ofdev; + err = devm_mutex_init(&ofdev->dev, &dev->lock); + if (err) + return err; - rc = -ENXIO; - if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); - goto err_free; - } + dev->ofdev = ofdev; - rc = -ENOMEM; - dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start, - sizeof(struct rgmii_regs)); - if (dev->base == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); - goto err_free; + dev->base = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->base)) { + dev_err(&ofdev->dev, "can't map device registers"); + return PTR_ERR(dev->base); } /* Check for RGMII flags */ - if (of_get_property(ofdev->dev.of_node, "has-mdio", NULL)) + if (of_property_read_bool(ofdev->dev.of_node, "has-mdio")) dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; /* CAB lacks the right properties, fix this up */ @@ -274,34 +251,17 @@ static int rgmii_probe(struct platform_device *ofdev) out_be32(&dev->base->fer, 0); printk(KERN_INFO - "RGMII %s initialized with%s MDIO support\n", - ofdev->dev.of_node->full_name, + "RGMII %pOF initialized with%s MDIO support\n", + ofdev->dev.of_node, (dev->flags & EMAC_RGMII_FLAG_HAS_MDIO) ? "" : "out"); wmb(); platform_set_drvdata(ofdev, dev); return 0; - - err_free: - kfree(dev); - err_gone: - return rc; -} - -static int rgmii_remove(struct platform_device *ofdev) -{ - struct rgmii_instance *dev = platform_get_drvdata(ofdev); - - WARN_ON(dev->users != 0); - - iounmap(dev->base); - kfree(dev); - - return 0; } -static struct of_device_id rgmii_match[] = +static const struct of_device_id rgmii_match[] = { { .compatible = "ibm,rgmii", @@ -315,11 +275,9 @@ static struct of_device_id rgmii_match[] = static struct platform_driver rgmii_driver = { .driver = { .name = "emac-rgmii", - .owner = THIS_MODULE, .of_match_table = rgmii_match, }, .probe = rgmii_probe, - .remove = rgmii_remove, }; int __init rgmii_init(void) diff --git a/drivers/net/ethernet/ibm/emac/rgmii.h b/drivers/net/ethernet/ibm/emac/rgmii.h index 668bceeff4a2..8e4e36eed172 100644 --- a/drivers/net/ethernet/ibm/emac/rgmii.h +++ b/drivers/net/ethernet/ibm/emac/rgmii.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/rgmii.h * @@ -16,11 +17,6 @@ * * Copyright (c) 2004, 2005 Zultys Technologies. * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #ifndef __IBM_NEWEMAC_RGMII_H @@ -56,15 +52,15 @@ struct rgmii_instance { #ifdef CONFIG_IBM_EMAC_RGMII -extern int rgmii_init(void); -extern void rgmii_exit(void); -extern int rgmii_attach(struct platform_device *ofdev, int input, int mode); -extern void rgmii_detach(struct platform_device *ofdev, int input); -extern void rgmii_get_mdio(struct platform_device *ofdev, int input); -extern void rgmii_put_mdio(struct platform_device *ofdev, int input); -extern void rgmii_set_speed(struct platform_device *ofdev, int input, int speed); -extern int rgmii_get_regs_len(struct platform_device *ofdev); -extern void *rgmii_dump_regs(struct platform_device *ofdev, void *buf); +int rgmii_init(void); +void rgmii_exit(void); +int rgmii_attach(struct platform_device *ofdev, int input, int mode); +void rgmii_detach(struct platform_device *ofdev, int input); +void rgmii_get_mdio(struct platform_device *ofdev, int input); +void rgmii_put_mdio(struct platform_device *ofdev, int input); +void rgmii_set_speed(struct platform_device *ofdev, int input, int speed); +int rgmii_get_regs_len(struct platform_device *ofdev); +void *rgmii_dump_regs(struct platform_device *ofdev, void *buf); #else diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c index c231a4a32c4d..09f6373ed2f9 100644 --- a/drivers/net/ethernet/ibm/emac/tah.c +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/ethernet/ibm/emac/tah.c * @@ -12,12 +13,10 @@ * Matt Porter <mporter@kernel.crashing.org> * * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ +#include <linux/mod_devicetable.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> #include <asm/io.h> #include "emac.h" @@ -57,8 +56,7 @@ void tah_reset(struct platform_device *ofdev) --n; if (unlikely(!n)) - printk(KERN_ERR "%s: reset timeout\n", - ofdev->dev.of_node->full_name); + printk(KERN_ERR "%pOF: reset timeout\n", ofdev->dev.of_node); /* 10KB TAH TX FIFO accommodates the max MTU of 9000 */ out_be32(&p->mr, @@ -89,33 +87,24 @@ void *tah_dump_regs(struct platform_device *ofdev, void *buf) static int tah_probe(struct platform_device *ofdev) { - struct device_node *np = ofdev->dev.of_node; struct tah_instance *dev; - struct resource regs; - int rc; + int err; - rc = -ENOMEM; - dev = kzalloc(sizeof(struct tah_instance), GFP_KERNEL); - if (dev == NULL) - goto err_gone; + dev = devm_kzalloc(&ofdev->dev, sizeof(struct tah_instance), + GFP_KERNEL); + if (!dev) + return -ENOMEM; - mutex_init(&dev->lock); - dev->ofdev = ofdev; + err = devm_mutex_init(&ofdev->dev, &dev->lock); + if (err) + return err; - rc = -ENXIO; - if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); - goto err_free; - } + dev->ofdev = ofdev; - rc = -ENOMEM; - dev->base = (struct tah_regs __iomem *)ioremap(regs.start, - sizeof(struct tah_regs)); - if (dev->base == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); - goto err_free; + dev->base = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->base)) { + dev_err(&ofdev->dev, "can't map device registers"); + return PTR_ERR(dev->base); } platform_set_drvdata(ofdev, dev); @@ -123,31 +112,13 @@ static int tah_probe(struct platform_device *ofdev) /* Initialize TAH and enable IPv4 checksum verification, no TSO yet */ tah_reset(ofdev); - printk(KERN_INFO - "TAH %s initialized\n", ofdev->dev.of_node->full_name); + printk(KERN_INFO "TAH %pOF initialized\n", ofdev->dev.of_node); wmb(); return 0; - - err_free: - kfree(dev); - err_gone: - return rc; -} - -static int tah_remove(struct platform_device *ofdev) -{ - struct tah_instance *dev = platform_get_drvdata(ofdev); - - WARN_ON(dev->users != 0); - - iounmap(dev->base); - kfree(dev); - - return 0; } -static struct of_device_id tah_match[] = +static const struct of_device_id tah_match[] = { { .compatible = "ibm,tah", @@ -162,11 +133,9 @@ static struct of_device_id tah_match[] = static struct platform_driver tah_driver = { .driver = { .name = "emac-tah", - .owner = THIS_MODULE, .of_match_table = tah_match, }, .probe = tah_probe, - .remove = tah_remove, }; int __init tah_init(void) diff --git a/drivers/net/ethernet/ibm/emac/tah.h b/drivers/net/ethernet/ibm/emac/tah.h index 350b7096a041..86c2b6b9d460 100644 --- a/drivers/net/ethernet/ibm/emac/tah.h +++ b/drivers/net/ethernet/ibm/emac/tah.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/tah.h * @@ -12,11 +13,6 @@ * Matt Porter <mporter@kernel.crashing.org> * * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #ifndef __IBM_NEWEMAC_TAH_H @@ -72,13 +68,13 @@ struct tah_instance { #ifdef CONFIG_IBM_EMAC_TAH -extern int tah_init(void); -extern void tah_exit(void); -extern int tah_attach(struct platform_device *ofdev, int channel); -extern void tah_detach(struct platform_device *ofdev, int channel); -extern void tah_reset(struct platform_device *ofdev); -extern int tah_get_regs_len(struct platform_device *ofdev); -extern void *tah_dump_regs(struct platform_device *ofdev, void *buf); +int tah_init(void); +void tah_exit(void); +int tah_attach(struct platform_device *ofdev, int channel); +void tah_detach(struct platform_device *ofdev, int channel); +void tah_reset(struct platform_device *ofdev); +int tah_get_regs_len(struct platform_device *ofdev); +void *tah_dump_regs(struct platform_device *ofdev, void *buf); #else diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c index 4cdf286f7ee3..69ca6065de1c 100644 --- a/drivers/net/ethernet/ibm/emac/zmii.c +++ b/drivers/net/ethernet/ibm/emac/zmii.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/ethernet/ibm/emac/zmii.c * @@ -14,16 +15,13 @@ * Based on original work by * Armin Kuster <akuster@mvista.com> * Copyright 2001 MontaVista Softare Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/ethtool.h> +#include <linux/mod_devicetable.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> #include <asm/io.h> #include "emac.h" @@ -48,20 +46,20 @@ */ static inline int zmii_valid_mode(int mode) { - return mode == PHY_MODE_MII || - mode == PHY_MODE_RMII || - mode == PHY_MODE_SMII || - mode == PHY_MODE_NA; + return mode == PHY_INTERFACE_MODE_MII || + mode == PHY_INTERFACE_MODE_RMII || + mode == PHY_INTERFACE_MODE_SMII || + mode == PHY_INTERFACE_MODE_NA; } static inline const char *zmii_mode_name(int mode) { switch (mode) { - case PHY_MODE_MII: + case PHY_INTERFACE_MODE_MII: return "MII"; - case PHY_MODE_RMII: + case PHY_INTERFACE_MODE_RMII: return "RMII"; - case PHY_MODE_SMII: + case PHY_INTERFACE_MODE_SMII: return "SMII"; default: BUG(); @@ -71,18 +69,19 @@ static inline const char *zmii_mode_name(int mode) static inline u32 zmii_mode_mask(int mode, int input) { switch (mode) { - case PHY_MODE_MII: + case PHY_INTERFACE_MODE_MII: return ZMII_FER_MII(input); - case PHY_MODE_RMII: + case PHY_INTERFACE_MODE_RMII: return ZMII_FER_RMII(input); - case PHY_MODE_SMII: + case PHY_INTERFACE_MODE_SMII: return ZMII_FER_SMII(input); default: return 0; } } -int zmii_attach(struct platform_device *ofdev, int input, int *mode) +int zmii_attach(struct platform_device *ofdev, int input, + phy_interface_t *mode) { struct zmii_instance *dev = platform_get_drvdata(ofdev); struct zmii_regs __iomem *p = dev->base; @@ -105,30 +104,30 @@ int zmii_attach(struct platform_device *ofdev, int input, int *mode) * Please, always specify PHY mode in your board port to avoid * any surprises. */ - if (dev->mode == PHY_MODE_NA) { - if (*mode == PHY_MODE_NA) { + if (dev->mode == PHY_INTERFACE_MODE_NA) { + if (*mode == PHY_INTERFACE_MODE_NA) { u32 r = dev->fer_save; ZMII_DBG(dev, "autodetecting mode, FER = 0x%08x" NL, r); if (r & (ZMII_FER_MII(0) | ZMII_FER_MII(1))) - dev->mode = PHY_MODE_MII; + dev->mode = PHY_INTERFACE_MODE_MII; else if (r & (ZMII_FER_RMII(0) | ZMII_FER_RMII(1))) - dev->mode = PHY_MODE_RMII; + dev->mode = PHY_INTERFACE_MODE_RMII; else - dev->mode = PHY_MODE_SMII; - } else + dev->mode = PHY_INTERFACE_MODE_SMII; + } else { dev->mode = *mode; - - printk(KERN_NOTICE "%s: bridge in %s mode\n", - ofdev->dev.of_node->full_name, + } + printk(KERN_NOTICE "%pOF: bridge in %s mode\n", + ofdev->dev.of_node, zmii_mode_name(dev->mode)); } else { /* All inputs must use the same mode */ - if (*mode != PHY_MODE_NA && *mode != dev->mode) { + if (*mode != PHY_INTERFACE_MODE_NA && *mode != dev->mode) { printk(KERN_ERR - "%s: invalid mode %d specified for input %d\n", - ofdev->dev.of_node->full_name, *mode, input); + "%pOF: invalid mode %d specified for input %d\n", + ofdev->dev.of_node, *mode, input); mutex_unlock(&dev->lock); return -EINVAL; } @@ -233,34 +232,25 @@ void *zmii_dump_regs(struct platform_device *ofdev, void *buf) static int zmii_probe(struct platform_device *ofdev) { - struct device_node *np = ofdev->dev.of_node; struct zmii_instance *dev; - struct resource regs; - int rc; + int err; - rc = -ENOMEM; - dev = kzalloc(sizeof(struct zmii_instance), GFP_KERNEL); - if (dev == NULL) - goto err_gone; + dev = devm_kzalloc(&ofdev->dev, sizeof(struct zmii_instance), + GFP_KERNEL); + if (!dev) + return -ENOMEM; - mutex_init(&dev->lock); - dev->ofdev = ofdev; - dev->mode = PHY_MODE_NA; + err = devm_mutex_init(&ofdev->dev, &dev->lock); + if (err) + return err; - rc = -ENXIO; - if (of_address_to_resource(np, 0, ®s)) { - printk(KERN_ERR "%s: Can't get registers address\n", - np->full_name); - goto err_free; - } + dev->ofdev = ofdev; + dev->mode = PHY_INTERFACE_MODE_NA; - rc = -ENOMEM; - dev->base = (struct zmii_regs __iomem *)ioremap(regs.start, - sizeof(struct zmii_regs)); - if (dev->base == NULL) { - printk(KERN_ERR "%s: Can't map device registers!\n", - np->full_name); - goto err_free; + dev->base = devm_platform_ioremap_resource(ofdev, 0); + if (IS_ERR(dev->base)) { + dev_err(&ofdev->dev, "can't map device registers"); + return PTR_ERR(dev->base); } /* We may need FER value for autodetection later */ @@ -269,32 +259,14 @@ static int zmii_probe(struct platform_device *ofdev) /* Disable all inputs by default */ out_be32(&dev->base->fer, 0); - printk(KERN_INFO - "ZMII %s initialized\n", ofdev->dev.of_node->full_name); + printk(KERN_INFO "ZMII %pOF initialized\n", ofdev->dev.of_node); wmb(); platform_set_drvdata(ofdev, dev); return 0; - - err_free: - kfree(dev); - err_gone: - return rc; -} - -static int zmii_remove(struct platform_device *ofdev) -{ - struct zmii_instance *dev = platform_get_drvdata(ofdev); - - WARN_ON(dev->users != 0); - - iounmap(dev->base); - kfree(dev); - - return 0; } -static struct of_device_id zmii_match[] = +static const struct of_device_id zmii_match[] = { { .compatible = "ibm,zmii", @@ -309,11 +281,9 @@ static struct of_device_id zmii_match[] = static struct platform_driver zmii_driver = { .driver = { .name = "emac-zmii", - .owner = THIS_MODULE, .of_match_table = zmii_match, }, .probe = zmii_probe, - .remove = zmii_remove, }; int __init zmii_init(void) diff --git a/drivers/net/ethernet/ibm/emac/zmii.h b/drivers/net/ethernet/ibm/emac/zmii.h index 455bfb085493..65daedc78594 100644 --- a/drivers/net/ethernet/ibm/emac/zmii.h +++ b/drivers/net/ethernet/ibm/emac/zmii.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * drivers/net/ethernet/ibm/emac/zmii.h * @@ -14,12 +15,6 @@ * Based on original work by * Armin Kuster <akuster@mvista.com> * Copyright 2001 MontaVista Softare Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #ifndef __IBM_NEWEMAC_ZMII_H #define __IBM_NEWEMAC_ZMII_H @@ -53,15 +48,16 @@ struct zmii_instance { #ifdef CONFIG_IBM_EMAC_ZMII -extern int zmii_init(void); -extern void zmii_exit(void); -extern int zmii_attach(struct platform_device *ofdev, int input, int *mode); -extern void zmii_detach(struct platform_device *ofdev, int input); -extern void zmii_get_mdio(struct platform_device *ofdev, int input); -extern void zmii_put_mdio(struct platform_device *ofdev, int input); -extern void zmii_set_speed(struct platform_device *ofdev, int input, int speed); -extern int zmii_get_regs_len(struct platform_device *ocpdev); -extern void *zmii_dump_regs(struct platform_device *ofdev, void *buf); +int zmii_init(void); +void zmii_exit(void); +int zmii_attach(struct platform_device *ofdev, int input, + phy_interface_t *mode); +void zmii_detach(struct platform_device *ofdev, int input); +void zmii_get_mdio(struct platform_device *ofdev, int input); +void zmii_put_mdio(struct platform_device *ofdev, int input); +void zmii_set_speed(struct platform_device *ofdev, int input, int speed); +int zmii_get_regs_len(struct platform_device *ocpdev); +void *zmii_dump_regs(struct platform_device *ofdev, void *buf); #else # define zmii_init() 0 diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 70fd55968844..6f0821f1e798 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IBM Power Virtual Ethernet Device Driver * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2003, 2010 * * Authors: Dave Larson <larson1@us.ibm.com> @@ -25,7 +12,6 @@ */ #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/dma-mapping.h> @@ -47,11 +33,12 @@ #include <asm/vio.h> #include <asm/iommu.h> #include <asm/firmware.h> +#include <net/tcp.h> +#include <net/ip6_checksum.h> #include "ibmveth.h" static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; @@ -59,7 +46,7 @@ static struct kobj_type ktype_veth_pool; static const char ibmveth_driver_name[] = "ibmveth"; static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver"; -#define ibmveth_driver_version "1.04" +#define ibmveth_driver_version "1.06" MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>"); MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver"); @@ -80,6 +67,11 @@ static unsigned int rx_flush __read_mostly = 0; module_param(rx_flush, uint, 0644); MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use"); +static bool old_large_send __read_mostly; +module_param(old_large_send, bool, 0444); +MODULE_PARM_DESC(old_large_send, + "Use old large send method on firmware that supports the new method"); + struct ibmveth_stat { char name[ETH_GSTRING_LEN]; int offset; @@ -88,7 +80,7 @@ struct ibmveth_stat { #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat) #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off)) -struct ibmveth_stat ibmveth_stats[] = { +static struct ibmveth_stat ibmveth_stats[] = { { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) }, { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) }, { "replenish_add_buff_failure", @@ -101,12 +93,15 @@ struct ibmveth_stat ibmveth_stats[] = { { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) }, { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) }, { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) }, + { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) }, + { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) }, + { "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support) } }; /* simple methods of getting data from the current rxq entry */ static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter) { - return adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off; + return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off); } static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter) @@ -130,9 +125,14 @@ static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter) return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK; } +static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT; +} + static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter) { - return adapter->rx_queue.queue_addr[adapter->rx_queue.index].length; + return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length); } static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter) @@ -140,6 +140,13 @@ static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter) return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD; } +static unsigned int ibmveth_real_max_tx_queues(void) +{ + unsigned int n_cpu = num_online_cpus(); + + return min(n_cpu, IBMVETH_MAX_QUEUES); +} + /* setup the initial settings for a buffer pool */ static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, @@ -157,12 +164,12 @@ static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool) { int i; - pool->free_map = kmalloc(sizeof(u16) * pool->size, GFP_KERNEL); + pool->free_map = kmalloc_array(pool->size, sizeof(u16), GFP_KERNEL); if (!pool->free_map) return -1; - pool->dma_addr = kmalloc(sizeof(dma_addr_t) * pool->size, GFP_KERNEL); + pool->dma_addr = kcalloc(pool->size, sizeof(dma_addr_t), GFP_KERNEL); if (!pool->dma_addr) { kfree(pool->free_map); pool->free_map = NULL; @@ -180,8 +187,6 @@ static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool) return -1; } - memset(pool->dma_addr, 0, sizeof(dma_addr_t) * pool->size); - for (i = 0; i < pool->size; ++i) pool->free_map[i] = i; @@ -197,7 +202,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) unsigned long offset; for (offset = 0; offset < length; offset += SMP_CACHE_BYTES) - asm("dcbfl %0,%1" :: "b" (addr), "r" (offset)); + asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset)); } /* replenish the buffers for a pool. note that we don't need to @@ -206,91 +211,184 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struct ibmveth_buff_pool *pool) { - u32 i; - u32 count = pool->size - atomic_read(&pool->available); - u32 buffers_added = 0; - struct sk_buff *skb; - unsigned int free_index, index; - u64 correlator; + union ibmveth_buf_desc descs[IBMVETH_MAX_RX_PER_HCALL] = {0}; + u32 remaining = pool->size - atomic_read(&pool->available); + u64 correlators[IBMVETH_MAX_RX_PER_HCALL] = {0}; unsigned long lpar_rc; + u32 buffers_added = 0; + u32 i, filled, batch; + struct vio_dev *vdev; dma_addr_t dma_addr; + struct device *dev; + u32 index; + + vdev = adapter->vdev; + dev = &vdev->dev; mb(); - for (i = 0; i < count; ++i) { - union ibmveth_buf_desc desc; + batch = adapter->rx_buffers_per_hcall; + + while (remaining > 0) { + unsigned int free_index = pool->consumer_index; + + /* Fill a batch of descriptors */ + for (filled = 0; filled < min(remaining, batch); filled++) { + index = pool->free_map[free_index]; + if (WARN_ON(index == IBM_VETH_INVALID_MAP)) { + adapter->replenish_add_buff_failure++; + netdev_info(adapter->netdev, + "Invalid map index %u, reset\n", + index); + schedule_work(&adapter->work); + break; + } - skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); + if (!pool->skbuff[index]) { + struct sk_buff *skb = NULL; - if (!skb) { - netdev_dbg(adapter->netdev, - "replenish: unable to allocate skb\n"); - adapter->replenish_no_mem++; - break; + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + adapter->replenish_no_mem++; + adapter->replenish_add_buff_failure++; + break; + } + + dma_addr = dma_map_single(dev, skb->data, + pool->buff_size, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + break; + } + + pool->dma_addr[index] = dma_addr; + pool->skbuff[index] = skb; + } else { + /* re-use case */ + dma_addr = pool->dma_addr[index]; + } + + if (rx_flush) { + unsigned int len; + + len = adapter->netdev->mtu + IBMVETH_BUFF_OH; + len = min(pool->buff_size, len); + ibmveth_flush_buffer(pool->skbuff[index]->data, + len); + } + + descs[filled].fields.flags_len = IBMVETH_BUF_VALID | + pool->buff_size; + descs[filled].fields.address = dma_addr; + + correlators[filled] = ((u64)pool->index << 32) | index; + *(u64 *)pool->skbuff[index]->data = correlators[filled]; + + free_index++; + if (free_index >= pool->size) + free_index = 0; } - free_index = pool->consumer_index; - pool->consumer_index++; - if (pool->consumer_index >= pool->size) - pool->consumer_index = 0; - index = pool->free_map[free_index]; + if (!filled) + break; + + /* single buffer case*/ + if (filled == 1) + lpar_rc = h_add_logical_lan_buffer(vdev->unit_address, + descs[0].desc); + else + /* Multi-buffer hcall */ + lpar_rc = h_add_logical_lan_buffers(vdev->unit_address, + descs[0].desc, + descs[1].desc, + descs[2].desc, + descs[3].desc, + descs[4].desc, + descs[5].desc, + descs[6].desc, + descs[7].desc); + if (lpar_rc != H_SUCCESS) { + dev_warn_ratelimited(dev, + "RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n", + filled, lpar_rc, batch); + goto hcall_failure; + } - BUG_ON(index == IBM_VETH_INVALID_MAP); - BUG_ON(pool->skbuff[index] != NULL); + /* Only update pool state after hcall succeeds */ + for (i = 0; i < filled; i++) { + free_index = pool->consumer_index; + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; - dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, - pool->buff_size, DMA_FROM_DEVICE); + pool->consumer_index++; + if (pool->consumer_index >= pool->size) + pool->consumer_index = 0; + } - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto failure; + buffers_added += filled; + adapter->replenish_add_buff_success += filled; + remaining -= filled; - pool->free_map[free_index] = IBM_VETH_INVALID_MAP; - pool->dma_addr[index] = dma_addr; - pool->skbuff[index] = skb; + memset(&descs, 0, sizeof(descs)); + memset(&correlators, 0, sizeof(correlators)); + continue; - correlator = ((u64)pool->index << 32) | index; - *(u64 *)skb->data = correlator; +hcall_failure: + for (i = 0; i < filled; i++) { + index = correlators[i] & 0xffffffffUL; + dma_addr = pool->dma_addr[index]; - desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size; - desc.fields.address = dma_addr; + if (pool->skbuff[index]) { + if (dma_addr && + !dma_mapping_error(dev, dma_addr)) + dma_unmap_single(dev, dma_addr, + pool->buff_size, + DMA_FROM_DEVICE); - if (rx_flush) { - unsigned int len = min(pool->buff_size, - adapter->netdev->mtu + - IBMVETH_BUFF_OH); - ibmveth_flush_buffer(skb->data, len); + dev_kfree_skb_any(pool->skbuff[index]); + pool->skbuff[index] = NULL; + } } - lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, - desc.desc); + adapter->replenish_add_buff_failure += filled; - if (lpar_rc != H_SUCCESS) { - goto failure; - } else { - buffers_added++; - adapter->replenish_add_buff_success++; + /* + * If multi rx buffers hcall is no longer supported by FW + * e.g. in the case of Live Parttion Migration + */ + if (batch > 1 && lpar_rc == H_FUNCTION) { + /* + * Instead of retry submit single buffer individually + * here just set the max rx buffer per hcall to 1 + * buffers will be respleshed next time + * when ibmveth_replenish_buffer_pool() is called again + * with single-buffer case + */ + netdev_info(adapter->netdev, + "RX Multi buffers not supported by FW, rc=%lu\n", + lpar_rc); + adapter->rx_buffers_per_hcall = 1; + netdev_info(adapter->netdev, + "Next rx replesh will fall back to single-buffer hcall\n"); } + break; } mb(); atomic_add(buffers_added, &(pool->available)); - return; +} -failure: - pool->free_map[free_index] = index; - pool->skbuff[index] = NULL; - if (pool->consumer_index == 0) - pool->consumer_index = pool->size - 1; - else - pool->consumer_index--; - if (!dma_mapping_error(&adapter->vdev->dev, dma_addr)) - dma_unmap_single(&adapter->vdev->dev, - pool->dma_addr[index], pool->buff_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; +/* + * The final 8 bytes of the buffer list is a counter of frames dropped + * because there was not a buffer in the buffer list capable of holding + * the frame. + */ +static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter) +{ + __be64 *p = adapter->buffer_list_addr + 4096 - 8; - mb(); - atomic_add(buffers_added, &(pool->available)); + adapter->rx_no_buffer = be64_to_cpup(p); } /* replenish routine */ @@ -308,8 +406,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) ibmveth_replenish_buffer_pool(adapter, pool); } - adapter->rx_no_buffer = *(u64 *)(((char*)adapter->buffer_list_addr) + - 4096 - 8); + ibmveth_update_rx_no_buffer(adapter); } /* empty and free ana buffer pool - also used to do cleanup in error paths */ @@ -346,28 +443,52 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, } } -/* remove a buffer from a pool */ -static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, - u64 correlator) +/** + * ibmveth_remove_buffer_from_pool - remove a buffer from a pool + * @adapter: adapter instance + * @correlator: identifies pool and index + * @reuse: whether to reuse buffer + * + * Return: + * * %0 - success + * * %-EINVAL - correlator maps to pool or index out of range + * * %-EFAULT - pool and index map to null skb + */ +static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator, bool reuse) { unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; unsigned int free_index; struct sk_buff *skb; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return -EINVAL; + } skb = adapter->rx_buff_pool[pool].skbuff[index]; + if (WARN_ON(!skb)) { + schedule_work(&adapter->work); + return -EFAULT; + } - BUG_ON(skb == NULL); - - adapter->rx_buff_pool[pool].skbuff[index] = NULL; + /* if we are going to reuse the buffer then keep the pointers around + * but mark index as available. replenish will see the skb pointer and + * assume it is to be recycled. + */ + if (!reuse) { + /* remove the skb pointer to mark free. actual freeing is done + * by upper level networking after gro_recieve + */ + adapter->rx_buff_pool[pool].skbuff[index] = NULL; - dma_unmap_single(&adapter->vdev->dev, - adapter->rx_buff_pool[pool].dma_addr[index], - adapter->rx_buff_pool[pool].buff_size, - DMA_FROM_DEVICE); + dma_unmap_single(&adapter->vdev->dev, + adapter->rx_buff_pool[pool].dma_addr[index], + adapter->rx_buff_pool[pool].buff_size, + DMA_FROM_DEVICE); + } free_index = adapter->rx_buff_pool[pool].producer_index; adapter->rx_buff_pool[pool].producer_index++; @@ -379,6 +500,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, mb(); atomic_dec(&(adapter->rx_buff_pool[pool].available)); + + return 0; } /* get the current buffer on the rx queue */ @@ -388,112 +511,76 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return NULL; + } return adapter->rx_buff_pool[pool].skbuff[index]; } -/* recycle the current buffer on the rx queue */ -static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) +/** + * ibmveth_rxq_harvest_buffer - Harvest buffer from pool + * + * @adapter: pointer to adapter + * @reuse: whether to reuse buffer + * + * Context: called from ibmveth_poll + * + * Return: + * * %0 - success + * * other - non-zero return from ibmveth_remove_buffer_from_pool + */ +static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, + bool reuse) { - u32 q_index = adapter->rx_queue.index; - u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator; - unsigned int pool = correlator >> 32; - unsigned int index = correlator & 0xffffffffUL; - union ibmveth_buf_desc desc; - unsigned long lpar_rc; - int ret = 1; - - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); - - if (!adapter->rx_buff_pool[pool].active) { - ibmveth_rxq_harvest_buffer(adapter); - ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]); - goto out; - } - - desc.fields.flags_len = IBMVETH_BUF_VALID | - adapter->rx_buff_pool[pool].buff_size; - desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index]; - - lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); + u64 cor; + int rc; - if (lpar_rc != H_SUCCESS) { - netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed " - "during recycle rc=%ld", lpar_rc); - ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); - ret = 0; - } + cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; + rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + if (unlikely(rc)) + return rc; if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } -out: - return ret; + return 0; } -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) +static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) { - ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); - - if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { - adapter->rx_queue.index = 0; - adapter->rx_queue.toggle = !adapter->rx_queue.toggle; - } + dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx], + adapter->tx_ltb_size, DMA_TO_DEVICE); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; } -static void ibmveth_cleanup(struct ibmveth_adapter *adapter) +static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx) { - int i; - struct device *dev = &adapter->vdev->dev; - - if (adapter->buffer_list_addr != NULL) { - if (!dma_mapping_error(dev, adapter->buffer_list_dma)) { - dma_unmap_single(dev, adapter->buffer_list_dma, 4096, - DMA_BIDIRECTIONAL); - adapter->buffer_list_dma = DMA_ERROR_CODE; - } - free_page((unsigned long)adapter->buffer_list_addr); - adapter->buffer_list_addr = NULL; - } - - if (adapter->filter_list_addr != NULL) { - if (!dma_mapping_error(dev, adapter->filter_list_dma)) { - dma_unmap_single(dev, adapter->filter_list_dma, 4096, - DMA_BIDIRECTIONAL); - adapter->filter_list_dma = DMA_ERROR_CODE; - } - free_page((unsigned long)adapter->filter_list_addr); - adapter->filter_list_addr = NULL; + adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size, + GFP_KERNEL); + if (!adapter->tx_ltb_ptr[idx]) { + netdev_err(adapter->netdev, + "unable to allocate tx long term buffer\n"); + return -ENOMEM; } - - if (adapter->rx_queue.queue_addr != NULL) { - dma_free_coherent(dev, adapter->rx_queue.queue_len, - adapter->rx_queue.queue_addr, - adapter->rx_queue.queue_dma); - adapter->rx_queue.queue_addr = NULL; + adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev, + adapter->tx_ltb_ptr[idx], + adapter->tx_ltb_size, + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) { + netdev_err(adapter->netdev, + "unable to DMA map tx long term buffer\n"); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; + return -ENOMEM; } - for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) - if (adapter->rx_buff_pool[i].active) - ibmveth_free_buffer_pool(adapter, - &adapter->rx_buff_pool[i]); - - if (adapter->bounce_buffer != NULL) { - if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->bounce_buffer_dma, - adapter->netdev->mtu + IBMVETH_BUFF_OH, - DMA_BIDIRECTIONAL); - adapter->bounce_buffer_dma = DMA_ERROR_CODE; - } - kfree(adapter->bounce_buffer); - adapter->bounce_buffer = NULL; - } + return 0; } static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, @@ -526,7 +613,7 @@ retry: static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - u64 mac_address = 0; + u64 mac_address; int rxq_entries = 1; unsigned long lpar_rc; int rc; @@ -541,14 +628,17 @@ static int ibmveth_open(struct net_device *netdev) for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) rxq_entries += adapter->rx_buff_pool[i].size; + rc = -ENOMEM; adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL); - adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL); + if (!adapter->buffer_list_addr) { + netdev_err(netdev, "unable to allocate list pages\n"); + goto out; + } - if (!adapter->buffer_list_addr || !adapter->filter_list_addr) { - netdev_err(netdev, "unable to allocate filter or buffer list " - "pages\n"); - rc = -ENOMEM; - goto err_out; + adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL); + if (!adapter->filter_list_addr) { + netdev_err(netdev, "unable to allocate filter pages\n"); + goto out_free_buffer_list; } dev = &adapter->vdev->dev; @@ -558,30 +648,33 @@ static int ibmveth_open(struct net_device *netdev) adapter->rx_queue.queue_addr = dma_alloc_coherent(dev, adapter->rx_queue.queue_len, &adapter->rx_queue.queue_dma, GFP_KERNEL); - if (!adapter->rx_queue.queue_addr) { - rc = -ENOMEM; - goto err_out; - } + if (!adapter->rx_queue.queue_addr) + goto out_free_filter_list; adapter->buffer_list_dma = dma_map_single(dev, adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, adapter->buffer_list_dma)) { + netdev_err(netdev, "unable to map buffer list pages\n"); + goto out_free_queue_mem; + } + adapter->filter_list_dma = dma_map_single(dev, adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, adapter->filter_list_dma)) { + netdev_err(netdev, "unable to map filter list pages\n"); + goto out_unmap_buffer_list; + } - if ((dma_mapping_error(dev, adapter->buffer_list_dma)) || - (dma_mapping_error(dev, adapter->filter_list_dma))) { - netdev_err(netdev, "unable to map filter or buffer list " - "pages\n"); - rc = -ENOMEM; - goto err_out; + for (i = 0; i < netdev->real_num_tx_queues; i++) { + if (ibmveth_allocate_tx_ltb(adapter, i)) + goto out_free_tx_ltb; } adapter->rx_queue.index = 0; adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; - memcpy(&mac_address, netdev->dev_addr, netdev->addr_len); - mac_address = mac_address >> 16; + mac_address = ether_addr_to_u64(netdev->dev_addr); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len; @@ -605,7 +698,7 @@ static int ibmveth_open(struct net_device *netdev) rxq_desc.desc, mac_address); rc = -ENONET; - goto err_out; + goto out_unmap_filter_list; } for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { @@ -615,7 +708,7 @@ static int ibmveth_open(struct net_device *netdev) netdev_err(netdev, "unable to alloc pool\n"); adapter->rx_buff_pool[i].active = 0; rc = -ENOMEM; - goto err_out; + goto out_free_buffer_pools; } } @@ -629,37 +722,47 @@ static int ibmveth_open(struct net_device *netdev) lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); - goto err_out; + goto out_free_buffer_pools; } - adapter->bounce_buffer = - kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL); - if (!adapter->bounce_buffer) { - rc = -ENOMEM; - goto err_out_free_irq; - } - adapter->bounce_buffer_dma = - dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, - netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) { - netdev_err(netdev, "unable to map bounce buffer\n"); - rc = -ENOMEM; - goto err_out_free_irq; - } + rc = -ENOMEM; netdev_dbg(netdev, "initial replenish cycle\n"); ibmveth_interrupt(netdev->irq, netdev); - netif_start_queue(netdev); + netif_tx_start_all_queues(netdev); netdev_dbg(netdev, "open complete\n"); return 0; -err_out_free_irq: - free_irq(netdev->irq, netdev); -err_out: - ibmveth_cleanup(adapter); +out_free_buffer_pools: + while (--i >= 0) { + if (adapter->rx_buff_pool[i].active) + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + } +out_unmap_filter_list: + dma_unmap_single(dev, adapter->filter_list_dma, 4096, + DMA_BIDIRECTIONAL); + +out_free_tx_ltb: + while (--i >= 0) { + ibmveth_free_tx_ltb(adapter, i); + } + +out_unmap_buffer_list: + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, + DMA_BIDIRECTIONAL); +out_free_queue_mem: + dma_free_coherent(dev, adapter->rx_queue.queue_len, + adapter->rx_queue.queue_addr, + adapter->rx_queue.queue_dma); +out_free_filter_list: + free_page((unsigned long)adapter->filter_list_addr); +out_free_buffer_list: + free_page((unsigned long)adapter->buffer_list_addr); +out: napi_disable(&adapter->napi); return rc; } @@ -667,14 +770,15 @@ err_out: static int ibmveth_close(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->vdev->dev; long lpar_rc; + int i; netdev_dbg(netdev, "close starting\n"); napi_disable(&adapter->napi); - if (!adapter->pool_config) - netif_stop_queue(netdev); + netif_tx_stop_all_queues(netdev); h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); @@ -689,38 +793,97 @@ static int ibmveth_close(struct net_device *netdev) free_irq(netdev->irq, netdev); - adapter->rx_no_buffer = *(u64 *)(((char *)adapter->buffer_list_addr) + - 4096 - 8); + ibmveth_update_rx_no_buffer(adapter); + + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, + DMA_BIDIRECTIONAL); + free_page((unsigned long)adapter->buffer_list_addr); + + dma_unmap_single(dev, adapter->filter_list_dma, 4096, + DMA_BIDIRECTIONAL); + free_page((unsigned long)adapter->filter_list_addr); + + dma_free_coherent(dev, adapter->rx_queue.queue_len, + adapter->rx_queue.queue_addr, + adapter->rx_queue.queue_dma); + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + if (adapter->rx_buff_pool[i].active) + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); - ibmveth_cleanup(adapter); + for (i = 0; i < netdev->real_num_tx_queues; i++) + ibmveth_free_tx_ltb(adapter, i); netdev_dbg(netdev, "close complete\n"); return 0; } -static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +/** + * ibmveth_reset - Handle scheduled reset work + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: This routine acquires rtnl_mutex and disables its NAPI through + * ibmveth_close. It can't be called directly in a context that has + * already acquired rtnl_mutex or disabled its NAPI, or directly from + * a poll routine. + * + * Return: void + */ +static void ibmveth_reset(struct work_struct *w) +{ + struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work); + struct net_device *netdev = adapter->netdev; + + netdev_dbg(netdev, "reset starting\n"); + + rtnl_lock(); + + dev_close(adapter->netdev); + dev_open(adapter->netdev, NULL); + + rtnl_unlock(); + + netdev_dbg(netdev, "reset complete\n"); +} + +static int ibmveth_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { - cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | - SUPPORTED_FIBRE); - cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | - ADVERTISED_FIBRE); - ethtool_cmd_speed_set(cmd, SPEED_1000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_FIBRE; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_ENABLE; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 1; + struct ibmveth_adapter *adapter = netdev_priv(dev); + + return ethtool_virtdev_set_link_ksettings(dev, cmd, + &adapter->speed, + &adapter->duplex); +} + +static int ibmveth_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + cmd->base.speed = adapter->speed; + cmd->base.duplex = adapter->duplex; + cmd->base.port = PORT_OTHER; + return 0; } +static void ibmveth_init_link_settings(struct net_device *dev) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + adapter->speed = SPEED_1000; + adapter->duplex = DUPLEX_FULL; +} + static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); - strlcpy(info->version, ibmveth_driver_version, sizeof(info->version)); + strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); + strscpy(info->version, ibmveth_driver_version, sizeof(info->version)); } static netdev_features_t ibmveth_fix_features(struct net_device *dev, @@ -736,7 +899,7 @@ static netdev_features_t ibmveth_fix_features(struct net_device *dev, */ if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } @@ -752,9 +915,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) if (netif_running(dev)) { restart = 1; - adapter->pool_config = 1; ibmveth_close(dev); - adapter->pool_config = 0; } set_attr = 0; @@ -772,8 +933,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); - if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) && - !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) && + if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) { ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, set_attr, &ret_attr); @@ -827,21 +987,90 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) return rc1 ? rc1 : rc2; } +static int ibmveth_set_tso(struct net_device *dev, u32 data) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + unsigned long set_attr, clr_attr, ret_attr; + long ret1, ret2; + int rc1 = 0, rc2 = 0; + int restart = 0; + + if (netif_running(dev)) { + restart = 1; + ibmveth_close(dev); + } + + set_attr = 0; + clr_attr = 0; + + if (data) + set_attr = IBMVETH_ILLAN_LRG_SR_ENABLED; + else + clr_attr = IBMVETH_ILLAN_LRG_SR_ENABLED; + + ret1 = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); + + if (ret1 == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) && + !old_large_send) { + ret2 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, + set_attr, &ret_attr); + + if (ret2 != H_SUCCESS) { + netdev_err(dev, "unable to change tso settings. %d rc=%ld\n", + data, ret2); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr, clr_attr, &ret_attr); + + if (data == 1) + dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + rc1 = -EIO; + + } else { + adapter->fw_large_send_support = data; + adapter->large_send = data; + } + } else { + /* Older firmware version of large send offload does not + * support tcp6/ipv6 + */ + if (data == 1) { + dev->features &= ~NETIF_F_TSO6; + netdev_info(dev, "TSO feature requires all partitions to have updated driver"); + } + adapter->large_send = data; + } + + if (restart) + rc2 = ibmveth_open(dev); + + return rc1 ? rc1 : rc2; +} + static int ibmveth_set_features(struct net_device *dev, netdev_features_t features) { struct ibmveth_adapter *adapter = netdev_priv(dev); int rx_csum = !!(features & NETIF_F_RXCSUM); - int rc; + int large_send = !!(features & (NETIF_F_TSO | NETIF_F_TSO6)); + int rc1 = 0, rc2 = 0; - if (rx_csum == adapter->rx_csum) - return 0; + if (rx_csum != adapter->rx_csum) { + rc1 = ibmveth_set_csum_offload(dev, rx_csum); + if (rc1 && !adapter->rx_csum) + dev->features = + features & ~(NETIF_F_CSUM_MASK | + NETIF_F_RXCSUM); + } - rc = ibmveth_set_csum_offload(dev, rx_csum); - if (rc && !adapter->rx_csum) - dev->features = features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM); + if (large_send != adapter->large_send) { + rc2 = ibmveth_set_tso(dev, large_send); + if (rc2 && !adapter->large_send) + dev->features = + features & ~(NETIF_F_TSO | NETIF_F_TSO6); + } - return rc; + return rc1 ? rc1 : rc2; } static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -875,13 +1104,79 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset); } +static void ibmveth_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + channels->max_tx = ibmveth_real_max_tx_queues(); + channels->tx_count = netdev->real_num_tx_queues; + + channels->max_rx = netdev->real_num_rx_queues; + channels->rx_count = netdev->real_num_rx_queues; +} + +static int ibmveth_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned int old = netdev->real_num_tx_queues, + goal = channels->tx_count; + int rc, i; + + /* If ndo_open has not been called yet then don't allocate, just set + * desired netdev_queue's and return + */ + if (!(netdev->flags & IFF_UP)) + return netif_set_real_num_tx_queues(netdev, goal); + + /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated + * but we may need to alloc/free the ltb's. + */ + netif_tx_stop_all_queues(netdev); + + /* Allocate any queue that we need */ + for (i = old; i < goal; i++) { + if (adapter->tx_ltb_ptr[i]) + continue; + + rc = ibmveth_allocate_tx_ltb(adapter, i); + if (!rc) + continue; + + /* if something goes wrong, free everything we just allocated */ + netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + break; + } + rc = netif_set_real_num_tx_queues(netdev, goal); + if (rc) { + netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + } + /* Free any that are no longer needed */ + for (i = old; i > goal; i--) { + if (adapter->tx_ltb_ptr[i - 1]) + ibmveth_free_tx_ltb(adapter, i - 1); + } + + netif_tx_wake_all_queues(netdev); + + return rc; +} + static const struct ethtool_ops netdev_ethtool_ops = { - .get_drvinfo = netdev_get_drvinfo, - .get_settings = netdev_get_settings, - .get_link = ethtool_op_get_link, - .get_strings = ibmveth_get_strings, - .get_sset_count = ibmveth_get_sset_count, - .get_ethtool_stats = ibmveth_get_ethtool_stats, + .get_drvinfo = netdev_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = ibmveth_get_strings, + .get_sset_count = ibmveth_get_sset_count, + .get_ethtool_stats = ibmveth_get_ethtool_stats, + .get_link_ksettings = ibmveth_get_link_ksettings, + .set_link_ksettings = ibmveth_set_link_ksettings, + .get_channels = ibmveth_get_channels, + .set_channels = ibmveth_set_channels }; static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -889,10 +1184,8 @@ static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -#define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1)) - static int ibmveth_send(struct ibmveth_adapter *adapter, - union ibmveth_buf_desc *descs) + unsigned long desc, unsigned long mss) { unsigned long correlator; unsigned int retry_count; @@ -905,11 +1198,9 @@ static int ibmveth_send(struct ibmveth_adapter *adapter, retry_count = 1024; correlator = 0; do { - ret = h_send_logical_lan(adapter->vdev->unit_address, - descs[0].desc, descs[1].desc, - descs[2].desc, descs[3].desc, - descs[4].desc, descs[5].desc, - correlator, &correlator); + ret = h_send_logical_lan(adapter->vdev->unit_address, desc, + correlator, &correlator, mss, + adapter->fw_large_send_support); } while ((ret == H_BUSY) && (retry_count--)); if (ret != H_SUCCESS && ret != H_DROPPED) { @@ -921,25 +1212,34 @@ static int ibmveth_send(struct ibmveth_adapter *adapter, return 0; } +static int ibmveth_is_packet_unsupported(struct sk_buff *skb, + struct net_device *netdev) +{ + struct ethhdr *ether_header; + int ret = 0; + + ether_header = eth_hdr(skb); + + if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) { + netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n"); + netdev->stats.tx_dropped++; + ret = -EOPNOTSUPP; + } + + return ret; +} + static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - unsigned int desc_flags; - union ibmveth_buf_desc descs[6]; - int last, i; - int force_bounce = 0; - dma_addr_t dma_addr; + unsigned int desc_flags, total_bytes; + union ibmveth_buf_desc desc; + int i, queue_num = skb_get_queue_mapping(skb); + unsigned long mss = 0; - /* - * veth handles a maximum of 6 segments including the header, so - * we have to linearize the skb if there are more than this. - */ - if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) { - netdev->stats.tx_dropped++; + if (ibmveth_is_packet_unsupported(skb, netdev)) goto out; - } - /* veth can't checksum offload UDP */ if (skb->ip_summed == CHECKSUM_PARTIAL && ((skb->protocol == htons(ETH_P_IP) && @@ -964,59 +1264,57 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, /* Need to zero out the checksum */ buf[0] = 0; buf[1] = 0; - } - -retry_bounce: - memset(descs, 0, sizeof(descs)); - - /* - * If a linear packet is below the rx threshold then - * copy it into the static bounce buffer. This avoids the - * cost of a TCE insert and remove. - */ - if (force_bounce || (!skb_is_nonlinear(skb) && - (skb->len < tx_copybreak))) { - skb_copy_from_linear_data(skb, adapter->bounce_buffer, - skb->len); - descs[0].fields.flags_len = desc_flags | skb->len; - descs[0].fields.address = adapter->bounce_buffer_dma; + if (skb_is_gso(skb) && adapter->fw_large_send_support) + desc_flags |= IBMVETH_BUF_LRG_SND; + } - if (ibmveth_send(adapter, descs)) { - adapter->tx_send_failed++; - netdev->stats.tx_dropped++; - } else { - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) { + if (adapter->fw_large_send_support) { + mss = (unsigned long)skb_shinfo(skb)->gso_size; + adapter->tx_large_packets++; + } else if (!skb_is_gso_v6(skb)) { + /* Put -1 in the IP checksum to tell phyp it + * is a largesend packet. Put the mss in + * the TCP checksum. + */ + ip_hdr(skb)->check = 0xffff; + tcp_hdr(skb)->check = + cpu_to_be16(skb_shinfo(skb)->gso_size); + adapter->tx_large_packets++; } + } + /* Copy header into mapped buffer */ + if (unlikely(skb->len > adapter->tx_ltb_size)) { + netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n", + skb->len, adapter->tx_ltb_size); + netdev->stats.tx_dropped++; goto out; } - - /* Map the header */ - dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto map_failed; - - descs[0].fields.flags_len = desc_flags | skb_headlen(skb); - descs[0].fields.address = dma_addr; - - /* Map the frags */ + memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb)); + total_bytes = skb_headlen(skb); + /* Copy frags into mapped buffers */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); - - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto map_failed_frags; + memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes, + skb_frag_address_safe(frag), skb_frag_size(frag)); + total_bytes += skb_frag_size(frag); + } - descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag); - descs[i+1].fields.address = dma_addr; + if (unlikely(total_bytes != skb->len)) { + netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n", + skb->len, total_bytes); + netdev->stats.tx_dropped++; + goto out; } + desc.fields.flags_len = desc_flags | skb->len; + desc.fields.address = adapter->tx_ltb_dma[queue_num]; + /* finish writing to long_term_buff before VIOS accessing it */ + dma_wmb(); - if (ibmveth_send(adapter, descs)) { + if (ibmveth_send(adapter, desc.desc, mss)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; } else { @@ -1024,34 +1322,127 @@ retry_bounce: netdev->stats.tx_bytes += skb->len; } - dma_unmap_single(&adapter->vdev->dev, - descs[0].fields.address, - descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - - for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) - dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, - descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - out: - dev_kfree_skb(skb); + dev_consume_skb_any(skb); return NETDEV_TX_OK; -map_failed_frags: - last = i+1; - for (i = 0; i < last; i++) - dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, - descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - -map_failed: - if (!firmware_has_feature(FW_FEATURE_CMO)) - netdev_err(netdev, "tx: unable to map xmit buffer\n"); - adapter->tx_map_failed++; - skb_linearize(skb); - force_bounce = 1; - goto retry_bounce; + +} + +static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) +{ + struct tcphdr *tcph; + int offset = 0; + int hdr_len; + + /* only TCP packets will be aggregated */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_TCP) { + offset = iph->ihl * 4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } else { + return; + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data; + + if (iph6->nexthdr == IPPROTO_TCP) { + offset = sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + } else { + return; + } + } else { + return; + } + /* if mss is not set through Large Packet bit/mss in rx buffer, + * expect that the mss will be written to the tcp header checksum. + */ + tcph = (struct tcphdr *)(skb->data + offset); + if (lrg_pkt) { + skb_shinfo(skb)->gso_size = mss; + } else if (offset) { + skb_shinfo(skb)->gso_size = ntohs(tcph->check); + tcph->check = 0; + } + + if (skb_shinfo(skb)->gso_size) { + hdr_len = offset + tcph->doff * 4; + skb_shinfo(skb)->gso_segs = + DIV_ROUND_UP(skb->len - hdr_len, + skb_shinfo(skb)->gso_size); + } +} + +static void ibmveth_rx_csum_helper(struct sk_buff *skb, + struct ibmveth_adapter *adapter) +{ + struct iphdr *iph = NULL; + struct ipv6hdr *iph6 = NULL; + __be16 skb_proto = 0; + u16 iphlen = 0; + u16 iph_proto = 0; + u16 tcphdrlen = 0; + + skb_proto = be16_to_cpu(skb->protocol); + + if (skb_proto == ETH_P_IP) { + iph = (struct iphdr *)skb->data; + + /* If the IP checksum is not offloaded and if the packet + * is large send, the checksum must be rebuilt. + */ + if (iph->check == 0xffff) { + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, + iph->ihl); + } + + iphlen = iph->ihl * 4; + iph_proto = iph->protocol; + } else if (skb_proto == ETH_P_IPV6) { + iph6 = (struct ipv6hdr *)skb->data; + iphlen = sizeof(struct ipv6hdr); + iph_proto = iph6->nexthdr; + } + + /* When CSO is enabled the TCP checksum may have be set to NULL by + * the sender given that we zeroed out TCP checksum field in + * transmit path (refer ibmveth_start_xmit routine). In this case set + * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will + * then be recalculated by the destination NIC (CSO must be enabled + * on the destination NIC). + * + * In an OVS environment, when a flow is not cached, specifically for a + * new TCP connection, the first packet information is passed up to + * the user space for finding a flow. During this process, OVS computes + * checksum on the first packet when CHECKSUM_PARTIAL flag is set. + * + * So, re-compute TCP pseudo header checksum. + */ + + if (iph_proto == IPPROTO_TCP) { + struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen); + + if (tcph->check == 0x0000) { + /* Recompute TCP pseudo header checksum */ + tcphdrlen = skb->len - iphlen; + if (skb_proto == ETH_P_IP) + tcph->check = + ~csum_tcpudp_magic(iph->saddr, + iph->daddr, tcphdrlen, iph_proto, 0); + else if (skb_proto == ETH_P_IPV6) + tcph->check = + ~csum_ipv6_magic(&iph6->saddr, + &iph6->daddr, tcphdrlen, iph_proto, 0); + /* Setup SKB fields for checksum offload */ + skb_partial_csum_set(skb, iphlen, + offsetof(struct tcphdr, check)); + skb_reset_network_header(skb); + } + } } static int ibmveth_poll(struct napi_struct *napi, int budget) @@ -1061,9 +1452,10 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) struct net_device *netdev = adapter->netdev; int frames_processed = 0; unsigned long lpar_rc; + u16 mss = 0; restart_poll: - do { + while (frames_processed < budget) { if (!ibmveth_rxq_pending_buffer(adapter)) break; @@ -1072,14 +1464,30 @@ restart_poll: wmb(); /* suggested by larson1 */ adapter->rx_invalid_buffer++; netdev_dbg(netdev, "recycling invalid buffer\n"); - ibmveth_rxq_recycle_buffer(adapter); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; } else { struct sk_buff *skb, *new_skb; int length = ibmveth_rxq_frame_length(adapter); int offset = ibmveth_rxq_frame_offset(adapter); int csum_good = ibmveth_rxq_csum_good(adapter); + int lrg_pkt = ibmveth_rxq_large_packet(adapter); + __sum16 iph_check = 0; skb = ibmveth_rxq_get_buffer(adapter); + if (unlikely(!skb)) + break; + + /* if the large packet bit is set in the rx queue + * descriptor, the mss will be written by PHYP eight + * bytes from the start of the rx buffer, which is + * skb->data at this stage + */ + if (lrg_pkt) { + __be64 *rxmss = (__be64 *)(skb->data + 8); + + mss = (u16)be64_to_cpu(*rxmss); + } new_skb = NULL; if (length < rx_copybreak) @@ -1092,49 +1500,70 @@ restart_poll: if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); - if (!ibmveth_rxq_recycle_buffer(adapter)) - kfree_skb(skb); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; skb = new_skb; } else { - ibmveth_rxq_harvest_buffer(adapter); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false))) + break; skb_reserve(skb, offset); } skb_put(skb, length); skb->protocol = eth_type_trans(skb, netdev); - if (csum_good) + /* PHYP without PLSO support places a -1 in the ip + * checksum for large send frames. + */ + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + iph_check = iph->check; + } + + if ((length > netdev->mtu + ETH_HLEN) || + lrg_pkt || iph_check == 0xffff) { + ibmveth_rx_mss_helper(skb, mss, lrg_pkt); + adapter->rx_large_packets++; + } + + if (csum_good) { skb->ip_summed = CHECKSUM_UNNECESSARY; + ibmveth_rx_csum_helper(skb, adapter); + } - netif_receive_skb(skb); /* send it up */ + napi_gro_receive(napi, skb); /* send it up */ netdev->stats.rx_packets++; netdev->stats.rx_bytes += length; frames_processed++; } - } while (frames_processed < budget); + } ibmveth_replenish_task(adapter); - if (frames_processed < budget) { - /* We think we are done - reenable interrupts, - * then check once more to make sure we are done. - */ - lpar_rc = h_vio_signal(adapter->vdev->unit_address, - VIO_IRQ_ENABLE); + if (frames_processed == budget) + goto out; - BUG_ON(lpar_rc != H_SUCCESS); + if (!napi_complete_done(napi, frames_processed)) + goto out; - napi_complete(napi); + /* We think we are done - reenable interrupts, + * then check once more to make sure we are done. + */ + lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE); + if (WARN_ON(lpar_rc != H_SUCCESS)) { + schedule_work(&adapter->work); + goto out; + } - if (ibmveth_rxq_pending_buffer(adapter) && - napi_reschedule(napi)) { - lpar_rc = h_vio_signal(adapter->vdev->unit_address, - VIO_IRQ_DISABLE); - goto restart_poll; - } + if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + goto restart_poll; } +out: return frames_processed; } @@ -1147,7 +1576,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) if (napi_schedule_prep(&adapter->napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - BUG_ON(lpar_rc != H_SUCCESS); + WARN_ON(lpar_rc != H_SUCCESS); __napi_schedule(&adapter->napi); } return IRQ_HANDLED; @@ -1184,8 +1613,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) /* add the addresses to the filter table */ netdev_for_each_mc_addr(ha, netdev) { /* add the multicast address to the filter table */ - unsigned long mcast_addr = 0; - memcpy(((char *)&mcast_addr)+2, ha->addr, 6); + u64 mcast_addr; + mcast_addr = ether_addr_to_u64(ha->addr); lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, IbmVethMcastAddFilter, mcast_addr); @@ -1215,11 +1644,8 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) int i, rc; int need_restart = 0; - if (new_mtu < IBMVETH_MIN_MTU) - return -EINVAL; - for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) - if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) + if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) break; if (i == IBMVETH_NUM_BUFF_POOLS) @@ -1229,17 +1655,15 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) only the buffer pools necessary to hold the new MTU */ if (netif_running(adapter->netdev)) { need_restart = 1; - adapter->pool_config = 1; ibmveth_close(adapter->netdev); - adapter->pool_config = 0; } /* Look for an active buffer pool that can hold the new MTU */ for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { adapter->rx_buff_pool[i].active = 1; - if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { - dev->mtu = new_mtu; + if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) { + WRITE_ONCE(dev->mtu, new_mtu); vio_cmo_set_dev_desired(viodev, ibmveth_get_desired_dma (viodev)); @@ -1276,18 +1700,23 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) { struct net_device *netdev = dev_get_drvdata(&vdev->dev); struct ibmveth_adapter *adapter; + struct iommu_table *tbl; unsigned long ret; int i; int rxqentries = 1; + tbl = get_iommu_table_base(&vdev->dev); + /* netdev inits at probe time along with the structures we need below*/ if (netdev == NULL) - return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl); adapter = netdev_priv(netdev); ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; - ret += IOMMU_PAGE_ALIGN(netdev->mtu); + ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl); + /* add size of mapped tx buffers */ + ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ @@ -1295,26 +1724,49 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) ret += adapter->rx_buff_pool[i].size * IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. - buff_size); + buff_size, tbl); rxqentries += adapter->rx_buff_pool[i].size; } /* add the size of the receive queue entries */ - ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + ret += IOMMU_PAGE_ALIGN( + rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl); return ret; } +static int ibmveth_set_mac_addr(struct net_device *dev, void *p) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + struct sockaddr *addr = p; + u64 mac_address; + int rc; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mac_address = ether_addr_to_u64(addr->sa_data); + rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address); + if (rc) { + netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc); + return rc; + } + + eth_hw_addr_set(dev, addr->sa_data); + + return 0; +} + static const struct net_device_ops ibmveth_netdev_ops = { .ndo_open = ibmveth_open, .ndo_stop = ibmveth_close, .ndo_start_xmit = ibmveth_start_xmit, .ndo_set_rx_mode = ibmveth_set_multicast_list, - .ndo_do_ioctl = ibmveth_ioctl, + .ndo_eth_ioctl = ibmveth_ioctl, .ndo_change_mtu = ibmveth_change_mtu, .ndo_fix_features = ibmveth_fix_features, .ndo_set_features = ibmveth_set_features, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ibmveth_set_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmveth_poll_controller, #endif @@ -1326,7 +1778,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) struct net_device *netdev; struct ibmveth_adapter *adapter; unsigned char *mac_addr_p; - unsigned int *mcastFilterSize_p; + __be32 *mcastFilterSize_p; + long ret; + unsigned long ret_attr; dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n", dev->unit_address); @@ -1346,16 +1800,16 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return -EINVAL; } - mcastFilterSize_p = (unsigned int *)vio_get_attribute(dev, - VETH_MCAST_FILTER_SIZE, NULL); + mcastFilterSize_p = (__be32 *)vio_get_attribute(dev, + VETH_MCAST_FILTER_SIZE, + NULL); if (!mcastFilterSize_p) { dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE " "attribute\n"); return -EINVAL; } - netdev = alloc_etherdev(sizeof(struct ibmveth_adapter)); - + netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1); if (!netdev) return -ENOMEM; @@ -1364,23 +1818,62 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; - adapter->mcastFilterSize = *mcastFilterSize_p; - adapter->pool_config = 0; + INIT_WORK(&adapter->work, ibmveth_reset); + adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); + ibmveth_init_link_settings(netdev); - netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); - - adapter->mac_addr = 0; - memcpy(&adapter->mac_addr, mac_addr_p, 6); + netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16); netdev->irq = dev->irq; netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + netdev->hw_features = NETIF_F_SG; + if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM; + } + netdev->features |= netdev->hw_features; - memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len); + ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); + + /* If running older firmware, TSO should not be enabled by default */ + if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) && + !old_large_send) { + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + netdev->features |= netdev->hw_features; + } else { + netdev->hw_features |= NETIF_F_TSO; + } + + adapter->is_active_trunk = false; + if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) { + adapter->is_active_trunk = true; + netdev->hw_features |= NETIF_F_FRAGLIST; + netdev->features |= NETIF_F_FRAGLIST; + } + + if (ret == H_SUCCESS && + (ret_attr & IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT)) { + adapter->rx_buffers_per_hcall = IBMVETH_MAX_RX_PER_HCALL; + netdev_dbg(netdev, + "RX Multi-buffer hcall supported by FW, batch set to %u\n", + adapter->rx_buffers_per_hcall); + } else { + adapter->rx_buffers_per_hcall = 1; + netdev_dbg(netdev, + "RX Single-buffer hcall mode, batch set to %u\n", + adapter->rx_buffers_per_hcall); + } + + netdev->min_mtu = IBMVETH_MIN_MTU; + netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; + + eth_hw_addr_set(netdev, mac_addr_p); + + if (firmware_has_feature(FW_FEATURE_CMO)) + memcpy(pool_count, pool_count_cmo, sizeof(pool_count)); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; @@ -1395,12 +1888,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) kobject_uevent(kobj, KOBJ_ADD); } - netdev_dbg(netdev, "adapter @ 0x%p\n", adapter); - - adapter->buffer_list_dma = DMA_ERROR_CODE; - adapter->filter_list_dma = DMA_ERROR_CODE; - adapter->rx_queue.queue_dma = DMA_ERROR_CODE; + rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(), + IBMVETH_DEFAULT_QUEUES)); + if (rc) { + netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n", + rc); + free_netdev(netdev); + return rc; + } + adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE); + for (i = 0; i < IBMVETH_MAX_QUEUES; i++) + adapter->tx_ltb_ptr[i] = NULL; + netdev_dbg(netdev, "adapter @ 0x%p\n", adapter); netdev_dbg(netdev, "registering netdev...\n"); ibmveth_set_features(netdev, netdev->features); @@ -1418,12 +1918,14 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return 0; } -static int ibmveth_remove(struct vio_dev *dev) +static void ibmveth_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmveth_adapter *adapter = netdev_priv(netdev); int i; + cancel_work_sync(&adapter->work); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) kobject_put(&adapter->rx_buff_pool[i].kobj); @@ -1431,8 +1933,6 @@ static int ibmveth_remove(struct vio_dev *dev) free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static struct attribute veth_active_attr; @@ -1455,36 +1955,59 @@ static ssize_t veth_pool_show(struct kobject *kobj, return 0; } +/** + * veth_pool_store - sysfs store handler for pool attributes + * @kobj: kobject embedded in pool + * @attr: attribute being changed + * @buf: value being stored + * @count: length of @buf in bytes + * + * Stores new value in pool attribute. Verifies the range of the new value for + * size and buff_size. Verifies that at least one pool remains available to + * receive MTU-sized packets. + * + * Context: Process context. + * Takes and releases rtnl_mutex to ensure correct ordering of close + * and open calls. + * Return: + * * %-EPERM - Not allowed to disabled all MTU-sized buffer pools + * * %-EINVAL - New pool size or buffer size is out of range + * * count - Return count for success + * * other - Return value from a failed ibmveth_open call + */ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct ibmveth_buff_pool *pool = container_of(kobj, struct ibmveth_buff_pool, kobj); - struct net_device *netdev = dev_get_drvdata( - container_of(kobj->parent, struct device, kobj)); + struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent)); struct ibmveth_adapter *adapter = netdev_priv(netdev); long value = simple_strtol(buf, NULL, 10); + bool change = false; + u32 newbuff_size; + u32 oldbuff_size; + int newactive; + int oldactive; + u32 newsize; + u32 oldsize; long rc; + rtnl_lock(); + + oldbuff_size = pool->buff_size; + oldactive = pool->active; + oldsize = pool->size; + + newbuff_size = oldbuff_size; + newactive = oldactive; + newsize = oldsize; + if (attr == &veth_active_attr) { - if (value && !pool->active) { - if (netif_running(netdev)) { - if (ibmveth_alloc_buffer_pool(pool)) { - netdev_err(netdev, - "unable to alloc pool\n"); - return -ENOMEM; - } - pool->active = 1; - adapter->pool_config = 1; - ibmveth_close(netdev); - adapter->pool_config = 0; - if ((rc = ibmveth_open(netdev))) - return rc; - } else { - pool->active = 1; - } - } else if (!value && pool->active) { + if (value && !oldactive) { + newactive = 1; + change = true; + } else if (!value && oldactive) { int mtu = netdev->mtu + IBMVETH_BUFF_OH; int i; /* Make sure there is a buffer pool with buffers that @@ -1500,54 +2023,60 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, if (i == IBMVETH_NUM_BUFF_POOLS) { netdev_err(netdev, "no active pool >= MTU\n"); - return -EPERM; + rc = -EPERM; + goto unlock_err; } - if (netif_running(netdev)) { - adapter->pool_config = 1; - ibmveth_close(netdev); - pool->active = 0; - adapter->pool_config = 0; - if ((rc = ibmveth_open(netdev))) - return rc; - } - pool->active = 0; + newactive = 0; + change = true; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { - return -EINVAL; - } else { - if (netif_running(netdev)) { - adapter->pool_config = 1; - ibmveth_close(netdev); - adapter->pool_config = 0; - pool->size = value; - if ((rc = ibmveth_open(netdev))) - return rc; - } else { - pool->size = value; - } + rc = -EINVAL; + goto unlock_err; + } + if (value != oldsize) { + newsize = value; + change = true; } } else if (attr == &veth_size_attr) { if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { - return -EINVAL; - } else { - if (netif_running(netdev)) { - adapter->pool_config = 1; - ibmveth_close(netdev); - adapter->pool_config = 0; - pool->buff_size = value; - if ((rc = ibmveth_open(netdev))) - return rc; - } else { - pool->buff_size = value; + rc = -EINVAL; + goto unlock_err; + } + if (value != oldbuff_size) { + newbuff_size = value; + change = true; + } + } + + if (change) { + if (netif_running(netdev)) + ibmveth_close(netdev); + + pool->active = newactive; + pool->buff_size = newbuff_size; + pool->size = newsize; + + if (netif_running(netdev)) { + rc = ibmveth_open(netdev); + if (rc) { + pool->active = oldactive; + pool->buff_size = oldbuff_size; + pool->size = oldsize; + goto unlock_err; } } } + rtnl_unlock(); /* kick the interrupt handler to allocate/deallocate pools */ ibmveth_interrupt(netdev->irq, netdev); return count; + +unlock_err: + rtnl_unlock(); + return rc; } @@ -1566,6 +2095,7 @@ static struct attribute *veth_pool_attrs[] = { &veth_size_attr, NULL, }; +ATTRIBUTE_GROUPS(veth_pool); static const struct sysfs_ops veth_pool_ops = { .show = veth_pool_show, @@ -1575,7 +2105,7 @@ static const struct sysfs_ops veth_pool_ops = { static struct kobj_type ktype_veth_pool = { .release = NULL, .sysfs_ops = &veth_pool_ops, - .default_attrs = veth_pool_attrs, + .default_groups = veth_pool_groups, }; static int ibmveth_resume(struct device *dev) @@ -1585,13 +2115,13 @@ static int ibmveth_resume(struct device *dev) return 0; } -static struct vio_device_id ibmveth_device_table[] = { +static const struct vio_device_id ibmveth_device_table[] = { { "network", "IBM,l-lan"}, { "", "" } }; MODULE_DEVICE_TABLE(vio, ibmveth_device_table); -static struct dev_pm_ops ibmveth_pm_ops = { +static const struct dev_pm_ops ibmveth_pm_ops = { .resume = ibmveth_resume }; @@ -1619,3 +2149,132 @@ static void __exit ibmveth_module_exit(void) module_init(ibmveth_module_init); module_exit(ibmveth_module_exit); + +#ifdef CONFIG_IBMVETH_KUNIT_TEST +#include <kunit/test.h> + +/** + * ibmveth_reset_kunit - reset routine for running in KUnit environment + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: Called in the KUnit environment. Does nothing. + * + * Return: void + */ +static void ibmveth_reset_kunit(struct work_struct *w) +{ + netdev_dbg(NULL, "reset_kunit starting\n"); + netdev_dbg(NULL, "reset_kunit complete\n"); +} + +/** + * ibmveth_remove_buffer_from_pool_test - unit test for some of + * ibmveth_remove_buffer_from_pool + * @test: pointer to kunit structure + * + * Tests the error returns from ibmveth_remove_buffer_from_pool. + * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be + * checked to see that these warnings happened. + * + * Return: void + */ +static void ibmveth_remove_buffer_from_pool_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + u64 correlator; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = (u64)0 | 0; + pool->skbuff[0] = NULL; + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + flush_work(&adapter->work); +} + +/** + * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer + * @test: pointer to kunit structure + * + * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for + * the NULL returns, so dmesg should be checked to see that these warnings + * happened. + * + * Return: void + */ +static void ibmveth_rxq_get_buffer_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + adapter->rx_queue.queue_len = 1; + adapter->rx_queue.index = 0; + adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry), + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + pool->skbuff[0] = skb; + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter)); + + flush_work(&adapter->work); +} + +static struct kunit_case ibmveth_test_cases[] = { + KUNIT_CASE(ibmveth_remove_buffer_from_pool_test), + KUNIT_CASE(ibmveth_rxq_get_buffer_test), + {} +}; + +static struct kunit_suite ibmveth_test_suite = { + .name = "ibmveth-kunit-test", + .test_cases = ibmveth_test_cases, +}; + +kunit_test_suite(ibmveth_test_suite); +#endif diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 43a794fab9ff..068f99df133e 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * IBM Power Virtual Ethernet Device Driver * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2003, 2010 * * Authors: Dave Larson <larson1@us.ibm.com> @@ -41,6 +28,9 @@ #define IbmVethMcastRemoveFilter 0x2UL #define IbmVethMcastClearFilterTable 0x3UL +#define IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT 0x0000000000040000UL +#define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL +#define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL #define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL #define IBMVETH_ILLAN_TRUNK_PRI_MASK 0x0000000000000F00UL #define IBMVETH_ILLAN_IPV6_TCP_CSUM 0x0000000000000004UL @@ -57,16 +47,41 @@ #define h_add_logical_lan_buffer(ua, buf) \ plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) +static inline long h_add_logical_lan_buffers(unsigned long unit_address, + unsigned long desc1, + unsigned long desc2, + unsigned long desc3, + unsigned long desc4, + unsigned long desc5, + unsigned long desc6, + unsigned long desc7, + unsigned long desc8) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + return plpar_hcall9(H_ADD_LOGICAL_LAN_BUFFERS, + retbuf, unit_address, + desc1, desc2, desc3, desc4, + desc5, desc6, desc7, desc8); +} + +/* FW allows us to send 6 descriptors but we only use one so mark + * the other 5 as unused (0) + */ static inline long h_send_logical_lan(unsigned long unit_address, - unsigned long desc1, unsigned long desc2, unsigned long desc3, - unsigned long desc4, unsigned long desc5, unsigned long desc6, - unsigned long corellator_in, unsigned long *corellator_out) + unsigned long desc, unsigned long corellator_in, + unsigned long *corellator_out, unsigned long mss, + unsigned long large_send_support) { long rc; unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, desc1, - desc2, desc3, desc4, desc5, desc6, corellator_in); + if (large_send_support) + rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, + desc, 0, 0, 0, 0, 0, corellator_in, mss); + else + rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, + desc, 0, 0, 0, 0, 0, corellator_in); *corellator_out = retbuf[0]; @@ -102,10 +117,15 @@ static inline long h_illan_attributes(unsigned long unit_address, #define IBMVETH_BUFF_LIST_SIZE 4096 #define IBMVETH_FILT_LIST_SIZE 4096 #define IBMVETH_MAX_BUF_SIZE (1024 * 128) +#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) +#define IBMVETH_MAX_QUEUES 16U +#define IBMVETH_DEFAULT_QUEUES 8U +#define IBMVETH_MAX_RX_PER_HCALL 8U static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; -static int pool_active[] = { 1, 1, 0, 0, 0}; +static int pool_count_cmo[] = { 256, 512, 256, 256, 64 }; +static int pool_active[] = { 1, 1, 0, 0, 1}; #define IBM_VETH_INVALID_MAP ((u16)0xffff) @@ -134,44 +154,65 @@ struct ibmveth_rx_q { }; struct ibmveth_adapter { - struct vio_dev *vdev; - struct net_device *netdev; - struct napi_struct napi; - struct net_device_stats stats; - unsigned int mcastFilterSize; - unsigned long mac_addr; - void * buffer_list_addr; - void * filter_list_addr; - dma_addr_t buffer_list_dma; - dma_addr_t filter_list_dma; - struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; - struct ibmveth_rx_q rx_queue; - int pool_config; - int rx_csum; - void *bounce_buffer; - dma_addr_t bounce_buffer_dma; - - u64 fw_ipv6_csum_support; - u64 fw_ipv4_csum_support; - /* adapter specific stats */ - u64 replenish_task_cycles; - u64 replenish_no_mem; - u64 replenish_add_buff_failure; - u64 replenish_add_buff_success; - u64 rx_invalid_buffer; - u64 rx_no_buffer; - u64 tx_map_failed; - u64 tx_send_failed; + struct vio_dev *vdev; + struct net_device *netdev; + struct napi_struct napi; + struct work_struct work; + unsigned int mcastFilterSize; + void *buffer_list_addr; + void *filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; + dma_addr_t buffer_list_dma; + dma_addr_t filter_list_dma; + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; + struct ibmveth_rx_q rx_queue; + int rx_csum; + int large_send; + bool is_active_trunk; + unsigned int rx_buffers_per_hcall; + + u64 fw_ipv6_csum_support; + u64 fw_ipv4_csum_support; + u64 fw_large_send_support; + /* adapter specific stats */ + u64 replenish_task_cycles; + u64 replenish_no_mem; + u64 replenish_add_buff_failure; + u64 replenish_add_buff_success; + u64 rx_invalid_buffer; + u64 rx_no_buffer; + u64 tx_map_failed; + u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; + /* Ethtool settings */ + u8 duplex; + u32 speed; }; +/* + * We pass struct ibmveth_buf_desc_fields to the hypervisor in registers, + * so we don't need to byteswap the two elements. However since we use + * a union (ibmveth_buf_desc) to convert from the struct to a u64 we + * do end up with endian specific ordering of the elements and that + * needs correcting. + */ struct ibmveth_buf_desc_fields { +#ifdef __BIG_ENDIAN u32 flags_len; + u32 address; +#else + u32 address; + u32 flags_len; +#endif #define IBMVETH_BUF_VALID 0x80000000 #define IBMVETH_BUF_TOGGLE 0x40000000 +#define IBMVETH_BUF_LRG_SND 0x04000000 #define IBMVETH_BUF_NO_CSUM 0x02000000 #define IBMVETH_BUF_CSUM_GOOD 0x01000000 #define IBMVETH_BUF_LEN_MASK 0x00FFFFFF - u32 address; }; union ibmveth_buf_desc { @@ -180,15 +221,17 @@ union ibmveth_buf_desc { }; struct ibmveth_rx_q_entry { - u32 flags_off; + __be32 flags_off; #define IBMVETH_RXQ_TOGGLE 0x80000000 #define IBMVETH_RXQ_TOGGLE_SHIFT 31 #define IBMVETH_RXQ_VALID 0x40000000 +#define IBMVETH_RXQ_LRG_PKT 0x04000000 #define IBMVETH_RXQ_NO_CSUM 0x02000000 #define IBMVETH_RXQ_CSUM_GOOD 0x01000000 #define IBMVETH_RXQ_OFF_MASK 0x0000FFFF - u32 length; + __be32 length; + /* correlator is only used by the OS, no need to byte swap */ u64 correlator; }; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c new file mode 100644 index 000000000000..3808148c1fc7 --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -0,0 +1,6808 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/**************************************************************************/ +/* */ +/* IBM System i and System p Virtual NIC Device Driver */ +/* Copyright (C) 2014 IBM Corp. */ +/* Santiago Leon (santi_leon@yahoo.com) */ +/* Thomas Falcon (tlfalcon@linux.vnet.ibm.com) */ +/* John Allen (jallen@linux.vnet.ibm.com) */ +/* */ +/* */ +/* This module contains the implementation of a virtual ethernet device */ +/* for use with IBM i/p Series LPAR Linux. It utilizes the logical LAN */ +/* option of the RS/6000 Platform Architecture to interface with virtual */ +/* ethernet NICs that are presented to the partition by the hypervisor. */ +/* */ +/* Messages are passed between the VNIC driver and the VNIC server using */ +/* Command/Response Queues (CRQs) and sub CRQs (sCRQs). CRQs are used to */ +/* issue and receive commands that initiate communication with the server */ +/* on driver initialization. Sub CRQs (sCRQs) are similar to CRQs, but */ +/* are used by the driver to notify the server that a packet is */ +/* ready for transmission or that a buffer has been added to receive a */ +/* packet. Subsequently, sCRQs are used by the server to notify the */ +/* driver that a packet transmission has been completed or that a packet */ +/* has been received and placed in a waiting buffer. */ +/* */ +/* In lieu of a more conventional "on-the-fly" DMA mapping strategy in */ +/* which skbs are DMA mapped and immediately unmapped when the transmit */ +/* or receive has been completed, the VNIC driver is required to use */ +/* "long term mapping". This entails that large, continuous DMA mapped */ +/* buffers are allocated on driver initialization and these buffers are */ +/* then continuously reused to pass skbs to and from the VNIC server. */ +/* */ +/**************************************************************************/ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/completion.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <linux/ethtool.h> +#include <linux/proc_fs.h> +#include <linux/if_arp.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kthread.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <net/net_namespace.h> +#include <asm/hvcall.h> +#include <linux/atomic.h> +#include <asm/vio.h> +#include <asm/xive.h> +#include <asm/iommu.h> +#include <linux/uaccess.h> +#include <asm/firmware.h> +#include <linux/workqueue.h> +#include <linux/if_vlan.h> +#include <linux/utsname.h> +#include <linux/cpu.h> + +#include "ibmvnic.h" + +static const char ibmvnic_driver_name[] = "ibmvnic"; +static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver"; + +MODULE_AUTHOR("Santiago Leon"); +MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(IBMVNIC_DRIVER_VERSION); + +static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; +static void release_sub_crqs(struct ibmvnic_adapter *, bool); +static int ibmvnic_reset_crq(struct ibmvnic_adapter *); +static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); +static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); +static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); +static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); +static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); +static int enable_scrq_irq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static int disable_scrq_irq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static int pending_scrq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, + struct ibmvnic_sub_crq_queue *); +static int ibmvnic_poll(struct napi_struct *napi, int data); +static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter); +static inline void reinit_init_done(struct ibmvnic_adapter *adapter); +static void send_query_map(struct ibmvnic_adapter *adapter); +static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); +static int send_request_unmap(struct ibmvnic_adapter *, u8); +static int send_login(struct ibmvnic_adapter *adapter); +static void send_query_cap(struct ibmvnic_adapter *adapter); +static int init_sub_crqs(struct ibmvnic_adapter *); +static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); +static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset); +static void release_crq_queue(struct ibmvnic_adapter *); +static int __ibmvnic_set_mac(struct net_device *, u8 *); +static int init_crq_queue(struct ibmvnic_adapter *adapter); +static int send_query_phys_parms(struct ibmvnic_adapter *adapter); +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq); +static void free_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb); +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); +static void flush_reset_queue(struct ibmvnic_adapter *adapter); +static void print_subcrq_error(struct device *dev, int rc, const char *func); + +struct ibmvnic_stat { + char name[ETH_GSTRING_LEN]; + int offset; +}; + +#define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \ + offsetof(struct ibmvnic_statistics, stat)) +#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off)))) + +static const struct ibmvnic_stat ibmvnic_stats[] = { + {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)}, + {"rx_bytes", IBMVNIC_STAT_OFF(rx_bytes)}, + {"tx_packets", IBMVNIC_STAT_OFF(tx_packets)}, + {"tx_bytes", IBMVNIC_STAT_OFF(tx_bytes)}, + {"ucast_tx_packets", IBMVNIC_STAT_OFF(ucast_tx_packets)}, + {"ucast_rx_packets", IBMVNIC_STAT_OFF(ucast_rx_packets)}, + {"mcast_tx_packets", IBMVNIC_STAT_OFF(mcast_tx_packets)}, + {"mcast_rx_packets", IBMVNIC_STAT_OFF(mcast_rx_packets)}, + {"bcast_tx_packets", IBMVNIC_STAT_OFF(bcast_tx_packets)}, + {"bcast_rx_packets", IBMVNIC_STAT_OFF(bcast_rx_packets)}, + {"align_errors", IBMVNIC_STAT_OFF(align_errors)}, + {"fcs_errors", IBMVNIC_STAT_OFF(fcs_errors)}, + {"single_collision_frames", IBMVNIC_STAT_OFF(single_collision_frames)}, + {"multi_collision_frames", IBMVNIC_STAT_OFF(multi_collision_frames)}, + {"sqe_test_errors", IBMVNIC_STAT_OFF(sqe_test_errors)}, + {"deferred_tx", IBMVNIC_STAT_OFF(deferred_tx)}, + {"late_collisions", IBMVNIC_STAT_OFF(late_collisions)}, + {"excess_collisions", IBMVNIC_STAT_OFF(excess_collisions)}, + {"internal_mac_tx_errors", IBMVNIC_STAT_OFF(internal_mac_tx_errors)}, + {"carrier_sense", IBMVNIC_STAT_OFF(carrier_sense)}, + {"too_long_frames", IBMVNIC_STAT_OFF(too_long_frames)}, + {"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)}, +}; + +static int send_crq_init_complete(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.generic.first = IBMVNIC_CRQ_INIT_CMD; + crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE; + + return ibmvnic_send_crq(adapter, &crq); +} + +static int send_version_xchg(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.version_exchange.first = IBMVNIC_CRQ_CMD; + crq.version_exchange.cmd = VERSION_EXCHANGE; + crq.version_exchange.version = cpu_to_be16(ibmvnic_version); + + return ibmvnic_send_crq(adapter, &crq); +} + +static void ibmvnic_clean_queue_affinity(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *queue) +{ + if (!(queue && queue->irq)) + return; + + cpumask_clear(queue->affinity_mask); + + if (irq_set_affinity_and_hint(queue->irq, NULL)) + netdev_warn(adapter->netdev, + "%s: Clear affinity failed, queue addr = %p, IRQ = %d\n", + __func__, queue, queue->irq); +} + +static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_sub_crq_queue **rxqs; + struct ibmvnic_sub_crq_queue **txqs; + int num_rxqs, num_txqs; + int i; + + rxqs = adapter->rx_scrq; + txqs = adapter->tx_scrq; + num_txqs = adapter->num_active_tx_scrqs; + num_rxqs = adapter->num_active_rx_scrqs; + + netdev_dbg(adapter->netdev, "%s: Cleaning irq affinity hints", __func__); + if (txqs) { + for (i = 0; i < num_txqs; i++) + ibmvnic_clean_queue_affinity(adapter, txqs[i]); + } + if (rxqs) { + for (i = 0; i < num_rxqs; i++) + ibmvnic_clean_queue_affinity(adapter, rxqs[i]); + } +} + +static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue, + unsigned int *cpu, int *stragglers, + int stride) +{ + cpumask_var_t mask; + int i; + int rc = 0; + + if (!(queue && queue->irq)) + return rc; + + /* cpumask_var_t is either a pointer or array, allocation works here */ + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + /* while we have extra cpu give one extra to this irq */ + if (*stragglers) { + stride++; + (*stragglers)--; + } + /* atomic write is safer than writing bit by bit directly */ + for_each_online_cpu_wrap(i, *cpu) { + if (!stride--) { + /* For the next queue we start from the first + * unused CPU in this queue + */ + *cpu = i; + break; + } + cpumask_set_cpu(i, mask); + } + + /* set queue affinity mask */ + cpumask_copy(queue->affinity_mask, mask); + rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask); + free_cpumask_var(mask); + + return rc; +} + +/* assumes cpu read lock is held */ +static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq; + struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq; + struct ibmvnic_sub_crq_queue *queue; + int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0; + int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0; + int total_queues, stride, stragglers, i; + unsigned int num_cpu, cpu = 0; + bool is_rx_queue; + int rc = 0; + + netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__); + if (!(adapter->rx_scrq && adapter->tx_scrq)) { + netdev_warn(adapter->netdev, + "%s: Set affinity failed, queues not allocated\n", + __func__); + return; + } + + total_queues = num_rxqs + num_txqs; + num_cpu = num_online_cpus(); + /* number of cpu's assigned per irq */ + stride = max_t(int, num_cpu / total_queues, 1); + /* number of leftover cpu's */ + stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0; + + for (i = 0; i < total_queues; i++) { + is_rx_queue = false; + /* balance core load by alternating rx and tx assignments + * ex: TX0 -> RX0 -> TX1 -> RX1 etc. + */ + if ((i % 2 == 1 && i_rxqs < num_rxqs) || i_txqs == num_txqs) { + queue = rxqs[i_rxqs++]; + is_rx_queue = true; + } else { + queue = txqs[i_txqs++]; + } + + rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers, + stride); + if (rc) + goto out; + + if (!queue || is_rx_queue) + continue; + + rc = __netif_set_xps_queue(adapter->netdev, + cpumask_bits(queue->affinity_mask), + i_txqs - 1, XPS_CPUS); + if (rc) + netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n", + __func__, i_txqs - 1, rc); + } + +out: + if (rc) { + netdev_warn(adapter->netdev, + "%s: Set affinity failed, queue addr = %p, IRQ = %d, rc = %d.\n", + __func__, queue, queue->irq, rc); + ibmvnic_clean_affinity(adapter); + } +} + +static int ibmvnic_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct ibmvnic_adapter *adapter; + + adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node); + ibmvnic_set_affinity(adapter); + return 0; +} + +static int ibmvnic_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct ibmvnic_adapter *adapter; + + adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node_dead); + ibmvnic_set_affinity(adapter); + return 0; +} + +static int ibmvnic_cpu_down_prep(unsigned int cpu, struct hlist_node *node) +{ + struct ibmvnic_adapter *adapter; + + adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node); + ibmvnic_clean_affinity(adapter); + return 0; +} + +static enum cpuhp_state ibmvnic_online; + +static int ibmvnic_cpu_notif_add(struct ibmvnic_adapter *adapter) +{ + int ret; + + ret = cpuhp_state_add_instance_nocalls(ibmvnic_online, &adapter->node); + if (ret) + return ret; + ret = cpuhp_state_add_instance_nocalls(CPUHP_IBMVNIC_DEAD, + &adapter->node_dead); + if (!ret) + return ret; + cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node); + return ret; +} + +static void ibmvnic_cpu_notif_remove(struct ibmvnic_adapter *adapter) +{ + cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node); + cpuhp_state_remove_instance_nocalls(CPUHP_IBMVNIC_DEAD, + &adapter->node_dead); +} + +static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, + unsigned long length, unsigned long *number, + unsigned long *irq) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_REG_SUB_CRQ, retbuf, unit_address, token, length); + *number = retbuf[0]; + *irq = retbuf[1]; + + return rc; +} + +/** + * ibmvnic_wait_for_completion - Check device state and wait for completion + * @adapter: private device data + * @comp_done: completion structure to wait for + * @timeout: time to wait in milliseconds + * + * Wait for a completion signal or until the timeout limit is reached + * while checking that the device is still active. + */ +static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter, + struct completion *comp_done, + unsigned long timeout) +{ + struct net_device *netdev; + unsigned long div_timeout; + u8 retry; + + netdev = adapter->netdev; + retry = 5; + div_timeout = msecs_to_jiffies(timeout / retry); + while (true) { + if (!adapter->crq.active) { + netdev_err(netdev, "Device down!\n"); + return -ENODEV; + } + if (!retry--) + break; + if (wait_for_completion_timeout(comp_done, div_timeout)) + return 0; + } + netdev_err(netdev, "Operation timed out.\n"); + return -ETIMEDOUT; +} + +/** + * reuse_ltb() - Check if a long term buffer can be reused + * @ltb: The long term buffer to be checked + * @size: The size of the long term buffer. + * + * An LTB can be reused unless its size has changed. + * + * Return: Return true if the LTB can be reused, false otherwise. + */ +static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size) +{ + return (ltb->buff && ltb->size == size); +} + +/** + * alloc_long_term_buff() - Allocate a long term buffer (LTB) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb: container object for the LTB + * @size: size of the LTB + * + * Allocate an LTB of the specified size and notify VIOS. + * + * If the given @ltb already has the correct size, reuse it. Otherwise if + * its non-NULL, free it. Then allocate a new one of the correct size. + * Notify the VIOS either way since we may now be working with a new VIOS. + * + * Allocating larger chunks of memory during resets, specially LPM or under + * low memory situations can cause resets to fail/timeout and for LPAR to + * lose connectivity. So hold onto the LTB even if we fail to communicate + * with the VIOS and reuse it on next open. Free LTB when adapter is closed. + * + * Return: 0 if we were able to allocate the LTB and notify the VIOS and + * a negative value otherwise. + */ +static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb, int size) +{ + struct device *dev = &adapter->vdev->dev; + u64 prev = 0; + int rc; + + if (!reuse_ltb(ltb, size)) { + dev_dbg(dev, + "LTB size changed from 0x%llx to 0x%x, reallocating\n", + ltb->size, size); + prev = ltb->size; + free_long_term_buff(adapter, ltb); + } + + if (ltb->buff) { + dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n", + ltb->map_id, ltb->size); + } else { + ltb->buff = dma_alloc_coherent(dev, size, <b->addr, + GFP_KERNEL); + if (!ltb->buff) { + dev_err(dev, "Couldn't alloc long term buffer\n"); + return -ENOMEM; + } + ltb->size = size; + + ltb->map_id = find_first_zero_bit(adapter->map_ids, + MAX_MAP_ID); + bitmap_set(adapter->map_ids, ltb->map_id, 1); + + dev_dbg(dev, + "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", + ltb->map_id, ltb->size, prev); + } + + /* Ensure ltb is zeroed - specially when reusing it. */ + memset(ltb->buff, 0, ltb->size); + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); + if (rc) { + dev_err(dev, "send_request_map failed, rc = %d\n", rc); + goto out; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_err(dev, "LTB map request aborted or timed out, rc = %d\n", + rc); + goto out; + } + + if (adapter->fw_done_rc) { + dev_err(dev, "Couldn't map LTB, rc = %d\n", + adapter->fw_done_rc); + rc = -EIO; + goto out; + } + rc = 0; +out: + /* don't free LTB on communication error - see function header */ + mutex_unlock(&adapter->fw_lock); + return rc; +} + +static void free_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb) +{ + struct device *dev = &adapter->vdev->dev; + + if (!ltb->buff) + return; + + /* VIOS automatically unmaps the long term buffer at remote + * end for the following resets: + * FAILOVER, MOBILITY, TIMEOUT. + */ + if (adapter->reset_reason != VNIC_RESET_FAILOVER && + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_TIMEOUT) + send_request_unmap(adapter, ltb->map_id); + + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + + ltb->buff = NULL; + /* mark this map_id free */ + bitmap_clear(adapter->map_ids, ltb->map_id, 1); + ltb->map_id = 0; +} + +/** + * free_ltb_set - free the given set of long term buffers (LTBS) + * @adapter: The ibmvnic adapter containing this ltb set + * @ltb_set: The ltb_set to be freed + * + * Free the set of LTBs in the given set. + */ + +static void free_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set) +{ + int i; + + for (i = 0; i < ltb_set->num_ltbs; i++) + free_long_term_buff(adapter, <b_set->ltbs[i]); + + kfree(ltb_set->ltbs); + ltb_set->ltbs = NULL; + ltb_set->num_ltbs = 0; +} + +/** + * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb_set: container object for the set of LTBs + * @num_buffs: Number of buffers in the LTB + * @buff_size: Size of each buffer in the LTB + * + * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size + * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the + * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. + * If new set needs more than in old set, allocate the remaining ones. + * Try and reuse as many LTBs as possible and avoid reallocation. + * + * Any changes to this allocation strategy must be reflected in + * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). + */ +static int alloc_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set, int num_buffs, + int buff_size) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ltb_set old_set; + struct ibmvnic_ltb_set new_set; + int rem_size; + int tot_size; /* size of all ltbs */ + int ltb_size; /* size of one ltb */ + int nltbs; + int rc; + int n; + int i; + + dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, + buff_size); + + ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); + tot_size = num_buffs * buff_size; + + if (ltb_size > tot_size) + ltb_size = tot_size; + + nltbs = tot_size / ltb_size; + if (tot_size % ltb_size) + nltbs++; + + old_set = *ltb_set; + + if (old_set.num_ltbs == nltbs) { + new_set = old_set; + } else { + int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); + + new_set.ltbs = kzalloc(tmp, GFP_KERNEL); + if (!new_set.ltbs) + return -ENOMEM; + + new_set.num_ltbs = nltbs; + + /* Free any excess ltbs in old set */ + for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) + free_long_term_buff(adapter, &old_set.ltbs[i]); + + /* Copy remaining ltbs to new set. All LTBs except the + * last one are of the same size. alloc_long_term_buff() + * will realloc if the size changes. + */ + n = min(old_set.num_ltbs, new_set.num_ltbs); + for (i = 0; i < n; i++) + new_set.ltbs[i] = old_set.ltbs[i]; + + /* Any additional ltbs in new set will have NULL ltbs for + * now and will be allocated in alloc_long_term_buff(). + */ + + /* We no longer need the old_set so free it. Note that we + * may have reused some ltbs from old set and freed excess + * ltbs above. So we only need to free the container now + * not the LTBs themselves. (i.e. dont free_ltb_set()!) + */ + kfree(old_set.ltbs); + old_set.ltbs = NULL; + old_set.num_ltbs = 0; + + /* Install the new set. If allocations fail below, we will + * retry later and know what size LTBs we need. + */ + *ltb_set = new_set; + } + + i = 0; + rem_size = tot_size; + while (rem_size) { + if (ltb_size > rem_size) + ltb_size = rem_size; + + rem_size -= ltb_size; + + rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); + if (rc) + goto out; + i++; + } + + WARN_ON(i != new_set.num_ltbs); + + return 0; +out: + /* We may have allocated one/more LTBs before failing and we + * want to try and reuse on next reset. So don't free ltb set. + */ + return rc; +} + +/** + * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. + * @rxpool: The receive buffer pool containing buffer + * @bufidx: Index of buffer in rxpool + * @ltbp: (Output) pointer to the long term buffer containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [rxpool, bufidx] to an LTB in the + * pool and its corresponding offset. Assume for now that each LTB is of + * different size but could possibly be optimized based on the allocation + * strategy in alloc_ltb_set(). + */ +static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON(bufidx >= rxpool->size); + + for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { + ltb = &rxpool->ltb_set.ltbs[i]; + nbufs = ltb->size / rxpool->buff_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * rxpool->buff_size; +} + +/** + * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. + * @txpool: The transmit buffer pool containing buffer + * @bufidx: Index of buffer in txpool + * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [txpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON_ONCE(bufidx >= txpool->num_buffers); + + for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { + ltb = &txpool->ltb_set.ltbs[i]; + nbufs = ltb->size / txpool->buf_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * txpool->buf_size; +} + +static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_active_rx_pools; i++) + adapter->rx_pool[i].active = 0; +} + +static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) +{ + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { + netdev_info(adapter->netdev, + "set max ind descs from %u to safe limit %u\n", + adapter->cur_max_ind_descs, + IBMVNIC_SAFE_IND_DESC); + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; + } +} + +static void replenish_rx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_rx_pool *pool) +{ + int count = pool->size - atomic_read(&pool->available); + u64 handle = adapter->rx_scrq[pool->index]->handle; + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_long_term_buff *ltb; + union sub_crq *sub_crq; + int buffers_added = 0; + unsigned long lpar_rc; + struct sk_buff *skb; + unsigned int offset; + dma_addr_t dma_addr; + unsigned char *dst; + int shift = 0; + int bufidx; + int i; + + if (!pool->active) + return; + + rx_scrq = adapter->rx_scrq[pool->index]; + ind_bufp = &rx_scrq->ind_buf; + + /* netdev_skb_alloc() could have failed after we saved a few skbs + * in the indir_buf and we would not have sent them to VIOS yet. + * To account for them, start the loop at ind_bufp->index rather + * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will + * be 0. + */ + for (i = ind_bufp->index; i < count; ++i) { + bufidx = pool->free_map[pool->next_free]; + + /* We maybe reusing the skb from earlier resets. Allocate + * only if necessary. But since the LTB may have changed + * during reset (see init_rx_pools()), update LTB below + * even if reusing skb. + */ + skb = pool->rx_buff[bufidx].skb; + if (!skb) { + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + dev_err(dev, "Couldn't replenish rx buff\n"); + adapter->replenish_no_mem++; + break; + } + } + + pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; + pool->next_free = (pool->next_free + 1) % pool->size; + + /* Copy the skb to the long term mapped DMA buffer */ + map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); + dst = ltb->buff + offset; + memset(dst, 0, pool->buff_size); + dma_addr = ltb->addr + offset; + + /* add the skb to an rx_buff in the pool */ + pool->rx_buff[bufidx].data = dst; + pool->rx_buff[bufidx].dma = dma_addr; + pool->rx_buff[bufidx].skb = skb; + pool->rx_buff[bufidx].pool_index = pool->index; + pool->rx_buff[bufidx].size = pool->buff_size; + + /* queue the rx_buff for the next send_subcrq_indirect */ + sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; + memset(sub_crq, 0, sizeof(*sub_crq)); + sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; + sub_crq->rx_add.correlator = + cpu_to_be64((u64)&pool->rx_buff[bufidx]); + sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); + sub_crq->rx_add.map_id = ltb->map_id; + + /* The length field of the sCRQ is defined to be 24 bits so the + * buffer size needs to be left shifted by a byte before it is + * converted to big endian to prevent the last byte from being + * truncated. + */ +#ifdef __LITTLE_ENDIAN__ + shift = 8; +#endif + sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); + + /* if send_subcrq_indirect queue is full, flush to VIOS */ + if (ind_bufp->index == adapter->cur_max_ind_descs || + i == count - 1) { + lpar_rc = + send_subcrq_indirect(adapter, handle, + (u64)ind_bufp->indir_dma, + (u64)ind_bufp->index); + if (lpar_rc != H_SUCCESS) + goto failure; + buffers_added += ind_bufp->index; + adapter->replenish_add_buff_success += ind_bufp->index; + ind_bufp->index = 0; + } + } + atomic_add(buffers_added, &pool->available); + return; + +failure: + if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) + dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + + /* Detect platform limit H_PARAMETER */ + if (lpar_rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ + for (i = ind_bufp->index - 1; i >= 0; --i) { + struct ibmvnic_rx_buff *rx_buff; + + pool->next_free = pool->next_free == 0 ? + pool->size - 1 : pool->next_free - 1; + sub_crq = &ind_bufp->indir_arr[i]; + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(sub_crq->rx_add.correlator); + bufidx = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = bufidx; + dev_kfree_skb_any(pool->rx_buff[bufidx].skb); + pool->rx_buff[bufidx].skb = NULL; + } + adapter->replenish_add_buff_failure += ind_bufp->index; + atomic_add(buffers_added, &pool->available); + ind_bufp->index = 0; + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable buffer pool replenishment and report carrier off if + * queue is closed or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset. + */ + deactivate_rx_pools(adapter); + netif_carrier_off(adapter->netdev); + } +} + +static void replenish_pools(struct ibmvnic_adapter *adapter) +{ + int i; + + adapter->replenish_task_cycles++; + for (i = 0; i < adapter->num_active_rx_pools; i++) { + if (adapter->rx_pool[i].active) + replenish_rx_pool(adapter, &adapter->rx_pool[i]); + } + + netdev_dbg(adapter->netdev, "Replenished %d pools\n", i); +} + +static void release_stats_buffers(struct ibmvnic_adapter *adapter) +{ + kfree(adapter->tx_stats_buffers); + kfree(adapter->rx_stats_buffers); + adapter->tx_stats_buffers = NULL; + adapter->rx_stats_buffers = NULL; +} + +static int init_stats_buffers(struct ibmvnic_adapter *adapter) +{ + adapter->tx_stats_buffers = + kcalloc(IBMVNIC_MAX_QUEUES, + sizeof(struct ibmvnic_tx_queue_stats), + GFP_KERNEL); + if (!adapter->tx_stats_buffers) + return -ENOMEM; + + adapter->rx_stats_buffers = + kcalloc(IBMVNIC_MAX_QUEUES, + sizeof(struct ibmvnic_rx_queue_stats), + GFP_KERNEL); + if (!adapter->rx_stats_buffers) + return -ENOMEM; + + return 0; +} + +static void release_stats_token(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + + if (!adapter->stats_token) + return; + + dma_unmap_single(dev, adapter->stats_token, + sizeof(struct ibmvnic_statistics), + DMA_FROM_DEVICE); + adapter->stats_token = 0; +} + +static int init_stats_token(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + dma_addr_t stok; + int rc; + + stok = dma_map_single(dev, &adapter->stats, + sizeof(struct ibmvnic_statistics), + DMA_FROM_DEVICE); + rc = dma_mapping_error(dev, stok); + if (rc) { + dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc); + return rc; + } + + adapter->stats_token = stok; + netdev_dbg(adapter->netdev, "Stats token initialized (%llx)\n", stok); + return 0; +} + +/** + * release_rx_pools() - Release any rx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ +static void release_rx_pools(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rx_pool *rx_pool; + int i, j; + + if (!adapter->rx_pool) + return; + + for (i = 0; i < adapter->num_active_rx_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + + netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); + + kfree(rx_pool->free_map); + + free_ltb_set(adapter, &rx_pool->ltb_set); + + if (!rx_pool->rx_buff) + continue; + + for (j = 0; j < rx_pool->size; j++) { + if (rx_pool->rx_buff[j].skb) { + dev_kfree_skb_any(rx_pool->rx_buff[j].skb); + rx_pool->rx_buff[j].skb = NULL; + } + } + + kfree(rx_pool->rx_buff); + } + + kfree(adapter->rx_pool); + adapter->rx_pool = NULL; + adapter->num_active_rx_pools = 0; + adapter->prev_rx_pool_size = 0; +} + +/** + * reuse_rx_pools() - Check if the existing rx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing rx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and size of each buffer) have not + * changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the rx pools can be reused, false otherwise. + */ +static bool reuse_rx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_buff_size, new_buff_size; + + if (!adapter->rx_pool) + return false; + + old_num_pools = adapter->num_active_rx_pools; + new_num_pools = adapter->req_rx_queues; + + old_pool_size = adapter->prev_rx_pool_size; + new_pool_size = adapter->req_rx_add_entries_per_subcrq; + + old_buff_size = adapter->prev_rx_buf_sz; + new_buff_size = adapter->cur_rx_buf_sz; + + if (old_buff_size != new_buff_size || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; +} + +/** + * init_rx_pools(): Initialize the set of receiver pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of receiver pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing rx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ +static int init_rx_pools(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_rx_pool *rx_pool; + u64 num_pools; + u64 pool_size; /* # of buffers in one pool */ + u64 buff_size; + int i, j, rc; + + pool_size = adapter->req_rx_add_entries_per_subcrq; + num_pools = adapter->req_rx_queues; + buff_size = adapter->cur_rx_buf_sz; + + if (reuse_rx_pools(adapter)) { + dev_dbg(dev, "Reusing rx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_rx_pools(adapter); + + adapter->rx_pool = kcalloc(num_pools, + sizeof(struct ibmvnic_rx_pool), + GFP_KERNEL); + if (!adapter->rx_pool) { + dev_err(dev, "Failed to allocate rx pools\n"); + return -ENOMEM; + } + + /* Set num_active_rx_pools early. If we fail below after partial + * allocation, release_rx_pools() will know how many to look for. + */ + adapter->num_active_rx_pools = num_pools; + + for (i = 0; i < num_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + + netdev_dbg(adapter->netdev, + "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n", + i, pool_size, buff_size); + + rx_pool->size = pool_size; + rx_pool->index = i; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); + + rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), + GFP_KERNEL); + if (!rx_pool->free_map) { + dev_err(dev, "Couldn't alloc free_map %d\n", i); + rc = -ENOMEM; + goto out_release; + } + + rx_pool->rx_buff = kcalloc(rx_pool->size, + sizeof(struct ibmvnic_rx_buff), + GFP_KERNEL); + if (!rx_pool->rx_buff) { + dev_err(dev, "Couldn't alloc rx buffers\n"); + rc = -ENOMEM; + goto out_release; + } + } + + adapter->prev_rx_pool_size = pool_size; + adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz; + +update_ltb: + for (i = 0; i < num_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", + i, rx_pool->size, rx_pool->buff_size); + + rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size); + if (rc) + goto out; + + for (j = 0; j < rx_pool->size; ++j) { + struct ibmvnic_rx_buff *rx_buff; + + rx_pool->free_map[j] = j; + + /* NOTE: Don't clear rx_buff->skb here - will leak + * memory! replenish_rx_pool() will reuse skbs or + * allocate as necessary. + */ + rx_buff = &rx_pool->rx_buff[j]; + rx_buff->dma = 0; + rx_buff->data = 0; + rx_buff->size = 0; + rx_buff->pool_index = 0; + } + + /* Mark pool "empty" so replenish_rx_pools() will + * update the LTB info for each buffer + */ + atomic_set(&rx_pool->available, 0); + rx_pool->next_alloc = 0; + rx_pool->next_free = 0; + /* replenish_rx_pool() may have called deactivate_rx_pools() + * on failover. Ensure pool is active now. + */ + rx_pool->active = 1; + } + return 0; +out_release: + release_rx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; +} + +static void release_vpd_data(struct ibmvnic_adapter *adapter) +{ + if (!adapter->vpd) + return; + + kfree(adapter->vpd->buff); + kfree(adapter->vpd); + + adapter->vpd = NULL; +} + +static void release_one_tx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_tx_pool *tx_pool) +{ + kfree(tx_pool->tx_buff); + kfree(tx_pool->free_map); + free_ltb_set(adapter, &tx_pool->ltb_set); +} + +/** + * release_tx_pools() - Release any tx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ +static void release_tx_pools(struct ibmvnic_adapter *adapter) +{ + int i; + + /* init_tx_pools() ensures that ->tx_pool and ->tso_pool are + * both NULL or both non-NULL. So we only need to check one. + */ + if (!adapter->tx_pool) + return; + + for (i = 0; i < adapter->num_active_tx_pools; i++) { + release_one_tx_pool(adapter, &adapter->tx_pool[i]); + release_one_tx_pool(adapter, &adapter->tso_pool[i]); + } + + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; + kfree(adapter->tso_pool); + adapter->tso_pool = NULL; + adapter->num_active_tx_pools = 0; + adapter->prev_tx_pool_size = 0; +} + +static int init_one_tx_pool(struct net_device *netdev, + struct ibmvnic_tx_pool *tx_pool, + int pool_size, int buf_size) +{ + int i; + + tx_pool->tx_buff = kcalloc(pool_size, + sizeof(struct ibmvnic_tx_buff), + GFP_KERNEL); + if (!tx_pool->tx_buff) + return -ENOMEM; + + tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL); + if (!tx_pool->free_map) { + kfree(tx_pool->tx_buff); + tx_pool->tx_buff = NULL; + return -ENOMEM; + } + + for (i = 0; i < pool_size; i++) + tx_pool->free_map[i] = i; + + tx_pool->consumer_index = 0; + tx_pool->producer_index = 0; + tx_pool->num_buffers = pool_size; + tx_pool->buf_size = buf_size; + + return 0; +} + +/** + * reuse_tx_pools() - Check if the existing tx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing tx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and mtu) have not changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the tx pools can be reused, false otherwise. + */ +static bool reuse_tx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_mtu, new_mtu; + + if (!adapter->tx_pool) + return false; + + old_num_pools = adapter->num_active_tx_pools; + new_num_pools = adapter->num_active_tx_scrqs; + old_pool_size = adapter->prev_tx_pool_size; + new_pool_size = adapter->req_tx_entries_per_subcrq; + old_mtu = adapter->prev_mtu; + new_mtu = adapter->req_mtu; + + if (old_mtu != new_mtu || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; +} + +/** + * init_tx_pools(): Initialize the set of transmit pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of transmit pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing tx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ +static int init_tx_pools(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct device *dev = &adapter->vdev->dev; + int num_pools; + u64 pool_size; /* # of buffers in pool */ + u64 buff_size; + int i, j, rc; + + num_pools = adapter->req_tx_queues; + + /* We must notify the VIOS about the LTB on all resets - but we only + * need to alloc/populate pools if either the number of buffers or + * size of each buffer in the pool has changed. + */ + if (reuse_tx_pools(adapter)) { + netdev_dbg(netdev, "Reusing tx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_tx_pools(adapter); + + pool_size = adapter->req_tx_entries_per_subcrq; + num_pools = adapter->num_active_tx_scrqs; + + adapter->tx_pool = kcalloc(num_pools, + sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); + if (!adapter->tx_pool) + return -ENOMEM; + + adapter->tso_pool = kcalloc(num_pools, + sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); + /* To simplify release_tx_pools() ensure that ->tx_pool and + * ->tso_pool are either both NULL or both non-NULL. + */ + if (!adapter->tso_pool) { + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; + return -ENOMEM; + } + + /* Set num_active_tx_pools early. If we fail below after partial + * allocation, release_tx_pools() will know how many to look for. + */ + adapter->num_active_tx_pools = num_pools; + + buff_size = adapter->req_mtu + VLAN_HLEN; + buff_size = ALIGN(buff_size, L1_CACHE_BYTES); + + for (i = 0; i < num_pools; i++) { + dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n", + i, adapter->req_tx_entries_per_subcrq, buff_size); + + rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], + pool_size, buff_size); + if (rc) + goto out_release; + + rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], + IBMVNIC_TSO_BUFS, + IBMVNIC_TSO_BUF_SZ); + if (rc) + goto out_release; + } + + adapter->prev_tx_pool_size = pool_size; + adapter->prev_mtu = adapter->req_mtu; + +update_ltb: + /* NOTE: All tx_pools have the same number of buffers (which is + * same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS + * buffers (see calls init_one_tx_pool() for these). + * For consistency, we use tx_pool->num_buffers and + * tso_pool->num_buffers below. + */ + rc = -1; + for (i = 0; i < num_pools; i++) { + struct ibmvnic_tx_pool *tso_pool; + struct ibmvnic_tx_pool *tx_pool; + + tx_pool = &adapter->tx_pool[i]; + + dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", + i, tx_pool->num_buffers, tx_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, + tx_pool->num_buffers, tx_pool->buf_size); + if (rc) + goto out; + + tx_pool->consumer_index = 0; + tx_pool->producer_index = 0; + + for (j = 0; j < tx_pool->num_buffers; j++) + tx_pool->free_map[j] = j; + + tso_pool = &adapter->tso_pool[i]; + + dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", + i, tso_pool->num_buffers, tso_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, + tso_pool->num_buffers, tso_pool->buf_size); + if (rc) + goto out; + + tso_pool->consumer_index = 0; + tso_pool->producer_index = 0; + + for (j = 0; j < tso_pool->num_buffers; j++) + tso_pool->free_map[j] = j; + } + + return 0; +out_release: + release_tx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; +} + +static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter) +{ + int i; + + if (adapter->napi_enabled) + return; + + for (i = 0; i < adapter->req_rx_queues; i++) + napi_enable(&adapter->napi[i]); + + adapter->napi_enabled = true; +} + +static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter) +{ + int i; + + if (!adapter->napi_enabled) + return; + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i); + napi_disable(&adapter->napi[i]); + } + + adapter->napi_enabled = false; +} + +static int init_napi(struct ibmvnic_adapter *adapter) +{ + int i; + + adapter->napi = kcalloc(adapter->req_rx_queues, + sizeof(struct napi_struct), GFP_KERNEL); + if (!adapter->napi) + return -ENOMEM; + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i); + netif_napi_add(adapter->netdev, &adapter->napi[i], + ibmvnic_poll); + } + + adapter->num_active_rx_napi = adapter->req_rx_queues; + return 0; +} + +static void release_napi(struct ibmvnic_adapter *adapter) +{ + int i; + + if (!adapter->napi) + return; + + for (i = 0; i < adapter->num_active_rx_napi; i++) { + netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i); + netif_napi_del(&adapter->napi[i]); + } + + kfree(adapter->napi); + adapter->napi = NULL; + adapter->num_active_rx_napi = 0; + adapter->napi_enabled = false; +} + +static const char *adapter_state_to_string(enum vnic_state state) +{ + switch (state) { + case VNIC_PROBING: + return "PROBING"; + case VNIC_PROBED: + return "PROBED"; + case VNIC_OPENING: + return "OPENING"; + case VNIC_OPEN: + return "OPEN"; + case VNIC_CLOSING: + return "CLOSING"; + case VNIC_CLOSED: + return "CLOSED"; + case VNIC_REMOVING: + return "REMOVING"; + case VNIC_REMOVED: + return "REMOVED"; + case VNIC_DOWN: + return "DOWN"; + } + return "UNKNOWN"; +} + +static int ibmvnic_login(struct net_device *netdev) +{ + unsigned long flags, timeout = msecs_to_jiffies(20000); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int retry_count = 0; + int retries = 10; + bool retry; + int rc; + + do { + retry = false; + if (retry_count > retries) { + netdev_warn(netdev, "Login attempts exceeded\n"); + return -EACCES; + } + + adapter->init_done_rc = 0; + reinit_completion(&adapter->init_done); + rc = send_login(adapter); + if (rc) + return rc; + + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_warn(netdev, "Login timed out\n"); + adapter->login_pending = false; + goto partial_reset; + } + + if (adapter->init_done_rc == ABORTED) { + netdev_warn(netdev, "Login aborted, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + /* FW or device may be busy, so + * wait a bit before retrying login + */ + msleep(500); + } else if (adapter->init_done_rc == PARTIALSUCCESS) { + retry_count++; + release_sub_crqs(adapter, 1); + + retry = true; + netdev_dbg(netdev, + "Received partial success, retrying...\n"); + adapter->init_done_rc = 0; + reinit_completion(&adapter->init_done); + send_query_cap(adapter); + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_warn(netdev, + "Capabilities query timed out\n"); + return -ETIMEDOUT; + } + + rc = init_sub_crqs(adapter); + if (rc) { + netdev_warn(netdev, + "SCRQ initialization failed\n"); + return rc; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + netdev_warn(netdev, + "SCRQ irq initialization failed\n"); + return rc; + } + /* Default/timeout error handling, reset and start fresh */ + } else if (adapter->init_done_rc) { + netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", + adapter->init_done_rc); + +partial_reset: + /* adapter login failed, so free any CRQs or sub-CRQs + * and register again before attempting to login again. + * If we don't do this then the VIOS may think that + * we are already logged in and reject any subsequent + * attempts + */ + netdev_warn(netdev, + "Freeing and re-registering CRQs before attempting to login again\n"); + retry = true; + adapter->init_done_rc = 0; + release_sub_crqs(adapter, true); + /* Much of this is similar logic as ibmvnic_probe(), + * we are essentially re-initializing communication + * with the server. We really should not run any + * resets/failovers here because this is already a form + * of reset and we do not want parallel resets occurring + */ + do { + reinit_init_done(adapter); + /* Clear any failovers we got in the previous + * pass since we are re-initializing the CRQ + */ + adapter->failover_pending = false; + release_crq_queue(adapter); + /* If we don't sleep here then we risk an + * unnecessary failover event from the VIOS. + * This is a known VIOS issue caused by a vnic + * device freeing and registering a CRQ too + * quickly. + */ + msleep(1500); + /* Avoid any resets, since we are currently + * resetting. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, + flags); + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(netdev, "login recovery: init CRQ failed %d\n", + rc); + return -EIO; + } + + rc = ibmvnic_reset_init(adapter, false); + if (rc) + netdev_err(netdev, "login recovery: Reset init failed %d\n", + rc); + /* IBMVNIC_CRQ_INIT will return EAGAIN if it + * fails, since ibmvnic_reset_init will free + * irq's in failure, we won't be able to receive + * new CRQs so we need to keep trying. probe() + * handles this similarly. + */ + } while (rc == -EAGAIN && retry_count++ < retries); + } + } while (retry); + + __ibmvnic_set_mac(netdev, adapter->mac_addr); + + netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); + return 0; +} + +static void release_login_buffer(struct ibmvnic_adapter *adapter) +{ + if (!adapter->login_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token, + adapter->login_buf_sz, DMA_TO_DEVICE); + kfree(adapter->login_buf); + adapter->login_buf = NULL; +} + +static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter) +{ + if (!adapter->login_rsp_buf) + return; + + dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token, + adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); + kfree(adapter->login_rsp_buf); + adapter->login_rsp_buf = NULL; +} + +static void release_resources(struct ibmvnic_adapter *adapter) +{ + release_vpd_data(adapter); + + release_napi(adapter); + release_login_buffer(adapter); + release_login_rsp_buffer(adapter); +} + +static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) +{ + struct net_device *netdev = adapter->netdev; + unsigned long timeout = msecs_to_jiffies(20000); + union ibmvnic_crq crq; + bool resend; + int rc; + + netdev_dbg(netdev, "setting link state %d\n", link_state); + + memset(&crq, 0, sizeof(crq)); + crq.logical_link_state.first = IBMVNIC_CRQ_CMD; + crq.logical_link_state.cmd = LOGICAL_LINK_STATE; + crq.logical_link_state.link_state = link_state; + + do { + resend = false; + + reinit_completion(&adapter->init_done); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + netdev_err(netdev, "Failed to set link state\n"); + return rc; + } + + if (!wait_for_completion_timeout(&adapter->init_done, + timeout)) { + netdev_err(netdev, "timeout setting link state\n"); + return -ETIMEDOUT; + } + + if (adapter->init_done_rc == PARTIALSUCCESS) { + /* Partuial success, delay and re-send */ + mdelay(1000); + resend = true; + } else if (adapter->init_done_rc) { + netdev_warn(netdev, "Unable to set link state, rc=%d\n", + adapter->init_done_rc); + return adapter->init_done_rc; + } + } while (resend); + + return 0; +} + +static int set_real_num_queues(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + netdev_dbg(netdev, "Setting real tx/rx queues (%llx/%llx)\n", + adapter->req_tx_queues, adapter->req_rx_queues); + + rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues); + if (rc) { + netdev_err(netdev, "failed to set the number of tx queues\n"); + return rc; + } + + rc = netif_set_real_num_rx_queues(netdev, adapter->req_rx_queues); + if (rc) + netdev_err(netdev, "failed to set the number of rx queues\n"); + + return rc; +} + +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + int len = 0; + int rc; + + if (adapter->vpd->buff) + len = adapter->vpd->len; + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; + crq.get_vpd_size.cmd = GET_VPD_SIZE; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc); + mutex_unlock(&adapter->fw_lock); + return rc; + } + mutex_unlock(&adapter->fw_lock); + + if (!adapter->vpd->len) + return -ENODATA; + + if (!adapter->vpd->buff) + adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL); + else if (adapter->vpd->len != len) + adapter->vpd->buff = + krealloc(adapter->vpd->buff, + adapter->vpd->len, GFP_KERNEL); + + if (!adapter->vpd->buff) { + dev_err(dev, "Could allocate VPD buffer\n"); + return -ENOMEM; + } + + adapter->vpd->dma_addr = + dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, adapter->vpd->dma_addr)) { + dev_err(dev, "Could not map VPD buffer\n"); + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + return -ENOMEM; + } + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + crq.get_vpd.first = IBMVNIC_CRQ_CMD; + crq.get_vpd.cmd = GET_VPD; + crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); + crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc); + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + mutex_unlock(&adapter->fw_lock); + return rc; + } + + mutex_unlock(&adapter->fw_lock); + return 0; +} + +static int init_resources(struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = set_real_num_queues(netdev); + if (rc) + return rc; + + adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL); + if (!adapter->vpd) + return -ENOMEM; + + /* Vital Product Data (VPD) */ + rc = ibmvnic_get_vpd(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + return rc; + } + + rc = init_napi(adapter); + if (rc) + return rc; + + send_query_map(adapter); + + rc = init_rx_pools(netdev); + if (rc) + return rc; + + rc = init_tx_pools(netdev); + return rc; +} + +static int __ibmvnic_open(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + enum vnic_state prev_state = adapter->state; + int i, rc; + + adapter->state = VNIC_OPENING; + replenish_pools(adapter); + ibmvnic_napi_enable(adapter); + + /* We're ready to receive frames, enable the sub-crq interrupts and + * set the logical link state to up + */ + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i); + if (prev_state == VNIC_CLOSED) + enable_irq(adapter->rx_scrq[i]->irq); + enable_scrq_irq(adapter, adapter->rx_scrq[i]); + } + + for (i = 0; i < adapter->req_tx_queues; i++) { + netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i); + if (prev_state == VNIC_CLOSED) + enable_irq(adapter->tx_scrq[i]->irq); + enable_scrq_irq(adapter, adapter->tx_scrq[i]); + /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL + * resets, don't reset the stats because there could be batched + * skb's waiting to be sent. If we reset dql stats, we risk + * num_completed being greater than num_queued. This will cause + * a BUG_ON in dql_completed(). + */ + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); + } + + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); + if (rc) { + ibmvnic_napi_disable(adapter); + ibmvnic_disable_irqs(adapter); + return rc; + } + + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + + netif_tx_start_all_queues(netdev); + + if (prev_state == VNIC_CLOSED) { + for (i = 0; i < adapter->req_rx_queues; i++) + napi_schedule(&adapter->napi[i]); + } + + adapter->state = VNIC_OPEN; + return rc; +} + +static int ibmvnic_open(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + ASSERT_RTNL(); + + /* If device failover is pending or we are about to reset, just set + * device state and return. Device operation will be handled by reset + * routine. + * + * It should be safe to overwrite the adapter->state here. Since + * we hold the rtnl, either the reset has not actually started or + * the rtnl got dropped during the set_link_state() in do_reset(). + * In the former case, no one else is changing the state (again we + * have the rtnl) and in the latter case, do_reset() will detect and + * honor our setting below. + */ + if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { + netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending); + adapter->state = VNIC_OPEN; + rc = 0; + goto out; + } + + if (adapter->state != VNIC_CLOSED) { + rc = ibmvnic_login(netdev); + if (rc) + goto out; + + rc = init_resources(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize resources\n"); + goto out; + } + } + + rc = __ibmvnic_open(netdev); + +out: + /* If open failed and there is a pending failover or in-progress reset, + * set device state and return. Device operation will be handled by + * reset routine. See also comments above regarding rtnl. + */ + if (rc && + (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) { + adapter->state = VNIC_OPEN; + rc = 0; + } + + if (rc) { + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + } + + return rc; +} + +static void clean_rx_pools(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rx_pool *rx_pool; + struct ibmvnic_rx_buff *rx_buff; + u64 rx_entries; + int rx_scrqs; + int i, j; + + if (!adapter->rx_pool) + return; + + rx_scrqs = adapter->num_active_rx_pools; + rx_entries = adapter->req_rx_add_entries_per_subcrq; + + /* Free any remaining skbs in the rx buffer pools */ + for (i = 0; i < rx_scrqs; i++) { + rx_pool = &adapter->rx_pool[i]; + if (!rx_pool || !rx_pool->rx_buff) + continue; + + netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i); + for (j = 0; j < rx_entries; j++) { + rx_buff = &rx_pool->rx_buff[j]; + if (rx_buff && rx_buff->skb) { + dev_kfree_skb_any(rx_buff->skb); + rx_buff->skb = NULL; + } + } + } +} + +static void clean_one_tx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_tx_pool *tx_pool) +{ + struct ibmvnic_tx_buff *tx_buff; + u64 tx_entries; + int i; + + if (!tx_pool || !tx_pool->tx_buff) + return; + + tx_entries = tx_pool->num_buffers; + + for (i = 0; i < tx_entries; i++) { + tx_buff = &tx_pool->tx_buff[i]; + if (tx_buff && tx_buff->skb) { + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + } + } +} + +static void clean_tx_pools(struct ibmvnic_adapter *adapter) +{ + int tx_scrqs; + int i; + + if (!adapter->tx_pool || !adapter->tso_pool) + return; + + tx_scrqs = adapter->num_active_tx_pools; + + /* Free any remaining skbs in the tx buffer pools */ + for (i = 0; i < tx_scrqs; i++) { + netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i); + clean_one_tx_pool(adapter, &adapter->tx_pool[i]); + clean_one_tx_pool(adapter, &adapter->tso_pool[i]); + } +} + +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i; + + if (adapter->tx_scrq) { + for (i = 0; i < adapter->req_tx_queues; i++) + if (adapter->tx_scrq[i]->irq) { + netdev_dbg(netdev, + "Disabling tx_scrq[%d] irq\n", i); + disable_scrq_irq(adapter, adapter->tx_scrq[i]); + disable_irq(adapter->tx_scrq[i]->irq); + } + } + + if (adapter->rx_scrq) { + for (i = 0; i < adapter->req_rx_queues; i++) { + if (adapter->rx_scrq[i]->irq) { + netdev_dbg(netdev, + "Disabling rx_scrq[%d] irq\n", i); + disable_scrq_irq(adapter, adapter->rx_scrq[i]); + disable_irq(adapter->rx_scrq[i]->irq); + } + } + } +} + +static void ibmvnic_cleanup(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + + if (test_bit(0, &adapter->resetting)) + netif_tx_disable(netdev); + else + netif_tx_stop_all_queues(netdev); + + ibmvnic_napi_disable(adapter); + ibmvnic_disable_irqs(adapter); +} + +static int __ibmvnic_close(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + adapter->state = VNIC_CLOSING; + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); + adapter->state = VNIC_CLOSED; + return rc; +} + +static int ibmvnic_close(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + adapter->force_reset_recovery); + + /* If device failover is pending, just set device state and return. + * Device operation will be handled by reset routine. + */ + if (adapter->failover_pending) { + adapter->state = VNIC_CLOSED; + return 0; + } + + rc = __ibmvnic_close(netdev); + ibmvnic_cleanup(netdev); + clean_rx_pools(adapter); + clean_tx_pools(adapter); + + return rc; +} + +/** + * get_hdr_lens - fills list of L2/L3/L4 hdr lens + * @hdr_field: bitfield determining needed headers + * @skb: socket buffer + * @hdr_len: array of header lengths to be filled + * + * Reads hdr_field to determine which headers are needed by firmware. + * Builds a buffer containing these headers. Saves individual header + * lengths and total buffer length to be used to build descriptors. + * + * Return: total len of all headers + */ +static int get_hdr_lens(u8 hdr_field, struct sk_buff *skb, + int *hdr_len) +{ + int len = 0; + + + if ((hdr_field >> 6) & 1) { + hdr_len[0] = skb_mac_header_len(skb); + len += hdr_len[0]; + } + + if ((hdr_field >> 5) & 1) { + hdr_len[1] = skb_network_header_len(skb); + len += hdr_len[1]; + } + + if (!((hdr_field >> 4) & 1)) + return len; + + if (skb->protocol == htons(ETH_P_IP)) { + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + hdr_len[2] = tcp_hdrlen(skb); + else if (ip_hdr(skb)->protocol == IPPROTO_UDP) + hdr_len[2] = sizeof(struct udphdr); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + hdr_len[2] = tcp_hdrlen(skb); + else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) + hdr_len[2] = sizeof(struct udphdr); + } + + return len + hdr_len[2]; +} + +/** + * create_hdr_descs - create header and header extension descriptors + * @hdr_field: bitfield determining needed headers + * @hdr_data: buffer containing header data + * @len: length of data buffer + * @hdr_len: array of individual header lengths + * @scrq_arr: descriptor array + * + * Creates header and, if needed, header extension descriptors and + * places them in a descriptor array, scrq_arr + * + * Return: Number of header descs + */ + +static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, + union sub_crq *scrq_arr) +{ + union sub_crq *hdr_desc; + int tmp_len = len; + int num_descs = 0; + u8 *data, *cur; + int tmp; + + while (tmp_len > 0) { + cur = hdr_data + len - tmp_len; + + hdr_desc = &scrq_arr[num_descs]; + if (num_descs) { + data = hdr_desc->hdr_ext.data; + tmp = tmp_len > 29 ? 29 : tmp_len; + hdr_desc->hdr_ext.first = IBMVNIC_CRQ_CMD; + hdr_desc->hdr_ext.type = IBMVNIC_HDR_EXT_DESC; + hdr_desc->hdr_ext.len = tmp; + } else { + data = hdr_desc->hdr.data; + tmp = tmp_len > 24 ? 24 : tmp_len; + hdr_desc->hdr.first = IBMVNIC_CRQ_CMD; + hdr_desc->hdr.type = IBMVNIC_HDR_DESC; + hdr_desc->hdr.len = tmp; + hdr_desc->hdr.l2_len = (u8)hdr_len[0]; + hdr_desc->hdr.l3_len = cpu_to_be16((u16)hdr_len[1]); + hdr_desc->hdr.l4_len = (u8)hdr_len[2]; + hdr_desc->hdr.flag = hdr_field << 1; + } + memcpy(data, cur, tmp); + tmp_len -= tmp; + num_descs++; + } + + return num_descs; +} + +/** + * build_hdr_descs_arr - build a header descriptor array + * @skb: tx socket buffer + * @indir_arr: indirect array + * @num_entries: number of descriptors to be sent + * @hdr_field: bit field determining which headers will be sent + * + * This function will build a TX descriptor array with applicable + * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. + */ + +static void build_hdr_descs_arr(struct sk_buff *skb, + union sub_crq *indir_arr, + int *num_entries, u8 hdr_field) +{ + int hdr_len[3] = {0, 0, 0}; + int tot_len; + + tot_len = get_hdr_lens(hdr_field, skb, hdr_len); + *num_entries += create_hdr_descs(hdr_field, skb_mac_header(skb), + tot_len, hdr_len, indir_arr + 1); +} + +static int ibmvnic_xmit_workarounds(struct sk_buff *skb, + struct net_device *netdev) +{ + /* For some backing devices, mishandling of small packets + * can result in a loss of connection or TX stall. Device + * architects recommend that no packet should be smaller + * than the minimum MTU value provided to the driver, so + * pad any packets to that length + */ + if (skb->len < netdev->min_mtu) + return skb_put_padto(skb, netdev->min_mtu); + + return 0; +} + +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff; + struct ibmvnic_tx_pool *tx_pool; + union sub_crq tx_scrq_entry; + int queue_num; + int entries; + int index; + int i; + + ind_bufp = &tx_scrq->ind_buf; + entries = (u64)ind_bufp->index; + queue_num = tx_scrq->pool_index; + + for (i = entries - 1; i >= 0; --i) { + tx_scrq_entry = ind_bufp->indir_arr[i]; + if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) + continue; + index = be32_to_cpu(tx_scrq_entry.v1.correlator); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[queue_num]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[queue_num]; + } + tx_pool->free_map[tx_pool->consumer_index] = index; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_buff = &tx_pool->tx_buff[index]; + adapter->tx_stats_buffers[queue_num].batched_packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + adapter->netdev->stats.tx_dropped++; + } + + ind_bufp->index = 0; + + if (atomic_sub_return(entries, &tx_scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); + } +} + +static int send_subcrq_direct(struct ibmvnic_adapter *adapter, + u64 remote_handle, u64 *entry) +{ + unsigned int ua = adapter->vdev->unit_address; + struct device *dev = &adapter->vdev->dev; + int rc; + + /* Make sure the hypervisor sees the complete request */ + dma_wmb(); + rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua, + cpu_to_be64(remote_handle), + cpu_to_be64(entry[0]), cpu_to_be64(entry[1]), + cpu_to_be64(entry[2]), cpu_to_be64(entry[3])); + + if (rc) + print_subcrq_error(dev, rc, __func__); + + return rc; +} + +static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq, + bool indirect) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + u64 dma_addr; + u64 entries; + u64 handle; + int rc; + + ind_bufp = &tx_scrq->ind_buf; + dma_addr = (u64)ind_bufp->indir_dma; + entries = (u64)ind_bufp->index; + handle = tx_scrq->handle; + + if (!entries) + return 0; + + if (indirect) + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); + else + rc = send_subcrq_direct(adapter, handle, + (u64 *)ind_bufp->indir_arr); + + if (rc) { + dev_err_ratelimited(&adapter->vdev->dev, + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", + rc, entries, &dma_addr, handle); + /* Detect platform limit H_PARAMETER */ + if (rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + } else { + ind_bufp->index = 0; + } + return rc; +} + +static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; + int queue_num = skb_get_queue_mapping(skb); + u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff = NULL; + struct ibmvnic_sub_crq_queue *tx_scrq; + struct ibmvnic_long_term_buff *ltb; + struct ibmvnic_tx_pool *tx_pool; + unsigned int tx_send_failed = 0; + netdev_tx_t ret = NETDEV_TX_OK; + unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; + unsigned int tx_dropped = 0; + unsigned int tx_dpackets = 0; + unsigned int tx_bpackets = 0; + unsigned int tx_bytes = 0; + dma_addr_t data_dma_addr; + struct netdev_queue *txq; + unsigned long lpar_rc; + unsigned int skblen; + union sub_crq tx_crq; + unsigned int offset; + bool use_scrq_send_direct = false; + int num_entries = 1; + unsigned char *dst; + int bufidx = 0; + u8 proto = 0; + + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { + dev_kfree_skb_any(skb); + + tx_send_failed++; + tx_dropped++; + ret = NETDEV_TX_OK; + goto out; + } + + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + + if (ibmvnic_xmit_workarounds(skb, netdev)) { + tx_dropped++; + tx_send_failed++; + ret = NETDEV_TX_OK; + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); + if (lpar_rc != H_SUCCESS) + goto tx_err; + goto out; + } + + if (skb_is_gso(skb)) + tx_pool = &adapter->tso_pool[queue_num]; + else + tx_pool = &adapter->tx_pool[queue_num]; + + bufidx = tx_pool->free_map[tx_pool->consumer_index]; + + if (bufidx == IBMVNIC_INVALID_MAP) { + dev_kfree_skb_any(skb); + tx_send_failed++; + tx_dropped++; + ret = NETDEV_TX_OK; + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); + if (lpar_rc != H_SUCCESS) + goto tx_err; + goto out; + } + + tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; + + map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); + + dst = ltb->buff + offset; + memset(dst, 0, tx_pool->buf_size); + data_dma_addr = ltb->addr + offset; + + /* if we are going to send_subcrq_direct this then we need to + * update the checksum before copying the data into ltb. Essentially + * these packets force disable CSO so that we can guarantee that + * FW does not need header info and we can send direct. Also, vnic + * server must be able to xmit standard packets without header data + */ + if (*hdrs == 0 && !skb_is_gso(skb) && + !ind_bufp->index && !netdev_xmit_more()) { + use_scrq_send_direct = true; + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + use_scrq_send_direct = false; + } + + if (skb_shinfo(skb)->nr_frags) { + int cur, i; + + /* Copy the head */ + skb_copy_from_linear_data(skb, dst, skb_headlen(skb)); + cur = skb_headlen(skb); + + /* Copy the frags */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(dst + cur, skb_frag_address(frag), + skb_frag_size(frag)); + cur += skb_frag_size(frag); + } + } else { + skb_copy_from_linear_data(skb, dst, skb->len); + } + + tx_pool->consumer_index = + (tx_pool->consumer_index + 1) % tx_pool->num_buffers; + + tx_buff = &tx_pool->tx_buff[bufidx]; + + /* Sanity checks on our free map to make sure it points to an index + * that is not being occupied by another skb. If skb memory is + * not freed then we see congestion control kick in and halt tx. + */ + if (unlikely(tx_buff->skb)) { + dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n", + skb_is_gso(skb) ? "tso_pool" : "tx_pool", + queue_num, bufidx); + dev_kfree_skb_any(tx_buff->skb); + } + + tx_buff->skb = skb; + tx_buff->index = bufidx; + tx_buff->pool_index = queue_num; + skblen = skb->len; + + memset(&tx_crq, 0, sizeof(tx_crq)); + tx_crq.v1.first = IBMVNIC_CRQ_CMD; + tx_crq.v1.type = IBMVNIC_TX_DESC; + tx_crq.v1.n_crq_elem = 1; + tx_crq.v1.n_sge = 1; + tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED; + + if (skb_is_gso(skb)) + tx_crq.v1.correlator = + cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); + else + tx_crq.v1.correlator = cpu_to_be32(bufidx); + tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); + tx_crq.v1.sge_len = cpu_to_be32(skb->len); + tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); + + if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { + tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; + tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); + } + + if (skb->protocol == htons(ETH_P_IP)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; + proto = ip_hdr(skb)->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; + proto = ipv6_hdr(skb)->nexthdr; + } + + if (proto == IPPROTO_TCP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; + else if (proto == IPPROTO_UDP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; + hdrs += 2; + } + if (skb_is_gso(skb)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_LSO; + tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + hdrs += 2; + } else if (use_scrq_send_direct) { + /* See above comment, CSO disabled with direct xmit */ + tx_crq.v1.flags1 &= ~(IBMVNIC_TX_CHKSUM_OFFLOAD); + ind_bufp->index = 1; + tx_buff->num_entries = 1; + netdev_tx_sent_queue(txq, skb->len); + ind_bufp->indir_arr[0] = tx_crq; + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, false); + if (lpar_rc != H_SUCCESS) + goto tx_err; + + tx_dpackets++; + goto early_exit; + } + + if ((*hdrs >> 7) & 1) + build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); + + tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; + /* flush buffer if current entry can not fit */ + if (num_entries + ind_bufp->index > cur_max_ind_descs) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); + if (lpar_rc != H_SUCCESS) + goto tx_flush_err; + } + + indir_arr[0] = tx_crq; + memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], + num_entries * sizeof(struct ibmvnic_generic_scrq)); + + ind_bufp->index += num_entries; + if (__netdev_tx_sent_queue(txq, skb->len, + netdev_xmit_more() && + ind_bufp->index < cur_max_ind_descs)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); + if (lpar_rc != H_SUCCESS) + goto tx_err; + } + + tx_bpackets++; + +early_exit: + if (atomic_add_return(num_entries, &tx_scrq->used) + >= adapter->req_tx_entries_per_subcrq) { + netdev_dbg(netdev, "Stopping queue %d\n", queue_num); + netif_stop_subqueue(netdev, queue_num); + } + + tx_bytes += skblen; + txq_trans_cond_update(txq); + ret = NETDEV_TX_OK; + goto out; + +tx_flush_err: + dev_kfree_skb_any(skb); + tx_buff->skb = NULL; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_dropped++; +tx_err: + if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) + dev_err_ratelimited(dev, "tx: send failed\n"); + + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable TX and report carrier off if queue is closed + * or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset or some other action. + */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } +out: + rcu_read_unlock(); + adapter->tx_send_failed += tx_send_failed; + adapter->tx_map_failed += tx_map_failed; + adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets; + adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets; + adapter->tx_stats_buffers[queue_num].bytes += tx_bytes; + adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped; + + return ret; +} + +static void ibmvnic_set_multi(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct netdev_hw_addr *ha; + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.request_capability.first = IBMVNIC_CRQ_CMD; + crq.request_capability.cmd = REQUEST_CAPABILITY; + + if (netdev->flags & IFF_PROMISC) { + if (!adapter->promisc_supported) + return; + } else { + if (netdev->flags & IFF_ALLMULTI) { + /* Accept all multicast */ + memset(&crq, 0, sizeof(crq)); + crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; + crq.multicast_ctrl.cmd = MULTICAST_CTRL; + crq.multicast_ctrl.flags = IBMVNIC_ENABLE_ALL; + ibmvnic_send_crq(adapter, &crq); + } else if (netdev_mc_empty(netdev)) { + /* Reject all multicast */ + memset(&crq, 0, sizeof(crq)); + crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; + crq.multicast_ctrl.cmd = MULTICAST_CTRL; + crq.multicast_ctrl.flags = IBMVNIC_DISABLE_ALL; + ibmvnic_send_crq(adapter, &crq); + } else { + /* Accept one or more multicast(s) */ + netdev_for_each_mc_addr(ha, netdev) { + memset(&crq, 0, sizeof(crq)); + crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; + crq.multicast_ctrl.cmd = MULTICAST_CTRL; + crq.multicast_ctrl.flags = IBMVNIC_ENABLE_MC; + ether_addr_copy(&crq.multicast_ctrl.mac_addr[0], + ha->addr); + ibmvnic_send_crq(adapter, &crq); + } + } + } +} + +static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + union ibmvnic_crq crq; + int rc; + + if (!is_valid_ether_addr(dev_addr)) { + rc = -EADDRNOTAVAIL; + goto err; + } + + memset(&crq, 0, sizeof(crq)); + crq.change_mac_addr.first = IBMVNIC_CRQ_CMD; + crq.change_mac_addr.cmd = CHANGE_MAC_ADDR; + ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr); + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + rc = -EIO; + mutex_unlock(&adapter->fw_lock); + goto err; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + /* netdev->dev_addr is changed in handle_change_mac_rsp function */ + if (rc || adapter->fw_done_rc) { + rc = -EIO; + mutex_unlock(&adapter->fw_lock); + goto err; + } + mutex_unlock(&adapter->fw_lock); + return 0; +err: + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + return rc; +} + +static int ibmvnic_set_mac(struct net_device *netdev, void *p) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + int rc; + + rc = 0; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ether_addr_copy(adapter->mac_addr, addr->sa_data); + if (adapter->state != VNIC_PROBED) + rc = __ibmvnic_set_mac(netdev, addr->sa_data); + + return rc; +} + +static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) +{ + switch (reason) { + case VNIC_RESET_FAILOVER: + return "FAILOVER"; + case VNIC_RESET_MOBILITY: + return "MOBILITY"; + case VNIC_RESET_FATAL: + return "FATAL"; + case VNIC_RESET_NON_FATAL: + return "NON_FATAL"; + case VNIC_RESET_TIMEOUT: + return "TIMEOUT"; + case VNIC_RESET_CHANGE_PARAM: + return "CHANGE_PARAM"; + case VNIC_RESET_PASSIVE_INIT: + return "PASSIVE_INIT"; + } + return "UNKNOWN"; +} + +/* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. + */ +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) +{ + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; +} + +/* + * do_reset returns zero if we are able to keep processing reset events, or + * non-zero if we hit a fatal error and must halt. + */ +static int do_reset(struct ibmvnic_adapter *adapter, + struct ibmvnic_rwi *rwi, u32 reset_state) +{ + struct net_device *netdev = adapter->netdev; + u64 old_num_rx_queues, old_num_tx_queues; + u64 old_num_rx_slots, old_num_tx_slots; + int rc; + + netdev_dbg(adapter->netdev, + "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + reset_reason_to_string(rwi->reset_reason), + adapter_state_to_string(reset_state)); + + adapter->reset_reason = rwi->reset_reason; + /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_lock(); + + /* Now that we have the rtnl lock, clear any pending failover. + * This will ensure ibmvnic_open() has either completed or will + * block until failover is complete. + */ + if (rwi->reset_reason == VNIC_RESET_FAILOVER) + adapter->failover_pending = false; + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } + + netif_carrier_off(netdev); + + old_num_rx_queues = adapter->req_rx_queues; + old_num_tx_queues = adapter->req_tx_queues; + old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq; + old_num_tx_slots = adapter->req_tx_entries_per_subcrq; + + ibmvnic_cleanup(netdev); + + if (reset_state == VNIC_OPEN && + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_FAILOVER) { + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = __ibmvnic_close(netdev); + if (rc) + goto out; + } else { + adapter->state = VNIC_CLOSING; + + /* Release the RTNL lock before link state change and + * re-acquire after the link state change to allow + * linkwatch_event to grab the RTNL lock and run during + * a reset. + */ + rtnl_unlock(); + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); + rtnl_lock(); + if (rc) + goto out; + + if (adapter->state == VNIC_OPEN) { + /* When we dropped rtnl, ibmvnic_open() got + * it and noticed that we are resetting and + * set the adapter state to OPEN. Update our + * new "target" state, and resume the reset + * from VNIC_CLOSING state. + */ + netdev_dbg(netdev, + "Open changed state from %s, updating.\n", + adapter_state_to_string(reset_state)); + reset_state = VNIC_OPEN; + adapter->state = VNIC_CLOSING; + } + + if (adapter->state != VNIC_CLOSING) { + /* If someone else changed the adapter state + * when we dropped the rtnl, fail the reset + */ + rc = -EAGAIN; + goto out; + } + adapter->state = VNIC_CLOSED; + } + } + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + release_resources(adapter); + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); + } + + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { + /* remove the closed state so when we call open it appears + * we are coming from the probed state. + */ + adapter->state = VNIC_PROBED; + + reinit_init_done(adapter); + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_crq_queue(adapter); + } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + rc = ibmvnic_reenable_crq_queue(adapter); + release_sub_crqs(adapter, 1); + } else { + rc = ibmvnic_reset_crq(adapter); + if (rc == H_CLOSED || rc == H_SUCCESS) { + rc = vio_enable_interrupts(adapter->vdev); + if (rc) + netdev_err(adapter->netdev, + "Reset failed to enable interrupts. rc=%d\n", + rc); + } + } + + if (rc) { + netdev_err(adapter->netdev, + "Reset couldn't initialize crq. rc=%d\n", rc); + goto out; + } + + rc = ibmvnic_reset_init(adapter, true); + if (rc) + goto out; + + /* If the adapter was in PROBE or DOWN state prior to the reset, + * exit here. + */ + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) { + rc = 0; + goto out; + } + + rc = ibmvnic_login(netdev); + if (rc) + goto out; + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_resources(adapter); + if (rc) + goto out; + } else if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues || + adapter->req_rx_add_entries_per_subcrq != + old_num_rx_slots || + adapter->req_tx_entries_per_subcrq != + old_num_tx_slots || + !adapter->rx_pool || + !adapter->tso_pool || + !adapter->tx_pool) { + release_napi(adapter); + release_vpd_data(adapter); + + rc = init_resources(adapter); + if (rc) + goto out; + + } else { + rc = init_tx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init tx pools failed (%d)\n", + rc); + goto out; + } + + rc = init_rx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init rx pools failed (%d)\n", + rc); + goto out; + } + } + ibmvnic_disable_irqs(adapter); + } + adapter->state = VNIC_CLOSED; + + if (reset_state == VNIC_CLOSED) { + rc = 0; + goto out; + } + + rc = __ibmvnic_open(netdev); + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } + + /* refresh device's multicast list */ + ibmvnic_set_multi(netdev); + + if (adapter->reset_reason == VNIC_RESET_FAILOVER || + adapter->reset_reason == VNIC_RESET_MOBILITY) + __netdev_notify_peers(netdev); + + rc = 0; + +out: + /* restore the adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_unlock(); + + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); + return rc; +} + +static int do_hard_reset(struct ibmvnic_adapter *adapter, + struct ibmvnic_rwi *rwi, u32 reset_state) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", + reset_reason_to_string(rwi->reset_reason)); + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } + + netif_carrier_off(netdev); + adapter->reset_reason = rwi->reset_reason; + + ibmvnic_cleanup(netdev); + release_resources(adapter); + release_sub_crqs(adapter, 0); + release_crq_queue(adapter); + + /* remove the closed state so when we call open it appears + * we are coming from the probed state. + */ + adapter->state = VNIC_PROBED; + + reinit_init_done(adapter); + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(adapter->netdev, + "Couldn't initialize crq. rc=%d\n", rc); + goto out; + } + + rc = ibmvnic_reset_init(adapter, false); + if (rc) + goto out; + + /* If the adapter was in PROBE or DOWN state prior to the reset, + * exit here. + */ + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) + goto out; + + rc = ibmvnic_login(netdev); + if (rc) + goto out; + + rc = init_resources(adapter); + if (rc) + goto out; + + ibmvnic_disable_irqs(adapter); + adapter->state = VNIC_CLOSED; + + if (reset_state == VNIC_CLOSED) + goto out; + + rc = __ibmvnic_open(netdev); + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } + + __netdev_notify_peers(netdev); +out: + /* restore adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); + return rc; +} + +static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rwi *rwi; + unsigned long flags; + + spin_lock_irqsave(&adapter->rwi_lock, flags); + + if (!list_empty(&adapter->rwi_list)) { + rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi, + list); + list_del(&rwi->list); + } else { + rwi = NULL; + } + + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + return rwi; +} + +/** + * do_passive_init - complete probing when partner device is detected. + * @adapter: ibmvnic_adapter struct + * + * If the ibmvnic device does not have a partner device to communicate with at boot + * and that partner device comes online at a later time, this function is called + * to complete the initialization process of ibmvnic device. + * Caller is expected to hold rtnl_lock(). + * + * Returns non-zero if sub-CRQs are not initialized properly leaving the device + * in the down state. + * Returns 0 upon success and the device is in PROBED state. + */ + +static int do_passive_init(struct ibmvnic_adapter *adapter) +{ + unsigned long timeout = msecs_to_jiffies(30000); + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + int rc; + + netdev_dbg(netdev, "Partner device found, probing.\n"); + + adapter->state = VNIC_PROBING; + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; + adapter->crq.active = true; + + rc = send_crq_init_complete(adapter); + if (rc) + goto out; + + rc = send_version_xchg(adapter); + if (rc) + netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc); + + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + rc = -ETIMEDOUT; + goto out; + } + + rc = init_sub_crqs(adapter); + if (rc) { + dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc); + goto out; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc); + goto init_failed; + } + + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + + adapter->state = VNIC_PROBED; + netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n"); + + return 0; + +init_failed: + release_sub_crqs(adapter, 1); +out: + adapter->state = VNIC_DOWN; + return rc; +} + +static void __ibmvnic_reset(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter; + unsigned int timeout = 5000; + struct ibmvnic_rwi *tmprwi; + bool saved_state = false; + struct ibmvnic_rwi *rwi; + unsigned long flags; + struct device *dev; + bool need_reset; + int num_fails = 0; + u32 reset_state; + int rc = 0; + + adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; + + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + return; + } + + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) + return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } + } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; + + rwi = get_next_rwi(adapter); + while (rwi) { + spin_lock_irqsave(&adapter->state_lock, flags); + + if (adapter->state == VNIC_REMOVING || + adapter->state == VNIC_REMOVED) { + spin_unlock_irqrestore(&adapter->state_lock, flags); + kfree(rwi); + rc = EBUSY; + break; + } + + if (!saved_state) { + reset_state = adapter->state; + saved_state = true; + } + spin_unlock_irqrestore(&adapter->state_lock, flags); + + if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) { + rtnl_lock(); + rc = do_passive_init(adapter); + rtnl_unlock(); + if (!rc) + netif_carrier_on(adapter->netdev); + } else if (adapter->force_reset_recovery) { + /* Since we are doing a hard reset now, clear the + * failover_pending flag so we don't ignore any + * future MOBILITY or other resets. + */ + adapter->failover_pending = false; + + /* Transport event occurred during previous reset */ + if (adapter->wait_for_reset) { + /* Previous was CHANGE_PARAM; caller locked */ + adapter->force_reset_recovery = false; + rc = do_hard_reset(adapter, rwi, reset_state); + } else { + rtnl_lock(); + adapter->force_reset_recovery = false; + rc = do_hard_reset(adapter, rwi, reset_state); + rtnl_unlock(); + } + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { + netdev_dbg(adapter->netdev, + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(60 * HZ); + } + } else { + rc = do_reset(adapter, rwi, reset_state); + } + tmprwi = rwi; + adapter->last_reset_time = jiffies; + + if (rc) + netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); + + rwi = get_next_rwi(adapter); + + /* + * If there are no resets queued and the previous reset failed, + * the adapter would be in an undefined state. So retry the + * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). + */ + if (!rwi && rc) + rwi = tmprwi; + else + kfree(tmprwi); + + if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || + rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) + adapter->force_reset_recovery = true; + } + + if (adapter->wait_for_reset) { + adapter->reset_done_rc = rc; + complete(&adapter->reset_done); + } + + clear_bit_unlock(0, &adapter->resetting); + + netdev_dbg(adapter->netdev, + "[S:%s FRR:%d WFR:%d] Done processing resets\n", + adapter_state_to_string(adapter->state), + adapter->force_reset_recovery, + adapter->wait_for_reset); +} + +static void __ibmvnic_delayed_reset(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter; + + adapter = container_of(work, struct ibmvnic_adapter, + ibmvnic_delayed_reset.work); + __ibmvnic_reset(&adapter->ibmvnic_reset); +} + +static void flush_reset_queue(struct ibmvnic_adapter *adapter) +{ + struct list_head *entry, *tmp_entry; + + if (!list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { + list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } + } +} + +static int ibmvnic_reset(struct ibmvnic_adapter *adapter, + enum ibmvnic_reset_reason reason) +{ + struct net_device *netdev = adapter->netdev; + struct ibmvnic_rwi *rwi, *tmp; + unsigned long flags; + int ret; + + spin_lock_irqsave(&adapter->rwi_lock, flags); + + /* If failover is pending don't schedule any other reset. + * Instead let the failover complete. If there is already a + * a failover reset scheduled, we will detect and drop the + * duplicate reset when walking the ->rwi_list below. + */ + if (adapter->state == VNIC_REMOVING || + adapter->state == VNIC_REMOVED || + (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) { + ret = EBUSY; + netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n"); + goto err; + } + + list_for_each_entry(tmp, &adapter->rwi_list, list) { + if (tmp->reset_reason == reason) { + netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", + reset_reason_to_string(reason)); + ret = EBUSY; + goto err; + } + } + + rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); + if (!rwi) { + ret = ENOMEM; + goto err; + } + /* if we just received a transport event, + * flush reset queue and process this reset + */ + if (adapter->force_reset_recovery) + flush_reset_queue(adapter); + + rwi->reset_reason = reason; + list_add_tail(&rwi->list, &adapter->rwi_list); + netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", + reset_reason_to_string(reason)); + queue_work(system_long_wq, &adapter->ibmvnic_reset); + + ret = 0; +err: + /* ibmvnic_close() below can block, so drop the lock first */ + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + if (ret == ENOMEM) + ibmvnic_close(netdev); + + return -ret; +} + +static void ibmvnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->req_rx_queues; i++) { + stats->rx_packets += adapter->rx_stats_buffers[i].packets; + stats->rx_bytes += adapter->rx_stats_buffers[i].bytes; + } + + for (i = 0; i < adapter->req_tx_queues; i++) { + stats->tx_packets += adapter->tx_stats_buffers[i].batched_packets; + stats->tx_packets += adapter->tx_stats_buffers[i].direct_packets; + stats->tx_bytes += adapter->tx_stats_buffers[i].bytes; + stats->tx_dropped += adapter->tx_stats_buffers[i].dropped_packets; + } +} + +static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + + if (test_bit(0, &adapter->resetting)) { + netdev_err(adapter->netdev, + "Adapter is resetting, skip timeout reset\n"); + return; + } + /* No queuing up reset until at least 5 seconds (default watchdog val) + * after last reset + */ + if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { + netdev_dbg(dev, "Not yet time to tx timeout.\n"); + return; + } + ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); +} + +static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_rx_buff *rx_buff) +{ + struct ibmvnic_rx_pool *pool = &adapter->rx_pool[rx_buff->pool_index]; + + rx_buff->skb = NULL; + + pool->free_map[pool->next_alloc] = (int)(rx_buff - pool->rx_buff); + pool->next_alloc = (pool->next_alloc + 1) % pool->size; + + atomic_dec(&pool->available); +} + +static int ibmvnic_poll(struct napi_struct *napi, int budget) +{ + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + int frames_processed; + int scrq_num; + + netdev = napi->dev; + adapter = netdev_priv(netdev); + scrq_num = (int)(napi - adapter->napi); + frames_processed = 0; + rx_scrq = adapter->rx_scrq[scrq_num]; + +restart_poll: + while (frames_processed < budget) { + struct sk_buff *skb; + struct ibmvnic_rx_buff *rx_buff; + union sub_crq *next; + u32 length; + u16 offset; + u8 flags = 0; + + if (unlikely(test_bit(0, &adapter->resetting) && + adapter->reset_reason != VNIC_RESET_NON_FATAL)) { + enable_scrq_irq(adapter, rx_scrq); + napi_complete_done(napi, frames_processed); + return frames_processed; + } + + if (!pending_scrq(adapter, rx_scrq)) + break; + next = ibmvnic_next_scrq(adapter, rx_scrq); + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(next->rx_comp.correlator); + /* do error checking */ + if (next->rx_comp.rc) { + netdev_dbg(netdev, "rx buffer returned with rc %x\n", + be16_to_cpu(next->rx_comp.rc)); + /* free the entry */ + next->rx_comp.first = 0; + dev_kfree_skb_any(rx_buff->skb); + remove_buff_from_pool(adapter, rx_buff); + continue; + } else if (!rx_buff->skb) { + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + continue; + } + + length = be32_to_cpu(next->rx_comp.len); + offset = be16_to_cpu(next->rx_comp.off_frame_data); + flags = next->rx_comp.flags; + skb = rx_buff->skb; + /* load long_term_buff before copying to skb */ + dma_rmb(); + skb_copy_to_linear_data(skb, rx_buff->data + offset, + length); + + /* VLAN Header has been stripped by the system firmware and + * needs to be inserted by the driver + */ + if (adapter->rx_vlan_header_insertion && + (flags & IBMVNIC_VLAN_STRIPPED)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + ntohs(next->rx_comp.vlan_tci)); + + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, netdev); + skb_record_rx_queue(skb, scrq_num); + + if (flags & IBMVNIC_IP_CHKSUM_GOOD && + flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + length = skb->len; + napi_gro_receive(napi, skb); /* send it up */ + adapter->rx_stats_buffers[scrq_num].packets++; + adapter->rx_stats_buffers[scrq_num].bytes += length; + frames_processed++; + } + + if (adapter->state != VNIC_CLOSING && + (atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2)) + replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); + if (frames_processed < budget) { + if (napi_complete_done(napi, frames_processed)) { + enable_scrq_irq(adapter, rx_scrq); + if (pending_scrq(adapter, rx_scrq)) { + if (napi_schedule(napi)) { + disable_scrq_irq(adapter, rx_scrq); + goto restart_poll; + } + } + } + } + return frames_processed; +} + +static int wait_for_reset(struct ibmvnic_adapter *adapter) +{ + int rc, ret; + + adapter->fallback.mtu = adapter->req_mtu; + adapter->fallback.rx_queues = adapter->req_rx_queues; + adapter->fallback.tx_queues = adapter->req_tx_queues; + adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq; + adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq; + + reinit_completion(&adapter->reset_done); + adapter->wait_for_reset = true; + rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + + if (rc) { + ret = rc; + goto out; + } + rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, 60000); + if (rc) { + ret = -ENODEV; + goto out; + } + + ret = 0; + if (adapter->reset_done_rc) { + ret = -EIO; + adapter->desired.mtu = adapter->fallback.mtu; + adapter->desired.rx_queues = adapter->fallback.rx_queues; + adapter->desired.tx_queues = adapter->fallback.tx_queues; + adapter->desired.rx_entries = adapter->fallback.rx_entries; + adapter->desired.tx_entries = adapter->fallback.tx_entries; + + reinit_completion(&adapter->reset_done); + adapter->wait_for_reset = true; + rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + if (rc) { + ret = rc; + goto out; + } + rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, + 60000); + if (rc) { + ret = -ENODEV; + goto out; + } + } +out: + adapter->wait_for_reset = false; + + return ret; +} + +static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.mtu = new_mtu + ETH_HLEN; + + return wait_for_reset(adapter); +} + +static netdev_features_t ibmvnic_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* Some backing hardware adapters can not + * handle packets with a MSS less than 224 + * or with only one segment. + */ + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_size < 224 || + skb_shinfo(skb)->gso_segs == 1) + features &= ~NETIF_F_GSO_MASK; + } + + return features; +} + +static const struct net_device_ops ibmvnic_netdev_ops = { + .ndo_open = ibmvnic_open, + .ndo_stop = ibmvnic_close, + .ndo_start_xmit = ibmvnic_xmit, + .ndo_set_rx_mode = ibmvnic_set_multi, + .ndo_set_mac_address = ibmvnic_set_mac, + .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = ibmvnic_get_stats64, + .ndo_tx_timeout = ibmvnic_tx_timeout, + .ndo_change_mtu = ibmvnic_change_mtu, + .ndo_features_check = ibmvnic_features_check, +}; + +/* ethtool functions */ + +static int ibmvnic_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; + + rc = send_query_phys_parms(adapter); + if (rc) { + adapter->speed = SPEED_UNKNOWN; + adapter->duplex = DUPLEX_UNKNOWN; + } + cmd->base.speed = adapter->speed; + cmd->base.duplex = adapter->duplex; + cmd->base.port = PORT_FIBRE; + cmd->base.phy_address = 0; + cmd->base.autoneg = AUTONEG_ENABLE; + + return 0; +} + +static void ibmvnic_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); + strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strscpy(info->fw_version, adapter->fw_version, + sizeof(info->fw_version)); +} + +static u32 ibmvnic_get_msglevel(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void ibmvnic_set_msglevel(struct net_device *netdev, u32 data) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = data; +} + +static u32 ibmvnic_get_link(struct net_device *netdev) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + /* Don't need to send a query because we request a logical link up at + * init and then we wait for link state indications + */ + return adapter->logical_link_state; +} + +static void ibmvnic_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; + ring->tx_pending = adapter->req_tx_entries_per_subcrq; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int ibmvnic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + + adapter->desired.rx_entries = ring->rx_pending; + adapter->desired.tx_entries = ring->tx_pending; + + return wait_for_reset(adapter); +} + +static void ibmvnic_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = adapter->req_rx_queues; + channels->tx_count = adapter->req_tx_queues; + channels->other_count = 0; + channels->combined_count = 0; +} + +static int ibmvnic_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.rx_queues = channels->rx_count; + adapter->desired.tx_queues = channels->tx_count; + + return wait_for_reset(adapter); +} + +static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) + ethtool_puts(&data, ibmvnic_stats[i].name); + + for (i = 0; i < adapter->req_tx_queues; i++) { + ethtool_sprintf(&data, "tx%d_batched_packets", i); + ethtool_sprintf(&data, "tx%d_direct_packets", i); + ethtool_sprintf(&data, "tx%d_bytes", i); + ethtool_sprintf(&data, "tx%d_dropped_packets", i); + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + ethtool_sprintf(&data, "rx%d_packets", i); + ethtool_sprintf(&data, "rx%d_bytes", i); + ethtool_sprintf(&data, "rx%d_interrupts", i); + } +} + +static int ibmvnic_get_sset_count(struct net_device *dev, int sset) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ibmvnic_stats) + + adapter->req_tx_queues * NUM_TX_STATS + + adapter->req_rx_queues * NUM_RX_STATS; + default: + return -EOPNOTSUPP; + } +} + +static void ibmvnic_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct ibmvnic_adapter *adapter = netdev_priv(dev); + union ibmvnic_crq crq; + int i, j; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.request_statistics.first = IBMVNIC_CRQ_CMD; + crq.request_statistics.cmd = REQUEST_STATISTICS; + crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token); + crq.request_statistics.len = + cpu_to_be32(sizeof(struct ibmvnic_statistics)); + + /* Wait for data to be written */ + reinit_completion(&adapter->stats_done); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) + return; + rc = ibmvnic_wait_for_completion(adapter, &adapter->stats_done, 10000); + if (rc) + return; + + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) + data[i] = be64_to_cpu(IBMVNIC_GET_STAT + (adapter, ibmvnic_stats[i].offset)); + + for (j = 0; j < adapter->req_tx_queues; j++) { + data[i] = adapter->tx_stats_buffers[j].batched_packets; + i++; + data[i] = adapter->tx_stats_buffers[j].direct_packets; + i++; + data[i] = adapter->tx_stats_buffers[j].bytes; + i++; + data[i] = adapter->tx_stats_buffers[j].dropped_packets; + i++; + } + + for (j = 0; j < adapter->req_rx_queues; j++) { + data[i] = adapter->rx_stats_buffers[j].packets; + i++; + data[i] = adapter->rx_stats_buffers[j].bytes; + i++; + data[i] = adapter->rx_stats_buffers[j].interrupts; + i++; + } +} + +static const struct ethtool_ops ibmvnic_ethtool_ops = { + .get_drvinfo = ibmvnic_get_drvinfo, + .get_msglevel = ibmvnic_get_msglevel, + .set_msglevel = ibmvnic_set_msglevel, + .get_link = ibmvnic_get_link, + .get_ringparam = ibmvnic_get_ringparam, + .set_ringparam = ibmvnic_set_ringparam, + .get_channels = ibmvnic_get_channels, + .set_channels = ibmvnic_set_channels, + .get_strings = ibmvnic_get_strings, + .get_sset_count = ibmvnic_get_sset_count, + .get_ethtool_stats = ibmvnic_get_ethtool_stats, + .get_link_ksettings = ibmvnic_get_link_ksettings, +}; + +/* Routines for managing CRQs/sCRQs */ + +static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + int rc; + + if (!scrq) { + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); + return -EINVAL; + } + + if (scrq->irq) { + free_irq(scrq->irq, scrq); + irq_dispose_mapping(scrq->irq); + scrq->irq = 0; + } + + if (scrq->msgs) { + memset(scrq->msgs, 0, 4 * PAGE_SIZE); + atomic_set(&scrq->used, 0); + scrq->cur = 0; + scrq->ind_buf.index = 0; + } else { + netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); + return -EINVAL; + } + + rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, + 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); + return rc; +} + +static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) +{ + int i, rc; + + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + + ibmvnic_clean_affinity(adapter); + + for (i = 0; i < adapter->req_tx_queues; i++) { + netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); + rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); + if (rc) + return rc; + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Re-setting rx_scrq[%d]\n", i); + rc = reset_one_sub_crq_queue(adapter, adapter->rx_scrq[i]); + if (rc) + return rc; + } + + return rc; +} + +static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq, + bool do_h_free) +{ + struct device *dev = &adapter->vdev->dev; + long rc; + + netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n"); + + if (do_h_free) { + /* Close the sub-crqs */ + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + if (rc) { + netdev_err(adapter->netdev, + "Failed to release sub-CRQ %16lx, rc = %ld\n", + scrq->crq_num, rc); + } + } + + dma_free_coherent(dev, + IBMVNIC_IND_MAX_ARR_SZ, + scrq->ind_buf.indir_arr, + scrq->ind_buf.indir_dma); + + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)scrq->msgs, 2); + free_cpumask_var(scrq->affinity_mask); + kfree(scrq); +} + +static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter + *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_sub_crq_queue *scrq; + int rc; + + scrq = kzalloc(sizeof(*scrq), GFP_KERNEL); + if (!scrq) + return NULL; + + scrq->msgs = + (union sub_crq *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 2); + if (!scrq->msgs) { + dev_warn(dev, "Couldn't allocate crq queue messages page\n"); + goto zero_page_failed; + } + if (!zalloc_cpumask_var(&scrq->affinity_mask, GFP_KERNEL)) + goto cpumask_alloc_failed; + + scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, scrq->msg_token)) { + dev_warn(dev, "Couldn't map crq queue messages page\n"); + goto map_failed; + } + + rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, + 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); + + if (rc == H_RESOURCE) + rc = ibmvnic_reset_crq(adapter); + + if (rc == H_CLOSED) { + dev_warn(dev, "Partner adapter not ready, waiting.\n"); + } else if (rc) { + dev_warn(dev, "Error %d registering sub-crq\n", rc); + goto reg_failed; + } + + scrq->adapter = adapter; + scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); + scrq->ind_buf.index = 0; + + scrq->ind_buf.indir_arr = + dma_alloc_coherent(dev, + IBMVNIC_IND_MAX_ARR_SZ, + &scrq->ind_buf.indir_dma, + GFP_KERNEL); + + if (!scrq->ind_buf.indir_arr) + goto indir_failed; + + spin_lock_init(&scrq->lock); + + netdev_dbg(adapter->netdev, + "sub-crq initialized, num %lx, hw_irq=%lx, irq=%x\n", + scrq->crq_num, scrq->hw_irq, scrq->irq); + + return scrq; + +indir_failed: + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); +reg_failed: + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, + DMA_BIDIRECTIONAL); +map_failed: + free_cpumask_var(scrq->affinity_mask); +cpumask_alloc_failed: + free_pages((unsigned long)scrq->msgs, 2); +zero_page_failed: + kfree(scrq); + + return NULL; +} + +static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) +{ + int i; + + ibmvnic_clean_affinity(adapter); + if (adapter->tx_scrq) { + for (i = 0; i < adapter->num_active_tx_scrqs; i++) { + if (!adapter->tx_scrq[i]) + continue; + + netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", + i); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); + if (adapter->tx_scrq[i]->irq) { + free_irq(adapter->tx_scrq[i]->irq, + adapter->tx_scrq[i]); + irq_dispose_mapping(adapter->tx_scrq[i]->irq); + adapter->tx_scrq[i]->irq = 0; + } + + release_sub_crq_queue(adapter, adapter->tx_scrq[i], + do_h_free); + } + + kfree(adapter->tx_scrq); + adapter->tx_scrq = NULL; + adapter->num_active_tx_scrqs = 0; + } + + /* Clean any remaining outstanding SKBs + * we freed the irq so we won't be hearing + * from them + */ + clean_tx_pools(adapter); + + if (adapter->rx_scrq) { + for (i = 0; i < adapter->num_active_rx_scrqs; i++) { + if (!adapter->rx_scrq[i]) + continue; + + netdev_dbg(adapter->netdev, "Releasing rx_scrq[%d]\n", + i); + if (adapter->rx_scrq[i]->irq) { + free_irq(adapter->rx_scrq[i]->irq, + adapter->rx_scrq[i]); + irq_dispose_mapping(adapter->rx_scrq[i]->irq); + adapter->rx_scrq[i]->irq = 0; + } + + release_sub_crq_queue(adapter, adapter->rx_scrq[i], + do_h_free); + } + + kfree(adapter->rx_scrq); + adapter->rx_scrq = NULL; + adapter->num_active_rx_scrqs = 0; + } +} + +static int disable_scrq_irq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long rc; + + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, + H_DISABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); + if (rc) + dev_err(dev, "Couldn't disable scrq irq 0x%lx. rc=%ld\n", + scrq->hw_irq, rc); + return rc; +} + +/* We can not use the IRQ chip EOI handler because that has the + * unintended effect of changing the interrupt priority. + */ +static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) +{ + u64 val = 0xff000000 | scrq->hw_irq; + unsigned long rc; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); +} + +/* Due to a firmware bug, the hypervisor can send an interrupt to a + * transmit or receive queue just prior to a partition migration. + * Force an EOI after migration. + */ +static void ibmvnic_clear_pending_interrupt(struct device *dev, + struct ibmvnic_sub_crq_queue *scrq) +{ + if (!xive_enabled()) + ibmvnic_xics_eoi(dev, scrq); +} + +static int enable_scrq_irq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long rc; + + if (scrq->hw_irq > 0x100000000ULL) { + dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq); + return 1; + } + + if (test_bit(0, &adapter->resetting) && + adapter->reset_reason == VNIC_RESET_MOBILITY) { + ibmvnic_clear_pending_interrupt(dev, scrq); + } + + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, + H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); + if (rc) + dev_err(dev, "Couldn't enable scrq irq 0x%lx. rc=%ld\n", + scrq->hw_irq, rc); + return rc; +} + +static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + struct device *dev = &adapter->vdev->dev; + int num_packets = 0, total_bytes = 0; + struct ibmvnic_tx_pool *tx_pool; + struct ibmvnic_tx_buff *txbuff; + struct netdev_queue *txq; + union sub_crq *next; + int index, i; + +restart_loop: + while (pending_scrq(adapter, scrq)) { + unsigned int pool = scrq->pool_index; + int num_entries = 0; + next = ibmvnic_next_scrq(adapter, scrq); + for (i = 0; i < next->tx_comp.num_comps; i++) { + index = be32_to_cpu(next->tx_comp.correlators[i]); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[pool]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[pool]; + } + + txbuff = &tx_pool->tx_buff[index]; + num_packets++; + num_entries += txbuff->num_entries; + if (txbuff->skb) { + total_bytes += txbuff->skb->len; + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } + txbuff->skb = NULL; + } else { + netdev_warn(adapter->netdev, + "TX completion received with NULL socket buffer\n"); + } + tx_pool->free_map[tx_pool->producer_index] = index; + tx_pool->producer_index = + (tx_pool->producer_index + 1) % + tx_pool->num_buffers; + } + /* remove tx_comp scrq*/ + next->tx_comp.first = 0; + + + if (atomic_sub_return(num_entries, &scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, + scrq->pool_index)) { + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); + } + } + + enable_scrq_irq(adapter, scrq); + + if (pending_scrq(adapter, scrq)) { + disable_scrq_irq(adapter, scrq); + goto restart_loop; + } + + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + + return 0; +} + +static irqreturn_t ibmvnic_interrupt_tx(int irq, void *instance) +{ + struct ibmvnic_sub_crq_queue *scrq = instance; + struct ibmvnic_adapter *adapter = scrq->adapter; + + disable_scrq_irq(adapter, scrq); + ibmvnic_complete_tx(adapter, scrq); + + return IRQ_HANDLED; +} + +static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance) +{ + struct ibmvnic_sub_crq_queue *scrq = instance; + struct ibmvnic_adapter *adapter = scrq->adapter; + + /* When booting a kdump kernel we can hit pending interrupts + * prior to completing driver initialization. + */ + if (unlikely(adapter->state != VNIC_OPEN)) + return IRQ_NONE; + + adapter->rx_stats_buffers[scrq->scrq_num].interrupts++; + + if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) { + disable_scrq_irq(adapter, scrq); + __napi_schedule(&adapter->napi[scrq->scrq_num]); + } + + return IRQ_HANDLED; +} + +static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_sub_crq_queue *scrq; + int i = 0, j = 0; + int rc = 0; + + for (i = 0; i < adapter->req_tx_queues; i++) { + netdev_dbg(adapter->netdev, "Initializing tx_scrq[%d] irq\n", + i); + scrq = adapter->tx_scrq[i]; + scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); + + if (!scrq->irq) { + rc = -EINVAL; + dev_err(dev, "Error mapping irq\n"); + goto req_tx_irq_failed; + } + + snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-tx%d", + adapter->vdev->unit_address, i); + rc = request_irq(scrq->irq, ibmvnic_interrupt_tx, + 0, scrq->name, scrq); + + if (rc) { + dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n", + scrq->irq, rc); + irq_dispose_mapping(scrq->irq); + goto req_tx_irq_failed; + } + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + netdev_dbg(adapter->netdev, "Initializing rx_scrq[%d] irq\n", + i); + scrq = adapter->rx_scrq[i]; + scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); + if (!scrq->irq) { + rc = -EINVAL; + dev_err(dev, "Error mapping irq\n"); + goto req_rx_irq_failed; + } + snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-rx%d", + adapter->vdev->unit_address, i); + rc = request_irq(scrq->irq, ibmvnic_interrupt_rx, + 0, scrq->name, scrq); + if (rc) { + dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n", + scrq->irq, rc); + irq_dispose_mapping(scrq->irq); + goto req_rx_irq_failed; + } + } + + cpus_read_lock(); + ibmvnic_set_affinity(adapter); + cpus_read_unlock(); + + return rc; + +req_rx_irq_failed: + for (j = 0; j < i; j++) { + free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]); + irq_dispose_mapping(adapter->rx_scrq[j]->irq); + } + i = adapter->req_tx_queues; +req_tx_irq_failed: + for (j = 0; j < i; j++) { + free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); + irq_dispose_mapping(adapter->tx_scrq[j]->irq); + } + release_sub_crqs(adapter, 1); + return rc; +} + +static int init_sub_crqs(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_sub_crq_queue **allqueues; + int registered_queues = 0; + int total_queues; + int more = 0; + int i; + + total_queues = adapter->req_tx_queues + adapter->req_rx_queues; + + allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL); + if (!allqueues) + return -ENOMEM; + + for (i = 0; i < total_queues; i++) { + allqueues[i] = init_sub_crq_queue(adapter); + if (!allqueues[i]) { + dev_warn(dev, "Couldn't allocate all sub-crqs\n"); + break; + } + registered_queues++; + } + + /* Make sure we were able to register the minimum number of queues */ + if (registered_queues < + adapter->min_tx_queues + adapter->min_rx_queues) { + dev_err(dev, "Fatal: Couldn't init min number of sub-crqs\n"); + goto tx_failed; + } + + /* Distribute the failed allocated queues*/ + for (i = 0; i < total_queues - registered_queues + more ; i++) { + netdev_dbg(adapter->netdev, "Reducing number of queues\n"); + switch (i % 3) { + case 0: + if (adapter->req_rx_queues > adapter->min_rx_queues) + adapter->req_rx_queues--; + else + more++; + break; + case 1: + if (adapter->req_tx_queues > adapter->min_tx_queues) + adapter->req_tx_queues--; + else + more++; + break; + } + } + + adapter->tx_scrq = kcalloc(adapter->req_tx_queues, + sizeof(*adapter->tx_scrq), GFP_KERNEL); + if (!adapter->tx_scrq) + goto tx_failed; + + for (i = 0; i < adapter->req_tx_queues; i++) { + adapter->tx_scrq[i] = allqueues[i]; + adapter->tx_scrq[i]->pool_index = i; + adapter->num_active_tx_scrqs++; + } + + adapter->rx_scrq = kcalloc(adapter->req_rx_queues, + sizeof(*adapter->rx_scrq), GFP_KERNEL); + if (!adapter->rx_scrq) + goto rx_failed; + + for (i = 0; i < adapter->req_rx_queues; i++) { + adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues]; + adapter->rx_scrq[i]->scrq_num = i; + adapter->num_active_rx_scrqs++; + } + + kfree(allqueues); + return 0; + +rx_failed: + kfree(adapter->tx_scrq); + adapter->tx_scrq = NULL; +tx_failed: + for (i = 0; i < registered_queues; i++) + release_sub_crq_queue(adapter, allqueues[i], 1); + kfree(allqueues); + return -ENOMEM; +} + +static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; + + if (!retry) { + /* Sub-CRQ entries are 32 byte long */ + int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + + atomic_set(&adapter->running_cap_crqs, cap_reqs); + + if (adapter->min_tx_entries_per_subcrq > entries_page || + adapter->min_rx_add_entries_per_subcrq > entries_page) { + dev_err(dev, "Fatal, invalid entries per sub-crq\n"); + return; + } + + if (adapter->desired.mtu) + adapter->req_mtu = adapter->desired.mtu; + else + adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; + + if (!adapter->desired.tx_entries) + adapter->desired.tx_entries = + adapter->max_tx_entries_per_subcrq; + if (!adapter->desired.rx_entries) + adapter->desired.rx_entries = + adapter->max_rx_add_entries_per_subcrq; + + max_entries = IBMVNIC_LTB_SET_SIZE / + (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { + adapter->desired.tx_entries = max_entries; + } + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { + adapter->desired.rx_entries = max_entries; + } + + if (adapter->desired.tx_entries) + adapter->req_tx_entries_per_subcrq = + adapter->desired.tx_entries; + else + adapter->req_tx_entries_per_subcrq = + adapter->max_tx_entries_per_subcrq; + + if (adapter->desired.rx_entries) + adapter->req_rx_add_entries_per_subcrq = + adapter->desired.rx_entries; + else + adapter->req_rx_add_entries_per_subcrq = + adapter->max_rx_add_entries_per_subcrq; + + if (adapter->desired.tx_queues) + adapter->req_tx_queues = + adapter->desired.tx_queues; + else + adapter->req_tx_queues = + adapter->opt_tx_comp_sub_queues; + + if (adapter->desired.rx_queues) + adapter->req_rx_queues = + adapter->desired.rx_queues; + else + adapter->req_rx_queues = + adapter->opt_rx_comp_queues; + + adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); + } + memset(&crq, 0, sizeof(crq)); + crq.request_capability.first = IBMVNIC_CRQ_CMD; + crq.request_capability.cmd = REQUEST_CAPABILITY; + + crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); + crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); + crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); + crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = + cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); + crq.request_capability.number = + cpu_to_be64(adapter->req_tx_entries_per_subcrq); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = + cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); + crq.request_capability.number = + cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + crq.request_capability.capability = cpu_to_be16(REQ_MTU); + crq.request_capability.number = cpu_to_be64(adapter->req_mtu); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + + if (adapter->netdev->flags & IFF_PROMISC) { + if (adapter->promisc_supported) { + crq.request_capability.capability = + cpu_to_be16(PROMISC_REQUESTED); + crq.request_capability.number = cpu_to_be64(1); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + } + } else { + crq.request_capability.capability = + cpu_to_be16(PROMISC_REQUESTED); + crq.request_capability.number = cpu_to_be64(0); + cap_reqs--; + ibmvnic_send_crq(adapter, &crq); + } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); +} + +static int pending_scrq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + union sub_crq *entry = &scrq->msgs[scrq->cur]; + int rc; + + rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return rc; +} + +static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *scrq) +{ + union sub_crq *entry; + unsigned long flags; + + spin_lock_irqsave(&scrq->lock, flags); + entry = &scrq->msgs[scrq->cur]; + if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) { + if (++scrq->cur == scrq->size) + scrq->cur = 0; + } else { + entry = NULL; + } + spin_unlock_irqrestore(&scrq->lock, flags); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return entry; +} + +static union ibmvnic_crq *ibmvnic_next_crq(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *queue = &adapter->crq; + union ibmvnic_crq *crq; + + crq = &queue->msgs[queue->cur]; + if (crq->generic.first & IBMVNIC_CRQ_CMD_RSP) { + if (++queue->cur == queue->size) + queue->cur = 0; + } else { + crq = NULL; + } + + return crq; +} + +static void print_subcrq_error(struct device *dev, int rc, const char *func) +{ + switch (rc) { + case H_PARAMETER: + dev_warn_ratelimited(dev, + "%s failed: Send request is malformed or adapter failover pending. (rc=%d)\n", + func, rc); + break; + case H_CLOSED: + dev_warn_ratelimited(dev, + "%s failed: Backing queue closed. Adapter is down or failover pending. (rc=%d)\n", + func, rc); + break; + default: + dev_err_ratelimited(dev, "%s failed: (rc=%d)\n", func, rc); + break; + } +} + +static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, + u64 remote_handle, u64 ioba, u64 num_entries) +{ + unsigned int ua = adapter->vdev->unit_address; + struct device *dev = &adapter->vdev->dev; + int rc; + + /* Make sure the hypervisor sees the complete request */ + dma_wmb(); + rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua, + cpu_to_be64(remote_handle), + ioba, num_entries); + + if (rc) + print_subcrq_error(dev, rc, __func__); + + return rc; +} + +static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, + union ibmvnic_crq *crq) +{ + unsigned int ua = adapter->vdev->unit_address; + struct device *dev = &adapter->vdev->dev; + u64 *u64_crq = (u64 *)crq; + int rc; + + netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n", + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); + + if (!adapter->crq.active && + crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { + dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n"); + return -EINVAL; + } + + /* Make sure the hypervisor sees the complete request */ + dma_wmb(); + + rc = plpar_hcall_norets(H_SEND_CRQ, ua, + cpu_to_be64(u64_crq[0]), + cpu_to_be64(u64_crq[1])); + + if (rc) { + if (rc == H_CLOSED) { + dev_warn(dev, "CRQ Queue closed\n"); + /* do not reset, report the fail, wait for passive init from server */ + } + + dev_warn(dev, "Send error (rc=%d)\n", rc); + } + + return rc; +} + +static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + int retries = 100; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.generic.first = IBMVNIC_CRQ_INIT_CMD; + crq.generic.cmd = IBMVNIC_CRQ_INIT; + netdev_dbg(adapter->netdev, "Sending CRQ init\n"); + + do { + rc = ibmvnic_send_crq(adapter, &crq); + if (rc != H_CLOSED) + break; + retries--; + msleep(50); + + } while (retries > 0); + + if (rc) { + dev_err(dev, "Failed to send init request, rc = %d\n", rc); + return rc; + } + + return 0; +} + +struct vnic_login_client_data { + u8 type; + __be16 len; + char name[]; +} __packed; + +static int vnic_client_data_len(struct ibmvnic_adapter *adapter) +{ + int len; + + /* Calculate the amount of buffer space needed for the + * vnic client data in the login buffer. There are four entries, + * OS name, LPAR name, device name, and a null last entry. + */ + len = 4 * sizeof(struct vnic_login_client_data); + len += 6; /* "Linux" plus NULL */ + len += strlen(utsname()->nodename) + 1; + len += strlen(adapter->netdev->name) + 1; + + return len; +} + +static void vnic_add_client_data(struct ibmvnic_adapter *adapter, + struct vnic_login_client_data *vlcd) +{ + const char *os_name = "Linux"; + int len; + + /* Type 1 - LPAR OS */ + vlcd->type = 1; + len = strlen(os_name) + 1; + vlcd->len = cpu_to_be16(len); + strscpy(vlcd->name, os_name, len); + vlcd = (struct vnic_login_client_data *)(vlcd->name + len); + + /* Type 2 - LPAR name */ + vlcd->type = 2; + len = strlen(utsname()->nodename) + 1; + vlcd->len = cpu_to_be16(len); + strscpy(vlcd->name, utsname()->nodename, len); + vlcd = (struct vnic_login_client_data *)(vlcd->name + len); + + /* Type 3 - device name */ + vlcd->type = 3; + len = strlen(adapter->netdev->name) + 1; + vlcd->len = cpu_to_be16(len); + strscpy(vlcd->name, adapter->netdev->name, len); +} + +static void ibmvnic_print_hex_dump(struct net_device *dev, void *buf, + size_t len) +{ + unsigned char hex_str[16 * 3]; + + for (size_t i = 0; i < len; i += 16) { + hex_dump_to_buffer((unsigned char *)buf + i, len - i, 16, 8, + hex_str, sizeof(hex_str), false); + netdev_dbg(dev, "%s\n", hex_str); + } +} + +static int send_login(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_login_rsp_buffer *login_rsp_buffer; + struct ibmvnic_login_buffer *login_buffer; + struct device *dev = &adapter->vdev->dev; + struct vnic_login_client_data *vlcd; + dma_addr_t rsp_buffer_token; + dma_addr_t buffer_token; + size_t rsp_buffer_size; + union ibmvnic_crq crq; + int client_data_len; + size_t buffer_size; + __be64 *tx_list_p; + __be64 *rx_list_p; + int rc; + int i; + + if (!adapter->tx_scrq || !adapter->rx_scrq) { + netdev_err(adapter->netdev, + "RX or TX queues are not allocated, device login failed\n"); + return -ENOMEM; + } + + release_login_buffer(adapter); + release_login_rsp_buffer(adapter); + + client_data_len = vnic_client_data_len(adapter); + + buffer_size = + sizeof(struct ibmvnic_login_buffer) + + sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) + + client_data_len; + + login_buffer = kzalloc(buffer_size, GFP_ATOMIC); + if (!login_buffer) + goto buf_alloc_failed; + + buffer_token = dma_map_single(dev, login_buffer, buffer_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, buffer_token)) { + dev_err(dev, "Couldn't map login buffer\n"); + goto buf_map_failed; + } + + rsp_buffer_size = sizeof(struct ibmvnic_login_rsp_buffer) + + sizeof(u64) * adapter->req_tx_queues + + sizeof(u64) * adapter->req_rx_queues + + sizeof(u64) * adapter->req_rx_queues + + sizeof(u8) * IBMVNIC_TX_DESC_VERSIONS; + + login_rsp_buffer = kmalloc(rsp_buffer_size, GFP_ATOMIC); + if (!login_rsp_buffer) + goto buf_rsp_alloc_failed; + + rsp_buffer_token = dma_map_single(dev, login_rsp_buffer, + rsp_buffer_size, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, rsp_buffer_token)) { + dev_err(dev, "Couldn't map login rsp buffer\n"); + goto buf_rsp_map_failed; + } + + adapter->login_buf = login_buffer; + adapter->login_buf_token = buffer_token; + adapter->login_buf_sz = buffer_size; + adapter->login_rsp_buf = login_rsp_buffer; + adapter->login_rsp_buf_token = rsp_buffer_token; + adapter->login_rsp_buf_sz = rsp_buffer_size; + + login_buffer->len = cpu_to_be32(buffer_size); + login_buffer->version = cpu_to_be32(INITIAL_VERSION_LB); + login_buffer->num_txcomp_subcrqs = cpu_to_be32(adapter->req_tx_queues); + login_buffer->off_txcomp_subcrqs = + cpu_to_be32(sizeof(struct ibmvnic_login_buffer)); + login_buffer->num_rxcomp_subcrqs = cpu_to_be32(adapter->req_rx_queues); + login_buffer->off_rxcomp_subcrqs = + cpu_to_be32(sizeof(struct ibmvnic_login_buffer) + + sizeof(u64) * adapter->req_tx_queues); + login_buffer->login_rsp_ioba = cpu_to_be32(rsp_buffer_token); + login_buffer->login_rsp_len = cpu_to_be32(rsp_buffer_size); + + tx_list_p = (__be64 *)((char *)login_buffer + + sizeof(struct ibmvnic_login_buffer)); + rx_list_p = (__be64 *)((char *)login_buffer + + sizeof(struct ibmvnic_login_buffer) + + sizeof(u64) * adapter->req_tx_queues); + + for (i = 0; i < adapter->req_tx_queues; i++) { + if (adapter->tx_scrq[i]) { + tx_list_p[i] = + cpu_to_be64(adapter->tx_scrq[i]->crq_num); + } + } + + for (i = 0; i < adapter->req_rx_queues; i++) { + if (adapter->rx_scrq[i]) { + rx_list_p[i] = + cpu_to_be64(adapter->rx_scrq[i]->crq_num); + } + } + + /* Insert vNIC login client data */ + vlcd = (struct vnic_login_client_data *) + ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues)); + login_buffer->client_data_offset = + cpu_to_be32((char *)vlcd - (char *)login_buffer); + login_buffer->client_data_len = cpu_to_be32(client_data_len); + + vnic_add_client_data(adapter, vlcd); + + netdev_dbg(adapter->netdev, "Login Buffer:\n"); + ibmvnic_print_hex_dump(adapter->netdev, adapter->login_buf, + adapter->login_buf_sz); + + memset(&crq, 0, sizeof(crq)); + crq.login.first = IBMVNIC_CRQ_CMD; + crq.login.cmd = LOGIN; + crq.login.ioba = cpu_to_be32(buffer_token); + crq.login.len = cpu_to_be32(buffer_size); + + adapter->login_pending = true; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + adapter->login_pending = false; + netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); + goto buf_send_failed; + } + + return 0; + +buf_send_failed: + dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size, + DMA_FROM_DEVICE); +buf_rsp_map_failed: + kfree(login_rsp_buffer); + adapter->login_rsp_buf = NULL; +buf_rsp_alloc_failed: + dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); +buf_map_failed: + kfree(login_buffer); + adapter->login_buf = NULL; +buf_alloc_failed: + return -ENOMEM; +} + +static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, + u32 len, u8 map_id) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.request_map.first = IBMVNIC_CRQ_CMD; + crq.request_map.cmd = REQUEST_MAP; + crq.request_map.map_id = map_id; + crq.request_map.ioba = cpu_to_be32(addr); + crq.request_map.len = cpu_to_be32(len); + return ibmvnic_send_crq(adapter, &crq); +} + +static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.request_unmap.first = IBMVNIC_CRQ_CMD; + crq.request_unmap.cmd = REQUEST_UNMAP; + crq.request_unmap.map_id = map_id; + return ibmvnic_send_crq(adapter, &crq); +} + +static void send_query_map(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.query_map.first = IBMVNIC_CRQ_CMD; + crq.query_map.cmd = QUERY_MAP; + ibmvnic_send_crq(adapter, &crq); +} + +/* Send a series of CRQs requesting various capabilities of the VNIC server */ +static void send_query_cap(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); + + memset(&crq, 0, sizeof(crq)); + crq.query_capability.first = IBMVNIC_CRQ_CMD; + crq.query_capability.cmd = QUERY_CAPABILITY; + + crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MIN_MTU); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_MTU); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = + cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); + + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); +} + +static void send_query_ip_offload(struct ibmvnic_adapter *adapter) +{ + int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + + adapter->ip_offload_tok = + dma_map_single(dev, + &adapter->ip_offload_buf, + buf_sz, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_tok)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(dev, "Couldn't map offload buffer\n"); + return; + } + + memset(&crq, 0, sizeof(crq)); + crq.query_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; + crq.query_ip_offload.len = cpu_to_be32(buf_sz); + crq.query_ip_offload.ioba = + cpu_to_be32(adapter->ip_offload_tok); + + ibmvnic_send_crq(adapter, &crq); +} + +static void send_control_ip_offload(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + struct device *dev = &adapter->vdev->dev; + netdev_features_t old_hw_features = 0; + union ibmvnic_crq crq; + + adapter->ip_offload_ctrl_tok = + dma_map_single(dev, + ctrl_buf, + sizeof(adapter->ip_offload_ctrl), + DMA_TO_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { + dev_err(dev, "Couldn't map ip offload control buffer\n"); + return; + } + + ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB); + ctrl_buf->ipv4_chksum = buf->ipv4_chksum; + ctrl_buf->ipv6_chksum = buf->ipv6_chksum; + ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum; + ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum; + ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum; + ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum; + ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4; + ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6; + + /* large_rx disabled for now, additional features needed */ + ctrl_buf->large_rx_ipv4 = 0; + ctrl_buf->large_rx_ipv6 = 0; + + if (adapter->state != VNIC_PROBING) { + old_hw_features = adapter->netdev->hw_features; + adapter->netdev->hw_features = 0; + } + + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + + if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) + adapter->netdev->hw_features |= NETIF_F_IP_CSUM; + + if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) + adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; + + if ((adapter->netdev->features & + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) + adapter->netdev->hw_features |= NETIF_F_RXCSUM; + + if (buf->large_tx_ipv4) + adapter->netdev->hw_features |= NETIF_F_TSO; + if (buf->large_tx_ipv6) + adapter->netdev->hw_features |= NETIF_F_TSO6; + + if (adapter->state == VNIC_PROBING) { + adapter->netdev->features |= adapter->netdev->hw_features; + } else if (old_hw_features != adapter->netdev->hw_features) { + netdev_features_t tmp = 0; + + /* disable features no longer supported */ + adapter->netdev->features &= adapter->netdev->hw_features; + /* turn on features now supported if previously enabled */ + tmp = (old_hw_features ^ adapter->netdev->hw_features) & + adapter->netdev->hw_features; + adapter->netdev->features |= + tmp & adapter->netdev->wanted_features; + } + + memset(&crq, 0, sizeof(crq)); + crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; + crq.control_ip_offload.len = + cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); + ibmvnic_send_crq(adapter, &crq); +} + +static void handle_vpd_size_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + + if (crq->get_vpd_size_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD size, rc=%x\n", + crq->get_vpd_size_rsp.rc.code); + complete(&adapter->fw_done); + return; + } + + adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len); + complete(&adapter->fw_done); +} + +static void handle_vpd_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + unsigned char *substr = NULL; + u8 fw_level_len = 0; + + memset(adapter->fw_version, 0, 32); + + dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len, + DMA_FROM_DEVICE); + + if (crq->get_vpd_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD from device, rc=%x\n", + crq->get_vpd_rsp.rc.code); + goto complete; + } + + /* get the position of the firmware version info + * located after the ASCII 'RM' substring in the buffer + */ + substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len); + if (!substr) { + dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n"); + goto complete; + } + + /* get length of firmware level ASCII substring */ + if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) { + fw_level_len = *(substr + 2); + } else { + dev_info(dev, "Length of FW substr extrapolated VDP buff\n"); + goto complete; + } + + /* copy firmware version string from vpd into adapter */ + if ((substr + 3 + fw_level_len) < + (adapter->vpd->buff + adapter->vpd->len)) { + strscpy(adapter->fw_version, substr + 3, + sizeof(adapter->fw_version)); + } else { + dev_info(dev, "FW substr extrapolated VPD buff\n"); + } + +complete: + if (adapter->fw_version[0] == '\0') + strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version)); + complete(&adapter->fw_done); +} + +static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + + dma_unmap_single(dev, adapter->ip_offload_tok, + sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE); + + netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); + ibmvnic_print_hex_dump(adapter->netdev, buf, + sizeof(adapter->ip_offload_buf)); + + netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); + netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); + netdev_dbg(adapter->netdev, "tcp_ipv4_chksum = %d\n", + buf->tcp_ipv4_chksum); + netdev_dbg(adapter->netdev, "tcp_ipv6_chksum = %d\n", + buf->tcp_ipv6_chksum); + netdev_dbg(adapter->netdev, "udp_ipv4_chksum = %d\n", + buf->udp_ipv4_chksum); + netdev_dbg(adapter->netdev, "udp_ipv6_chksum = %d\n", + buf->udp_ipv6_chksum); + netdev_dbg(adapter->netdev, "large_tx_ipv4 = %d\n", + buf->large_tx_ipv4); + netdev_dbg(adapter->netdev, "large_tx_ipv6 = %d\n", + buf->large_tx_ipv6); + netdev_dbg(adapter->netdev, "large_rx_ipv4 = %d\n", + buf->large_rx_ipv4); + netdev_dbg(adapter->netdev, "large_rx_ipv6 = %d\n", + buf->large_rx_ipv6); + netdev_dbg(adapter->netdev, "max_ipv4_hdr_sz = %d\n", + buf->max_ipv4_header_size); + netdev_dbg(adapter->netdev, "max_ipv6_hdr_sz = %d\n", + buf->max_ipv6_header_size); + netdev_dbg(adapter->netdev, "max_tcp_hdr_size = %d\n", + buf->max_tcp_header_size); + netdev_dbg(adapter->netdev, "max_udp_hdr_size = %d\n", + buf->max_udp_header_size); + netdev_dbg(adapter->netdev, "max_large_tx_size = %d\n", + buf->max_large_tx_size); + netdev_dbg(adapter->netdev, "max_large_rx_size = %d\n", + buf->max_large_rx_size); + netdev_dbg(adapter->netdev, "ipv6_ext_hdr = %d\n", + buf->ipv6_extension_header); + netdev_dbg(adapter->netdev, "tcp_pseudosum_req = %d\n", + buf->tcp_pseudosum_req); + netdev_dbg(adapter->netdev, "num_ipv6_ext_hd = %d\n", + buf->num_ipv6_ext_headers); + netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n", + buf->off_ipv6_ext_headers); + + send_control_ip_offload(adapter); +} + +static const char *ibmvnic_fw_err_cause(u16 cause) +{ + switch (cause) { + case ADAPTER_PROBLEM: + return "adapter problem"; + case BUS_PROBLEM: + return "bus problem"; + case FW_PROBLEM: + return "firmware problem"; + case DD_PROBLEM: + return "device driver problem"; + case EEH_RECOVERY: + return "EEH recovery"; + case FW_UPDATED: + return "firmware updated"; + case LOW_MEMORY: + return "low Memory"; + default: + return "unknown"; + } +} + +static void handle_error_indication(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + u16 cause; + + cause = be16_to_cpu(crq->error_indication.error_cause); + + dev_warn_ratelimited(dev, + "Firmware reports %serror, cause: %s. Starting recovery...\n", + crq->error_indication.flags + & IBMVNIC_FATAL_ERROR ? "FATAL " : "", + ibmvnic_fw_err_cause(cause)); + + if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR) + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + else + ibmvnic_reset(adapter, VNIC_RESET_NON_FATAL); +} + +static int handle_change_mac_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + long rc; + + rc = crq->change_mac_addr_rsp.rc.code; + if (rc) { + dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); + goto out; + } + /* crq->change_mac_addr.mac_addr is the requested one + * crq->change_mac_addr_rsp.mac_addr is the returned valid one. + */ + eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]); + ether_addr_copy(adapter->mac_addr, + &crq->change_mac_addr_rsp.mac_addr[0]); +out: + complete(&adapter->fw_done); + return rc; +} + +static void handle_request_cap_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + u64 *req_value; + char *name; + + atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); + switch (be16_to_cpu(crq->request_capability_rsp.capability)) { + case REQ_TX_QUEUES: + req_value = &adapter->req_tx_queues; + name = "tx"; + break; + case REQ_RX_QUEUES: + req_value = &adapter->req_rx_queues; + name = "rx"; + break; + case REQ_RX_ADD_QUEUES: + req_value = &adapter->req_rx_add_queues; + name = "rx_add"; + break; + case REQ_TX_ENTRIES_PER_SUBCRQ: + req_value = &adapter->req_tx_entries_per_subcrq; + name = "tx_entries_per_subcrq"; + break; + case REQ_RX_ADD_ENTRIES_PER_SUBCRQ: + req_value = &adapter->req_rx_add_entries_per_subcrq; + name = "rx_add_entries_per_subcrq"; + break; + case REQ_MTU: + req_value = &adapter->req_mtu; + name = "mtu"; + break; + case PROMISC_REQUESTED: + req_value = &adapter->promisc; + name = "promisc"; + break; + default: + dev_err(dev, "Got invalid cap request rsp %d\n", + crq->request_capability.capability); + return; + } + + switch (crq->request_capability_rsp.rc.code) { + case SUCCESS: + break; + case PARTIALSUCCESS: + dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", + *req_value, + (long)be64_to_cpu(crq->request_capability_rsp.number), + name); + + if (be16_to_cpu(crq->request_capability_rsp.capability) == + REQ_MTU) { + pr_err("mtu of %llu is not supported. Reverting.\n", + *req_value); + *req_value = adapter->fallback.mtu; + } else { + *req_value = + be64_to_cpu(crq->request_capability_rsp.number); + } + + send_request_cap(adapter, 1); + return; + default: + dev_err(dev, "Error %d in request cap rsp\n", + crq->request_capability_rsp.rc.code); + return; + } + + /* Done receiving requested capabilities, query IP offload support */ + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_query_ip_offload(adapter); +} + +static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + struct net_device *netdev = adapter->netdev; + struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; + struct ibmvnic_login_buffer *login = adapter->login_buf; + u64 *tx_handle_array; + u64 *rx_handle_array; + int num_tx_pools; + int num_rx_pools; + u64 *size_array; + u32 rsp_len; + int i; + + /* CHECK: Test/set of login_pending does not need to be atomic + * because only ibmvnic_tasklet tests/clears this. + */ + if (!adapter->login_pending) { + netdev_warn(netdev, "Ignoring unexpected login response\n"); + return 0; + } + adapter->login_pending = false; + + /* If the number of queues requested can't be allocated by the + * server, the login response will return with code 1. We will need + * to resend the login buffer with fewer queues requested. + */ + if (login_rsp_crq->generic.rc.code) { + adapter->init_done_rc = login_rsp_crq->generic.rc.code; + complete(&adapter->init_done); + return 0; + } + + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + + netdev->mtu = adapter->req_mtu - ETH_HLEN; + + netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); + ibmvnic_print_hex_dump(netdev, adapter->login_rsp_buf, + adapter->login_rsp_buf_sz); + + /* Sanity checks */ + if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs || + (be32_to_cpu(login->num_rxcomp_subcrqs) * + adapter->req_rx_add_queues != + be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { + dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + return -EIO; + } + + rsp_len = be32_to_cpu(login_rsp->len); + if (be32_to_cpu(login->login_rsp_len) < rsp_len || + rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) || + rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) || + rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) { + /* This can happen if a login request times out and there are + * 2 outstanding login requests sent, the LOGIN_RSP crq + * could have been for the older login request. So we are + * parsing the newer response buffer which may be incomplete + */ + dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n"); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + return -EIO; + } + + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); + /* variable buffer sizes are not supported, so just read the + * first entry. + */ + adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]); + + num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); + num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + + tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); + rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs)); + + for (i = 0; i < num_tx_pools; i++) + adapter->tx_scrq[i]->handle = tx_handle_array[i]; + + for (i = 0; i < num_rx_pools; i++) + adapter->rx_scrq[i]->handle = rx_handle_array[i]; + + adapter->num_active_tx_scrqs = num_tx_pools; + adapter->num_active_rx_scrqs = num_rx_pools; + release_login_rsp_buffer(adapter); + release_login_buffer(adapter); + complete(&adapter->init_done); + + return 0; +} + +static void handle_request_unmap_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + long rc; + + rc = crq->request_unmap_rsp.rc.code; + if (rc) + dev_err(dev, "Error %ld in REQUEST_UNMAP_RSP\n", rc); +} + +static void handle_query_map_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + long rc; + + rc = crq->query_map_rsp.rc.code; + if (rc) { + dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc); + return; + } + netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n", + crq->query_map_rsp.page_size, + __be32_to_cpu(crq->query_map_rsp.tot_pages), + __be32_to_cpu(crq->query_map_rsp.free_pages)); +} + +static void handle_query_cap_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + long rc; + + atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(netdev, "Outstanding queries: %d\n", + atomic_read(&adapter->running_cap_crqs)); + rc = crq->query_capability.rc.code; + if (rc) { + dev_err(dev, "Error %ld in QUERY_CAP_RSP\n", rc); + goto out; + } + + switch (be16_to_cpu(crq->query_capability.capability)) { + case MIN_TX_QUEUES: + adapter->min_tx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_tx_queues = %lld\n", + adapter->min_tx_queues); + break; + case MIN_RX_QUEUES: + adapter->min_rx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_rx_queues = %lld\n", + adapter->min_rx_queues); + break; + case MIN_RX_ADD_QUEUES: + adapter->min_rx_add_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_rx_add_queues = %lld\n", + adapter->min_rx_add_queues); + break; + case MAX_TX_QUEUES: + adapter->max_tx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_tx_queues = %lld\n", + adapter->max_tx_queues); + break; + case MAX_RX_QUEUES: + adapter->max_rx_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_rx_queues = %lld\n", + adapter->max_rx_queues); + break; + case MAX_RX_ADD_QUEUES: + adapter->max_rx_add_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_rx_add_queues = %lld\n", + adapter->max_rx_add_queues); + break; + case MIN_TX_ENTRIES_PER_SUBCRQ: + adapter->min_tx_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n", + adapter->min_tx_entries_per_subcrq); + break; + case MIN_RX_ADD_ENTRIES_PER_SUBCRQ: + adapter->min_rx_add_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n", + adapter->min_rx_add_entries_per_subcrq); + break; + case MAX_TX_ENTRIES_PER_SUBCRQ: + adapter->max_tx_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n", + adapter->max_tx_entries_per_subcrq); + break; + case MAX_RX_ADD_ENTRIES_PER_SUBCRQ: + adapter->max_rx_add_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n", + adapter->max_rx_add_entries_per_subcrq); + break; + case TCP_IP_OFFLOAD: + adapter->tcp_ip_offload = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "tcp_ip_offload = %lld\n", + adapter->tcp_ip_offload); + break; + case PROMISC_SUPPORTED: + adapter->promisc_supported = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "promisc_supported = %lld\n", + adapter->promisc_supported); + break; + case MIN_MTU: + adapter->min_mtu = be64_to_cpu(crq->query_capability.number); + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); + break; + case MAX_MTU: + adapter->max_mtu = be64_to_cpu(crq->query_capability.number); + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); + break; + case MAX_MULTICAST_FILTERS: + adapter->max_multicast_filters = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_multicast_filters = %lld\n", + adapter->max_multicast_filters); + break; + case VLAN_HEADER_INSERTION: + adapter->vlan_header_insertion = + be64_to_cpu(crq->query_capability.number); + if (adapter->vlan_header_insertion) + netdev->features |= NETIF_F_HW_VLAN_STAG_TX; + netdev_dbg(netdev, "vlan_header_insertion = %lld\n", + adapter->vlan_header_insertion); + break; + case RX_VLAN_HEADER_INSERTION: + adapter->rx_vlan_header_insertion = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "rx_vlan_header_insertion = %lld\n", + adapter->rx_vlan_header_insertion); + break; + case MAX_TX_SG_ENTRIES: + adapter->max_tx_sg_entries = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "max_tx_sg_entries = %lld\n", + adapter->max_tx_sg_entries); + break; + case RX_SG_SUPPORTED: + adapter->rx_sg_supported = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "rx_sg_supported = %lld\n", + adapter->rx_sg_supported); + break; + case OPT_TX_COMP_SUB_QUEUES: + adapter->opt_tx_comp_sub_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n", + adapter->opt_tx_comp_sub_queues); + break; + case OPT_RX_COMP_QUEUES: + adapter->opt_rx_comp_queues = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n", + adapter->opt_rx_comp_queues); + break; + case OPT_RX_BUFADD_Q_PER_RX_COMP_Q: + adapter->opt_rx_bufadd_q_per_rx_comp_q = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n", + adapter->opt_rx_bufadd_q_per_rx_comp_q); + break; + case OPT_TX_ENTRIES_PER_SUBCRQ: + adapter->opt_tx_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n", + adapter->opt_tx_entries_per_subcrq); + break; + case OPT_RXBA_ENTRIES_PER_SUBCRQ: + adapter->opt_rxba_entries_per_subcrq = + be64_to_cpu(crq->query_capability.number); + netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n", + adapter->opt_rxba_entries_per_subcrq); + break; + case TX_RX_DESC_REQ: + adapter->tx_rx_desc_req = crq->query_capability.number; + netdev_dbg(netdev, "tx_rx_desc_req = %llx\n", + adapter->tx_rx_desc_req); + break; + + default: + netdev_err(netdev, "Got invalid cap rsp %d\n", + crq->query_capability.capability); + } + +out: + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_request_cap(adapter, 0); +} + +static int send_query_phys_parms(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.query_phys_parms.first = IBMVNIC_CRQ_CMD; + crq.query_phys_parms.cmd = QUERY_PHYS_PARMS; + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + reinit_completion(&adapter->fw_done); + + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + mutex_unlock(&adapter->fw_lock); + return adapter->fw_done_rc ? -EIO : 0; +} + +static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int rc; + __be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed); + + rc = crq->query_phys_parms_rsp.rc.code; + if (rc) { + netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc); + return rc; + } + switch (rspeed) { + case IBMVNIC_10MBPS: + adapter->speed = SPEED_10; + break; + case IBMVNIC_100MBPS: + adapter->speed = SPEED_100; + break; + case IBMVNIC_1GBPS: + adapter->speed = SPEED_1000; + break; + case IBMVNIC_10GBPS: + adapter->speed = SPEED_10000; + break; + case IBMVNIC_25GBPS: + adapter->speed = SPEED_25000; + break; + case IBMVNIC_40GBPS: + adapter->speed = SPEED_40000; + break; + case IBMVNIC_50GBPS: + adapter->speed = SPEED_50000; + break; + case IBMVNIC_100GBPS: + adapter->speed = SPEED_100000; + break; + case IBMVNIC_200GBPS: + adapter->speed = SPEED_200000; + break; + default: + if (netif_carrier_ok(netdev)) + netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed); + adapter->speed = SPEED_UNKNOWN; + } + if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX) + adapter->duplex = DUPLEX_FULL; + else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX) + adapter->duplex = DUPLEX_HALF; + else + adapter->duplex = DUPLEX_UNKNOWN; + + return rc; +} + +static void ibmvnic_handle_crq(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_generic_crq *gen_crq = &crq->generic; + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + u64 *u64_crq = (u64 *)crq; + long rc; + + netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n", + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); + switch (gen_crq->first) { + case IBMVNIC_CRQ_INIT_RSP: + switch (gen_crq->cmd) { + case IBMVNIC_CRQ_INIT: + dev_info(dev, "Partner initialized\n"); + adapter->from_passive_init = true; + /* Discard any stale login responses from prev reset. + * CHECK: should we clear even on INIT_COMPLETE? + */ + adapter->login_pending = false; + + if (adapter->state == VNIC_DOWN) + rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT); + else + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } + + if (!completion_done(&adapter->init_done)) { + if (!adapter->init_done_rc) + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + + break; + case IBMVNIC_CRQ_INIT_COMPLETE: + dev_info(dev, "Partner initialization complete\n"); + adapter->crq.active = true; + send_version_xchg(adapter); + break; + default: + dev_err(dev, "Unknown crq cmd: %d\n", gen_crq->cmd); + } + return; + case IBMVNIC_CRQ_XPORT_EVENT: + netif_carrier_off(netdev); + adapter->crq.active = false; + /* terminate any thread waiting for a response + * from the device + */ + if (!completion_done(&adapter->fw_done)) { + adapter->fw_done_rc = -EIO; + complete(&adapter->fw_done); + } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + + if (!completion_done(&adapter->stats_done)) + complete(&adapter->stats_done); + if (test_bit(0, &adapter->resetting)) + adapter->force_reset_recovery = true; + if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { + dev_info(dev, "Migrated, re-enabling adapter\n"); + ibmvnic_reset(adapter, VNIC_RESET_MOBILITY); + } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { + dev_info(dev, "Backing device failover detected\n"); + adapter->failover_pending = true; + } else { + /* The adapter lost the connection */ + dev_err(dev, "Virtual Adapter failed (rc=%d)\n", + gen_crq->cmd); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); + } + return; + case IBMVNIC_CRQ_CMD_RSP: + break; + default: + dev_err(dev, "Got an invalid msg type 0x%02x\n", + gen_crq->first); + return; + } + + switch (gen_crq->cmd) { + case VERSION_EXCHANGE_RSP: + rc = crq->version_exchange_rsp.rc.code; + if (rc) { + dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc); + break; + } + ibmvnic_version = + be16_to_cpu(crq->version_exchange_rsp.version); + dev_info(dev, "Partner protocol version is %d\n", + ibmvnic_version); + send_query_cap(adapter); + break; + case QUERY_CAPABILITY_RSP: + handle_query_cap_rsp(crq, adapter); + break; + case QUERY_MAP_RSP: + handle_query_map_rsp(crq, adapter); + break; + case REQUEST_MAP_RSP: + adapter->fw_done_rc = crq->request_map_rsp.rc.code; + complete(&adapter->fw_done); + break; + case REQUEST_UNMAP_RSP: + handle_request_unmap_rsp(crq, adapter); + break; + case REQUEST_CAPABILITY_RSP: + handle_request_cap_rsp(crq, adapter); + break; + case LOGIN_RSP: + netdev_dbg(netdev, "Got Login Response\n"); + handle_login_rsp(crq, adapter); + break; + case LOGICAL_LINK_STATE_RSP: + netdev_dbg(netdev, + "Got Logical Link State Response, state: %d rc: %d\n", + crq->logical_link_state_rsp.link_state, + crq->logical_link_state_rsp.rc.code); + adapter->logical_link_state = + crq->logical_link_state_rsp.link_state; + adapter->init_done_rc = crq->logical_link_state_rsp.rc.code; + complete(&adapter->init_done); + break; + case LINK_STATE_INDICATION: + netdev_dbg(netdev, "Got Logical Link State Indication\n"); + adapter->phys_link_state = + crq->link_state_indication.phys_link_state; + adapter->logical_link_state = + crq->link_state_indication.logical_link_state; + if (adapter->phys_link_state && adapter->logical_link_state) + netif_carrier_on(netdev); + else + netif_carrier_off(netdev); + break; + case CHANGE_MAC_ADDR_RSP: + netdev_dbg(netdev, "Got MAC address change Response\n"); + adapter->fw_done_rc = handle_change_mac_rsp(crq, adapter); + break; + case ERROR_INDICATION: + netdev_dbg(netdev, "Got Error Indication\n"); + handle_error_indication(crq, adapter); + break; + case REQUEST_STATISTICS_RSP: + netdev_dbg(netdev, "Got Statistics Response\n"); + complete(&adapter->stats_done); + break; + case QUERY_IP_OFFLOAD_RSP: + netdev_dbg(netdev, "Got Query IP offload Response\n"); + handle_query_ip_offload_rsp(adapter); + break; + case MULTICAST_CTRL_RSP: + netdev_dbg(netdev, "Got multicast control Response\n"); + break; + case CONTROL_IP_OFFLOAD_RSP: + netdev_dbg(netdev, "Got Control IP offload Response\n"); + dma_unmap_single(dev, adapter->ip_offload_ctrl_tok, + sizeof(adapter->ip_offload_ctrl), + DMA_TO_DEVICE); + complete(&adapter->init_done); + break; + case COLLECT_FW_TRACE_RSP: + netdev_dbg(netdev, "Got Collect firmware trace Response\n"); + complete(&adapter->fw_done); + break; + case GET_VPD_SIZE_RSP: + handle_vpd_size_rsp(crq, adapter); + break; + case GET_VPD_RSP: + handle_vpd_rsp(crq, adapter); + break; + case QUERY_PHYS_PARMS_RSP: + adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter); + complete(&adapter->fw_done); + break; + default: + netdev_err(netdev, "Got an invalid cmd type 0x%02x\n", + gen_crq->cmd); + } +} + +static irqreturn_t ibmvnic_interrupt(int irq, void *instance) +{ + struct ibmvnic_adapter *adapter = instance; + + tasklet_schedule(&adapter->tasklet); + return IRQ_HANDLED; +} + +static void ibmvnic_tasklet(struct tasklet_struct *t) +{ + struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet); + struct ibmvnic_crq_queue *queue = &adapter->crq; + union ibmvnic_crq *crq; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). + */ + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; + } + + spin_unlock_irqrestore(&queue->lock, flags); +} + +static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *adapter) +{ + struct vio_dev *vdev = adapter->vdev; + int rc; + + do { + rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address); + } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + if (rc) + dev_err(&vdev->dev, "Error enabling adapter (rc=%d)\n", rc); + + return rc; +} + +static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *crq = &adapter->crq; + struct device *dev = &adapter->vdev->dev; + struct vio_dev *vdev = adapter->vdev; + int rc; + + /* Close the CRQ */ + do { + rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + + memset(crq->msgs, 0, PAGE_SIZE); + crq->cur = 0; + crq->active = false; + + /* And re-open it again */ + rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, + crq->msg_token, PAGE_SIZE); + + if (rc == H_CLOSED) + /* Adapter is good, but other end is not ready */ + dev_warn(dev, "Partner adapter not ready\n"); + else if (rc != 0) + dev_warn(dev, "Couldn't register crq (rc=%d)\n", rc); + + return rc; +} + +static void release_crq_queue(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *crq = &adapter->crq; + struct vio_dev *vdev = adapter->vdev; + long rc; + + if (!crq->msgs) + return; + + netdev_dbg(adapter->netdev, "Releasing CRQ\n"); + free_irq(vdev->irq, adapter); + tasklet_kill(&adapter->tasklet); + do { + rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); + + dma_unmap_single(&vdev->dev, crq->msg_token, PAGE_SIZE, + DMA_BIDIRECTIONAL); + free_page((unsigned long)crq->msgs); + crq->msgs = NULL; + crq->active = false; +} + +static int init_crq_queue(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_crq_queue *crq = &adapter->crq; + struct device *dev = &adapter->vdev->dev; + struct vio_dev *vdev = adapter->vdev; + int rc, retrc = -ENOMEM; + + if (crq->msgs) + return 0; + + crq->msgs = (union ibmvnic_crq *)get_zeroed_page(GFP_KERNEL); + /* Should we allocate more than one page? */ + + if (!crq->msgs) + return -ENOMEM; + + crq->size = PAGE_SIZE / sizeof(*crq->msgs); + crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, crq->msg_token)) + goto map_failed; + + rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, + crq->msg_token, PAGE_SIZE); + + if (rc == H_RESOURCE) + /* maybe kexecing and resource is busy. try a reset */ + rc = ibmvnic_reset_crq(adapter); + retrc = rc; + + if (rc == H_CLOSED) { + dev_warn(dev, "Partner adapter not ready\n"); + } else if (rc) { + dev_warn(dev, "Error %d opening adapter\n", rc); + goto reg_crq_failed; + } + + retrc = 0; + + tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet); + + netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq); + snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x", + adapter->vdev->unit_address); + rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, crq->name, adapter); + if (rc) { + dev_err(dev, "Couldn't register irq 0x%x. rc=%d\n", + vdev->irq, rc); + goto req_irq_failed; + } + + rc = vio_enable_interrupts(vdev); + if (rc) { + dev_err(dev, "Error %d enabling interrupts\n", rc); + goto req_irq_failed; + } + + crq->cur = 0; + spin_lock_init(&crq->lock); + + /* process any CRQs that were queued before we enabled interrupts */ + tasklet_schedule(&adapter->tasklet); + + return retrc; + +req_irq_failed: + tasklet_kill(&adapter->tasklet); + do { + rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); + } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); +reg_crq_failed: + dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL); +map_failed: + free_page((unsigned long)crq->msgs); + crq->msgs = NULL; + return retrc; +} + +static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long timeout = msecs_to_jiffies(20000); + u64 old_num_rx_queues = adapter->req_rx_queues; + u64 old_num_tx_queues = adapter->req_tx_queues; + int rc; + + adapter->from_passive_init = false; + + rc = ibmvnic_send_crq_init(adapter); + if (rc) { + dev_err(dev, "Send crq init failed with error %d\n", rc); + return rc; + } + + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + return -ETIMEDOUT; + } + + if (adapter->init_done_rc) { + release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); + return adapter->init_done_rc; + } + + if (adapter->from_passive_init) { + adapter->state = VNIC_OPEN; + adapter->from_passive_init = false; + dev_err(dev, "CRQ-init failed, passive-init\n"); + return -EINVAL; + } + + if (reset && + test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && + adapter->reset_reason != VNIC_RESET_MOBILITY) { + if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues) { + release_sub_crqs(adapter, 0); + rc = init_sub_crqs(adapter); + } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + + rc = reset_sub_crq_queues(adapter); + } + } else { + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + /* After an LPM, reset the max number of indirect + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT + * hcall to the default max (e.g POWER8 -> POWER10) + * + * If the new destination platform does not support + * the higher limit max (e.g. POWER10-> POWER8 LPM) + * H_PARAMETER will trigger automatic fallback to the + * safe minimum limit. + */ + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + } + + rc = init_sub_crqs(adapter); + } + + if (rc) { + dev_err(dev, "Initialization of sub crqs failed\n"); + release_crq_queue(adapter); + return rc; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n"); + release_crq_queue(adapter); + } + + return rc; +} + +static struct device_attribute dev_attr_failover; + +static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) +{ + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + unsigned char *mac_addr_p; + unsigned long flags; + bool init_success; + int rc; + + dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", + dev->unit_address); + + mac_addr_p = (unsigned char *)vio_get_attribute(dev, + VETH_MAC_ADDR, NULL); + if (!mac_addr_p) { + dev_err(&dev->dev, + "(%s:%3.3d) ERROR: Can't find MAC_ADDR attribute\n", + __FILE__, __LINE__); + return 0; + } + + netdev = alloc_etherdev_mq(sizeof(struct ibmvnic_adapter), + IBMVNIC_MAX_QUEUES); + if (!netdev) + return -ENOMEM; + + adapter = netdev_priv(netdev); + adapter->state = VNIC_PROBING; + dev_set_drvdata(&dev->dev, netdev); + adapter->vdev = dev; + adapter->netdev = netdev; + adapter->login_pending = false; + memset(&adapter->map_ids, 0, sizeof(adapter->map_ids)); + /* map_ids start at 1, so ensure map_id 0 is always "in-use" */ + bitmap_set(adapter->map_ids, 0, 1); + + ether_addr_copy(adapter->mac_addr, mac_addr_p); + eth_hw_addr_set(netdev, adapter->mac_addr); + netdev->irq = dev->irq; + netdev->netdev_ops = &ibmvnic_netdev_ops; + netdev->ethtool_ops = &ibmvnic_ethtool_ops; + SET_NETDEV_DEV(netdev, &dev->dev); + + INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); + INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset, + __ibmvnic_delayed_reset); + INIT_LIST_HEAD(&adapter->rwi_list); + spin_lock_init(&adapter->rwi_lock); + spin_lock_init(&adapter->state_lock); + mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); + init_completion(&adapter->init_done); + init_completion(&adapter->fw_done); + init_completion(&adapter->reset_done); + init_completion(&adapter->stats_done); + clear_bit(0, &adapter->resetting); + adapter->prev_rx_buf_sz = 0; + adapter->prev_mtu = 0; + + init_success = false; + do { + reinit_init_done(adapter); + + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + rc = init_crq_queue(adapter); + if (rc) { + dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", + rc); + goto ibmvnic_init_fail; + } + + rc = ibmvnic_reset_init(adapter, false); + } while (rc == -EAGAIN); + + /* We are ignoring the error from ibmvnic_reset_init() assuming that the + * partner is not ready. CRQ is not active. When the partner becomes + * ready, we will do the passive init reset. + */ + + if (!rc) + init_success = true; + + rc = init_stats_buffers(adapter); + if (rc) + goto ibmvnic_init_fail; + + rc = init_stats_token(adapter); + if (rc) + goto ibmvnic_stats_fail; + + rc = device_create_file(&dev->dev, &dev_attr_failover); + if (rc) + goto ibmvnic_dev_file_err; + + netif_carrier_off(netdev); + + if (init_success) { + adapter->state = VNIC_PROBED; + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + } else { + adapter->state = VNIC_DOWN; + } + + adapter->wait_for_reset = false; + adapter->last_reset_time = jiffies; + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + + rc = register_netdev(netdev); + if (rc) { + dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); + goto ibmvnic_register_fail; + } + dev_info(&dev->dev, "ibmvnic registered\n"); + + rc = ibmvnic_cpu_notif_add(adapter); + if (rc) { + netdev_err(netdev, "Registering cpu notifier failed\n"); + goto cpu_notif_add_failed; + } + + complete(&adapter->probe_done); + + return 0; + +cpu_notif_add_failed: + unregister_netdev(netdev); + +ibmvnic_register_fail: + device_remove_file(&dev->dev, &dev_attr_failover); + +ibmvnic_dev_file_err: + release_stats_token(adapter); + +ibmvnic_stats_fail: + release_stats_buffers(adapter); + +ibmvnic_init_fail: + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + + mutex_destroy(&adapter->fw_lock); + free_netdev(netdev); + + return rc; +} + +static void ibmvnic_remove(struct vio_dev *dev) +{ + struct net_device *netdev = dev_get_drvdata(&dev->dev); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + unsigned long flags; + + spin_lock_irqsave(&adapter->state_lock, flags); + + /* If ibmvnic_reset() is scheduling a reset, wait for it to + * finish. Then, set the state to REMOVING to prevent it from + * scheduling any more work and to have reset functions ignore + * any resets that have already been scheduled. Drop the lock + * after setting state, so __ibmvnic_reset() which is called + * from the flush_work() below, can make progress. + */ + spin_lock(&adapter->rwi_lock); + adapter->state = VNIC_REMOVING; + spin_unlock(&adapter->rwi_lock); + + spin_unlock_irqrestore(&adapter->state_lock, flags); + + ibmvnic_cpu_notif_remove(adapter); + + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + rtnl_lock(); + unregister_netdevice(netdev); + + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); + + release_stats_token(adapter); + release_stats_buffers(adapter); + + adapter->state = VNIC_REMOVED; + + rtnl_unlock(); + mutex_destroy(&adapter->fw_lock); + device_remove_file(&dev->dev, &dev_attr_failover); + free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); +} + +static ssize_t failover_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + __be64 session_token; + long rc; + + if (!sysfs_streq(buf, "1")) + return -EINVAL; + + rc = plpar_hcall(H_VIOCTL, retbuf, adapter->vdev->unit_address, + H_GET_SESSION_TOKEN, 0, 0, 0); + if (rc) { + netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", + rc); + goto last_resort; + } + + session_token = (__be64)retbuf[0]; + netdev_dbg(netdev, "Initiating client failover, session id %llx\n", + be64_to_cpu(session_token)); + rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, + H_SESSION_ERR_DETECTED, session_token, 0, 0); + if (rc) { + netdev_err(netdev, + "H_VIOCTL initiated failover failed, rc %ld\n", + rc); + goto last_resort; + } + + return count; + +last_resort: + netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + return count; +} +static DEVICE_ATTR_WO(failover); + +static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmvnic_adapter *adapter; + struct iommu_table *tbl; + unsigned long ret = 0; + int i; + + tbl = get_iommu_table_base(&vdev->dev); + + /* netdev inits at probe time along with the structures we need below*/ + if (!netdev) + return IOMMU_PAGE_ALIGN(IBMVNIC_IO_ENTITLEMENT_DEFAULT, tbl); + + adapter = netdev_priv(netdev); + + ret += PAGE_SIZE; /* the crq message queue */ + ret += IOMMU_PAGE_ALIGN(sizeof(struct ibmvnic_statistics), tbl); + + for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++) + ret += 4 * PAGE_SIZE; /* the scrq message queue */ + + for (i = 0; i < adapter->num_active_rx_pools; i++) + ret += adapter->rx_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl); + + return ret; +} + +static int ibmvnic_resume(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + if (adapter->state != VNIC_OPEN) + return 0; + + tasklet_schedule(&adapter->tasklet); + + return 0; +} + +static const struct vio_device_id ibmvnic_device_table[] = { + {"network", "IBM,vnic"}, + {"", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmvnic_device_table); + +static const struct dev_pm_ops ibmvnic_pm_ops = { + .resume = ibmvnic_resume +}; + +static struct vio_driver ibmvnic_driver = { + .id_table = ibmvnic_device_table, + .probe = ibmvnic_probe, + .remove = ibmvnic_remove, + .get_desired_dma = ibmvnic_get_desired_dma, + .name = ibmvnic_driver_name, + .pm = &ibmvnic_pm_ops, +}; + +/* module functions */ +static int __init ibmvnic_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/ibmvnic:online", + ibmvnic_cpu_online, + ibmvnic_cpu_down_prep); + if (ret < 0) + goto out; + ibmvnic_online = ret; + ret = cpuhp_setup_state_multi(CPUHP_IBMVNIC_DEAD, "net/ibmvnic:dead", + NULL, ibmvnic_cpu_dead); + if (ret) + goto err_dead; + + ret = vio_register_driver(&ibmvnic_driver); + if (ret) + goto err_vio_register; + + pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string, + IBMVNIC_DRIVER_VERSION); + + return 0; +err_vio_register: + cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD); +err_dead: + cpuhp_remove_multi_state(ibmvnic_online); +out: + return ret; +} + +static void __exit ibmvnic_module_exit(void) +{ + vio_unregister_driver(&ibmvnic_driver); + cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD); + cpuhp_remove_multi_state(ibmvnic_online); +} + +module_init(ibmvnic_module_init); +module_exit(ibmvnic_module_exit); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h new file mode 100644 index 000000000000..480dc587078f --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -0,0 +1,1074 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/**************************************************************************/ +/* */ +/* IBM System i and System p Virtual NIC Device Driver */ +/* Copyright (C) 2014 IBM Corp. */ +/* Santiago Leon (santi_leon@yahoo.com) */ +/* Thomas Falcon (tlfalcon@linux.vnet.ibm.com) */ +/* John Allen (jallen@linux.vnet.ibm.com) */ +/* */ +/* */ +/* This module contains the implementation of a virtual ethernet device */ +/* for use with IBM i/pSeries LPAR Linux. It utilizes the logical LAN */ +/* option of the RS/6000 Platform Architecture to interface with virtual */ +/* ethernet NICs that are presented to the partition by the hypervisor. */ +/* */ +/**************************************************************************/ + +#define IBMVNIC_NAME "ibmvnic" +#define IBMVNIC_DRIVER_VERSION "1.0.1" +#define IBMVNIC_INVALID_MAP -1 +#define IBMVNIC_OPEN_FAILED 3 + +/* basic structures plus 100 2k buffers */ +#define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305 + +/* Initial module_parameters */ +#define IBMVNIC_RX_WEIGHT 16 +/* when changing this, update IBMVNIC_IO_ENTITLEMENT_DEFAULT */ +#define IBMVNIC_BUFFS_PER_POOL 100 +#define IBMVNIC_MAX_QUEUES 16 +#define IBMVNIC_MAX_QUEUE_SZ 4096 +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_SAFE_IND_DESC 16 +#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) + +#define IBMVNIC_TSO_BUF_SZ 65536 +#define IBMVNIC_TSO_BUFS 64 +#define IBMVNIC_TSO_POOL_MASK 0x80000000 + +/* A VNIC adapter has set of Rx and Tx pools (aka queues). Each Rx/Tx pool + * has a set of buffers. The size of each buffer is determined by the MTU. + * + * Each Rx/Tx pool is also associated with a DMA region that is shared + * with the "hardware" (VIOS) and used to send/receive packets. The DMA + * region is also referred to as a Long Term Buffer or LTB. + * + * The size of the DMA region required for an Rx/Tx pool depends on the + * number and size (MTU) of the buffers in the pool. At the max levels + * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus + * some padding. + * + * But the size of a single DMA region is limited by MAX_PAGE_ORDER in the + * kernel (about 16MB currently). To support say 4K Jumbo frames, we + * use a set of LTBs (struct ltb_set) per pool. + * + * IBMVNIC_ONE_LTB_MAX - max size of each LTB supported by kernel + * IBMVNIC_ONE_LTB_SIZE - current max size of each LTB in an ltb_set + * (must be <= IBMVNIC_ONE_LTB_MAX) + * IBMVNIC_LTB_SET_SIZE - current size of all LTBs in an ltb_set + * + * Each VNIC can have upto 16 Rx, 16 Tx and 16 TSO pools. The TSO pools + * are of fixed length (IBMVNIC_TSO_BUF_SZ * IBMVNIC_TSO_BUFS) of 4MB. + * + * The Rx and Tx pools can have upto 4096 buffers. The max size of these + * buffers is about 9588 (for jumbo frames, including IBMVNIC_BUFFER_HLEN). + * So, setting the IBMVNIC_LTB_SET_SIZE for a pool to 4096 * 9588 ~= 38MB. + * + * There is a trade-off in setting IBMVNIC_ONE_LTB_SIZE. If it is large, + * the allocation of the LTB can fail when system is low in memory. If + * its too small, we would need several mappings for each of the Rx/ + * Tx/TSO pools but there is a limit of 255 mappings per vnic in the + * VNIC protocol. + * + * So setting IBMVNIC_ONE_LTB_SIZE to 8MB. With IBMVNIC_LTB_SET_SIZE set + * to 38MB, we will need 5 LTBs per Rx and Tx pool and 1 LTB per TSO + * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 + * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. + */ +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << MAX_PAGE_ORDER) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) +#define IBMVNIC_LTB_SET_SIZE (38 << 20) + +#define IBMVNIC_BUFFER_HLEN 500 +#define IBMVNIC_RESET_DELAY 100 + +struct ibmvnic_login_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_LB 1 + __be32 num_txcomp_subcrqs; + __be32 off_txcomp_subcrqs; + __be32 num_rxcomp_subcrqs; + __be32 off_rxcomp_subcrqs; + __be32 login_rsp_ioba; + __be32 login_rsp_len; + __be32 client_data_offset; + __be32 client_data_len; +} __packed __aligned(8); + +struct ibmvnic_login_rsp_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_LRB 1 + __be32 num_txsubm_subcrqs; + __be32 off_txsubm_subcrqs; + __be32 num_rxadd_subcrqs; + __be32 off_rxadd_subcrqs; + __be32 off_rxadd_buff_size; + __be32 num_supp_tx_desc; + __be32 off_supp_tx_desc; +} __packed __aligned(8); + +struct ibmvnic_query_ip_offload_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_IOB 1 + u8 ipv4_chksum; + u8 ipv6_chksum; + u8 tcp_ipv4_chksum; + u8 tcp_ipv6_chksum; + u8 udp_ipv4_chksum; + u8 udp_ipv6_chksum; + u8 large_tx_ipv4; + u8 large_tx_ipv6; + u8 large_rx_ipv4; + u8 large_rx_ipv6; + u8 reserved1[14]; + __be16 max_ipv4_header_size; + __be16 max_ipv6_header_size; + __be16 max_tcp_header_size; + __be16 max_udp_header_size; + __be32 max_large_tx_size; + __be32 max_large_rx_size; + u8 reserved2[16]; + u8 ipv6_extension_header; +#define IPV6_EH_NOT_SUPPORTED 0x00 +#define IPV6_EH_SUPPORTED_LIM 0x01 +#define IPV6_EH_SUPPORTED 0xFF + u8 tcp_pseudosum_req; +#define TCP_PS_NOT_REQUIRED 0x00 +#define TCP_PS_REQUIRED 0x01 + u8 reserved3[30]; + __be16 num_ipv6_ext_headers; + __be32 off_ipv6_ext_headers; + u8 reserved4[154]; +} __packed __aligned(8); + +struct ibmvnic_control_ip_offload_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_IOB 1 + u8 ipv4_chksum; + u8 ipv6_chksum; + u8 tcp_ipv4_chksum; + u8 tcp_ipv6_chksum; + u8 udp_ipv4_chksum; + u8 udp_ipv6_chksum; + u8 large_tx_ipv4; + u8 large_tx_ipv6; + u8 bad_packet_rx; + u8 large_rx_ipv4; + u8 large_rx_ipv6; + u8 reserved4[111]; +} __packed __aligned(8); + +struct ibmvnic_fw_component { + u8 name[48]; + __be32 trace_buff_size; + u8 correlator; + u8 trace_level; + u8 parent_correlator; + u8 error_check_level; + u8 trace_on; + u8 reserved[7]; + u8 description[192]; +} __packed __aligned(8); + +struct ibmvnic_fw_trace_entry { + __be32 trace_id; + u8 num_valid_data; + u8 reserved[3]; + __be64 pmc_registers; + __be64 timebase; + __be64 trace_data[5]; +} __packed __aligned(8); + +struct ibmvnic_statistics { + __be32 version; + __be32 promiscuous; + __be64 rx_packets; + __be64 rx_bytes; + __be64 tx_packets; + __be64 tx_bytes; + __be64 ucast_tx_packets; + __be64 ucast_rx_packets; + __be64 mcast_tx_packets; + __be64 mcast_rx_packets; + __be64 bcast_tx_packets; + __be64 bcast_rx_packets; + __be64 align_errors; + __be64 fcs_errors; + __be64 single_collision_frames; + __be64 multi_collision_frames; + __be64 sqe_test_errors; + __be64 deferred_tx; + __be64 late_collisions; + __be64 excess_collisions; + __be64 internal_mac_tx_errors; + __be64 carrier_sense; + __be64 too_long_frames; + __be64 internal_mac_rx_errors; + u8 reserved[72]; +} __packed __aligned(8); + +struct ibmvnic_tx_queue_stats { + u64 batched_packets; + u64 direct_packets; + u64 bytes; + u64 dropped_packets; +}; + +#define NUM_TX_STATS \ + (sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64)) + +struct ibmvnic_rx_queue_stats { + u64 packets; + u64 bytes; + u64 interrupts; +}; + +#define NUM_RX_STATS \ + (sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64)) + +struct ibmvnic_acl_buffer { + __be32 len; + __be32 version; +#define INITIAL_VERSION_IOB 1 + u8 mac_acls_restrict; + u8 vlan_acls_restrict; + u8 reserved1[22]; + __be32 num_mac_addrs; + __be32 offset_mac_addrs; + __be32 num_vlan_ids; + __be32 offset_vlan_ids; + u8 reserved2[80]; +} __packed __aligned(8); + +/* descriptors have been changed, how should this be defined? 1? 4? */ + +#define IBMVNIC_TX_DESC_VERSIONS 3 + +/* is this still needed? */ +struct ibmvnic_tx_comp_desc { + u8 first; + u8 num_comps; + __be16 rcs[5]; + __be32 correlators[5]; +} __packed __aligned(8); + +/* some flags that included in v0 descriptor, which is gone + * only used for IBMVNIC_TCP_CHKSUM and IBMVNIC_UDP_CHKSUM + * and only in some offload_flags variable that doesn't seem + * to be used anywhere, can probably be removed? + */ + +#define IBMVNIC_TCP_CHKSUM 0x20 +#define IBMVNIC_UDP_CHKSUM 0x08 + +struct ibmvnic_tx_desc { + u8 first; + u8 type; + +#define IBMVNIC_TX_DESC 0x10 + u8 n_crq_elem; + u8 n_sge; + u8 flags1; +#define IBMVNIC_TX_COMP_NEEDED 0x80 +#define IBMVNIC_TX_CHKSUM_OFFLOAD 0x40 +#define IBMVNIC_TX_LSO 0x20 +#define IBMVNIC_TX_PROT_TCP 0x10 +#define IBMVNIC_TX_PROT_UDP 0x08 +#define IBMVNIC_TX_PROT_IPV4 0x04 +#define IBMVNIC_TX_PROT_IPV6 0x02 +#define IBMVNIC_TX_VLAN_PRESENT 0x01 + u8 flags2; +#define IBMVNIC_TX_VLAN_INSERT 0x80 + __be16 mss; + u8 reserved[4]; + __be32 correlator; + __be16 vlan_id; + __be16 dma_reg; + __be32 sge_len; + __be64 ioba; +} __packed __aligned(8); + +struct ibmvnic_hdr_desc { + u8 first; + u8 type; +#define IBMVNIC_HDR_DESC 0x11 + u8 len; + u8 l2_len; + __be16 l3_len; + u8 l4_len; + u8 flag; + u8 data[24]; +} __packed __aligned(8); + +struct ibmvnic_hdr_ext_desc { + u8 first; + u8 type; +#define IBMVNIC_HDR_EXT_DESC 0x12 + u8 len; + u8 data[29]; +} __packed __aligned(8); + +struct ibmvnic_sge_desc { + u8 first; + u8 type; +#define IBMVNIC_SGE_DESC 0x30 + __be16 sge1_dma_reg; + __be32 sge1_len; + __be64 sge1_ioba; + __be16 reserved; + __be16 sge2_dma_reg; + __be32 sge2_len; + __be64 sge2_ioba; +} __packed __aligned(8); + +struct ibmvnic_rx_comp_desc { + u8 first; + u8 flags; +#define IBMVNIC_IP_CHKSUM_GOOD 0x80 +#define IBMVNIC_TCP_UDP_CHKSUM_GOOD 0x40 +#define IBMVNIC_END_FRAME 0x20 +#define IBMVNIC_EXACT_MC 0x10 +#define IBMVNIC_VLAN_STRIPPED 0x08 + __be16 off_frame_data; + __be32 len; + __be64 correlator; + __be16 vlan_tci; + __be16 rc; + u8 reserved[12]; +} __packed __aligned(8); + +struct ibmvnic_generic_scrq { + u8 first; + u8 reserved[31]; +} __packed __aligned(8); + +struct ibmvnic_rx_buff_add_desc { + u8 first; + u8 reserved[7]; + __be64 correlator; + __be32 ioba; + u8 map_id; + __be32 len:24; + u8 reserved2[8]; +} __packed __aligned(8); + +struct ibmvnic_rc { + u8 code; /* one of enum ibmvnic_rc_codes */ + u8 detailed_data[3]; +} __packed __aligned(4); + +struct ibmvnic_generic_crq { + u8 first; + u8 cmd; + u8 params[10]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_version_exchange { + u8 first; + u8 cmd; + __be16 version; +#define IBMVNIC_INITIAL_VERSION 1 + u8 reserved[8]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_capability { + u8 first; + u8 cmd; + __be16 capability; /* one of ibmvnic_capabilities */ + __be64 number; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_login { + u8 first; + u8 cmd; + u8 reserved[6]; + __be32 ioba; + __be32 len; +} __packed __aligned(8); + +struct ibmvnic_phys_parms { + u8 first; + u8 cmd; + u8 flags1; +#define IBMVNIC_EXTERNAL_LOOPBACK 0x80 +#define IBMVNIC_INTERNAL_LOOPBACK 0x40 +#define IBMVNIC_PROMISC 0x20 +#define IBMVNIC_PHYS_LINK_ACTIVE 0x10 +#define IBMVNIC_AUTONEG_DUPLEX 0x08 +#define IBMVNIC_FULL_DUPLEX 0x04 +#define IBMVNIC_HALF_DUPLEX 0x02 +#define IBMVNIC_CAN_CHG_PHYS_PARMS 0x01 + u8 flags2; +#define IBMVNIC_LOGICAL_LNK_ACTIVE 0x80 + __be32 speed; +#define IBMVNIC_AUTONEG 0x80000000 +#define IBMVNIC_10MBPS 0x40000000 +#define IBMVNIC_100MBPS 0x20000000 +#define IBMVNIC_1GBPS 0x10000000 +#define IBMVNIC_10GBPS 0x08000000 +#define IBMVNIC_40GBPS 0x04000000 +#define IBMVNIC_100GBPS 0x02000000 +#define IBMVNIC_25GBPS 0x01000000 +#define IBMVNIC_50GBPS 0x00800000 +#define IBMVNIC_200GBPS 0x00400000 + __be32 mtu; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_logical_link_state { + u8 first; + u8 cmd; + u8 link_state; +#define IBMVNIC_LOGICAL_LNK_DN 0x00 +#define IBMVNIC_LOGICAL_LNK_UP 0x01 +#define IBMVNIC_LOGICAL_LNK_QUERY 0xff + u8 reserved[9]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_query_ip_offload { + u8 first; + u8 cmd; + u8 reserved[2]; + __be32 len; + __be32 ioba; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_control_ip_offload { + u8 first; + u8 cmd; + u8 reserved[2]; + __be32 ioba; + __be32 len; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_request_statistics { + u8 first; + u8 cmd; + u8 flags; +#define IBMVNIC_PHYSICAL_PORT 0x80 + u8 reserved1; + __be32 ioba; + __be32 len; + u8 reserved[4]; +} __packed __aligned(8); + +struct ibmvnic_error_indication { + u8 first; + u8 cmd; + u8 flags; +#define IBMVNIC_FATAL_ERROR 0x80 + u8 reserved1; + __be32 error_id; + __be32 detail_error_sz; + __be16 error_cause; + u8 reserved2[2]; +} __packed __aligned(8); + +struct ibmvnic_link_state_indication { + u8 first; + u8 cmd; + u8 reserved1[2]; + u8 phys_link_state; + u8 logical_link_state; + u8 reserved2[10]; +} __packed __aligned(8); + +struct ibmvnic_change_mac_addr { + u8 first; + u8 cmd; + u8 mac_addr[6]; + u8 reserved[4]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_multicast_ctrl { + u8 first; + u8 cmd; + u8 mac_addr[6]; + u8 flags; +#define IBMVNIC_ENABLE_MC 0x80 +#define IBMVNIC_DISABLE_MC 0x40 +#define IBMVNIC_ENABLE_ALL 0x20 +#define IBMVNIC_DISABLE_ALL 0x10 + u8 reserved1; + __be16 reserved2; /* was num_enabled_mc_addr; */ + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_get_vpd_size { + u8 first; + u8 cmd; + u8 reserved[14]; +} __packed __aligned(8); + +struct ibmvnic_get_vpd_size_rsp { + u8 first; + u8 cmd; + u8 reserved[2]; + __be64 len; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_get_vpd { + u8 first; + u8 cmd; + u8 reserved1[2]; + __be32 ioba; + __be32 len; + u8 reserved[4]; +} __packed __aligned(8); + +struct ibmvnic_get_vpd_rsp { + u8 first; + u8 cmd; + u8 reserved[10]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_acl_change_indication { + u8 first; + u8 cmd; + __be16 change_type; +#define IBMVNIC_MAC_ACL 0 +#define IBMVNIC_VLAN_ACL 1 + u8 reserved[12]; +} __packed __aligned(8); + +struct ibmvnic_acl_query { + u8 first; + u8 cmd; + u8 reserved1[2]; + __be32 ioba; + __be32 len; + u8 reserved2[4]; +} __packed __aligned(8); + +struct ibmvnic_tune { + u8 first; + u8 cmd; + u8 reserved1[2]; + __be32 ioba; + __be32 len; + u8 reserved2[4]; +} __packed __aligned(8); + +struct ibmvnic_request_map { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + __be32 ioba; + __be32 len; + u8 reserved2[4]; +} __packed __aligned(8); + +struct ibmvnic_request_map_rsp { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + u8 reserved2[8]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_request_unmap { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + u8 reserved2[12]; +} __packed __aligned(8); + +struct ibmvnic_request_unmap_rsp { + u8 first; + u8 cmd; + u8 reserved1; + u8 map_id; + u8 reserved2[8]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +struct ibmvnic_query_map { + u8 first; + u8 cmd; + u8 reserved[14]; +} __packed __aligned(8); + +struct ibmvnic_query_map_rsp { + u8 first; + u8 cmd; + u8 reserved; + u8 page_size; + __be32 tot_pages; + __be32 free_pages; + struct ibmvnic_rc rc; +} __packed __aligned(8); + +union ibmvnic_crq { + struct ibmvnic_generic_crq generic; + struct ibmvnic_version_exchange version_exchange; + struct ibmvnic_version_exchange version_exchange_rsp; + struct ibmvnic_capability query_capability; + struct ibmvnic_capability query_capability_rsp; + struct ibmvnic_capability request_capability; + struct ibmvnic_capability request_capability_rsp; + struct ibmvnic_login login; + struct ibmvnic_generic_crq login_rsp; + struct ibmvnic_phys_parms query_phys_parms; + struct ibmvnic_phys_parms query_phys_parms_rsp; + struct ibmvnic_phys_parms query_phys_capabilities; + struct ibmvnic_phys_parms query_phys_capabilities_rsp; + struct ibmvnic_phys_parms set_phys_parms; + struct ibmvnic_phys_parms set_phys_parms_rsp; + struct ibmvnic_logical_link_state logical_link_state; + struct ibmvnic_logical_link_state logical_link_state_rsp; + struct ibmvnic_query_ip_offload query_ip_offload; + struct ibmvnic_query_ip_offload query_ip_offload_rsp; + struct ibmvnic_control_ip_offload control_ip_offload; + struct ibmvnic_control_ip_offload control_ip_offload_rsp; + struct ibmvnic_request_statistics request_statistics; + struct ibmvnic_generic_crq request_statistics_rsp; + struct ibmvnic_error_indication error_indication; + struct ibmvnic_link_state_indication link_state_indication; + struct ibmvnic_change_mac_addr change_mac_addr; + struct ibmvnic_change_mac_addr change_mac_addr_rsp; + struct ibmvnic_multicast_ctrl multicast_ctrl; + struct ibmvnic_multicast_ctrl multicast_ctrl_rsp; + struct ibmvnic_get_vpd_size get_vpd_size; + struct ibmvnic_get_vpd_size_rsp get_vpd_size_rsp; + struct ibmvnic_get_vpd get_vpd; + struct ibmvnic_get_vpd_rsp get_vpd_rsp; + struct ibmvnic_acl_change_indication acl_change_indication; + struct ibmvnic_acl_query acl_query; + struct ibmvnic_generic_crq acl_query_rsp; + struct ibmvnic_tune tune; + struct ibmvnic_generic_crq tune_rsp; + struct ibmvnic_request_map request_map; + struct ibmvnic_request_map_rsp request_map_rsp; + struct ibmvnic_request_unmap request_unmap; + struct ibmvnic_request_unmap_rsp request_unmap_rsp; + struct ibmvnic_query_map query_map; + struct ibmvnic_query_map_rsp query_map_rsp; +}; + +enum ibmvnic_rc_codes { + SUCCESS = 0, + PARTIALSUCCESS = 1, + PERMISSION = 2, + NOMEMORY = 3, + PARAMETER = 4, + UNKNOWNCOMMAND = 5, + ABORTED = 6, + INVALIDSTATE = 7, + INVALIDIOBA = 8, + INVALIDLENGTH = 9, + UNSUPPORTEDOPTION = 10, +}; + +enum ibmvnic_capabilities { + MIN_TX_QUEUES = 1, + MIN_RX_QUEUES = 2, + MIN_RX_ADD_QUEUES = 3, + MAX_TX_QUEUES = 4, + MAX_RX_QUEUES = 5, + MAX_RX_ADD_QUEUES = 6, + REQ_TX_QUEUES = 7, + REQ_RX_QUEUES = 8, + REQ_RX_ADD_QUEUES = 9, + MIN_TX_ENTRIES_PER_SUBCRQ = 10, + MIN_RX_ADD_ENTRIES_PER_SUBCRQ = 11, + MAX_TX_ENTRIES_PER_SUBCRQ = 12, + MAX_RX_ADD_ENTRIES_PER_SUBCRQ = 13, + REQ_TX_ENTRIES_PER_SUBCRQ = 14, + REQ_RX_ADD_ENTRIES_PER_SUBCRQ = 15, + TCP_IP_OFFLOAD = 16, + PROMISC_REQUESTED = 17, + PROMISC_SUPPORTED = 18, + MIN_MTU = 19, + MAX_MTU = 20, + REQ_MTU = 21, + MAX_MULTICAST_FILTERS = 22, + VLAN_HEADER_INSERTION = 23, + RX_VLAN_HEADER_INSERTION = 24, + MAX_TX_SG_ENTRIES = 25, + RX_SG_SUPPORTED = 26, + RX_SG_REQUESTED = 27, + OPT_TX_COMP_SUB_QUEUES = 28, + OPT_RX_COMP_QUEUES = 29, + OPT_RX_BUFADD_Q_PER_RX_COMP_Q = 30, + OPT_TX_ENTRIES_PER_SUBCRQ = 31, + OPT_RXBA_ENTRIES_PER_SUBCRQ = 32, + TX_RX_DESC_REQ = 33, +}; + +enum ibmvnic_error_cause { + ADAPTER_PROBLEM = 0, + BUS_PROBLEM = 1, + FW_PROBLEM = 2, + DD_PROBLEM = 3, + EEH_RECOVERY = 4, + FW_UPDATED = 5, + LOW_MEMORY = 6, +}; + +enum ibmvnic_commands { + VERSION_EXCHANGE = 0x01, + VERSION_EXCHANGE_RSP = 0x81, + QUERY_CAPABILITY = 0x02, + QUERY_CAPABILITY_RSP = 0x82, + REQUEST_CAPABILITY = 0x03, + REQUEST_CAPABILITY_RSP = 0x83, + LOGIN = 0x04, + LOGIN_RSP = 0x84, + QUERY_PHYS_PARMS = 0x05, + QUERY_PHYS_PARMS_RSP = 0x85, + QUERY_PHYS_CAPABILITIES = 0x06, + QUERY_PHYS_CAPABILITIES_RSP = 0x86, + SET_PHYS_PARMS = 0x07, + SET_PHYS_PARMS_RSP = 0x87, + ERROR_INDICATION = 0x08, + LOGICAL_LINK_STATE = 0x0C, + LOGICAL_LINK_STATE_RSP = 0x8C, + REQUEST_STATISTICS = 0x0D, + REQUEST_STATISTICS_RSP = 0x8D, + COLLECT_FW_TRACE = 0x11, + COLLECT_FW_TRACE_RSP = 0x91, + LINK_STATE_INDICATION = 0x12, + CHANGE_MAC_ADDR = 0x13, + CHANGE_MAC_ADDR_RSP = 0x93, + MULTICAST_CTRL = 0x14, + MULTICAST_CTRL_RSP = 0x94, + GET_VPD_SIZE = 0x15, + GET_VPD_SIZE_RSP = 0x95, + GET_VPD = 0x16, + GET_VPD_RSP = 0x96, + TUNE = 0x17, + TUNE_RSP = 0x97, + QUERY_IP_OFFLOAD = 0x18, + QUERY_IP_OFFLOAD_RSP = 0x98, + CONTROL_IP_OFFLOAD = 0x19, + CONTROL_IP_OFFLOAD_RSP = 0x99, + ACL_CHANGE_INDICATION = 0x1A, + ACL_QUERY = 0x1B, + ACL_QUERY_RSP = 0x9B, + QUERY_MAP = 0x1D, + QUERY_MAP_RSP = 0x9D, + REQUEST_MAP = 0x1E, + REQUEST_MAP_RSP = 0x9E, + REQUEST_UNMAP = 0x1F, + REQUEST_UNMAP_RSP = 0x9F, + VLAN_CTRL = 0x20, + VLAN_CTRL_RSP = 0xA0, +}; + +enum ibmvnic_crq_type { + IBMVNIC_CRQ_CMD = 0x80, + IBMVNIC_CRQ_CMD_RSP = 0x80, + IBMVNIC_CRQ_INIT_CMD = 0xC0, + IBMVNIC_CRQ_INIT_RSP = 0xC0, + IBMVNIC_CRQ_XPORT_EVENT = 0xFF, +}; + +enum ibmvfc_crq_format { + IBMVNIC_CRQ_INIT = 0x01, + IBMVNIC_CRQ_INIT_COMPLETE = 0x02, + IBMVNIC_PARTITION_MIGRATED = 0x06, + IBMVNIC_DEVICE_FAILOVER = 0x08, +}; + +struct ibmvnic_crq_queue { + union ibmvnic_crq *msgs; + int size, cur; + dma_addr_t msg_token; + /* Used for serialization of msgs, cur */ + spinlock_t lock; + bool active; + char name[32]; +}; + +union sub_crq { + struct ibmvnic_generic_scrq generic; + struct ibmvnic_tx_comp_desc tx_comp; + struct ibmvnic_tx_desc v1; + struct ibmvnic_hdr_desc hdr; + struct ibmvnic_hdr_ext_desc hdr_ext; + struct ibmvnic_sge_desc sge; + struct ibmvnic_rx_comp_desc rx_comp; + struct ibmvnic_rx_buff_add_desc rx_add; +}; + +struct ibmvnic_ind_xmit_queue { + union sub_crq *indir_arr; + dma_addr_t indir_dma; + int index; +}; + +struct ibmvnic_sub_crq_queue { + union sub_crq *msgs; + int size, cur; + dma_addr_t msg_token; + unsigned long crq_num; + unsigned long hw_irq; + unsigned int irq; + unsigned int pool_index; + int scrq_num; + /* Used for serialization of msgs, cur */ + spinlock_t lock; + struct sk_buff *rx_skb_top; + struct ibmvnic_adapter *adapter; + struct ibmvnic_ind_xmit_queue ind_buf; + atomic_t used; + char name[32]; + u64 handle; + cpumask_var_t affinity_mask; +} ____cacheline_aligned; + +struct ibmvnic_long_term_buff { + unsigned char *buff; + dma_addr_t addr; + u64 size; + u8 map_id; +}; + +struct ibmvnic_ltb_set { + int num_ltbs; + struct ibmvnic_long_term_buff *ltbs; +}; + +struct ibmvnic_tx_buff { + struct sk_buff *skb; + int index; + int pool_index; + int num_entries; +}; + +struct ibmvnic_tx_pool { + struct ibmvnic_tx_buff *tx_buff; + int *free_map; + int consumer_index; + int producer_index; + struct ibmvnic_ltb_set ltb_set; + int num_buffers; + int buf_size; +} ____cacheline_aligned; + +struct ibmvnic_rx_buff { + struct sk_buff *skb; + dma_addr_t dma; + unsigned char *data; + int size; + int pool_index; +}; + +struct ibmvnic_rx_pool { + struct ibmvnic_rx_buff *rx_buff; + int size; /* # of buffers in the pool */ + int index; + int buff_size; + atomic_t available; + int *free_map; + int next_free; + int next_alloc; + int active; + struct ibmvnic_ltb_set ltb_set; +} ____cacheline_aligned; + +struct ibmvnic_vpd { + unsigned char *buff; + dma_addr_t dma_addr; + u64 len; +}; + +enum vnic_state {VNIC_PROBING = 1, + VNIC_PROBED, + VNIC_OPENING, + VNIC_OPEN, + VNIC_CLOSING, + VNIC_CLOSED, + VNIC_REMOVING, + VNIC_REMOVED, + VNIC_DOWN}; + +enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, + VNIC_RESET_MOBILITY, + VNIC_RESET_FATAL, + VNIC_RESET_NON_FATAL, + VNIC_RESET_TIMEOUT, + VNIC_RESET_CHANGE_PARAM, + VNIC_RESET_PASSIVE_INIT}; + +struct ibmvnic_rwi { + enum ibmvnic_reset_reason reset_reason; + struct list_head list; +}; + +struct ibmvnic_tunables { + u64 rx_queues; + u64 tx_queues; + u64 rx_entries; + u64 tx_entries; + u64 mtu; +}; + +struct ibmvnic_adapter { + struct vio_dev *vdev; + struct net_device *netdev; + struct ibmvnic_crq_queue crq; + u8 mac_addr[ETH_ALEN]; + struct ibmvnic_query_ip_offload_buffer ip_offload_buf; + dma_addr_t ip_offload_tok; + struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; + dma_addr_t ip_offload_ctrl_tok; + u32 msg_enable; + u32 cur_max_ind_descs; + + /* Vital Product Data (VPD) */ + struct ibmvnic_vpd *vpd; + char fw_version[32]; + + /* Statistics */ + struct ibmvnic_statistics stats; + dma_addr_t stats_token; + struct completion stats_done; + int replenish_no_mem; + int replenish_add_buff_success; + int replenish_add_buff_failure; + int replenish_task_cycles; + int tx_send_failed; + int tx_map_failed; + + struct ibmvnic_tx_queue_stats *tx_stats_buffers; + struct ibmvnic_rx_queue_stats *rx_stats_buffers; + + int phys_link_state; + int logical_link_state; + + u32 speed; + u8 duplex; + + /* login data */ + struct ibmvnic_login_buffer *login_buf; + dma_addr_t login_buf_token; + int login_buf_sz; + + struct ibmvnic_login_rsp_buffer *login_rsp_buf; + dma_addr_t login_rsp_buf_token; + int login_rsp_buf_sz; + + atomic_t running_cap_crqs; + + struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; + struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; + + /* rx structs */ + struct napi_struct *napi; + struct ibmvnic_rx_pool *rx_pool; + u64 promisc; + + struct ibmvnic_tx_pool *tx_pool; + struct ibmvnic_tx_pool *tso_pool; + struct completion probe_done; + struct completion init_done; + int init_done_rc; + + struct completion fw_done; + /* Used for serialization of device commands */ + struct mutex fw_lock; + int fw_done_rc; + + struct completion reset_done; + int reset_done_rc; + bool wait_for_reset; + + /* CPU hotplug instances for online & dead */ + struct hlist_node node; + struct hlist_node node_dead; + + /* partner capabilities */ + u64 min_tx_queues; + u64 min_rx_queues; + u64 min_rx_add_queues; + u64 max_tx_queues; + u64 max_rx_queues; + u64 max_rx_add_queues; + u64 req_tx_queues; + u64 req_rx_queues; + u64 req_rx_add_queues; + u64 min_tx_entries_per_subcrq; + u64 min_rx_add_entries_per_subcrq; + u64 max_tx_entries_per_subcrq; + u64 max_rx_add_entries_per_subcrq; + u64 req_tx_entries_per_subcrq; + u64 req_rx_add_entries_per_subcrq; + u64 tcp_ip_offload; + u64 promisc_requested; + u64 promisc_supported; + u64 min_mtu; + u64 max_mtu; + u64 req_mtu; + u64 prev_mtu; + u64 max_multicast_filters; + u64 vlan_header_insertion; + u64 rx_vlan_header_insertion; + u64 max_tx_sg_entries; + u64 rx_sg_supported; + u64 rx_sg_requested; + u64 opt_tx_comp_sub_queues; + u64 opt_rx_comp_queues; + u64 opt_rx_bufadd_q_per_rx_comp_q; + u64 opt_tx_entries_per_subcrq; + u64 opt_rxba_entries_per_subcrq; + __be64 tx_rx_desc_req; +#define MAX_MAP_ID 255 + DECLARE_BITMAP(map_ids, MAX_MAP_ID); + u32 num_active_rx_scrqs; + u32 num_active_rx_pools; + u32 num_active_rx_napi; + u32 num_active_tx_scrqs; + u32 num_active_tx_pools; + + u32 prev_rx_pool_size; + u32 prev_tx_pool_size; + u32 cur_rx_buf_sz; + u32 prev_rx_buf_sz; + + struct tasklet_struct tasklet; + enum vnic_state state; + /* Used for serialization of state field. When taking both state + * and rwi locks, take state lock first. + */ + spinlock_t state_lock; + enum ibmvnic_reset_reason reset_reason; + struct list_head rwi_list; + /* Used for serialization of rwi_list. When taking both state + * and rwi locks, take state lock first + */ + spinlock_t rwi_lock; + struct work_struct ibmvnic_reset; + struct delayed_work ibmvnic_delayed_reset; + unsigned long resetting; + /* last device reset time */ + unsigned long last_reset_time; + + bool napi_enabled; + bool from_passive_init; + bool login_pending; + /* protected by rcu */ + bool tx_queues_active; + bool failover_pending; + bool force_reset_recovery; + + struct ibmvnic_tunables desired; + struct ibmvnic_tunables fallback; +}; |
