diff options
Diffstat (limited to 'drivers/infiniband/hw/hns')
| -rw-r--r-- | drivers/infiniband/hw/hns/Makefile | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_ah.c | 1 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_bond.c | 1012 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_bond.h | 95 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_device.h | 16 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 141 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 20 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_main.c | 185 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_pd.c | 1 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_qp.c | 5 | ||||
| -rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_srq.c | 1 |
11 files changed, 1429 insertions, 52 deletions
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index baf592e6f21b..d07ef02c5231 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -4,11 +4,13 @@ # ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 +ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3pf +ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common ccflags-y += -I $(src) hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ - hns_roce_debugfs.o hns_roce_hw_v2.o + hns_roce_debugfs.o hns_roce_hw_v2.o hns_roce_bond.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 307c35888b30..0c1c32d23c88 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include "hns_roce_device.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c new file mode 100644 index 000000000000..cc85f3ce1f3e --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -0,0 +1,1012 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2025 Hisilicon Limited. + */ + +#include <net/lag.h> +#include <net/bonding.h> +#include "hns_roce_device.h" +#include "hns_roce_hw_v2.h" +#include "hns_roce_bond.h" + +static DEFINE_XARRAY(roce_bond_xa); + +static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev) +{ + struct ib_device *ibdev = + ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS); + + if (!ibdev) + return NULL; + + return container_of(ibdev, struct hns_roce_dev, ib_dev); +} + +static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev) +{ + struct net_device *upper_dev; + + rcu_read_lock(); + upper_dev = netdev_master_upper_dev_get_rcu(net_dev); + dev_hold(upper_dev); + rcu_read_unlock(); + + return upper_dev; +} + +static int get_netdev_bond_slave_id(struct net_device *net_dev, + struct hns_roce_bond_group *bond_grp) +{ + int i; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) + if (net_dev == bond_grp->bond_func_info[i].net_dev) + return i; + + return -ENOENT; +} + +struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, + u8 bus_num) +{ + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); + struct hns_roce_bond_group *bond_grp; + struct net_device *upper_dev = NULL; + int i; + + if (!die_info) + return NULL; + + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = die_info->bgrps[i]; + if (!bond_grp) + continue; + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) + return bond_grp; + if (bond_grp->upper_dev) { + upper_dev = get_upper_dev_from_ndev(net_dev); + if (bond_grp->upper_dev == upper_dev) { + dev_put(upper_dev); + return bond_grp; + } + dev_put(upper_dev); + } + } + + return NULL; +} + +static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp, + struct hns_roce_dev *hr_dev) +{ + struct net_device *active_dev; + struct net_device *old_dev; + int i, ret = 0; + + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + rcu_read_lock(); + active_dev = + bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev)); + rcu_read_unlock(); + } else { + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + active_dev = bond_grp->bond_func_info[i].net_dev; + if (active_dev && + ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE) + break; + } + } + + if (!active_dev || i == ROCE_BOND_FUNC_MAX) + active_dev = get_hr_netdev(hr_dev, 0); + + old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1); + if (old_dev == active_dev) + goto out; + + ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1); + if (ret) { + dev_err(hr_dev->dev, "failed to set netdev for bond.\n"); + goto out; + } + + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + if (old_dev) + roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev); + rdma_roce_rescan_port(&hr_dev->ib_dev, 1); + } +out: + dev_put(old_dev); + return ret; +} + +bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) +{ + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED && + bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED) + return true; + + return false; +} + +static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) +{ + struct net_device *net_dev; + u32 active_slave_map = 0; + u8 active_slave_num = 0; + bool active; + u8 i; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (!net_dev || !(bond_grp->slave_map & (1U << i))) + continue; + + active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ? + net_lag_port_dev_txable(net_dev) : + (ib_get_curr_port_state(net_dev) == IB_PORT_ACTIVE); + if (active) { + active_slave_num++; + active_slave_map |= (1U << i); + } + } + + bond_grp->active_slave_num = active_slave_num; + bond_grp->active_slave_map = active_slave_map; +} + +static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp, + struct hns_roce_dev *hr_dev) +{ + bond_grp->main_hr_dev = hr_dev; + hns_roce_bond_get_active_slave(bond_grp); + + return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); +} + +static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp, + u8 func_idx) +{ + struct hnae3_handle *handle; + + handle = bond_grp->bond_func_info[func_idx].handle; + if (handle->priv) + hns_roce_bond_uninit_client(bond_grp, func_idx); +} + +static struct hns_roce_dev + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, + u8 func_idx, bool need_switch); + +static int switch_main_dev(struct hns_roce_bond_group *bond_grp, + u8 main_func_idx) +{ + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + u8 i; + + bond_grp->main_hr_dev = NULL; + hns_roce_bond_uninit_client(bond_grp, main_func_idx); + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if ((bond_grp->slave_map & (1U << i)) && net_dev) { + /* In case this slave is still being registered as + * a non-bonded PF, uninit it first and then re-init + * it as the main device. + */ + hns_roce_slave_uninit(bond_grp, i); + hr_dev = hns_roce_slave_init(bond_grp, i, false); + if (hr_dev) { + bond_grp->main_hr_dev = hr_dev; + break; + } + } + } + + if (!bond_grp->main_hr_dev) + return -ENODEV; + + return 0; +} + +static struct hns_roce_dev + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, + u8 func_idx, bool need_switch) +{ + struct hns_roce_dev *hr_dev = NULL; + struct hnae3_handle *handle; + u8 main_func_idx; + int ret; + + if (need_switch) { + main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); + if (func_idx == main_func_idx) { + ret = switch_main_dev(bond_grp, main_func_idx); + if (ret == -ENODEV) + return NULL; + } + } + + handle = bond_grp->bond_func_info[func_idx].handle; + if (handle) { + if (handle->priv) + return handle->priv; + /* Prevent this device from being initialized as a bond device */ + if (need_switch) + bond_grp->bond_func_info[func_idx].net_dev = NULL; + hr_dev = hns_roce_bond_init_client(bond_grp, func_idx); + if (!hr_dev) + BOND_ERR_LOG("failed to init slave %u.\n", func_idx); + } + + return hr_dev; +} + +static struct hns_roce_die_info *alloc_die_info(int bus_num) +{ + struct hns_roce_die_info *die_info; + int ret; + + die_info = kzalloc(sizeof(*die_info), GFP_KERNEL); + if (!die_info) + return NULL; + + ret = xa_err(xa_store(&roce_bond_xa, bus_num, die_info, GFP_KERNEL)); + if (ret) { + kfree(die_info); + return NULL; + } + + mutex_init(&die_info->die_mutex); + + return die_info; +} + +static void dealloc_die_info(struct hns_roce_die_info *die_info, u8 bus_num) +{ + mutex_destroy(&die_info->die_mutex); + xa_erase(&roce_bond_xa, bus_num); + kfree(die_info); +} + +static int alloc_bond_id(struct hns_roce_bond_group *bond_grp) +{ + u8 bus_num = bond_grp->bus_num; + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); + int i; + + if (!die_info) { + die_info = alloc_die_info(bus_num); + if (!die_info) + return -ENOMEM; + } + + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + if (die_info->bond_id_mask & BOND_ID(i)) + continue; + + die_info->bond_id_mask |= BOND_ID(i); + die_info->bgrps[i] = bond_grp; + bond_grp->bond_id = i; + + return 0; + } + + return -ENOSPC; +} + +static int remove_bond_id(int bus_num, u8 bond_id) +{ + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); + + if (bond_id >= ROCE_BOND_NUM_MAX) + return -EINVAL; + + if (!die_info) + return -ENODEV; + + die_info->bond_id_mask &= ~BOND_ID(bond_id); + die_info->bgrps[bond_id] = NULL; + if (!die_info->bond_id_mask) + dealloc_die_info(die_info, bus_num); + + return 0; +} + +static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) +{ + struct hns_roce_dev *hr_dev; + int ret; + int i; + + for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) { + if (bond_grp->slave_map & (1 << i)) + hns_roce_slave_uninit(bond_grp, i); + } + + mutex_lock(&bond_grp->bond_mutex); + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_unlock(&bond_grp->bond_mutex); + bond_grp->main_hr_dev = NULL; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + if (bond_grp->slave_map & (1 << i)) { + hr_dev = hns_roce_slave_init(bond_grp, i, false); + if (hr_dev) { + bond_grp->main_hr_dev = hr_dev; + break; + } + } + } + + if (!bond_grp->main_hr_dev) { + ret = -ENODEV; + goto out; + } + + hns_roce_bond_get_active_slave(bond_grp); + + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); + +out: + if (ret) { + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); + } else { + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE set bond finished!\n"); + } +} + +static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) +{ + u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); + struct hns_roce_dev *hr_dev; + u8 i; + + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) + goto out; + + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + bond_grp->main_hr_dev = NULL; + + hns_roce_slave_uninit(bond_grp, main_func_idx); + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + hr_dev = hns_roce_slave_init(bond_grp, i, false); + if (hr_dev) + bond_grp->main_hr_dev = hr_dev; + } + +out: + hns_roce_cleanup_bond(bond_grp); +} + +static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) +{ + int ret; + + hns_roce_bond_get_active_slave(bond_grp); + + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); + + mutex_lock(&bond_grp->bond_mutex); + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_unlock(&bond_grp->bond_mutex); + + if (ret) + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to change RoCE bond slave state, ret = %d.\n", + ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave changestate finished!\n"); +} + +static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp) +{ + int ret; + u8 i; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + if (bond_grp->slave_map & (1U << i)) { + if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn)) + continue; + hns_roce_slave_uninit(bond_grp, i); + } else { + hns_roce_slave_init(bond_grp, i, true); + if (!bond_grp->main_hr_dev) { + ret = -ENODEV; + goto out; + } + bond_grp->bond_func_info[i].net_dev = NULL; + bond_grp->bond_func_info[i].handle = NULL; + } + } + + hns_roce_bond_get_active_slave(bond_grp); + + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); + +out: + if (ret) { + BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); + } else { + mutex_lock(&bond_grp->bond_mutex); + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM) + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_unlock(&bond_grp->bond_mutex); + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave change num finished!\n"); + } +} + +static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev) +{ + struct hns_roce_v2_priv *priv; + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + int func_idx; + + bond_grp->slave_map = 0; + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + func_idx = get_netdev_bond_slave_id(net_dev, bond_grp); + if (func_idx < 0) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (!hr_dev) + continue; + func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); + if (!bond_grp->bond_func_info[func_idx].net_dev) { + priv = hr_dev->priv; + bond_grp->bond_func_info[func_idx].net_dev = + net_dev; + bond_grp->bond_func_info[func_idx].handle = + priv->handle; + } + ib_device_put(&hr_dev->ib_dev); + } + + bond_grp->slave_map |= (1 << func_idx); + } + rcu_read_unlock(); +} + +static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) +{ + struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + bool ret = true; + + if (!hr_dev) { + if (bond_grp && + get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) + return true; + else + return false; + } + + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) { + ret = false; + goto out; + } + + if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) { + ret = false; + goto out; + } + + if (bond_grp->bus_num != get_hr_bus_num(hr_dev)) + ret = false; + +out: + ib_device_put(&hr_dev->ib_dev); + return ret; +} + +static bool check_slave_support(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev) +{ + struct net_device *net_dev; + u8 slave_num = 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + if (is_dev_bond_supported(bond_grp, net_dev)) { + slave_num++; + continue; + } + rcu_read_unlock(); + return false; + } + rcu_read_unlock(); + + return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX); +} + +static void hns_roce_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct hns_roce_bond_group *bond_grp = + container_of(delayed_work, struct hns_roce_bond_group, + bond_work); + enum hns_roce_bond_state bond_state; + bool bond_ready; + + mutex_lock(&bond_grp->bond_mutex); + bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev); + hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev); + bond_state = bond_grp->bond_state; + bond_grp->bond_ready = bond_ready; + mutex_unlock(&bond_grp->bond_mutex); + + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "bond work: bond_ready - %d, bond_state - %d.\n", + bond_ready, bond_state); + + if (!bond_ready) { + hns_roce_clear_bond(bond_grp); + return; + } + + switch (bond_state) { + case HNS_ROCE_BOND_NOT_BONDED: + hns_roce_set_bond(bond_grp); + /* In set_bond flow, we don't need to set bond netdev here as + * it has been done when bond_grp->main_hr_dev is registered. + */ + return; + case HNS_ROCE_BOND_SLAVE_CHANGESTATE: + hns_roce_slave_changestate(bond_grp); + break; + case HNS_ROCE_BOND_SLAVE_CHANGE_NUM: + hns_roce_slave_change_num(bond_grp); + break; + default: + return; + } + hns_roce_set_bond_netdev(bond_grp, bond_grp->main_hr_dev); +} + +static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp, + struct hns_roce_dev *hr_dev, + struct net_device *upper_dev) +{ + bond_grp->upper_dev = upper_dev; + bond_grp->main_hr_dev = hr_dev; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + bond_grp->bond_ready = false; +} + +static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp) +{ + mutex_lock(&bond_grp->bond_mutex); + + cancel_delayed_work(&bond_grp->bond_work); + bond_grp->upper_dev = NULL; + bond_grp->main_hr_dev = NULL; + bond_grp->bond_ready = false; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; + bond_grp->slave_map = 0; + memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info)); + + mutex_unlock(&bond_grp->bond_mutex); +} + +void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) +{ + int ret; + + ret = bond_grp->main_hr_dev ? + hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; + if (ret) + BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE clear bond finished!\n"); + + hns_roce_detach_bond_grp(bond_grp); +} + +static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) +{ + struct hns_roce_bond_group *bond_grp_tmp; + + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num); + return bond_grp_tmp == bond_grp; +} + +static void lowerstate_event_setting(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changelowerstate_info *info) +{ + mutex_lock(&bond_grp->bond_mutex); + + if (bond_grp->bond_ready && + bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED) + bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE; + + mutex_unlock(&bond_grp->bond_mutex); +} + +static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changelowerstate_info *info) +{ + struct net_device *net_dev = + netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); + + if (!netif_is_lag_port(net_dev)) + return false; + + if (!lowerstate_event_filter(bond_grp, net_dev)) + return false; + + lowerstate_event_setting(bond_grp, info); + + return true; +} + +static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info) +{ + if (!bond_info) + return false; + + if (bond_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + bond_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + + if (bond_info->tx_type == NETDEV_LAG_TX_TYPE_HASH && + bond_info->hash_type > NETDEV_LAG_HASH_L23) + return false; + + return true; +} + +static void upper_event_setting(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changeupper_info *info) +{ + struct netdev_lag_upper_info *bond_upper_info = NULL; + bool slave_inc = info->linking; + + if (slave_inc) + bond_upper_info = info->upper_info; + + if (bond_upper_info) { + bond_grp->tx_type = bond_upper_info->tx_type; + bond_grp->hash_type = bond_upper_info->hash_type; + } +} + +static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp) +{ + struct net_device *net_dev; + u8 slave_num = 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(bond_grp->upper_dev, net_dev) { + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) + slave_num++; + } + rcu_read_unlock(); + + return (slave_num > 1); +} + +static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info, + struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev) +{ + if (!is_bond_setting_supported(bond_info)) + return false; + + return check_slave_support(bond_grp, upper_dev); +} + +static enum bond_support_type + check_bond_support(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev, + struct netdev_notifier_changeupper_info *info) +{ + bool bond_grp_exist = false; + bool support; + + if (upper_dev == bond_grp->upper_dev) + bond_grp_exist = true; + + if (!info->linking && !bond_grp_exist) + return BOND_NOT_SUPPORT; + + if (info->linking) + support = check_linking_bond_support(info->upper_info, bond_grp, + upper_dev); + else + support = check_unlinking_bond_support(bond_grp); + + if (support) + return BOND_SUPPORT; + + return bond_grp_exist ? BOND_EXISTING_NOT_SUPPORT : BOND_NOT_SUPPORT; +} + +static bool upper_event_filter(struct netdev_notifier_changeupper_info *info, + struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) +{ + struct net_device *upper_dev = info->upper_dev; + struct hns_roce_bond_group *bond_grp_tmp; + struct hns_roce_dev *hr_dev; + bool ret = true; + u8 bus_num; + + if (!info->linking || + bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED) + return bond_grp->upper_dev == upper_dev; + + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (!hr_dev) + return false; + + bus_num = get_hr_bus_num(hr_dev); + if (bond_grp->bus_num != bus_num) { + ret = false; + goto out; + } + + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bus_num); + if (bond_grp_tmp && bond_grp_tmp != bond_grp) + ret = false; +out: + ib_device_put(&hr_dev->ib_dev); + return ret; +} + +static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *net_dev = + netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); + struct net_device *upper_dev = info->upper_dev; + enum bond_support_type support = BOND_SUPPORT; + struct hns_roce_dev *hr_dev; + int slave_id; + + if (!upper_dev || !netif_is_lag_master(upper_dev)) + return false; + + if (!upper_event_filter(info, bond_grp, net_dev)) + return false; + + mutex_lock(&bond_grp->bond_mutex); + support = check_bond_support(bond_grp, upper_dev, info); + if (support == BOND_NOT_SUPPORT) { + mutex_unlock(&bond_grp->bond_mutex); + return false; + } + + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (!hr_dev) { + mutex_unlock(&bond_grp->bond_mutex); + return false; + } + hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev); + ib_device_put(&hr_dev->ib_dev); + } + + /* In the case of netdev being unregistered, the roce + * instance shouldn't be inited. + */ + if (net_dev->reg_state >= NETREG_UNREGISTERING) { + slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); + if (slave_id >= 0) { + bond_grp->bond_func_info[slave_id].net_dev = NULL; + bond_grp->bond_func_info[slave_id].handle = NULL; + } + } + + if (support == BOND_SUPPORT) { + bond_grp->bond_ready = true; + if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED) + bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGE_NUM; + } + mutex_unlock(&bond_grp->bond_mutex); + if (support == BOND_SUPPORT) + upper_event_setting(bond_grp, info); + + return true; +} + +static int hns_roce_bond_event(struct notifier_block *self, + unsigned long event, void *ptr) +{ + struct hns_roce_bond_group *bond_grp = + container_of(self, struct hns_roce_bond_group, bond_nb); + bool changed = false; + + if (event == NETDEV_CHANGEUPPER) + changed = hns_roce_bond_upper_event(bond_grp, ptr); + if (event == NETDEV_CHANGELOWERSTATE) + changed = hns_roce_bond_lowerstate_event(bond_grp, ptr); + + if (changed) + schedule_delayed_work(&bond_grp->bond_work, HZ); + + return NOTIFY_DONE; +} + +int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX]; + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + int ret; + int i; + + if (xa_load(&roce_bond_xa, bus_num)) + return 0; + + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = kvzalloc(sizeof(*bond_grp), GFP_KERNEL); + if (!bond_grp) { + ret = -ENOMEM; + goto mem_err; + } + + mutex_init(&bond_grp->bond_mutex); + INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_bond_work); + + bond_grp->bond_ready = false; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; + bond_grp->bus_num = bus_num; + + ret = alloc_bond_id(bond_grp); + if (ret) { + dev_err(hr_dev->dev, + "failed to alloc bond ID, ret = %d.\n", ret); + goto alloc_id_err; + } + + bond_grp->bond_nb.notifier_call = hns_roce_bond_event; + ret = register_netdevice_notifier(&bond_grp->bond_nb); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to register bond nb, ret = %d.\n", ret); + goto register_nb_err; + } + bgrps[i] = bond_grp; + } + + return 0; + +register_nb_err: + remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); +alloc_id_err: + mutex_destroy(&bond_grp->bond_mutex); + kvfree(bond_grp); +mem_err: + for (i--; i >= 0; i--) { + unregister_netdevice_notifier(&bgrps[i]->bond_nb); + cancel_delayed_work_sync(&bgrps[i]->bond_work); + remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id); + mutex_destroy(&bgrps[i]->bond_mutex); + kvfree(bgrps[i]); + } + return ret; +} + +void hns_roce_dealloc_bond_grp(void) +{ + struct hns_roce_bond_group *bond_grp; + struct hns_roce_die_info *die_info; + unsigned long id; + int i; + + xa_for_each(&roce_bond_xa, id, die_info) { + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = die_info->bgrps[i]; + if (!bond_grp) + continue; + unregister_netdevice_notifier(&bond_grp->bond_nb); + cancel_delayed_work_sync(&bond_grp->bond_work); + remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); + mutex_destroy(&bond_grp->bond_mutex); + kvfree(bond_grp); + } + } +} + +int hns_roce_bond_init(struct hns_roce_dev *hr_dev) +{ + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + int ret; + + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + + if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT) { + ret = hns_roce_recover_bond(bond_grp, hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "failed to recover RoCE bond, ret = %d.\n", ret); + return ret; + } + } + + return hns_roce_set_bond_netdev(bond_grp, hr_dev); +} + +void hns_roce_bond_suspend(struct hnae3_handle *handle) +{ + u8 bus_num = handle->pdev->bus->number; + struct hns_roce_bond_group *bond_grp; + struct hns_roce_die_info *die_info; + int i; + + die_info = xa_load(&roce_bond_xa, bus_num); + if (!die_info) + return; + + mutex_lock(&die_info->die_mutex); + + /* + * Avoid duplicated processing when calling this function + * multiple times. + */ + if (die_info->suspend_cnt) + goto out; + + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = die_info->bgrps[i]; + if (!bond_grp) + continue; + unregister_netdevice_notifier(&bond_grp->bond_nb); + cancel_delayed_work_sync(&bond_grp->bond_work); + } + +out: + die_info->suspend_cnt++; + mutex_unlock(&die_info->die_mutex); +} + +void hns_roce_bond_resume(struct hnae3_handle *handle) +{ + u8 bus_num = handle->pdev->bus->number; + struct hns_roce_bond_group *bond_grp; + struct hns_roce_die_info *die_info; + int i, ret; + + die_info = xa_load(&roce_bond_xa, bus_num); + if (!die_info) + return; + + mutex_lock(&die_info->die_mutex); + + die_info->suspend_cnt--; + if (die_info->suspend_cnt) + goto out; + + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = die_info->bgrps[i]; + if (!bond_grp) + continue; + ret = register_netdevice_notifier(&bond_grp->bond_nb); + if (ret) + dev_err(&handle->pdev->dev, + "failed to resume bond notifier(bus_num = %u, id = %u), ret = %d.\n", + bus_num, bond_grp->bond_id, ret); + } + +out: + mutex_unlock(&die_info->die_mutex); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h new file mode 100644 index 000000000000..98c295d78ca1 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2025 Hisilicon Limited. + */ + +#ifndef _HNS_ROCE_BOND_H +#define _HNS_ROCE_BOND_H + +#include <linux/netdevice.h> +#include <net/bonding.h> + +#define ROCE_BOND_FUNC_MAX 4 +#define ROCE_BOND_NUM_MAX 2 + +#define BOND_ID(id) BIT(id) + +#define BOND_ERR_LOG(fmt, ...) \ + pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__) + +enum { + BOND_MODE_1, + BOND_MODE_2_4, +}; + +enum hns_roce_bond_hashtype { + BOND_HASH_L2, + BOND_HASH_L34, + BOND_HASH_L23, +}; + +enum bond_support_type { + BOND_NOT_SUPPORT, + /* + * bond_grp already exists, but in the current + * conditions it's no longer supported + */ + BOND_EXISTING_NOT_SUPPORT, + BOND_SUPPORT, +}; + +enum hns_roce_bond_state { + HNS_ROCE_BOND_NOT_ATTACHED, + HNS_ROCE_BOND_NOT_BONDED, + HNS_ROCE_BOND_IS_BONDED, + HNS_ROCE_BOND_SLAVE_CHANGE_NUM, + HNS_ROCE_BOND_SLAVE_CHANGESTATE, +}; + +enum hns_roce_bond_cmd_type { + HNS_ROCE_SET_BOND, + HNS_ROCE_CHANGE_BOND, + HNS_ROCE_CLEAR_BOND, +}; + +struct hns_roce_func_info { + struct net_device *net_dev; + struct hnae3_handle *handle; +}; + +struct hns_roce_bond_group { + struct net_device *upper_dev; + struct hns_roce_dev *main_hr_dev; + u8 active_slave_num; + u32 slave_map; + u32 active_slave_map; + u8 bond_id; + u8 bus_num; + struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX]; + bool bond_ready; + enum hns_roce_bond_state bond_state; + enum netdev_lag_tx_type tx_type; + enum netdev_lag_hash hash_type; + struct mutex bond_mutex; + struct notifier_block bond_nb; + struct delayed_work bond_work; +}; + +struct hns_roce_die_info { + u8 bond_id_mask; + struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX]; + struct mutex die_mutex; + u8 suspend_cnt; +}; + +struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, + u8 bus_num); +int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev); +void hns_roce_dealloc_bond_grp(void); +void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); +bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); +int hns_roce_bond_init(struct hns_roce_dev *hr_dev); +void hns_roce_bond_suspend(struct hnae3_handle *handle); +void hns_roce_bond_resume(struct hnae3_handle *handle); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 06832c0ac055..318f18cf37aa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -33,6 +33,7 @@ #ifndef _HNS_ROCE_DEVICE_H #define _HNS_ROCE_DEVICE_H +#include <linux/pci.h> #include <rdma/ib_verbs.h> #include <rdma/hns-abi.h> #include "hns_roce_debugfs.h" @@ -153,6 +154,7 @@ enum { HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), HNS_ROCE_CAP_FLAG_STASH = BIT(17), HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), + HNS_ROCE_CAP_FLAG_BOND = BIT(21), HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22), }; @@ -177,6 +179,7 @@ enum hns_roce_instance_state { HNS_ROCE_STATE_INIT, HNS_ROCE_STATE_INITED, HNS_ROCE_STATE_UNINIT, + HNS_ROCE_STATE_BOND_UNINIT, }; enum { @@ -1167,6 +1170,17 @@ static inline u8 get_tclass(const struct ib_global_route *grh) grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; } +static inline struct net_device *get_hr_netdev(struct hns_roce_dev *hr_dev, + u8 port) +{ + return hr_dev->iboe.netdevs[port]; +} + +static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev) +{ + return hr_dev->pci_dev->bus->number; +} + void hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); @@ -1293,7 +1307,7 @@ void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); -void hns_roce_exit(struct hns_roce_dev *hr_dev); +void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup); int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 63052c0e7613..2d6ae89e525b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -43,11 +43,13 @@ #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> +#include "hclge_main.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" +#include "hns_roce_bond.h" #define CREATE_TRACE_POINTS #include "hns_roce_trace.h" @@ -1434,6 +1436,79 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } +static enum hns_roce_opcode_type + get_bond_opcode(enum hns_roce_bond_cmd_type bond_type) +{ + switch (bond_type) { + case HNS_ROCE_SET_BOND: + return HNS_ROCE_OPC_SET_BOND_INFO; + case HNS_ROCE_CHANGE_BOND: + return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT; + case HNS_ROCE_CLEAR_BOND: + return HNS_ROCE_OPC_CLEAR_BOND_INFO; + default: + WARN(true, "Invalid bond type %d!\n", bond_type); + return HNS_ROCE_OPC_SET_BOND_INFO; + } +} + +static enum hns_roce_bond_hashtype + get_bond_hashtype(enum netdev_lag_hash netdev_hashtype) +{ + switch (netdev_hashtype) { + case NETDEV_LAG_HASH_L2: + return BOND_HASH_L2; + case NETDEV_LAG_HASH_L34: + return BOND_HASH_L34; + case NETDEV_LAG_HASH_L23: + return BOND_HASH_L23; + default: + WARN(true, "Invalid hash type %d!\n", netdev_hashtype); + return BOND_HASH_L2; + } +} + +int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp, + enum hns_roce_bond_cmd_type bond_type) +{ + enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type); + struct hns_roce_bond_info *slave_info; + struct hns_roce_cmq_desc desc = {}; + int ret; + + slave_info = (struct hns_roce_bond_info *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, opcode, false); + + slave_info->bond_id = cpu_to_le32(bond_grp->bond_id); + if (bond_type == HNS_ROCE_CLEAR_BOND) + goto out; + + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + slave_info->bond_mode = cpu_to_le32(BOND_MODE_1); + if (bond_grp->active_slave_num != 1) + ibdev_warn(&bond_grp->main_hr_dev->ib_dev, + "active slave cnt(%u) in Mode 1 is invalid.\n", + bond_grp->active_slave_num); + } else { + slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4); + slave_info->hash_policy = + cpu_to_le32(get_bond_hashtype(bond_grp->hash_type)); + } + + slave_info->active_slave_cnt = cpu_to_le32(bond_grp->active_slave_num); + slave_info->active_slave_mask = cpu_to_le32(bond_grp->active_slave_map); + slave_info->slave_mask = cpu_to_le32(bond_grp->slave_map); + +out: + ret = hns_roce_cmq_send(bond_grp->main_hr_dev, &desc, 1); + if (ret) + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "cmq bond type(%d) failed, ret = %d.\n", + bond_type, ret); + + return ret; +} + static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, dma_addr_t base_addr, u8 cmd, unsigned long tag) { @@ -2275,6 +2350,9 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << HNS_ROCE_CAP_FLAGS_EX_SHIFT; + if (hr_dev->is_vf) + caps->flags &= ~HNS_ROCE_CAP_FLAG_BOND; + caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); @@ -7067,7 +7145,7 @@ error_failed_kzalloc: } static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, - bool reset) + bool reset, bool bond_cleanup) { struct hns_roce_dev *hr_dev = handle->priv; @@ -7079,7 +7157,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; hns_roce_handle_device_err(hr_dev); - hns_roce_exit(hr_dev); + hns_roce_exit(hr_dev, bond_cleanup); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); } @@ -7130,12 +7208,51 @@ reset_chk_err: static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { + /* Suspend bond to avoid concurrency */ + hns_roce_bond_suspend(handle); + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) - return; + goto out; handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; - __hns_roce_hw_v2_uninit_instance(handle, reset); + __hns_roce_hw_v2_uninit_instance(handle, reset, true); + + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + +out: + hns_roce_bond_resume(handle); +} + +struct hns_roce_dev + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, + int func_idx) +{ + struct hnae3_handle *handle; + int ret; + + handle = bond_grp->bond_func_info[func_idx].handle; + if (!handle || !handle->client) + return NULL; + + ret = hns_roce_hw_v2_init_instance(handle); + if (ret) + return NULL; + + return handle->priv; +} + +void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx) +{ + struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle; + + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) + return; + + handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT; + + __hns_roce_hw_v2_uninit_instance(handle, false, false); handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; } @@ -7144,6 +7261,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; + /* Suspend bond to avoid concurrency */ + hns_roce_bond_suspend(handle); + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); return 0; @@ -7174,6 +7294,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state)) { handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + hns_roce_bond_resume(handle); return 0; } @@ -7193,6 +7314,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) dev_info(dev, "reset done, RoCE client reinit finished.\n"); } + hns_roce_bond_resume(handle); return ret; } @@ -7204,7 +7326,7 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle) handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY); - __hns_roce_hw_v2_uninit_instance(handle, false); + __hns_roce_hw_v2_uninit_instance(handle, false, false); return 0; } @@ -7240,6 +7362,14 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, if (linkup || !hr_dev) return; + /* For bond device, the link status depends on the upper netdev, + * and the upper device's link status depends on all the slaves' + * netdev but not only one. So bond device cannot get a correct + * link status from this path. + */ + if (hns_roce_get_bond_grp(netdev, get_hr_bus_num(hr_dev))) + return; + ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev); } @@ -7264,6 +7394,7 @@ static int __init hns_roce_hw_v2_init(void) static void __exit hns_roce_hw_v2_exit(void) { + hns_roce_dealloc_bond_grp(); hnae3_unregister_client(&hns_roce_hw_v2_client); hns_roce_cleanup_debugfs(); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e64a04d6f85b..285fe0875fac 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -35,6 +35,7 @@ #include <linux/bitops.h> #include "hnae3.h" +#include "hns_roce_bond.h" #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 @@ -228,6 +229,9 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, + HNS_ROCE_OPC_SET_BOND_INFO = 0x8601, + HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602, + HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603, }; #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 @@ -1465,7 +1469,23 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; +struct hns_roce_bond_info { + __le32 bond_id; + __le32 bond_mode; + __le32 active_slave_cnt; + __le32 active_slave_mask; + __le32 slave_mask; + __le32 hash_policy; +}; + +struct hns_roce_dev + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, + int func_idx); +void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx); int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); +int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp, + enum hns_roce_bond_cmd_type bond_type); static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f3607fe107a7..2f4864ab7d4e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -32,7 +32,6 @@ */ #include <linux/acpi.h> #include <linux/module.h> -#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> @@ -41,6 +40,7 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" +#include "hns_roce_bond.h" static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, const u8 *addr) @@ -89,30 +89,75 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) return ret; } -static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port, - unsigned long event) +static int hns_roce_get_port_state(struct hns_roce_dev *hr_dev, u32 port_num, + enum ib_port_state *state) { + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + struct net_device *net_dev; + + net_dev = ib_device_get_netdev(&hr_dev->ib_dev, port_num); + if (!net_dev) + return -ENODEV; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + if (bond_grp) { + *state = ib_get_curr_port_state(bond_grp->upper_dev); + goto out; + } + } + + *state = ib_get_curr_port_state(net_dev); +out: + dev_put(net_dev); + return 0; +} + +static int handle_en_event(struct net_device *netdev, + struct hns_roce_dev *hr_dev, + u32 port, unsigned long event) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; struct device *dev = hr_dev->dev; - struct net_device *netdev; + enum ib_port_state curr_state; + struct ib_event ibevent; int ret = 0; - netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { dev_err(dev, "can't find netdev on port(%u)!\n", port); return -ENODEV; } switch (event) { - case NETDEV_UP: - case NETDEV_CHANGE: case NETDEV_REGISTER: case NETDEV_CHANGEADDR: ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr); break; + case NETDEV_UP: + case NETDEV_CHANGE: + ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr); + if (ret) + return ret; + fallthrough; case NETDEV_DOWN: - /* - * In v1 engine, only support all ports closed together. - */ + if (!netif_is_lag_master(netdev)) + break; + curr_state = ib_get_curr_port_state(netdev); + + write_lock_irq(&ibdev->cache_lock); + if (ibdev->port_data[port].cache.last_port_state == curr_state) { + write_unlock_irq(&ibdev->cache_lock); + return 0; + } + ibdev->port_data[port].cache.last_port_state = curr_state; + write_unlock_irq(&ibdev->cache_lock); + + ibevent.event = (curr_state == IB_PORT_DOWN) ? + IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE; + ibevent.device = ibdev; + ibevent.element.port_num = port + 1; + ib_dispatch_event(&ibevent); break; default: dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event)); @@ -126,17 +171,25 @@ static int hns_roce_netdev_event(struct notifier_block *self, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct hns_roce_bond_group *bond_grp; struct hns_roce_ib_iboe *iboe = NULL; struct hns_roce_dev *hr_dev = NULL; + struct net_device *upper = NULL; int ret; u32 port; hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); iboe = &hr_dev->iboe; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), + get_hr_bus_num(hr_dev)); + upper = bond_grp ? bond_grp->upper_dev : NULL; + } for (port = 0; port < hr_dev->caps.num_ports; port++) { - if (dev == iboe->netdevs[port]) { - ret = handle_en_event(hr_dev, port, event); + if ((!upper && dev == iboe->netdevs[port]) || + (upper && dev == upper)) { + ret = handle_en_event(dev, hr_dev, port, event); if (ret) return NOTIFY_DONE; break; @@ -148,12 +201,13 @@ static int hns_roce_netdev_event(struct notifier_block *self, static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) { + struct net_device *net_dev; int ret; u8 i; for (i = 0; i < hr_dev->caps.num_ports; i++) { - ret = hns_roce_set_mac(hr_dev, i, - hr_dev->iboe.netdevs[i]->dev_addr); + net_dev = get_hr_netdev(hr_dev, i); + ret = hns_roce_set_mac(hr_dev, i, net_dev->dev_addr); if (ret) return ret; } @@ -221,9 +275,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, struct ib_port_attr *props) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = hr_dev->dev; struct net_device *net_dev; - unsigned long flags; enum ib_mtu mtu; u32 port; int ret; @@ -244,26 +296,26 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, if (ret) ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret); - spin_lock_irqsave(&hr_dev->iboe.lock, flags); - - net_dev = hr_dev->iboe.netdevs[port]; + net_dev = ib_device_get_netdev(ib_dev, port_num); if (!net_dev) { - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "find netdev %u failed!\n", port); + ibdev_err(ib_dev, "find netdev %u failed!\n", port); return -EINVAL; } mtu = iboe_get_mtu(net_dev->mtu); props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256; - props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ? - IB_PORT_ACTIVE : - IB_PORT_DOWN; + + dev_put(net_dev); + + ret = hns_roce_get_port_state(hr_dev, port_num, &props->state); + if (ret) { + ibdev_err(ib_dev, "failed to get port state.\n"); + return ret; + } + props->phys_state = props->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; - - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return 0; } @@ -617,9 +669,40 @@ static int hns_roce_get_hw_stats(struct ib_device *device, return num_counters; } -static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) +static void + hns_roce_unregister_bond_cleanup(struct hns_roce_dev *hr_dev, + struct hns_roce_bond_group *bond_grp) +{ + struct net_device *net_dev; + int i; + + /* To avoid the loss of other slave devices when main_hr_dev + * is unregistered, re-initialize the remaining slaves before + * the bond resources cleanup. + */ + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && net_dev != get_hr_netdev(hr_dev, 0)) + hns_roce_bond_init_client(bond_grp, i); + } + + hns_roce_cleanup_bond(bond_grp); +} + +static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, + bool bond_cleanup) { + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + + if (bond_cleanup && hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + if (bond_grp) + hns_roce_unregister_bond_cleanup(hr_dev, bond_grp); + } hr_dev->active = false; unregister_netdevice_notifier(&iboe->nb); @@ -708,11 +791,12 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = { static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { - int ret; struct hns_roce_ib_iboe *iboe = NULL; - struct ib_device *ib_dev = NULL; struct device *dev = hr_dev->dev; + struct ib_device *ib_dev = NULL; + struct net_device *net_dev; unsigned int i; + int ret; iboe = &hr_dev->iboe; spin_lock_init(&iboe->lock); @@ -747,17 +831,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops); - for (i = 0; i < hr_dev->caps.num_ports; i++) { - if (!hr_dev->iboe.netdevs[i]) - continue; - ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i], - i + 1); - if (ret) + dma_set_max_seg_size(dev, SZ_2G); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { + ret = hns_roce_alloc_bond_grp(hr_dev); + if (ret) { + dev_err(dev, "failed to alloc bond_grp for bus %u, ret = %d\n", + get_hr_bus_num(hr_dev), ret); return ret; + } + } + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND && + hns_roce_bond_is_active(hr_dev)) { + ret = hns_roce_bond_init(hr_dev); + if (ret) { + dev_err(dev, "failed to init bond!\n"); + return ret; + } + ret = ib_register_device(ib_dev, "hns_bond_%d", dev); + } else { + for (i = 0; i < hr_dev->caps.num_ports; i++) { + net_dev = get_hr_netdev(hr_dev, i); + if (!net_dev) + continue; + + ret = ib_device_set_netdev(ib_dev, net_dev, i + 1); + if (ret) + return ret; + } + ret = ib_register_device(ib_dev, "hns_%d", dev); } - dma_set_max_seg_size(dev, SZ_2G); - ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); return ret; @@ -1157,10 +1262,10 @@ error_failed_alloc_dfx_cnt: return ret; } -void hns_roce_exit(struct hns_roce_dev *hr_dev) +void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) { hns_roce_unregister_debugfs(hr_dev); - hns_roce_unregister_device(hr_dev); + hns_roce_unregister_device(hr_dev, bond_cleanup); if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index d35cf59d0f43..225c3e328e0e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/pci.h> #include "hns_roce_device.h" void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bdd879ac12dd..d1640c5fbaab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> @@ -1348,11 +1347,13 @@ static int check_mtu_validate(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_attr *attr, int attr_mask) { + struct net_device *net_dev; enum ib_mtu active_mtu; int p; p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; - active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); + net_dev = get_hr_netdev(hr_dev, p); + active_mtu = iboe_get_mtu(net_dev->mtu); if ((hr_dev->caps.max_mtu >= IB_MTU_2048 && attr->path_mtu > hr_dev->caps.max_mtu) || diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 1090051f493b..8a6efb6b9c9e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,7 +3,6 @@ * Copyright (c) 2018 Hisilicon Limited. */ -#include <linux/pci.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> #include "hns_roce_device.h" |
