diff options
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/dev.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/devlink.h | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eq.c | 220 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c | 42 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 10 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 248 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 4 |
8 files changed, 344 insertions, 222 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 445fe30c3d0b..e7739acc926e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -35,6 +35,7 @@ #include <linux/mlx5/mlx5_ifc_vdpa.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" +#include "devlink.h" /* intf dev list mutex */ static DEFINE_MUTEX(mlx5_intf_mutex); @@ -109,17 +110,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev) return true; } -static bool is_eth_enabled(struct mlx5_core_dev *dev) -{ - union devlink_param_value val; - int err; - - err = devl_param_driverinit_value_get(priv_to_devlink(dev), - DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, - &val); - return err ? false : val.vbool; -} - bool mlx5_vnet_supported(struct mlx5_core_dev *dev) { if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) @@ -251,7 +241,7 @@ static const struct mlx5_adev_device { .is_enabled = &is_ib_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", .is_supported = &mlx5_eth_supported, - .is_enabled = &is_eth_enabled }, + .is_enabled = &mlx5_core_is_eth_enabled }, [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", .is_supported = &is_eth_rep_supported }, [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 5dcfb4d86d8a..defba5bd91d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -46,4 +46,15 @@ void mlx5_devlink_free(struct devlink *devlink); int mlx5_devlink_params_register(struct devlink *devlink); void mlx5_devlink_params_unregister(struct devlink *devlink); +static inline bool mlx5_core_is_eth_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); + return err ? false : val.vbool; +} + #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 38b32e98f3bd..eb41f0abf798 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -18,6 +18,7 @@ #include "lib/clock.h" #include "diag/fw_tracer.h" #include "mlx5_irq.h" +#include "pci_irq.h" #include "devlink.h" #include "en_accel/ipsec.h" @@ -61,9 +62,7 @@ struct mlx5_eq_table { struct mlx5_irq_table *irq_table; struct mlx5_irq **comp_irqs; struct mlx5_irq *ctrl_irq; -#ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; -#endif }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -637,6 +636,7 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); return MLX5_NUM_ASYNC_EQE; } + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -803,44 +803,28 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -static void comp_irqs_release(struct mlx5_core_dev *dev) +static void comp_irqs_release_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - if (mlx5_core_is_sf(dev)) - mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); - else - mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); - kfree(table->comp_irqs); + mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); } -static int comp_irqs_request(struct mlx5_core_dev *dev) +static int comp_irqs_request_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; const struct cpumask *prev = cpu_none_mask; const struct cpumask *mask; - int ncomp_eqs = table->num_comp_eqs; + int ncomp_eqs; u16 *cpus; int ret; int cpu; int i; ncomp_eqs = table->num_comp_eqs; - table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); - if (!table->comp_irqs) - return -ENOMEM; - if (mlx5_core_is_sf(dev)) { - ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); - if (ret < 0) - goto free_irqs; - return ret; - } - cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); - if (!cpus) { + if (!cpus) ret = -ENOMEM; - goto free_irqs; - } i = 0; rcu_read_lock(); @@ -854,17 +838,89 @@ static int comp_irqs_request(struct mlx5_core_dev *dev) } spread_done: rcu_read_unlock(); - ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); + ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap); kfree(cpus); - if (ret < 0) - goto free_irqs; return ret; +} + +static void comp_irqs_release_sf(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); +} + +static int comp_irqs_request_sf(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs = table->num_comp_eqs; + + return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); +} + +static void comp_irqs_release(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) : + comp_irqs_release_pci(dev); -free_irqs: kfree(table->comp_irqs); +} + +static int comp_irqs_request(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs; + int ret; + + ncomp_eqs = table->num_comp_eqs; + table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); + if (!table->comp_irqs) + return -ENOMEM; + + ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) : + comp_irqs_request_pci(dev); + if (ret < 0) + kfree(table->comp_irqs); + return ret; } +#ifdef CONFIG_RFS_ACCEL +static int alloc_rmap(struct mlx5_core_dev *mdev) +{ + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + + /* rmap is a mapping between irq number and queue number. + * Each irq can be assigned only to a single rmap. + * Since SFs share IRQs, rmap mapping cannot function correctly + * for irqs that are shared between different core/netdev RX rings. + * Hence we don't allow netdev rmap for SFs. + */ + if (mlx5_core_is_sf(mdev)) + return 0; + + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); + if (!eq_table->rmap) + return -ENOMEM; + return 0; +} + +static void free_rmap(struct mlx5_core_dev *mdev) +{ + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + + if (eq_table->rmap) { + free_irq_cpu_rmap(eq_table->rmap); + eq_table->rmap = NULL; + } +} +#else +static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } +static void free_rmap(struct mlx5_core_dev *mdev) {} +#endif + static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -880,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) kfree(eq); } comp_irqs_release(dev); + free_rmap(dev); } static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) @@ -906,9 +963,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) int err; int i; + err = alloc_rmap(dev); + if (err) + return err; + ncomp_eqs = comp_irqs_request(dev); - if (ncomp_eqs < 0) - return ncomp_eqs; + if (ncomp_eqs < 0) { + err = ncomp_eqs; + goto err_irqs_req; + } + INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); @@ -953,6 +1017,8 @@ clean_eq: kfree(eq); clean: destroy_comp_eqs(dev); +err_irqs_req: + free_rmap(dev); return err; } @@ -1031,55 +1097,12 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) return ERR_PTR(-ENOENT); } -static void clear_rmap(struct mlx5_core_dev *dev) -{ -#ifdef CONFIG_RFS_ACCEL - struct mlx5_eq_table *eq_table = dev->priv.eq_table; - - free_irq_cpu_rmap(eq_table->rmap); -#endif -} - -static int set_rmap(struct mlx5_core_dev *mdev) -{ - int err = 0; -#ifdef CONFIG_RFS_ACCEL - struct mlx5_eq_table *eq_table = mdev->priv.eq_table; - int vecidx; - - eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); - if (!eq_table->rmap) { - err = -ENOMEM; - mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); - goto err_out; - } - - for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) { - err = irq_cpu_rmap_add(eq_table->rmap, - pci_irq_vector(mdev->pdev, vecidx)); - if (err) { - mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", - err); - goto err_irq_cpu_rmap_add; - } - } - return 0; - -err_irq_cpu_rmap_add: - clear_rmap(mdev); -err_out: -#endif - return err; -} - /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - if (!mlx5_core_is_sf(dev)) - clear_rmap(dev); mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); } @@ -1090,44 +1113,47 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) #define MLX5_MAX_ASYNC_EQS 3 #endif -int mlx5_eq_table_create(struct mlx5_core_dev *dev) +static int get_num_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + int max_dev_eqs; + int max_eqs_sf; + int num_eqs; + + /* If ethernet is disabled we use just a single completion vector to + * have the other vectors available for other drivers using mlx5_core. For + * example, mlx5_vdpa + */ + if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) + return 1; + + max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); - int max_eqs_sf; - int err; - eq_table->num_comp_eqs = - min_t(int, - mlx5_irq_table_get_num_comp(eq_table->irq_table), - num_eqs - MLX5_MAX_ASYNC_EQS); + num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), + max_dev_eqs - MLX5_MAX_ASYNC_EQS); if (mlx5_core_is_sf(dev)) { max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); - eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs, - max_eqs_sf); + num_eqs = min_t(int, num_eqs, max_eqs_sf); } + return num_eqs; +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + eq_table->num_comp_eqs = get_num_eqs(dev); err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); goto err_async_eqs; } - if (!mlx5_core_is_sf(dev)) { - /* rmap is a mapping between irq number and queue number. - * each irq can be assign only to a single rmap. - * since SFs share IRQs, rmap mapping cannot function correctly - * for irqs that are shared for different core/netdev RX rings. - * Hence we don't allow netdev rmap for SFs - */ - err = set_rmap(dev); - if (err) - goto err_rmap; - } - err = create_comp_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create completion EQs\n"); @@ -1135,10 +1161,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) } return 0; + err_comp_eqs: - if (!mlx5_core_is_sf(dev)) - clear_rmap(dev); -err_rmap: destroy_async_eqs(dev); err_async_eqs: return err; @@ -1146,8 +1170,6 @@ err_async_eqs: void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { - if (!mlx5_core_is_sf(dev)) - clear_rmap(dev); destroy_comp_eqs(dev); destroy_async_eqs(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 380a208ab137..fa467335526e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -45,30 +45,28 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, /* Creating an IRQ from irq_pool */ static struct mlx5_irq * -irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { - cpumask_var_t auto_mask; - struct mlx5_irq *irq; + struct irq_affinity_desc auto_desc = {}; u32 irq_index; int err; - if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL)) - return ERR_PTR(-ENOMEM); err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); if (err) return ERR_PTR(err); if (pool->irqs_per_cpu) { - if (cpumask_weight(req_mask) > 1) + if (cpumask_weight(&af_desc->mask) > 1) /* if req_mask contain more then one CPU, set the least loadad CPU * of req_mask */ - cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); + cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask), + &auto_desc.mask); else - cpu_get(pool, cpumask_first(req_mask)); + cpu_get(pool, cpumask_first(&af_desc->mask)); } - irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); - free_cpumask_var(auto_mask); - return irq; + return mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); } /* Looking for the IRQ with the smallest refcount that fits req_mask. @@ -115,22 +113,22 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req /** * mlx5_irq_affinity_request - request an IRQ according to the given mask. * @pool: IRQ pool to request from. - * @req_mask: cpumask requested for this IRQ. + * @af_desc: affinity descriptor for this IRQ. * * This function returns a pointer to IRQ, or ERR_PTR in case of error. */ struct mlx5_irq * -mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { struct mlx5_irq *least_loaded_irq, *new_irq; mutex_lock(&pool->lock); - least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); + least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask); if (least_loaded_irq && mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) goto out; /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ - new_irq = irq_pool_request_irq(pool, req_mask); + new_irq = irq_pool_request_irq(pool, af_desc); if (IS_ERR(new_irq)) { if (!least_loaded_irq) { /* We failed to create an IRQ and we didn't find an IRQ */ @@ -194,32 +192,30 @@ int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, struct mlx5_irq **irqs) { struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - cpumask_var_t req_mask; + struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; int i = 0; - if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) - return -ENOMEM; - cpumask_copy(req_mask, cpu_online_mask); + af_desc.is_managed = 1; + cpumask_copy(&af_desc.mask, cpu_online_mask); for (i = 0; i < nirqs; i++) { if (mlx5_irq_pool_is_sf_pool(pool)) - irq = mlx5_irq_affinity_request(pool, req_mask); + irq = mlx5_irq_affinity_request(pool, &af_desc); else /* In case SF pool doesn't exists, fallback to the PF IRQs. * The PF IRQs are already allocated and binded to CPU * at this point. Hence, only an index is needed. */ - irq = mlx5_irq_request(dev, i, NULL); + irq = mlx5_irq_request(dev, i, NULL, NULL); if (IS_ERR(irq)) break; irqs[i] = irq; - cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); + cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask); mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); } - free_cpumask_var(req_mask); if (!i) return PTR_ERR(irq); return i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 597174ceadc9..f95df73d1089 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -46,9 +46,6 @@ #include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/vport.h> -#ifdef CONFIG_RFS_ACCEL -#include <linux/cpu_rmap.h> -#endif #include <linux/version.h> #include <net/devlink.h> #include "mlx5_core.h" @@ -1401,16 +1398,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto function_teardown; } + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + err = mlx5_load(dev); if (err) goto err_load; set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_params_register(priv_to_devlink(dev)); - if (err) - goto err_devlink_params_reg; - err = mlx5_register_device(dev); if (err) goto err_register; @@ -1420,11 +1417,11 @@ int mlx5_init_one(struct mlx5_core_dev *dev) return 0; err_register: - mlx5_devlink_params_unregister(priv_to_devlink(dev)); -err_devlink_params_reg: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: + mlx5_devlink_params_unregister(priv_to_devlink(dev)); +err_devlink_params_reg: mlx5_cleanup_once(dev); function_teardown: mlx5_function_teardown(dev, true); @@ -1443,7 +1440,6 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) mutex_lock(&dev->intf_state_mutex); mlx5_unregister_device(dev); - mlx5_devlink_params_unregister(priv_to_devlink(dev)); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", @@ -1454,6 +1450,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); mlx5_cleanup_once(dev); mlx5_function_teardown(dev, true); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 23cb63fa4588..efd0c299c5c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -9,6 +9,7 @@ #define MLX5_COMP_EQS_PER_SF 8 struct mlx5_irq; +struct cpu_rmap; int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); @@ -25,9 +26,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, - struct cpumask *affinity); + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap); int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs); + struct mlx5_irq **irqs, struct cpu_rmap **rmap); void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); @@ -39,7 +41,7 @@ struct mlx5_irq_pool; int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, struct mlx5_irq **irqs); struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, - const struct cpumask *req_mask); + struct irq_affinity_desc *af_desc); void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, int num_irqs); #else @@ -50,7 +52,7 @@ static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, } static inline struct mlx5_irq * -mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 6bde18bcd42f..e12e528c09f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -9,6 +9,7 @@ #include "mlx5_irq.h" #include "pci_irq.h" #include "lib/sf.h" +#include "lib/eq.h" #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif @@ -29,12 +30,11 @@ struct mlx5_irq { char name[MLX5_MAX_IRQ_NAME]; struct mlx5_irq_pool *pool; int refcount; - u32 index; - int irqn; + struct msi_map map; }; struct mlx5_irq_table { - struct mlx5_irq_pool *pf_pool; + struct mlx5_irq_pool *pcif_pool; struct mlx5_irq_pool *sf_ctrl_pool; struct mlx5_irq_pool *sf_comp_pool; }; @@ -127,15 +127,26 @@ out: static void irq_release(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif - xa_erase(&pool->irqs, irq->index); - /* free_irq requires that affinity_hint and rmap will be cleared - * before calling it. This is why there is asymmetry with set_rmap - * which should be called after alloc_irq but before request_irq. + xa_erase(&pool->irqs, irq->map.index); + /* free_irq requires that affinity_hint and rmap will be cleared before + * calling it. To satisfy this requirement, we call + * irq_cpu_rmap_remove() to remove the notifier */ - irq_update_affinity_hint(irq->irqn, NULL); + irq_update_affinity_hint(irq->map.virq, NULL); +#ifdef CONFIG_RFS_ACCEL + rmap = mlx5_eq_table_get_rmap(pool->dev); + if (rmap && irq->map.index) + irq_cpu_rmap_remove(rmap, irq->map.virq); +#endif + free_cpumask_var(irq->mask); - free_irq(irq->irqn, &irq->nh); + free_irq(irq->map.virq, &irq->nh); + if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) + pci_msix_free_irq(pool->dev->pdev, irq->map); kfree(irq); } @@ -198,7 +209,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) return; } - if (vecidx == pool->xa_num_irqs.max) { + if (!vecidx) { snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); return; } @@ -207,7 +218,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) } struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, - const struct cpumask *affinity) + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) { struct mlx5_core_dev *dev = pool->dev; char name[MLX5_MAX_IRQ_NAME]; @@ -217,7 +229,28 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, irq = kzalloc(sizeof(*irq), GFP_KERNEL); if (!irq) return ERR_PTR(-ENOMEM); - irq->irqn = pci_irq_vector(dev->pdev, i); + if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { + /* The vector at index 0 was already allocated. + * Just get the irq number. If dynamic irq is not supported + * vectors have also been allocated. + */ + irq->map.virq = pci_irq_vector(dev->pdev, i); + irq->map.index = 0; + } else { + irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); + if (!irq->map.virq) { + err = irq->map.index; + goto err_alloc_irq; + } + } + + if (i && rmap && *rmap) { +#ifdef CONFIG_RFS_ACCEL + err = irq_cpu_rmap_add(*rmap, irq->map.virq); + if (err) + goto err_irq_rmap; +#endif + } if (!mlx5_irq_pool_is_sf_pool(pool)) irq_set_name(pool, name, i); else @@ -225,7 +258,7 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); snprintf(irq->name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); - err = request_irq(irq->irqn, irq_int_handler, 0, irq->name, + err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, &irq->nh); if (err) { mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); @@ -236,26 +269,37 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, err = -ENOMEM; goto err_cpumask; } - if (affinity) { - cpumask_copy(irq->mask, affinity); - irq_set_affinity_and_hint(irq->irqn, irq->mask); + if (af_desc) { + cpumask_copy(irq->mask, &af_desc->mask); + irq_set_affinity_and_hint(irq->map.virq, irq->mask); } irq->pool = pool; irq->refcount = 1; - irq->index = i; - err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL)); + irq->map.index = i; + err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL)); if (err) { mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", - irq->index, err); + irq->map.index, err); goto err_xa; } return irq; err_xa: - irq_update_affinity_hint(irq->irqn, NULL); + if (af_desc) + irq_update_affinity_hint(irq->map.virq, NULL); free_cpumask_var(irq->mask); err_cpumask: - free_irq(irq->irqn, &irq->nh); + free_irq(irq->map.virq, &irq->nh); err_req_irq: +#ifdef CONFIG_RFS_ACCEL + if (i && rmap && *rmap) { + free_irq_cpu_rmap(*rmap); + *rmap = NULL; + } +err_irq_rmap: +#endif + if (i && pci_msix_can_alloc_dyn(dev->pdev)) + pci_msix_free_irq(dev->pdev, irq->map); +err_alloc_irq: kfree(irq); return ERR_PTR(err); } @@ -292,7 +336,7 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) int mlx5_irq_get_index(struct mlx5_irq *irq) { - return irq->index; + return irq->map.index; } /* irq_pool API */ @@ -300,7 +344,8 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) /* requesting an irq from a given pool according to given index */ static struct mlx5_irq * irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, - struct cpumask *affinity) + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) { struct mlx5_irq *irq; @@ -310,7 +355,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, mlx5_irq_get_locked(irq); goto unlock; } - irq = mlx5_irq_alloc(pool, vecidx, affinity); + irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); unlock: mutex_unlock(&pool->lock); return irq; @@ -337,7 +382,7 @@ struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. */ - return pool ? pool : irq_table->pf_pool; + return pool ? pool : irq_table->pcif_pool; } static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) @@ -351,7 +396,7 @@ static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. */ - return pool ? pool : irq_table->pf_pool; + return pool ? pool : irq_table->pcif_pool; } /** @@ -364,7 +409,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) int i; for (i = 0; i < nirqs; i++) { - synchronize_irq(irqs[i]->irqn); + synchronize_irq(irqs[i]->map.virq); mlx5_irq_put(irqs[i]); } } @@ -387,26 +432,26 @@ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); - cpumask_var_t req_mask; + struct irq_affinity_desc af_desc; struct mlx5_irq *irq; - if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) - return ERR_PTR(-ENOMEM); - cpumask_copy(req_mask, cpu_online_mask); + cpumask_copy(&af_desc.mask, cpu_online_mask); + af_desc.is_managed = false; if (!mlx5_irq_pool_is_sf_pool(pool)) { - /* In case we are allocating a control IRQ for PF/VF */ + /* In case we are allocating a control IRQ from a pci device's pool. + * This can happen also for a SF if the SFs pool is empty. + */ if (!pool->xa_num_irqs.max) { - cpumask_clear(req_mask); + cpumask_clear(&af_desc.mask); /* In case we only have a single IRQ for PF/VF */ - cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); } - /* Allocate the IRQ in the last index of the pool */ - irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask); + /* Allocate the IRQ in index 0. The vector was already allocated */ + irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); } else { - irq = mlx5_irq_affinity_request(pool, req_mask); + irq = mlx5_irq_affinity_request(pool, &af_desc); } - free_cpumask_var(req_mask); return irq; } @@ -415,28 +460,82 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) * @dev: mlx5 device that requesting the IRQ. * @vecidx: vector index of the IRQ. This argument is ignore if affinity is * provided. - * @affinity: cpumask requested for this IRQ. + * @af_desc: affinity descriptor for this IRQ. + * @rmap: pointer to reverse map pointer for completion interrupts * * This function returns a pointer to IRQ, or ERR_PTR in case of error. */ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, - struct cpumask *affinity) + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) { struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool; struct mlx5_irq *irq; - pool = irq_table->pf_pool; - irq = irq_pool_request_vector(pool, vecidx, affinity); + pool = irq_table->pcif_pool; + irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); if (IS_ERR(irq)) return irq; mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", - irq->irqn, cpumask_pr_args(affinity), + irq->map.virq, cpumask_pr_args(&af_desc->mask), irq->refcount / MLX5_EQ_REFS_PER_IRQ); return irq; } /** + * mlx5_msix_alloc - allocate msix interrupt + * @dev: mlx5 device from which to request + * @handler: interrupt handler + * @affdesc: affinity descriptor + * @name: interrupt name + * + * Returns: struct msi_map with result encoded. + * Note: the caller must make sure to release the irq by calling + * mlx5_msix_free() if shutdown was initiated. + */ +struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, + irqreturn_t (*handler)(int, void *), + const struct irq_affinity_desc *affdesc, + const char *name) +{ + struct msi_map map; + int err; + + if (!dev->pdev) { + map.virq = 0; + map.index = -EINVAL; + return map; + } + + map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); + if (!map.virq) + return map; + + err = request_irq(map.virq, handler, 0, name, NULL); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + pci_msix_free_irq(dev->pdev, map); + map.virq = 0; + map.index = -ENOMEM; + } + return map; +} +EXPORT_SYMBOL(mlx5_msix_alloc); + +/** + * mlx5_msix_free - free a previously allocated msix interrupt + * @dev: mlx5 device associated with interrupt + * @map: map previously returned by mlx5_msix_alloc() + */ +void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) +{ + free_irq(map.virq, NULL); + pci_msix_free_irq(dev->pdev, map); +} +EXPORT_SYMBOL(mlx5_msix_free); + +/** * mlx5_irqs_release_vectors - release one or more IRQs back to the system. * @irqs: IRQs to be released. * @nirqs: number of IRQs to be released. @@ -452,6 +551,7 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) * @cpus: CPUs array for binding the IRQs * @nirqs: number of IRQs to request. * @irqs: an output array of IRQs pointers. + * @rmap: pointer to reverse map pointer for completion interrupts * * Each IRQ is bound to at most 1 CPU. * This function is requests nirqs IRQs, starting from @vecidx. @@ -460,24 +560,22 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) * @nirqs), if successful, or a negative error code in case of an error. */ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs) + struct mlx5_irq **irqs, struct cpu_rmap **rmap) { - cpumask_var_t req_mask; + struct irq_affinity_desc af_desc; struct mlx5_irq *irq; int i; - if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) - return -ENOMEM; + af_desc.is_managed = 1; for (i = 0; i < nirqs; i++) { - cpumask_set_cpu(cpus[i], req_mask); - irq = mlx5_irq_request(dev, i, req_mask); + cpumask_set_cpu(cpus[i], &af_desc.mask); + irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); if (IS_ERR(irq)) break; - cpumask_clear(req_mask); + cpumask_clear(&af_desc.mask); irqs[i] = irq; } - free_cpumask_var(req_mask); return i ? i : PTR_ERR(irq); } @@ -521,7 +619,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) kvfree(pool); } -static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) +static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) { struct mlx5_irq_table *table = dev->priv.irq_table; int num_sf_ctrl_by_msix; @@ -529,12 +627,12 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) int num_sf_ctrl; int err; - /* init pf_pool */ - table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL, - MLX5_EQ_SHARE_IRQ_MIN_COMP, - MLX5_EQ_SHARE_IRQ_MAX_COMP); - if (IS_ERR(table->pf_pool)) - return PTR_ERR(table->pf_pool); + /* init pcif_pool */ + table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->pcif_pool)) + return PTR_ERR(table->pcif_pool); if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { @@ -548,7 +646,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) MLX5_SFS_PER_CTRL_IRQ); num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); - table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl, + table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, "mlx5_sf_ctrl", MLX5_EQ_SHARE_IRQ_MIN_CTRL, MLX5_EQ_SHARE_IRQ_MAX_CTRL); @@ -557,7 +655,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) goto err_pf; } /* init sf_comp_pool */ - table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl, + table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, sf_vec - num_sf_ctrl, "mlx5_sf_comp", MLX5_EQ_SHARE_IRQ_MIN_COMP, MLX5_EQ_SHARE_IRQ_MAX_COMP); @@ -579,7 +677,7 @@ err_irqs_per_cpu: err_sf_ctrl: irq_pool_free(table->sf_ctrl_pool); err_pf: - irq_pool_free(table->pf_pool); + irq_pool_free(table->pcif_pool); return err; } @@ -589,7 +687,7 @@ static void irq_pools_destroy(struct mlx5_irq_table *table) irq_pool_free(table->sf_comp_pool); irq_pool_free(table->sf_ctrl_pool); } - irq_pool_free(table->pf_pool); + irq_pool_free(table->pcif_pool); } /* irq_table API */ @@ -620,9 +718,9 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) { - if (!table->pf_pool->xa_num_irqs.max) + if (!table->pcif_pool->xa_num_irqs.max) return 1; - return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min; + return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; } int mlx5_irq_table_create(struct mlx5_core_dev *dev) @@ -631,26 +729,30 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); int total_vec; - int pf_vec; + int pcif_vec; + int req_vec; int err; + int n; if (mlx5_core_is_sf(dev)) return 0; - pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; - pf_vec = min_t(int, pf_vec, num_eqs); + pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; + pcif_vec = min_t(int, pcif_vec, num_eqs); - total_vec = pf_vec; + total_vec = pcif_vec; if (mlx5_sf_max_functions(dev)) total_vec += MLX5_IRQ_CTRL_SF_MAX + MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); + total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); + pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); - total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX); - if (total_vec < 0) - return total_vec; - pf_vec = min(pf_vec, total_vec); + req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; + n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); + if (n < 0) + return n; - err = irq_pools_init(dev, total_vec - pf_vec, pf_vec); + err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); if (err) pci_free_irq_vectors(dev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index 5c7e68bee43a..d3a77a0ab848 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -12,6 +12,7 @@ #define MLX5_EQ_REFS_PER_IRQ (2) struct mlx5_irq; +struct cpu_rmap; struct mlx5_irq_pool { char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; @@ -31,7 +32,8 @@ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) } struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, - const struct cpumask *affinity); + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap); int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq); |