From 822b3b2ebfff8e9b3d006086c527738a7ca00cd0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Mar 2015 14:57:33 +0200 Subject: net: Add max rate tx queue attribute This adds a tx_maxrate attribute to the tx queue sysfs entry allowing for max-rate limiting. Along with DCB-ETS and BQL this provides another knob to tune queue performance. The limit units are Mbps. By default it is disabled. To disable the rate limitation after it has been set for a queue, it should be set to zero. Signed-off-by: John Fastabend Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-queues | 8 +++ Documentation/networking/scaling.txt | 9 ++++ include/linux/netdevice.h | 8 +++ net/core/net-sysfs.c | 67 +++++++++++++++++++----- 4 files changed, 80 insertions(+), 12 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 5e9aeb91d355..0c0df91b1516 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -24,6 +24,14 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. +What: /sys/class//queues/tx-/tx_maxrate +Date: March 2015 +KernelVersion: 4.1 +Contact: netdev@vger.kernel.org +Description: + A Mbps max-rate set for the queue, a value of zero means disabled, + default is disabled. + What: /sys/class//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index 99ca40e8e810..cbfac0949635 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt @@ -421,6 +421,15 @@ best CPUs to share a given queue are probably those that share the cache with the CPU that processes transmit completions for that queue (transmit interrupts). +Per TX Queue rate limitation: +============================= + +These are rate-limitation mechanisms implemented by HW, where currently +a max-rate attribute is supported, by setting a Mbps value to + +/sys/class/net//queues/tx-/tx_maxrate + +A value of zero means disabled, and this is the default. Further Information =================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd1d069758be..76c5de4978a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -587,6 +587,7 @@ struct netdev_queue { #ifdef CONFIG_BQL struct dql dql; #endif + unsigned long tx_maxrate; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -1022,6 +1023,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. + * int (*ndo_set_tx_maxrate)(struct net_device *dev, + * int queue_index, u32 maxrate); + * Called when a user wants to set a max-rate limitation of specific + * TX queue. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1178,6 +1183,9 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); + int (*ndo_set_tx_maxrate)(struct net_device *dev, + int queue_index, + u32 maxrate); }; /** diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index cf30620a88e1..7e58bd7ec232 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -951,6 +951,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } +#ifdef CONFIG_XPS +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + +static ssize_t show_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + return sprintf(buf, "%lu\n", queue->tx_maxrate); +} + +static ssize_t set_tx_maxrate(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + int err, index = get_netdev_queue_index(queue); + u32 rate = 0; + + err = kstrtou32(buf, 10, &rate); + if (err < 0) + return err; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = -EOPNOTSUPP; + if (dev->netdev_ops->ndo_set_tx_maxrate) + err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + + rtnl_unlock(); + if (!err) { + queue->tx_maxrate = rate; + return len; + } + return err; +} + +static struct netdev_queue_attribute queue_tx_maxrate = + __ATTR(tx_maxrate, S_IRUGO | S_IWUSR, + show_tx_maxrate, set_tx_maxrate); +#endif + static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); @@ -1065,18 +1119,6 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static unsigned int get_netdev_queue_index(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - unsigned int i; - - i = queue - dev->_tx; - BUG_ON(i >= dev->num_tx_queues); - - return i; -} - - static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { @@ -1153,6 +1195,7 @@ static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, + &queue_tx_maxrate.attr, #endif NULL }; -- cgit From fc31e2560a2443410fe45c27116fae736541a7b5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 18 Mar 2015 14:57:34 +0200 Subject: net/mlx4_core: Add basic support for QP max-rate limiting Add the low-level device commands and definitions used for QP max-rate limiting. This is done through the following elements: - read rate-limit device caps in QUERY_DEV_CAP: number of different rates and the min/max rates in Kbs/Mbs/Gbs units - enhance the QP context struct to contain rate limit units and value - allow to do run time rate-limit setting to QPs through the update-qp firmware command - QP rate-limiting is disallowed for VFs Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 33 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ drivers/net/ethernet/mellanox/mlx4/qp.c | 5 ++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 6 +++- include/linux/mlx4/device.h | 17 +++++++++++ include/linux/mlx4/qp.h | 14 ++++++--- 7 files changed, 72 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 242bcee5d774..4a471f5d1b56 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -144,7 +144,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [19] = "Performance optimized for limited rule configuration flow steering support", [20] = "Recoverable error events support", [21] = "Port Remap support", - [22] = "QCN support" + [22] = "QCN support", + [23] = "QP rate limiting support" }; int i; @@ -697,6 +698,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac +#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc +#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0 +#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2 + dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -904,6 +909,18 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET); dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + dev_cap->rl_caps.num_rates = size; + if (dev_cap->rl_caps.num_rates) { + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET); + dev_cap->rl_caps.max_val = size & 0xfff; + dev_cap->rl_caps.max_unit = size >> 14; + MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET); + dev_cap->rl_caps.min_val = size & 0xfff; + dev_cap->rl_caps.min_unit = size >> 14; + } + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; @@ -979,6 +996,15 @@ void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->dmfs_high_rate_qpn_base); mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n", dev_cap->dmfs_high_rate_qpn_range); + + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) { + struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps; + + mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n", + rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val, + rl_caps->min_unit, rl_caps->min_val); + } + dump_dev_cap_flags(dev, dev_cap->flags); dump_dev_cap_flags2(dev, dev_cap->flags2); } @@ -1075,6 +1101,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, u64 flags; int err = 0; u8 field; + u16 field16; u32 bmme_flags, field32; int real_port; int slave_port; @@ -1158,6 +1185,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0xfe; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); + /* turn off QP max-rate limiting for guests */ + field16 = 0; + MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index f44f7f6017ed..863655bd3947 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -127,6 +127,7 @@ struct mlx4_dev_cap { u32 max_counters; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + struct mlx4_rate_limit_caps rl_caps; struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7e487223489a..43aa76775b5f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -489,6 +489,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE; } + dev->caps.rl_caps = dev_cap->rl_caps; + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] = dev->caps.dmfs_high_rate_qpn_range; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index eda29dbbfcd2..69e4462e4ee4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -442,6 +442,11 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN); } + if (attr & MLX4_UPDATE_QP_RATE_LIMIT) { + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_RATE_LIMIT; + cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val); + } + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); cmd->qp_mask = cpu_to_be64(qp_mask); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d43e25914d19..c258f8625aac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2947,8 +2947,12 @@ static int verify_qp_parameters(struct mlx4_dev *dev, qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); - if (slave != mlx4_master_func_num(dev)) + if (slave != mlx4_master_func_num(dev)) { qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + /* setting QP rate-limit is disallowed for VFs */ + if (qp_ctx->rate_limit_params) + return -EPERM; + } switch (qp_type) { case MLX4_QP_ST_RC: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1cc54822b931..4550c67b92e4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,6 +205,7 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 }; enum { @@ -450,6 +451,21 @@ enum mlx4_module_id { MLX4_MODULE_ID_QSFP28 = 0x11, }; +enum { /* rl */ + MLX4_QP_RATE_LIMIT_NONE = 0, + MLX4_QP_RATE_LIMIT_KBS = 1, + MLX4_QP_RATE_LIMIT_MBS = 2, + MLX4_QP_RATE_LIMIT_GBS = 3 +}; + +struct mlx4_rate_limit_caps { + u16 num_rates; /* Number of different rates */ + u8 min_unit; + u16 min_val; + u8 max_unit; + u16 max_val; +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -565,6 +581,7 @@ struct mlx4_caps { u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; u32 vf_caps; + struct mlx4_rate_limit_caps rl_caps; }; struct mlx4_buf_list { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 551f85456c11..1023ebe035b7 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -207,14 +207,16 @@ struct mlx4_qp_context { __be32 msn; __be16 rq_wqe_counter; __be16 sq_wqe_counter; - u32 reserved3[2]; + u32 reserved3; + __be16 rate_limit_params; + __be16 reserved4; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; - u8 reserved4[2]; + u8 reserved5[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; - u32 reserved5[10]; + u32 reserved6[10]; }; struct mlx4_update_qp_context { @@ -229,6 +231,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; enum { @@ -428,7 +431,8 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 + MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 }; enum mlx4_update_qp_params_flags { @@ -438,6 +442,8 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; u32 flags; + u16 rate_unit; + u16 rate_val; }; int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, -- cgit From c10e4fc6c45616dbadb8ccee189e2c09fb8f056f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 18 Mar 2015 14:57:35 +0200 Subject: net/mlx4_en: Add tx queue maxrate support Add ndo_set_tx_maxrate support. To support per tx queue maxrate limit, we use the update-qp firmware command to do run-time rate setting for the qp that serves this tx ring. Signed-off-by: Or Gerlitz Signed-off-by: Ido Shamay Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c59ed925adaf..d8dc3f985c2b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2379,6 +2379,33 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, } #endif +int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[queue_index]; + struct mlx4_update_qp_params params; + int err; + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + + /* rate provided to us in Mbs, check if it fits into 12 bits, if not use Gbs */ + if (maxrate >> 12) { + params.rate_unit = MLX4_QP_RATE_LIMIT_GBS; + params.rate_val = maxrate / 1000; + } else if (maxrate) { + params.rate_unit = MLX4_QP_RATE_LIMIT_MBS; + params.rate_val = maxrate; + } else { /* zero serves to revoke the QP rate-limitation */ + params.rate_unit = 0; + params.rate_val = 0; + } + + err = mlx4_update_qp(priv->mdev->dev, tx_ring->qpn, MLX4_UPDATE_QP_RATE_LIMIT, + ¶ms); + return err; +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2410,6 +2437,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, #endif + .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2444,6 +2472,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, #endif + .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, }; struct mlx4_en_bond { -- cgit