summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/sfc/ef10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/sfc/ef10.c')
-rw-r--r--drivers/net/ethernet/sfc/ef10.c242
1 files changed, 233 insertions, 9 deletions
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 00279da6a1e8..de2947ccc5ad 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2086,6 +2086,92 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
ER_DZ_TX_DESC_UPD, tx_queue->queue);
}
+/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
+ */
+static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
+ struct sk_buff *skb,
+ bool *data_mapped)
+{
+ struct efx_tx_buffer *buffer;
+ struct tcphdr *tcp;
+ struct iphdr *ip;
+
+ u16 ipv4_id;
+ u32 seqnum;
+ u32 mss;
+
+ EFX_WARN_ON_ONCE_PARANOID(tx_queue->tso_version != 2);
+
+ mss = skb_shinfo(skb)->gso_size;
+
+ if (unlikely(mss < 4)) {
+ WARN_ONCE(1, "MSS of %u is too small for TSO v2\n", mss);
+ return -EINVAL;
+ }
+
+ ip = ip_hdr(skb);
+ if (ip->version == 4) {
+ /* Modify IPv4 header if needed. */
+ ip->tot_len = 0;
+ ip->check = 0;
+ ipv4_id = ip->id;
+ } else {
+ /* Modify IPv6 header if needed. */
+ struct ipv6hdr *ipv6 = ipv6_hdr(skb);
+
+ ipv6->payload_len = 0;
+ ipv4_id = 0;
+ }
+
+ tcp = tcp_hdr(skb);
+ seqnum = ntohl(tcp->seq);
+
+ buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+ buffer->flags = EFX_TX_BUF_OPTION;
+ buffer->len = 0;
+ buffer->unmap_len = 0;
+ EFX_POPULATE_QWORD_5(buffer->option,
+ ESF_DZ_TX_DESC_IS_OPT, 1,
+ ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
+ ESF_DZ_TX_TSO_OPTION_TYPE,
+ ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
+ ESF_DZ_TX_TSO_IP_ID, ipv4_id,
+ ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
+ );
+ ++tx_queue->insert_count;
+
+ buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+ buffer->flags = EFX_TX_BUF_OPTION;
+ buffer->len = 0;
+ buffer->unmap_len = 0;
+ EFX_POPULATE_QWORD_4(buffer->option,
+ ESF_DZ_TX_DESC_IS_OPT, 1,
+ ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
+ ESF_DZ_TX_TSO_OPTION_TYPE,
+ ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
+ ESF_DZ_TX_TSO_TCP_MSS, mss
+ );
+ ++tx_queue->insert_count;
+
+ return 0;
+}
+
+static u32 efx_ef10_tso_versions(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ u32 tso_versions = 0;
+
+ if (nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))
+ tso_versions |= BIT(1);
+ if (nic_data->datapath_caps2 &
+ (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))
+ tso_versions |= BIT(2);
+ return tso_versions;
+}
+
static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
@@ -2095,6 +2181,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
struct efx_channel *channel = tx_queue->channel;
struct efx_nic *efx = tx_queue->efx;
struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ bool tso_v2 = false;
size_t inlen;
dma_addr_t dma_addr;
efx_qword_t *txd;
@@ -2102,13 +2189,21 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
int i;
BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+ /* TSOv2 is a limited resource that can only be configured on a limited
+ * number of queues. TSO without checksum offload is not really a thing,
+ * so we only enable it for those queues.
+ */
+ if (csum_offload && (nic_data->datapath_caps2 &
+ (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) {
+ tso_v2 = true;
+ netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
+ channel->channel);
+ }
+
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
- MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS,
- INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
- INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
@@ -2124,10 +2219,30 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
- rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
- NULL, 0, NULL);
- if (rc)
- goto fail;
+ do {
+ MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS,
+ /* This flag was removed from mcdi_pcol.h for
+ * the non-_EXT version of INIT_TXQ. However,
+ * firmware still honours it.
+ */
+ INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
+ INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+ NULL, 0, NULL);
+ if (rc == -ENOSPC && tso_v2) {
+ /* Retry without TSOv2 if we're short on contexts. */
+ tso_v2 = false;
+ netif_warn(efx, probe, efx->net_dev,
+ "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n");
+ } else if (rc) {
+ efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
+ MC_CMD_INIT_TXQ_EXT_IN_LEN,
+ NULL, 0, rc);
+ goto fail;
+ }
+ } while (rc);
/* A previous user of this TX queue might have set us up the
* bomb by writing a descriptor to the TX push collector but
@@ -2146,8 +2261,11 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
tx_queue->write_count = 1;
- if (nic_data->datapath_caps &
- (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
+ if (tso_v2) {
+ tx_queue->handle_tso = efx_ef10_tx_tso_desc;
+ tx_queue->tso_version = 2;
+ } else if (nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
tx_queue->tso_version = 1;
}
@@ -2202,6 +2320,25 @@ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue);
}
+#define EFX_EF10_MAX_TX_DESCRIPTOR_LEN 0x3fff
+
+static unsigned int efx_ef10_tx_limit_len(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, unsigned int len)
+{
+ if (len > EFX_EF10_MAX_TX_DESCRIPTOR_LEN) {
+ /* If we need to break across multiple descriptors we should
+ * stop at a page boundary. This assumes the length limit is
+ * greater than the page size.
+ */
+ dma_addr_t end = dma_addr + EFX_EF10_MAX_TX_DESCRIPTOR_LEN;
+
+ BUILD_BUG_ON(EFX_EF10_MAX_TX_DESCRIPTOR_LEN < EFX_PAGE_SIZE);
+ len = (end & (~(EFX_PAGE_SIZE - 1))) - dma_addr;
+ }
+
+ return len;
+}
+
static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
{
unsigned int old_write_count = tx_queue->write_count;
@@ -2245,6 +2382,86 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
}
}
+#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
+ 1 << RSS_MODE_HASH_DST_ADDR_LBN)
+#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\
+ 1 << RSS_MODE_HASH_DST_PORT_LBN)
+#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
+ 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
+ 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
+ 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
+ (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
+ (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
+
+static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
+{
+ /* Firmware had a bug (sfc bug 61952) where it would not actually
+ * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
+ * This meant that it would always contain whatever was previously
+ * in the MCDI buffer. Fortunately, all firmware versions with
+ * this bug have the same default flags value for a newly-allocated
+ * RSS context, and the only time we want to get the flags is just
+ * after allocating. Moreover, the response has a 32-bit hole
+ * where the context ID would be in the request, so we can use an
+ * overlength buffer in the request and pre-fill the flags field
+ * with what we believe the default to be. Thus if the firmware
+ * has the bug, it will leave our pre-filled value in the flags
+ * field of the response, and we will get the right answer.
+ *
+ * However, this does mean that this function should NOT be used if
+ * the RSS context flags might not be their defaults - it is ONLY
+ * reliably correct for a newly-allocated RSS context.
+ */
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ /* Check we have a hole for the context ID */
+ BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
+ RSS_CONTEXT_FLAGS_DEFAULT);
+ rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
+ sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+ if (rc == 0) {
+ if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
+ rc = -EIO;
+ else
+ *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
+ }
+ return rc;
+}
+
+/* Attempt to enable 4-tuple UDP hashing on the specified RSS context.
+ * If we fail, we just leave the RSS context at its default hash settings,
+ * which is safe but may slightly reduce performance.
+ * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
+ * just need to set the UDP ports flags (for both IP versions).
+ */
+static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
+ u32 flags;
+
+ BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
+
+ if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+ return;
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+ flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
+ flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
+ if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
+ NULL, 0, NULL))
+ /* Succeeded, so UDP 4-tuple is now enabled */
+ efx->rx_hash_udp_4tuple = true;
+}
+
static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
bool exclusive, unsigned *context_size)
{
@@ -2290,6 +2507,10 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
if (context_size)
*context_size = rss_spread;
+ if (nic_data->datapath_caps &
+ 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
+ efx_ef10_set_rss_flags(efx, *context);
+
return 0;
}
@@ -5385,6 +5606,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.tx_init = efx_ef10_tx_init,
.tx_remove = efx_ef10_tx_remove,
.tx_write = efx_ef10_tx_write,
+ .tx_limit_len = efx_ef10_tx_limit_len,
.rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
.rx_probe = efx_ef10_rx_probe,
.rx_init = efx_ef10_rx_init,
@@ -5491,6 +5713,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.tx_init = efx_ef10_tx_init,
.tx_remove = efx_ef10_tx_remove,
.tx_write = efx_ef10_tx_write,
+ .tx_limit_len = efx_ef10_tx_limit_len,
.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
.rx_probe = efx_ef10_rx_probe,
.rx_init = efx_ef10_rx_init,
@@ -5550,6 +5773,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
#endif
.get_mac_address = efx_ef10_get_mac_address_pf,
.set_mac_address = efx_ef10_set_mac_address,
+ .tso_versions = efx_ef10_tso_versions,
.revision = EFX_REV_HUNT_A0,
.max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH),