diff options
Diffstat (limited to 'drivers/net/ethernet/meta/fbnic/fbnic_txrx.c')
| -rw-r--r-- | drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 1472 |
1 files changed, 1140 insertions, 332 deletions
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index b5050fabe8fe..13d508ce637f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -2,10 +2,14 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/iopoll.h> #include <linux/pci.h> #include <net/netdev_queues.h> #include <net/page_pool/helpers.h> +#include <net/tcp.h> +#include <net/xdp.h> #include "fbnic.h" #include "fbnic_csr.h" @@ -13,11 +17,19 @@ #include "fbnic_txrx.h" enum { + FBNIC_XDP_PASS = 0, + FBNIC_XDP_CONSUME, + FBNIC_XDP_TX, + FBNIC_XDP_LEN_ERR, +}; + +enum { FBNIC_XMIT_CB_TS = 0x01, }; struct fbnic_xmit_cb { u32 bytecount; + u16 gso_segs; u8 desc_count; u8 flags; int hw_head; @@ -25,6 +37,8 @@ struct fbnic_xmit_cb { #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb)) +#define FBNIC_XMIT_NOUNMAP ((void *)1) + static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring) { unsigned long csr_base = (unsigned long)ring->doorbell; @@ -113,6 +127,11 @@ static int fbnic_maybe_stop_tx(const struct net_device *dev, res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, FBNIC_TX_DESC_WAKEUP); + if (!res) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.stop++; + u64_stats_update_end(&ring->stats.syncp); + } return !res; } @@ -174,8 +193,72 @@ static bool fbnic_tx_tstamp(struct sk_buff *skb) } static bool +fbnic_tx_lso(struct fbnic_ring *ring, struct sk_buff *skb, + struct skb_shared_info *shinfo, __le64 *meta, + unsigned int *l2len, unsigned int *i3len) +{ + unsigned int l3_type, l4_type, l4len, hdrlen; + unsigned char *l4hdr; + __be16 payload_len; + + if (unlikely(skb_cow_head(skb, 0))) + return true; + + if (shinfo->gso_type & SKB_GSO_PARTIAL) { + l3_type = FBNIC_TWD_L3_TYPE_OTHER; + } else if (!skb->encapsulation) { + if (ip_hdr(skb)->version == 4) + l3_type = FBNIC_TWD_L3_TYPE_IPV4; + else + l3_type = FBNIC_TWD_L3_TYPE_IPV6; + } else { + unsigned int o3len; + + o3len = skb_inner_network_header(skb) - skb_network_header(skb); + *i3len -= o3len; + *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_OHLEN_MASK, + o3len / 2)); + l3_type = FBNIC_TWD_L3_TYPE_V6V6; + } + + l4hdr = skb_checksum_start(skb); + payload_len = cpu_to_be16(skb->len - (l4hdr - skb->data)); + + if (shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + struct tcphdr *tcph = (struct tcphdr *)l4hdr; + + l4_type = FBNIC_TWD_L4_TYPE_TCP; + l4len = __tcp_hdrlen((struct tcphdr *)l4hdr); + csum_replace_by_diff(&tcph->check, (__force __wsum)payload_len); + } else { + struct udphdr *udph = (struct udphdr *)l4hdr; + + l4_type = FBNIC_TWD_L4_TYPE_UDP; + l4len = sizeof(struct udphdr); + csum_replace_by_diff(&udph->check, (__force __wsum)payload_len); + } + + hdrlen = (l4hdr - skb->data) + l4len; + *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L3_TYPE_MASK, l3_type) | + FIELD_PREP(FBNIC_TWD_L4_TYPE_MASK, l4_type) | + FIELD_PREP(FBNIC_TWD_L4_HLEN_MASK, l4len / 4) | + FIELD_PREP(FBNIC_TWD_MSS_MASK, shinfo->gso_size) | + FBNIC_TWD_FLAG_REQ_LSO); + + FBNIC_XMIT_CB(skb)->bytecount += (shinfo->gso_segs - 1) * hdrlen; + FBNIC_XMIT_CB(skb)->gso_segs = shinfo->gso_segs; + + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.lso += shinfo->gso_segs; + u64_stats_update_end(&ring->stats.syncp); + + return false; +} + +static bool fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) { + struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int l2len, i3len; if (fbnic_tx_tstamp(skb)) @@ -190,7 +273,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)); - *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); + if (shinfo->gso_size) { + if (fbnic_tx_lso(ring, skb, shinfo, meta, &l2len, &i3len)) + return true; + } else { + *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.csum_partial++; + u64_stats_update_end(&ring->stats.syncp); + } *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); @@ -198,12 +289,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) } static void -fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) +fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq, + u64 *csum_cmpl, u64 *csum_none) { skb_checksum_none_assert(skb); - if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) + if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) { + (*csum_none)++; return; + } if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -212,6 +306,7 @@ fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = (__force __wsum)csum; + (*csum_cmpl)++; } } @@ -222,6 +317,7 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) unsigned int tail = ring->tail, first; unsigned int size, data_len; skb_frag_t *frag; + bool is_net_iov; dma_addr_t dma; __le64 *twd; @@ -237,6 +333,7 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) goto dma_error; + is_net_iov = false; dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); for (frag = &skb_shinfo(skb)->frags[0];; frag++) { @@ -249,6 +346,8 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_AL)); + if (is_net_iov) + ring->tx_buf[tail] = FBNIC_XMIT_NOUNMAP; tail++; tail &= ring->size_mask; @@ -262,6 +361,7 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) goto dma_error; + is_net_iov = skb_frag_is_net_iov(frag); dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); } @@ -297,6 +397,8 @@ dma_error: twd = &ring->desc[tail]; if (tail == first) fbnic_unmap_single_twd(dev, twd); + else if (ring->tx_buf[tail] == FBNIC_XMIT_NOUNMAP) + ring->tx_buf[tail] = NULL; else fbnic_unmap_page_twd(dev, twd); } @@ -329,7 +431,9 @@ fbnic_xmit_frame_ring(struct sk_buff *skb, struct fbnic_ring *ring) /* Write all members within DWORD to condense this into 2 4B writes */ FBNIC_XMIT_CB(skb)->bytecount = skb->len; + FBNIC_XMIT_CB(skb)->gso_segs = 1; FBNIC_XMIT_CB(skb)->desc_count = 0; + FBNIC_XMIT_CB(skb)->flags = 0; if (fbnic_tx_offloads(ring, skb, meta)) goto err_free; @@ -356,6 +460,59 @@ netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev) return fbnic_xmit_frame_ring(skb, fbn->tx[q_map]); } +static netdev_features_t +fbnic_features_check_encap_gso(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features, unsigned int l3len) +{ + netdev_features_t skb_gso_features; + struct ipv6hdr *ip6_hdr; + unsigned char l4_hdr; + unsigned int start; + __be16 frag_off; + + /* Require MANGLEID for GSO_PARTIAL of IPv4. + * In theory we could support TSO with single, innermost v4 header + * by pretending everything before it is L2, but that needs to be + * parsed case by case.. so leaving it for when the need arises. + */ + if (!(features & NETIF_F_TSO_MANGLEID)) + features &= ~NETIF_F_TSO; + + skb_gso_features = skb_shinfo(skb)->gso_type; + skb_gso_features <<= NETIF_F_GSO_SHIFT; + + /* We'd only clear the native GSO features, so don't bother validating + * if the match can only be on those supported thru GSO_PARTIAL. + */ + if (!(skb_gso_features & FBNIC_TUN_GSO_FEATURES)) + return features; + + /* We can only do IPv6-in-IPv6, not v4-in-v6. It'd be nice + * to fall back to partial for this, or any failure below. + * This is just an optimization, UDPv4 will be caught later on. + */ + if (skb_gso_features & NETIF_F_TSO) + return features & ~FBNIC_TUN_GSO_FEATURES; + + /* Inner headers multiple of 2 */ + if ((skb_inner_network_header(skb) - skb_network_header(skb)) % 2) + return features & ~FBNIC_TUN_GSO_FEATURES; + + /* Encapsulated GSO packet, make 100% sure it's IPv6-in-IPv6. */ + ip6_hdr = ipv6_hdr(skb); + if (ip6_hdr->version != 6) + return features & ~FBNIC_TUN_GSO_FEATURES; + + l4_hdr = ip6_hdr->nexthdr; + start = (unsigned char *)ip6_hdr - skb->data + sizeof(struct ipv6hdr); + start = ipv6_skip_exthdr(skb, start, &l4_hdr, &frag_off); + if (frag_off || l4_hdr != IPPROTO_IPV6 || + skb->data + start != skb_inner_network_header(skb)) + return features & ~FBNIC_TUN_GSO_FEATURES; + + return features; +} + netdev_features_t fbnic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) @@ -376,9 +533,12 @@ fbnic_features_check(struct sk_buff *skb, struct net_device *dev, !FIELD_FIT(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) || !FIELD_FIT(FBNIC_TWD_L3_IHLEN_MASK, l3len / 2) || !FIELD_FIT(FBNIC_TWD_CSUM_OFFSET_MASK, skb->csum_offset / 2)) - return features & ~NETIF_F_CSUM_MASK; + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - return features; + if (likely(!skb->encapsulation) || !skb_is_gso(skb)) + return features; + + return fbnic_features_check_encap_gso(skb, dev, features, l3len); } static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, @@ -423,13 +583,17 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, desc_cnt--; while (desc_cnt--) { - fbnic_unmap_page_twd(nv->dev, &ring->desc[head]); + if (ring->tx_buf[head] != FBNIC_XMIT_NOUNMAP) + fbnic_unmap_page_twd(nv->dev, + &ring->desc[head]); + else + ring->tx_buf[head] = NULL; head++; head &= ring->size_mask; } total_bytes += FBNIC_XMIT_CB(skb)->bytecount; - total_packets += 1; + total_packets += FBNIC_XMIT_CB(skb)->gso_segs; napi_consume_skb(skb, napi_budget); } @@ -444,7 +608,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, if (unlikely(discard)) { u64_stats_update_begin(&ring->stats.syncp); ring->stats.dropped += total_packets; - ring->stats.ts_lost += ts_lost; + ring->stats.twq.ts_lost += ts_lost; u64_stats_update_end(&ring->stats.syncp); netdev_tx_completed_queue(txq, total_packets, total_bytes); @@ -456,9 +620,62 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, ring->stats.packets += total_packets; u64_stats_update_end(&ring->stats.syncp); - netif_txq_completed_wake(txq, total_packets, total_bytes, - fbnic_desc_unused(ring), - FBNIC_TX_DESC_WAKEUP); + if (!netif_txq_completed_wake(txq, total_packets, total_bytes, + fbnic_desc_unused(ring), + FBNIC_TX_DESC_WAKEUP)) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.wake++; + u64_stats_update_end(&ring->stats.syncp); + } +} + +static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct, + struct fbnic_ring *ring, bool discard, + unsigned int hw_head) +{ + u64 total_bytes = 0, total_packets = 0; + unsigned int head = ring->head; + + while (hw_head != head) { + struct page *page; + u64 twd; + + if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL)))) + goto next_desc; + + twd = le64_to_cpu(ring->desc[head]); + page = ring->tx_buf[head]; + + /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives + * us one increment per packet, with no branches. + */ + total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) - + FBNIC_TWD_TYPE_AL; + total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd); + + page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, -1, + pp_allow_direct); +next_desc: + head++; + head &= ring->size_mask; + } + + if (!total_bytes) + return; + + ring->head = head; + + if (discard) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.dropped += total_packets; + u64_stats_update_end(&ring->stats.syncp); + return; + } + + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.bytes += total_bytes; + ring->stats.packets += total_packets; + u64_stats_update_end(&ring->stats.syncp); } static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, @@ -507,49 +724,70 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, skb_tstamp_tx(skb, &hwtstamp); u64_stats_update_begin(&ring->stats.syncp); - ring->stats.ts_packets++; + ring->stats.twq.ts_packets++; u64_stats_update_end(&ring->stats.syncp); } static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, - struct page *page) + netmem_ref netmem) { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - page_pool_fragment_page(page, PAGECNT_BIAS_MAX); - rx_buf->pagecnt_bias = PAGECNT_BIAS_MAX; - rx_buf->page = page; + page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX); + rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; + rx_buf->netmem = netmem; } -static struct page *fbnic_page_pool_get(struct fbnic_ring *ring, - unsigned int idx) +static struct page * +fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx) { - struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; + struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx]; rx_buf->pagecnt_bias--; - return rx_buf->page; + /* sub0 is always fed system pages, from the NAPI-level page_pool */ + return netmem_to_page(rx_buf->netmem); +} + +static netmem_ref +fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx) +{ + struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx]; + + rx_buf->pagecnt_bias--; + + return rx_buf->netmem; } static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, - struct fbnic_napi_vector *nv, int budget) + int budget) { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - struct page *page = rx_buf->page; + netmem_ref netmem = rx_buf->netmem; - if (!page_pool_unref_page(page, rx_buf->pagecnt_bias)) - page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget); + if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias)) + page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1, + !!budget); - rx_buf->page = NULL; + rx_buf->netmem = 0; } static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, - struct fbnic_q_triad *qt, s32 ts_head, s32 head0) + struct fbnic_q_triad *qt, s32 ts_head, s32 head0, + s32 head1) { if (head0 >= 0) fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); else if (ts_head >= 0) fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); + + if (head1 >= 0) { + qt->cmpl.deferred_head = -1; + if (napi_budget) + fbnic_clean_twq1(nv, true, &qt->sub1, false, head1); + else + qt->cmpl.deferred_head = head1; + } } static void @@ -557,6 +795,7 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int napi_budget) { struct fbnic_ring *cmpl = &qt->cmpl; + s32 head1 = cmpl->deferred_head; s32 head0 = -1, ts_head = -1; __le64 *raw_tcd, done; u32 head = cmpl->head; @@ -574,7 +813,10 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { case FBNIC_TCD_TYPE_0: - if (!(tcd & FBNIC_TCD_TWQ1)) + if (tcd & FBNIC_TCD_TWQ1) + head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK, + tcd); + else head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, tcd); /* Currently all err status bits are related to @@ -607,11 +849,11 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, } /* Unmap and free processed buffers */ - fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0); + fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1); } -static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, - struct fbnic_ring *ring, unsigned int hw_head) +static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head, + int napi_budget) { unsigned int head = ring->head; @@ -619,7 +861,7 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, return; do { - fbnic_page_pool_drain(ring, head, nv, napi_budget); + fbnic_page_pool_drain(ring, head, napi_budget); head++; head &= ring->size_mask; @@ -628,10 +870,10 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, ring->head = head; } -static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) +static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) { __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; - dma_addr_t dma = page_pool_get_dma_addr(page); + dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem); u64 bd, i = FBNIC_BD_FRAG_COUNT; bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | @@ -646,10 +888,11 @@ static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) *bdq_desc = cpu_to_le64(bd); bd += FIELD_PREP(FBNIC_BD_DESC_ADDR_MASK, 1) | FIELD_PREP(FBNIC_BD_DESC_ID_MASK, 1); + bdq_desc++; } while (--i); } -static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) +static void fbnic_fill_bdq(struct fbnic_ring *bdq) { unsigned int count = fbnic_desc_unused(bdq); unsigned int i = bdq->tail; @@ -658,14 +901,19 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) return; do { - struct page *page; + netmem_ref netmem; + + netmem = page_pool_dev_alloc_netmems(bdq->page_pool); + if (!netmem) { + u64_stats_update_begin(&bdq->stats.syncp); + bdq->stats.bdq.alloc_failed++; + u64_stats_update_end(&bdq->stats.syncp); - page = page_pool_dev_alloc_pages(nv->page_pool); - if (!page) break; + } - fbnic_page_pool_init(bdq, i, page); - fbnic_bd_prep(bdq, i, page); + fbnic_page_pool_init(bdq, i, netmem); + fbnic_bd_prep(bdq, i, netmem); i++; i &= bdq->size_mask; @@ -712,7 +960,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, { unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); - struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx); + struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx); unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; unsigned char *hdr_start; @@ -727,7 +975,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; frame_sz = hdr_pg_end - hdr_pg_start; - xdp_init_buff(&pkt->buff, frame_sz, NULL); + xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * FBNIC_BD_FRAG_SIZE; @@ -738,13 +986,12 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, /* Build frame around buffer */ hdr_start = page_address(page) + hdr_pg_start; - + net_prefetch(pkt->buff.data); xdp_prepare_buff(&pkt->buff, hdr_start, headroom, len - FBNIC_RX_PAD, true); - pkt->data_truesize = 0; - pkt->data_len = 0; - pkt->nr_frags = 0; + pkt->hwtstamp = 0; + pkt->add_frag_failed = false; } static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, @@ -754,9 +1001,9 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); - struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx); - struct skb_shared_info *shinfo; + netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx); unsigned int truesize; + bool added; truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); @@ -765,88 +1012,171 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, FBNIC_BD_FRAG_SIZE; /* Sync DMA buffer */ - dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), - pg_off, truesize, DMA_BIDIRECTIONAL); - - /* Add page to xdp shared info */ - shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - - /* We use gso_segs to store truesize */ - pkt->data_truesize += truesize; - - __skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len); - - /* Store data_len in gso_size */ - pkt->data_len += len; + page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, + pg_off, truesize); + + added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); + if (unlikely(!added)) { + pkt->add_frag_failed = true; + netdev_err_once(nv->napi.dev, + "Failed to add fragment to xdp_buff\n"); + } } -static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv, +static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt, struct fbnic_pkt_buff *pkt, int budget) { - struct skb_shared_info *shinfo; struct page *page; - int nr_frags; if (!pkt->buff.data_hard_start) return; - shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - nr_frags = pkt->nr_frags; + if (xdp_buff_has_frags(&pkt->buff)) { + struct skb_shared_info *shinfo; + netmem_ref netmem; + int nr_frags; - while (nr_frags--) { - page = skb_frag_page(&shinfo->frags[nr_frags]); - page_pool_put_full_page(nv->page_pool, page, !!budget); + shinfo = xdp_get_shared_info_from_buff(&pkt->buff); + nr_frags = shinfo->nr_frags; + + while (nr_frags--) { + netmem = skb_frag_netmem(&shinfo->frags[nr_frags]); + page_pool_put_full_netmem(qt->sub1.page_pool, netmem, + !!budget); + } } page = virt_to_page(pkt->buff.data_hard_start); - page_pool_put_full_page(nv->page_pool, page, !!budget); + page_pool_put_full_page(qt->sub0.page_pool, page, !!budget); } static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, struct fbnic_pkt_buff *pkt) { - unsigned int nr_frags = pkt->nr_frags; - struct skb_shared_info *shinfo; - unsigned int truesize; struct sk_buff *skb; - truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM - - pkt->buff.data_hard_start; - - /* Build frame around buffer */ - skb = napi_build_skb(pkt->buff.data_hard_start, truesize); - if (unlikely(!skb)) + skb = xdp_build_skb_from_buff(&pkt->buff); + if (!skb) return NULL; - /* Push data pointer to start of data, put tail to end of data */ - skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start); - __skb_put(skb, pkt->buff.data_end - pkt->buff.data); + /* Add timestamp if present */ + if (pkt->hwtstamp) + skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; - /* Add tracking for metadata at the start of the frame */ - skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta); + return skb; +} - /* Add Rx frags */ - if (nr_frags) { - /* Verify that shared info didn't move */ +static long fbnic_pkt_tx(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_ring *ring = &nv->qt[0].sub1; + int size, offset, nsegs = 1, data_len = 0; + unsigned int tail = ring->tail; + struct skb_shared_info *shinfo; + skb_frag_t *frag = NULL; + struct page *page; + dma_addr_t dma; + __le64 *twd; + + if (unlikely(xdp_buff_has_frags(&pkt->buff))) { shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - WARN_ON(skb_shinfo(skb) != shinfo); + nsegs += shinfo->nr_frags; + data_len = shinfo->xdp_frags_size; + frag = &shinfo->frags[0]; + } - skb->truesize += pkt->data_truesize; - skb->data_len += pkt->data_len; - shinfo->nr_frags = nr_frags; - skb->len += pkt->data_len; + if (fbnic_desc_unused(ring) < nsegs) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.dropped++; + u64_stats_update_end(&ring->stats.syncp); + return -FBNIC_XDP_CONSUME; } - skb_mark_for_recycle(skb); + page = virt_to_page(pkt->buff.data_hard_start); + offset = offset_in_page(pkt->buff.data); + dma = page_pool_get_dma_addr(page); - /* Set MAC header specific fields */ - skb->protocol = eth_type_trans(skb, nv->napi.dev); + size = pkt->buff.data_end - pkt->buff.data; - /* Add timestamp if present */ - if (pkt->hwtstamp) - skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; + while (nsegs--) { + dma_sync_single_range_for_device(nv->dev, dma, offset, size, + DMA_BIDIRECTIONAL); + dma += offset; - return skb; + ring->tx_buf[tail] = page; + + twd = &ring->desc[tail]; + *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | + FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | + FIELD_PREP(FBNIC_TWD_TYPE_MASK, + FBNIC_TWD_TYPE_AL)); + + tail++; + tail &= ring->size_mask; + + if (!data_len) + break; + + offset = skb_frag_off(frag); + page = skb_frag_page(frag); + dma = page_pool_get_dma_addr(page); + + size = skb_frag_size(frag); + data_len -= size; + frag++; + } + + *twd |= FBNIC_TWD_TYPE(LAST_AL); + + ring->tail = tail; + + return -FBNIC_XDP_TX; +} + +static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv, + unsigned int pkt_tail) +{ + struct fbnic_ring *ring = &nv->qt[0].sub1; + + /* Force DMA writes to flush before writing to tail */ + dma_wmb(); + + writel(pkt_tail, ring->doorbell); +} + +static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); + struct bpf_prog *xdp_prog; + int act; + + xdp_prog = READ_ONCE(fbn->xdp_prog); + if (!xdp_prog) + goto xdp_pass; + + /* Should never happen, config paths enforce HDS threshold > MTU */ + if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) + return ERR_PTR(-FBNIC_XDP_LEN_ERR); + + act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); + switch (act) { + case XDP_PASS: +xdp_pass: + return fbnic_build_skb(nv, pkt); + case XDP_TX: + return ERR_PTR(fbnic_pkt_tx(nv, pkt)); + default: + bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_DROP: + break; + } + + return ERR_PTR(-FBNIC_XDP_CONSUME); } static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) @@ -875,12 +1205,13 @@ static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, u64 rcd, struct sk_buff *skb, - struct fbnic_q_triad *qt) + struct fbnic_q_triad *qt, + u64 *csum_cmpl, u64 *csum_none) { struct net_device *netdev = nv->napi.dev; struct fbnic_ring *rcq = &qt->cmpl; - fbnic_rx_csum(rcd, skb, rcq); + fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none); if (netdev->features & NETIF_F_RXHASH) skb_set_hash(skb, @@ -898,10 +1229,11 @@ static bool fbnic_rcd_metadata_err(u64 rcd) static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int budget) { - unsigned int packets = 0, bytes = 0, dropped = 0; + unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; + u64 csum_complete = 0, csum_none = 0, length_errors = 0; + s32 head0 = -1, head1 = -1, pkt_tail = -1; struct fbnic_ring *rcq = &qt->cmpl; struct fbnic_pkt_buff *pkt; - s32 head0 = -1, head1 = -1; __le64 *raw_rcd, done; u32 head = rcq->head; @@ -912,6 +1244,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* Walk the completion queue collecting the heads reported by NIC */ while (likely(packets < budget)) { struct sk_buff *skb = ERR_PTR(-EINVAL); + u32 pkt_bytes; u64 rcd; if ((*raw_rcd & cpu_to_le64(FBNIC_RCD_DONE)) == done) @@ -942,22 +1275,38 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* We currently ignore the action table index */ break; case FBNIC_RCD_TYPE_META: - if (likely(!fbnic_rcd_metadata_err(rcd))) - skb = fbnic_build_skb(nv, pkt); + if (likely(!fbnic_rcd_metadata_err(rcd) && + !pkt->add_frag_failed)) { + pkt_bytes = xdp_get_buff_len(&pkt->buff); + skb = fbnic_run_xdp(nv, pkt); + } /* Populate skb and invalidate XDP */ if (!IS_ERR_OR_NULL(skb)) { - fbnic_populate_skb_fields(nv, rcd, skb, qt); - - packets++; - bytes += skb->len; - + fbnic_populate_skb_fields(nv, rcd, skb, qt, + &csum_complete, + &csum_none); napi_gro_receive(&nv->napi, skb); + } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) { + pkt_tail = nv->qt[0].sub1.tail; + } else if (PTR_ERR(skb) == -FBNIC_XDP_CONSUME) { + fbnic_put_pkt_buff(qt, pkt, 1); } else { - dropped++; - fbnic_put_pkt_buff(nv, pkt, 1); + if (!skb) + alloc_failed++; + + if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) + length_errors++; + else + dropped++; + + fbnic_put_pkt_buff(qt, pkt, 1); + goto next_dont_count; } + packets++; + bytes += pkt_bytes; +next_dont_count: pkt->buff.data_hard_start = NULL; break; @@ -974,19 +1323,24 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, u64_stats_update_begin(&rcq->stats.syncp); rcq->stats.packets += packets; rcq->stats.bytes += bytes; - /* Re-add ethernet header length (removed in fbnic_build_skb) */ - rcq->stats.bytes += ETH_HLEN * packets; rcq->stats.dropped += dropped; + rcq->stats.rx.alloc_failed += alloc_failed; + rcq->stats.rx.csum_complete += csum_complete; + rcq->stats.rx.csum_none += csum_none; + rcq->stats.rx.length_errors += length_errors; u64_stats_update_end(&rcq->stats.syncp); + if (pkt_tail >= 0) + fbnic_pkt_commit_tail(nv, pkt_tail); + /* Unmap and free processed buffers */ if (head0 >= 0) - fbnic_clean_bdq(nv, budget, &qt->sub0, head0); - fbnic_fill_bdq(nv, &qt->sub0); + fbnic_clean_bdq(&qt->sub0, head0, budget); + fbnic_fill_bdq(&qt->sub0); if (head1 >= 0) - fbnic_clean_bdq(nv, budget, &qt->sub1, head1); - fbnic_fill_bdq(nv, &qt->sub1); + fbnic_clean_bdq(&qt->sub1, head1, budget); + fbnic_fill_bdq(&qt->sub1); /* Record the current head/tail of the queue */ if (rcq->head != head) { @@ -1033,20 +1387,20 @@ static int fbnic_poll(struct napi_struct *napi, int budget) if (likely(napi_complete_done(napi, work_done))) fbnic_nv_irq_rearm(nv); - return 0; + return work_done; } -static irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) +irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) { - struct fbnic_napi_vector *nv = data; + struct fbnic_napi_vector *nv = *(void **)data; napi_schedule_irqoff(&nv->napi); return IRQ_HANDLED; } -static void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, - struct fbnic_ring *rxr) +void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, + struct fbnic_ring *rxr) { struct fbnic_queue_stats *stats = &rxr->stats; @@ -1054,10 +1408,27 @@ static void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.bytes += stats->bytes; fbn->rx_stats.packets += stats->packets; fbn->rx_stats.dropped += stats->dropped; + fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; + fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; + fbn->rx_stats.rx.csum_none += stats->rx.csum_none; + fbn->rx_stats.rx.length_errors += stats->rx.length_errors; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4); } -static void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, - struct fbnic_ring *txr) +void fbnic_aggregate_ring_bdq_counters(struct fbnic_net *fbn, + struct fbnic_ring *bdq) +{ + struct fbnic_queue_stats *stats = &bdq->stats; + + /* Capture stats from queues before dissasociating them */ + fbn->bdq_stats.bdq.alloc_failed += stats->bdq.alloc_failed; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->rx_stats.bdq) / 8 != 1); +} + +void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, + struct fbnic_ring *txr) { struct fbnic_queue_stats *stats = &txr->stats; @@ -1065,8 +1436,28 @@ static void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, fbn->tx_stats.bytes += stats->bytes; fbn->tx_stats.packets += stats->packets; fbn->tx_stats.dropped += stats->dropped; - fbn->tx_stats.ts_lost += stats->ts_lost; - fbn->tx_stats.ts_packets += stats->ts_packets; + fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial; + fbn->tx_stats.twq.lso += stats->twq.lso; + fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost; + fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets; + fbn->tx_stats.twq.stop += stats->twq.stop; + fbn->tx_stats.twq.wake += stats->twq.wake; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); +} + +void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn, + struct fbnic_ring *xdpr) +{ + struct fbnic_queue_stats *stats = &xdpr->stats; + + if (!(xdpr->flags & FBNIC_RING_F_STATS)) + return; + + /* Capture stats from queues before dissasociating them */ + fbn->tx_stats.dropped += stats->dropped; + fbn->tx_stats.bytes += stats->bytes; + fbn->tx_stats.packets += stats->packets; } static void fbnic_remove_tx_ring(struct fbnic_net *fbn, @@ -1082,6 +1473,19 @@ static void fbnic_remove_tx_ring(struct fbnic_net *fbn, fbn->tx[txr->q_idx] = NULL; } +static void fbnic_remove_xdp_ring(struct fbnic_net *fbn, + struct fbnic_ring *xdpr) +{ + if (!(xdpr->flags & FBNIC_RING_F_STATS)) + return; + + fbnic_aggregate_ring_xdp_counters(fbn, xdpr); + + /* Remove pointer to the Tx ring */ + WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr); + fbn->tx[xdpr->q_idx] = NULL; +} + static void fbnic_remove_rx_ring(struct fbnic_net *fbn, struct fbnic_ring *rxr) { @@ -1095,66 +1499,70 @@ static void fbnic_remove_rx_ring(struct fbnic_net *fbn, fbn->rx[rxr->q_idx] = NULL; } +static void fbnic_remove_bdq_ring(struct fbnic_net *fbn, + struct fbnic_ring *bdq) +{ + if (!(bdq->flags & FBNIC_RING_F_STATS)) + return; + + fbnic_aggregate_ring_bdq_counters(fbn, bdq); +} + +static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt) +{ + page_pool_destroy(qt->sub0.page_pool); + page_pool_destroy(qt->sub1.page_pool); +} + static void fbnic_free_napi_vector(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { struct fbnic_dev *fbd = nv->fbd; - u32 v_idx = nv->v_idx; int i, j; for (i = 0; i < nv->txt_count; i++) { fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); + fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1); fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); } for (j = 0; j < nv->rxt_count; j++, i++) { - fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0); - fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1); + fbnic_remove_bdq_ring(fbn, &nv->qt[i].sub0); + fbnic_remove_bdq_ring(fbn, &nv->qt[i].sub1); fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); } - fbnic_free_irq(fbd, v_idx, nv); - page_pool_destroy(nv->page_pool); - netif_napi_del(&nv->napi); - list_del(&nv->napis); + fbnic_napi_free_irq(fbd, nv); + netif_napi_del_locked(&nv->napi); + fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); } void fbnic_free_napi_vectors(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv, *temp; - - list_for_each_entry_safe(nv, temp, &fbn->napis, napis) - fbnic_free_napi_vector(fbn, nv); -} - -static void fbnic_name_napi_vector(struct fbnic_napi_vector *nv) -{ - unsigned char *dev_name = nv->napi.dev->name; + int i; - if (!nv->rxt_count) - snprintf(nv->name, sizeof(nv->name), "%s-Tx-%u", dev_name, - nv->v_idx - FBNIC_NON_NAPI_VECTORS); - else - snprintf(nv->name, sizeof(nv->name), "%s-TxRx-%u", dev_name, - nv->v_idx - FBNIC_NON_NAPI_VECTORS); + for (i = 0; i < fbn->num_napi; i++) + if (fbn->napi[i]) + fbnic_free_napi_vector(fbn, fbn->napi[i]); } -#define FBNIC_PAGE_POOL_FLAGS \ - (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) - -static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, - struct fbnic_napi_vector *nv) +static int +fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, + unsigned int rxq_idx) { struct page_pool_params pp_params = { .order = 0, - .flags = FBNIC_PAGE_POOL_FLAGS, - .pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count, + .flags = PP_FLAG_DMA_MAP | + PP_FLAG_DMA_SYNC_DEV, + .pool_size = fbn->hpq_size + fbn->ppq_size, .nid = NUMA_NO_NODE, - .dev = nv->dev, + .dev = fbn->netdev->dev.parent, .dma_dir = DMA_BIDIRECTIONAL, .offset = 0, - .max_len = PAGE_SIZE + .max_len = PAGE_SIZE, + .netdev = fbn->netdev, + .queue_idx = rxq_idx, }; struct page_pool *pp; @@ -1174,9 +1582,24 @@ static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, if (IS_ERR(pp)) return PTR_ERR(pp); - nv->page_pool = pp; + qt->sub0.page_pool = pp; + if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp_params.dma_dir = DMA_FROM_DEVICE; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) + goto err_destroy_sub0; + } else { + page_pool_get(pp); + } + qt->sub1.page_pool = pp; return 0; + +err_destroy_sub0: + page_pool_destroy(pp); + return PTR_ERR(pp); } static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, @@ -1186,6 +1609,7 @@ static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, ring->doorbell = doorbell; ring->q_idx = q_idx; ring->flags = flags; + ring->deferred_head = -1; } static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, @@ -1195,11 +1619,18 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, { int txt_count = txq_count, rxt_count = rxq_count; u32 __iomem *uc_addr = fbd->uc_addr0; + int xdp_count = 0, qt_count, err; struct fbnic_napi_vector *nv; struct fbnic_q_triad *qt; - int qt_count, err; u32 __iomem *db; + /* We need to reserve at least one Tx Queue Triad for an XDP ring */ + if (rxq_count) { + xdp_count = 1; + if (!txt_count) + txt_count = 1; + } + qt_count = txt_count + rxq_count; if (!qt_count) return -EINVAL; @@ -1222,42 +1653,34 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, nv->v_idx = v_idx; /* Tie napi to netdev */ - list_add(&nv->napis, &fbn->napis); - netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll); + fbn->napi[fbnic_napi_idx(nv)] = nv; + netif_napi_add_config_locked(fbn->netdev, &nv->napi, fbnic_poll, + fbnic_napi_idx(nv)); /* Record IRQ to NAPI struct */ - netif_napi_set_irq(&nv->napi, - pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx)); + netif_napi_set_irq_locked(&nv->napi, + pci_irq_vector(to_pci_dev(fbd->dev), + nv->v_idx)); /* Tie nv back to PCIe dev */ nv->dev = fbd->dev; - /* Allocate page pool */ - if (rxq_count) { - err = fbnic_alloc_nv_page_pool(fbn, nv); - if (err) - goto napi_del; - } - - /* Initialize vector name */ - fbnic_name_napi_vector(nv); - /* Request the IRQ for napi vector */ - err = fbnic_request_irq(fbd, v_idx, &fbnic_msix_clean_rings, - IRQF_SHARED, nv->name, nv); + err = fbnic_napi_request_irq(fbd, nv); if (err) - goto pp_destroy; + goto napi_del; /* Initialize queue triads */ qt = nv->qt; while (txt_count) { + u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; + /* Configure Tx queue */ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; /* Assign Tx queue to netdev if applicable */ if (txq_count > 0) { - u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; fbnic_ring_init(&qt->sub0, db, txq_idx, flags); fbn->tx[txq_idx] = &qt->sub0; @@ -1267,6 +1690,28 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, FBNIC_RING_F_DISABLED); } + /* Configure XDP queue */ + db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL]; + + /* Assign XDP queue to netdev if applicable + * + * The setup for this is in itself a bit different. + * 1. We only need one XDP Tx queue per NAPI vector. + * 2. We associate it to the first Rx queue index. + * 3. The hardware side is associated based on the Tx Queue. + * 4. The netdev queue is offset by FBNIC_MAX_TXQs. + */ + if (xdp_count > 0) { + unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx; + + fbnic_ring_init(&qt->sub1, db, xdp_idx, flags); + fbn->tx[xdp_idx] = &qt->sub1; + xdp_count--; + } else { + fbnic_ring_init(&qt->sub1, db, 0, + FBNIC_RING_F_DISABLED); + } + /* Configure Tx completion queue */ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; fbnic_ring_init(&qt->cmpl, db, 0, 0); @@ -1282,11 +1727,13 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, while (rxt_count) { /* Configure header queue */ db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_HPQ_TAIL]; - fbnic_ring_init(&qt->sub0, db, 0, FBNIC_RING_F_CTX); + fbnic_ring_init(&qt->sub0, db, 0, + FBNIC_RING_F_CTX | FBNIC_RING_F_STATS); /* Configure payload queue */ db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_BDQ_PPQ_TAIL]; - fbnic_ring_init(&qt->sub1, db, 0, FBNIC_RING_F_CTX); + fbnic_ring_init(&qt->sub1, db, 0, + FBNIC_RING_F_CTX | FBNIC_RING_F_STATS); /* Configure Rx completion queue */ db = &uc_addr[FBNIC_QUEUE(rxq_idx) + FBNIC_QUEUE_RCQ_HEAD]; @@ -1303,11 +1750,9 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, return 0; -pp_destroy: - page_pool_destroy(nv->page_pool); napi_del: - netif_napi_del(&nv->napi); - list_del(&nv->napis); + netif_napi_del_locked(&nv->napi); + fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); return err; } @@ -1363,7 +1808,7 @@ int fbnic_alloc_napi_vectors(struct fbnic_net *fbn) free_vectors: fbnic_free_napi_vectors(fbn); - return -ENOMEM; + return err; } static void fbnic_free_ring_resources(struct device *dev, @@ -1520,6 +1965,12 @@ static void fbnic_free_qt_resources(struct fbnic_net *fbn, fbnic_free_ring_resources(dev, &qt->cmpl); fbnic_free_ring_resources(dev, &qt->sub1); fbnic_free_ring_resources(dev, &qt->sub0); + + if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) { + xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); + xdp_rxq_info_unreg(&qt->xdp_rxq); + fbnic_free_qt_page_pools(qt); + } } static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, @@ -1532,6 +1983,10 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, if (err) return err; + err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1); + if (err) + goto free_sub0; + err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); if (err) goto free_sub1; @@ -1539,20 +1994,37 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, return 0; free_sub1: + fbnic_free_ring_resources(dev, &qt->sub1); +free_sub0: fbnic_free_ring_resources(dev, &qt->sub0); return err; } static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt) { struct device *dev = fbn->netdev->dev.parent; int err; - err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); + err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); if (err) return err; + err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx, + nv->napi.napi_id); + if (err) + goto free_page_pools; + + err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL, + qt->sub0.page_pool); + if (err) + goto unreg_rxq; + + err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); + if (err) + goto unreg_mm; + err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); if (err) goto free_sub0; @@ -1567,19 +2039,21 @@ free_sub1: fbnic_free_ring_resources(dev, &qt->sub1); free_sub0: fbnic_free_ring_resources(dev, &qt->sub0); +unreg_mm: + xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); +unreg_rxq: + xdp_rxq_info_unreg(&qt->xdp_rxq); +free_page_pools: + fbnic_free_qt_page_pools(qt); return err; } static void fbnic_free_nv_resources(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { - int i, j; - - /* Free Tx Resources */ - for (i = 0; i < nv->txt_count; i++) - fbnic_free_qt_resources(fbn, &nv->qt[i]); + int i; - for (j = 0; j < nv->rxt_count; j++, i++) + for (i = 0; i < nv->txt_count + nv->rxt_count; i++) fbnic_free_qt_resources(fbn, &nv->qt[i]); } @@ -1592,19 +2066,19 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) { err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } /* Allocate Rx Resources */ for (j = 0; j < nv->rxt_count; j++, i++) { - err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]); + err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } return 0; -free_resources: +free_qt_resources: while (i--) fbnic_free_qt_resources(fbn, &nv->qt[i]); return err; @@ -1612,19 +2086,18 @@ free_resources: void fbnic_free_resources(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; + int i; - list_for_each_entry(nv, &fbn->napis, napis) - fbnic_free_nv_resources(fbn, nv); + for (i = 0; i < fbn->num_napi; i++) + fbnic_free_nv_resources(fbn, fbn->napi[i]); } int fbnic_alloc_resources(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; - int err = -ENODEV; + int i, err = -ENODEV; - list_for_each_entry(nv, &fbn->napis, napis) { - err = fbnic_alloc_nv_resources(fbn, nv); + for (i = 0; i < fbn->num_napi; i++) { + err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]); if (err) goto free_resources; } @@ -1632,12 +2105,77 @@ int fbnic_alloc_resources(struct fbnic_net *fbn) return 0; free_resources: - list_for_each_entry_continue_reverse(nv, &fbn->napis, napis) - fbnic_free_nv_resources(fbn, nv); + while (i--) + fbnic_free_nv_resources(fbn, fbn->napi[i]); return err; } +static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv) +{ + int i, j; + + /* Associate Tx queue with NAPI */ + for (i = 0; i < nv->txt_count; i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, + NETDEV_QUEUE_TYPE_TX, &nv->napi); + } + + /* Associate Rx queue with NAPI */ + for (j = 0; j < nv->rxt_count; j++, i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, + NETDEV_QUEUE_TYPE_RX, &nv->napi); + } +} + +static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv) +{ + int i, j; + + /* Disassociate Tx queue from NAPI */ + for (i = 0; i < nv->txt_count; i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, + NETDEV_QUEUE_TYPE_TX, NULL); + } + + /* Disassociate Rx queue from NAPI */ + for (j = 0; j < nv->rxt_count; j++, i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, + NETDEV_QUEUE_TYPE_RX, NULL); + } +} + +int fbnic_set_netif_queues(struct fbnic_net *fbn) +{ + int i, err; + + err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues, + fbn->num_rx_queues); + if (err) + return err; + + for (i = 0; i < fbn->num_napi; i++) + fbnic_set_netif_napi(fbn->napi[i]); + + return 0; +} + +void fbnic_reset_netif_queues(struct fbnic_net *fbn) +{ + int i; + + for (i = 0; i < fbn->num_napi; i++) + fbnic_reset_netif_napi(fbn->napi[i]); +} + static void fbnic_disable_twq0(struct fbnic_ring *txr) { u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL); @@ -1647,6 +2185,15 @@ static void fbnic_disable_twq0(struct fbnic_ring *txr) fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); } +static void fbnic_disable_twq1(struct fbnic_ring *txr) +{ + u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL); + + twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; + + fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl); +} + static void fbnic_disable_tcq(struct fbnic_ring *txr) { fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); @@ -1670,38 +2217,51 @@ static void fbnic_disable_rcq(struct fbnic_ring *rxr) void fbnic_napi_disable(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; + int i; - list_for_each_entry(nv, &fbn->napis, napis) { - napi_disable(&nv->napi); + for (i = 0; i < fbn->num_napi; i++) { + napi_disable_locked(&fbn->napi[i]->napi); - fbnic_nv_irq_disable(nv); + fbnic_nv_irq_disable(fbn->napi[i]); } } -void fbnic_disable(struct fbnic_net *fbn) +static void __fbnic_nv_disable(struct fbnic_napi_vector *nv) { - struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; - int i, j; + int i, t; - list_for_each_entry(nv, &fbn->napis, napis) { - /* Disable Tx queue triads */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + /* Disable Tx queue triads */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_disable_twq0(&qt->sub0); - fbnic_disable_tcq(&qt->cmpl); - } + fbnic_disable_twq0(&qt->sub0); + fbnic_disable_twq1(&qt->sub1); + fbnic_disable_tcq(&qt->cmpl); + } - /* Disable Rx queue triads */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + /* Disable Rx queue triads */ + for (i = 0; i < nv->rxt_count; i++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_disable_bdq(&qt->sub0, &qt->sub1); - fbnic_disable_rcq(&qt->cmpl); - } + fbnic_disable_bdq(&qt->sub0, &qt->sub1); + fbnic_disable_rcq(&qt->cmpl); } +} + +static void +fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) +{ + __fbnic_nv_disable(nv); + fbnic_wrfl(fbn->fbd); +} + +void fbnic_disable(struct fbnic_net *fbn) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + for (i = 0; i < fbn->num_napi; i++) + __fbnic_nv_disable(fbn->napi[i]); fbnic_wrfl(fbd); } @@ -1790,94 +2350,117 @@ int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) return err; } -void fbnic_flush(struct fbnic_net *fbn) +static int +fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx) { - struct fbnic_napi_vector *nv; + static const unsigned int tx_regs[] = { + FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0), + FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0), + }, rx_regs[] = { + FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0), + FBNIC_QM_RCQ_IDLE(0), + }; + struct fbnic_dev *fbd = fbn->fbd; + unsigned int val, mask, off; + const unsigned int *regs; + unsigned int reg_cnt; + int i, err; - list_for_each_entry(nv, &fbn->napis, napis) { - int i, j; + regs = rx ? rx_regs : tx_regs; + reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs); - /* Flush any processed Tx Queue Triads and drop the rest */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; - struct netdev_queue *tx_queue; + off = idx / 32; + mask = BIT(idx % 32); - /* Clean the work queues of unprocessed work */ - fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); + for (i = 0; i < reg_cnt; i++) { + err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask, + 2, 500000, false, + fbd, regs[i] + off); + if (err) { + netdev_err(fbd->netdev, + "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n", + rx ? "Rx" : "Tx", idx, regs[i] + off, i, + val, mask); + return err; + } + } - /* Reset completion queue descriptor ring */ - memset(qt->cmpl.desc, 0, qt->cmpl.size); + return 0; +} - /* Nothing else to do if Tx queue is disabled */ - if (qt->sub0.flags & FBNIC_RING_F_DISABLED) - continue; +static void fbnic_nv_flush(struct fbnic_napi_vector *nv) +{ + int j, t; - /* Reset BQL associated with Tx queue */ - tx_queue = netdev_get_tx_queue(nv->napi.dev, - qt->sub0.q_idx); - netdev_tx_reset_queue(tx_queue); + /* Flush any processed Tx Queue Triads and drop the rest */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + struct netdev_queue *tx_queue; - /* Disassociate Tx queue from NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, - NETDEV_QUEUE_TYPE_TX, NULL); - } + /* Clean the work queues of unprocessed work */ + fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); + fbnic_clean_twq1(nv, false, &qt->sub1, true, + qt->sub1.tail); - /* Flush any processed Rx Queue Triads and drop the rest */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + /* Reset completion queue descriptor ring */ + memset(qt->cmpl.desc, 0, qt->cmpl.size); - /* Clean the work queues of unprocessed work */ - fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail); - fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail); + /* Nothing else to do if Tx queue is disabled */ + if (qt->sub0.flags & FBNIC_RING_F_DISABLED) + continue; + + /* Reset BQL associated with Tx queue */ + tx_queue = netdev_get_tx_queue(nv->napi.dev, + qt->sub0.q_idx); + netdev_tx_reset_queue(tx_queue); + } - /* Reset completion queue descriptor ring */ - memset(qt->cmpl.desc, 0, qt->cmpl.size); + /* Flush any processed Rx Queue Triads and drop the rest */ + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0); - qt->cmpl.pkt->buff.data_hard_start = NULL; + /* Clean the work queues of unprocessed work */ + fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0); + fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0); - /* Disassociate Rx queue from NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, - NETDEV_QUEUE_TYPE_RX, NULL); - } + /* Reset completion queue descriptor ring */ + memset(qt->cmpl.desc, 0, qt->cmpl.size); + + fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0); + memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); } } -void fbnic_fill(struct fbnic_net *fbn) +void fbnic_flush(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; - - list_for_each_entry(nv, &fbn->napis, napis) { - int i, j; + int i; - /* Configure NAPI mapping for Tx */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (i = 0; i < fbn->num_napi; i++) + fbnic_nv_flush(fbn->napi[i]); +} - /* Nothing to do if Tx queue is disabled */ - if (qt->sub0.flags & FBNIC_RING_F_DISABLED) - continue; +static void fbnic_nv_fill(struct fbnic_napi_vector *nv) +{ + int j, t; - /* Associate Tx queue with NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, - NETDEV_QUEUE_TYPE_TX, &nv->napi); - } + /* Configure NAPI mapping and populate pages + * in the BDQ rings to use for Rx + */ + for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - /* Configure NAPI mapping and populate pages - * in the BDQ rings to use for Rx - */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + /* Populate the header and payload BDQs */ + fbnic_fill_bdq(&qt->sub0); + fbnic_fill_bdq(&qt->sub1); + } +} - /* Associate Rx queue with NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, - NETDEV_QUEUE_TYPE_RX, &nv->napi); +void fbnic_fill(struct fbnic_net *fbn) +{ + int i; - /* Populate the header and payload BDQs */ - fbnic_fill_bdq(nv, &qt->sub0); - fbnic_fill_bdq(nv, &qt->sub1); - } - } + for (i = 0; i < fbn->num_napi; i++) + fbnic_nv_fill(fbn->napi[i]); } static void fbnic_enable_twq0(struct fbnic_ring *twq) @@ -1902,6 +2485,28 @@ static void fbnic_enable_twq0(struct fbnic_ring *twq) fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); } +static void fbnic_enable_twq1(struct fbnic_ring *twq) +{ + u32 log_size = fls(twq->size_mask); + + if (!twq->size_mask) + return; + + /* Reset head/tail */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); + twq->tail = 0; + twq->head = 0; + + /* Store descriptor ring address and size */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma)); + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma)); + + /* Write lower 4 bits of log size as 64K ring size is 0 */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf); + + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); +} + static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, struct fbnic_ring *tcq) { @@ -1970,11 +2575,15 @@ write_ctl: } static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, - struct fbnic_ring *rcq) + struct fbnic_ring *rcq, bool tx_pause) { + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); u32 drop_mode, rcq_ctl; - drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE; + if (!tx_pause && fbn->num_rx_queues > 1) + drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE; + else + drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_NEVER; /* Specify packet layout */ rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) | @@ -1984,17 +2593,83 @@ static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv, fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl); } +void fbnic_config_drop_mode(struct fbnic_net *fbn, bool tx_pause) +{ + int i, t; + + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; + + for (t = 0; t < nv->rxt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[nv->txt_count + t]; + + fbnic_config_drop_mode_rcq(nv, &qt->cmpl, tx_pause); + } + } +} + +static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc) +{ + u32 threshold; + + /* Set the threhsold to half the ring size if rx_frames + * is not configured + */ + threshold = rx_desc ? : rcq->size_mask / 2; + + fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv_idx); + fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, threshold); +} + +void fbnic_config_txrx_usecs(struct fbnic_napi_vector *nv, u32 arm) +{ + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); + struct fbnic_dev *fbd = nv->fbd; + u32 val = arm; + + val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT, fbn->rx_usecs) | + FBNIC_INTR_CQ_REARM_RCQ_TIMEOUT_UPD_EN; + val |= FIELD_PREP(FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT, fbn->tx_usecs) | + FBNIC_INTR_CQ_REARM_TCQ_TIMEOUT_UPD_EN; + + fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val); +} + +void fbnic_config_rx_frames(struct fbnic_napi_vector *nv) +{ + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); + int i; + + for (i = nv->txt_count; i < nv->rxt_count + nv->txt_count; i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + fbnic_config_rim_threshold(&qt->cmpl, nv->v_idx, + fbn->rx_max_frames * + FBNIC_MIN_RXD_PER_FRAME); + } +} + static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, struct fbnic_ring *rcq) { + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); u32 log_size = fls(rcq->size_mask); - u32 rcq_ctl; + u32 hds_thresh = fbn->hds_thresh; + u32 rcq_ctl = 0; + + fbnic_config_drop_mode_rcq(nv, rcq, fbn->tx_pause); - fbnic_config_drop_mode_rcq(nv, rcq); + /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should + * be split at L4. It would also result in the frames being split at + * L2/L3 depending on the frame size. + */ + if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { + rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; + hds_thresh = FBNIC_HDR_BYTES_MIN; + } - rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | - FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, - FBNIC_RX_MAX_HDR) | + rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | + FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, FBNIC_RX_PAYLD_OFFSET) | FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, @@ -2014,61 +2689,73 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_SIZE, log_size & 0xf); /* Store interrupt information for the completion queue */ - fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_CTL, nv->v_idx); - fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_THRESHOLD, rcq->size_mask / 2); + fbnic_config_rim_threshold(rcq, nv->v_idx, fbn->rx_max_frames * + FBNIC_MIN_RXD_PER_FRAME); fbnic_ring_wr32(rcq, FBNIC_QUEUE_RIM_MASK, 0); /* Enable queue */ fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); } -void fbnic_enable(struct fbnic_net *fbn) +static void __fbnic_nv_enable(struct fbnic_napi_vector *nv) { - struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; - int i, j; + int j, t; - list_for_each_entry(nv, &fbn->napis, napis) { - /* Setup Tx Queue Triads */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + /* Setup Tx Queue Triads */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_enable_twq0(&qt->sub0); - fbnic_enable_tcq(nv, &qt->cmpl); - } + fbnic_enable_twq0(&qt->sub0); + fbnic_enable_twq1(&qt->sub1); + fbnic_enable_tcq(nv, &qt->cmpl); + } - /* Setup Rx Queue Triads */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + /* Setup Rx Queue Triads */ + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_enable_bdq(&qt->sub0, &qt->sub1); - fbnic_config_drop_mode_rcq(nv, &qt->cmpl); - fbnic_enable_rcq(nv, &qt->cmpl); - } + page_pool_enable_direct_recycling(qt->sub0.page_pool, + &nv->napi); + page_pool_enable_direct_recycling(qt->sub1.page_pool, + &nv->napi); + + fbnic_enable_bdq(&qt->sub0, &qt->sub1); + fbnic_enable_rcq(nv, &qt->cmpl); } +} - fbnic_wrfl(fbd); +static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) +{ + __fbnic_nv_enable(nv); + fbnic_wrfl(fbn->fbd); } -static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv) +void fbnic_enable(struct fbnic_net *fbn) { - struct fbnic_dev *fbd = nv->fbd; - u32 val; + struct fbnic_dev *fbd = fbn->fbd; + int i; - val = FBNIC_INTR_CQ_REARM_INTR_UNMASK; + for (i = 0; i < fbn->num_napi; i++) + __fbnic_nv_enable(fbn->napi[i]); - fbnic_wr32(fbd, FBNIC_INTR_CQ_REARM(nv->v_idx), val); + fbnic_wrfl(fbd); +} + +static void fbnic_nv_irq_enable(struct fbnic_napi_vector *nv) +{ + fbnic_config_txrx_usecs(nv, FBNIC_INTR_CQ_REARM_INTR_UNMASK); } void fbnic_napi_enable(struct fbnic_net *fbn) { u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; int i; - list_for_each_entry(nv, &fbn->napis, napis) { - napi_enable(&nv->napi); + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; + + napi_enable_locked(&nv->napi); fbnic_nv_irq_enable(nv); @@ -2096,17 +2783,18 @@ void fbnic_napi_depletion_check(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; - int i, j; + int i, j, t; + + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; - list_for_each_entry(nv, &fbn->napis, napis) { /* Find RQs which are completely out of pages */ - for (i = nv->txt_count, j = 0; j < nv->rxt_count; j++, i++) { + for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) { /* Assume 4 pages is always enough to fit a packet * and therefore generate a completion and an IRQ. */ - if (fbnic_desc_used(&nv->qt[i].sub0) < 4 || - fbnic_desc_used(&nv->qt[i].sub1) < 4) + if (fbnic_desc_used(&nv->qt[t].sub0) < 4 || + fbnic_desc_used(&nv->qt[t].sub1) < 4) irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); } } @@ -2120,3 +2808,123 @@ void fbnic_napi_depletion_check(struct net_device *netdev) fbnic_wrfl(fbd); } + +static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_q_triad *real; + struct fbnic_q_triad *qt = qmem; + struct fbnic_napi_vector *nv; + + if (!netif_running(dev)) + return fbnic_alloc_qt_page_pools(fbn, qt, idx); + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, + real->sub0.flags); + fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, + real->sub1.flags); + fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, + real->cmpl.flags); + + return fbnic_alloc_rx_qt_resources(fbn, nv, qt); +} + +static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) +{ + struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_q_triad *qt = qmem; + + if (!netif_running(dev)) + fbnic_free_qt_page_pools(qt); + else + fbnic_free_qt_resources(fbn, qt); +} + +static void __fbnic_nv_restart(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + fbnic_nv_enable(fbn, nv); + fbnic_nv_fill(nv); + + napi_enable_locked(&nv->napi); + fbnic_nv_irq_enable(nv); + fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32)); + fbnic_wrfl(fbd); + + for (i = 0; i < nv->txt_count; i++) + netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx); +} + +static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_napi_vector *nv; + struct fbnic_q_triad *real; + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + fbnic_aggregate_ring_bdq_counters(fbn, &real->sub0); + fbnic_aggregate_ring_bdq_counters(fbn, &real->sub1); + fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl); + + memcpy(real, qmem, sizeof(*real)); + + __fbnic_nv_restart(fbn, nv); + + return 0; +} + +static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_q_triad *real; + struct fbnic_napi_vector *nv; + int i, t; + int err; + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + napi_disable_locked(&nv->napi); + fbnic_nv_irq_disable(nv); + + for (i = 0; i < nv->txt_count; i++) + netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx); + fbnic_nv_disable(fbn, nv); + + for (t = 0; t < nv->txt_count + nv->rxt_count; t++) { + err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count, + nv->qt[t].sub0.q_idx); + if (err) + goto err_restart; + } + + fbnic_synchronize_irq(fbn->fbd, nv->v_idx); + fbnic_nv_flush(nv); + + page_pool_disable_direct_recycling(real->sub0.page_pool); + page_pool_disable_direct_recycling(real->sub1.page_pool); + + memcpy(qmem, real, sizeof(*real)); + + return 0; + +err_restart: + __fbnic_nv_restart(fbn, nv); + return err; +} + +const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct fbnic_q_triad), + .ndo_queue_mem_alloc = fbnic_queue_mem_alloc, + .ndo_queue_mem_free = fbnic_queue_mem_free, + .ndo_queue_start = fbnic_queue_start, + .ndo_queue_stop = fbnic_queue_stop, +}; |
