diff options
Diffstat (limited to 'drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c')
| -rw-r--r-- | drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c | 1183 |
1 files changed, 1183 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c new file mode 100644 index 000000000000..e3ddf18dcbf5 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -0,0 +1,1183 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include <net/libeth/xdp.h> + +#include "idpf.h" + +/** + * idpf_tx_singleq_csum - Enable tx checksum offloads + * @skb: pointer to skb + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if checksum offload cannot be executed, 1 + * otherwise. + */ +static int idpf_tx_singleq_csum(struct sk_buff *skb, + struct idpf_tx_offload_params *off) +{ + u32 l4_len, l3_len, l2_len; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u32 offset, cmd = 0; + u8 l4_proto = 0; + __be16 frag_off; + bool is_tso; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + l2_len = ip.hdr - skb->data; + offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); + is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); + if (skb->encapsulation) { + u32 tunnel = 0; + + /* define outer network header type */ + if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { + /* The stack computes the IP header already, the only + * time we need the hardware to recompute it is in the + * case of TSO. + */ + tunnel |= is_tso ? + IDPF_TX_CTX_EXT_IP_IPV4 : + IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { + tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; + + l4_proto = ip.v6->nexthdr; + if (ipv6_ext_hdr(l4_proto)) + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(*ip.v6), + &l4_proto, &frag_off); + } + + /* define outer transport */ + switch (l4_proto) { + case IPPROTO_UDP: + tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; + break; + case IPPROTO_GRE: + tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + l4.hdr = skb_inner_network_header(skb); + break; + default: + if (is_tso) + return -1; + + skb_checksum_help(skb); + + return 0; + } + off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; + + /* compute outer L3 header size */ + tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, + (l4.hdr - ip.hdr) / 4); + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* compute tunnel header size */ + tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, + (ip.hdr - l4.hdr) / 2); + + /* indicate if we need to offload outer UDP header */ + if (is_tso && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; + + /* record tunnel offload values */ + off->cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + off->tx_flags |= IDPF_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + off->tx_flags |= IDPF_TX_FLAGS_IPV6; + } + + /* Enable IP checksum offloads */ + if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { + l4_proto = ip.v4->protocol; + /* See comment above regarding need for HW to recompute IP + * header checksum in the case of TSO. + */ + if (is_tso) + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; + else + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; + + } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; + l4_proto = ip.v6->nexthdr; + if (ipv6_ext_hdr(l4_proto)) + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(*ip.v6), &l4_proto, + &frag_off); + } else { + return -1; + } + + /* compute inner L3 header size */ + l3_len = l4.hdr - ip.hdr; + offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; + + /* Enable L4 checksum offloads */ + switch (l4_proto) { + case IPPROTO_TCP: + /* enable checksum offloads */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; + l4_len = l4.tcp->doff; + break; + case IPPROTO_UDP: + /* enable UDP checksum offload */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; + l4_len = sizeof(struct udphdr) >> 2; + break; + case IPPROTO_SCTP: + /* enable SCTP checksum offload */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; + l4_len = sizeof(struct sctphdr) >> 2; + break; + default: + if (is_tso) + return -1; + + skb_checksum_help(skb); + + return 0; + } + + offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; + off->td_cmd |= cmd; + off->hdr_offsets |= offset; + + return 1; +} + +/** + * idpf_tx_singleq_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, + struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + struct libeth_sq_napi_stats ss = { }; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + libeth_tx_complete(tx_buf, &cp); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = &txq->flex_tx[idx]; + memset(tx_desc, 0, sizeof(*tx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + +/** + * idpf_tx_singleq_map - Build the Tx base descriptor + * @tx_q: queue to send buffer on + * @first: first buffer info buffer to use + * @offloads: pointer to struct that holds offload parameters + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit base mode descriptor. + */ +static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, + struct idpf_tx_buf *first, + struct idpf_tx_offload_params *offloads) +{ + u32 offsets = offloads->hdr_offsets; + struct idpf_tx_buf *tx_buf = first; + struct idpf_base_tx_desc *tx_desc; + struct sk_buff *skb = first->skb; + u64 td_cmd = offloads->td_cmd; + unsigned int data_len, size; + u16 i = tx_q->next_to_use; + struct netdev_queue *nq; + skb_frag_t *frag; + dma_addr_t dma; + u64 td_tag = 0; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = &tx_q->base_tx[i]; + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + /* write each descriptor with CRC bit */ + if (idpf_queue_has(CRC_EN, tx_q)) + td_cmd |= IDPF_TX_DESC_CMD_ICRC; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_singleq_dma_map_error(tx_q, skb, + first, i); + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + tx_buf->type = LIBETH_SQE_FRAG; + + /* align size to end of page */ + max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); + tx_desc->buf_addr = cpu_to_le64(dma); + + /* account for data chunks larger than the hardware + * can handle + */ + while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, + offsets, + max_data, + td_tag); + if (unlikely(++i == tx_q->desc_count)) { + tx_buf = &tx_q->tx_buf[0]; + tx_desc = &tx_q->base_tx[0]; + i = 0; + } else { + tx_buf++; + tx_desc++; + } + + tx_buf->type = LIBETH_SQE_EMPTY; + + dma += max_data; + size -= max_data; + + max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + tx_desc->buf_addr = cpu_to_le64(dma); + } + + if (!data_len) + break; + + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, + size, td_tag); + + if (unlikely(++i == tx_q->desc_count)) { + tx_buf = &tx_q->tx_buf[0]; + tx_desc = &tx_q->base_tx[0]; + i = 0; + } else { + tx_buf++; + tx_desc++; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, + DMA_TO_DEVICE); + } + + skb_tx_timestamp(first->skb); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); + + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, + size, td_tag); + + first->type = LIBETH_SQE_SKB; + first->rs_idx = i; + + IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); + + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytes); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +} + +/** + * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring + * @txq: queue to put context descriptor on + * + * Since the TX buffer rings mimics the descriptor ring, update the tx buffer + * ring entry to reflect that this index is a context descriptor + */ +static struct idpf_base_tx_ctx_desc * +idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq) +{ + struct idpf_base_tx_ctx_desc *ctx_desc; + int ntu = txq->next_to_use; + + txq->tx_buf[ntu].type = LIBETH_SQE_CTX; + + ctx_desc = &txq->base_ctx[ntu]; + + IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); + txq->next_to_use = ntu; + + return ctx_desc; +} + +/** + * idpf_tx_singleq_build_ctx_desc - populate context descriptor + * @txq: queue to send buffer on + * @offload: offload parameter structure + **/ +static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, + struct idpf_tx_offload_params *offload) +{ + struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); + u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; + + if (offload->tso_segs) { + qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, + offload->tso_len); + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.lso_pkts); + u64_stats_update_end(&txq->stats_sync); + } + + desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); + + desc->qw0.l2tag2 = 0; + desc->qw0.rsvd1 = 0; + desc->qw1 = cpu_to_le64(qw1); +} + +/** + * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors + * @skb: send buffer + * @tx_q: queue to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_tx_queue *tx_q) +{ + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; + u32 count, buf_count = 1; + int csum, tso, needed; + __be16 protocol; + + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); + if (unlikely(!count)) + return idpf_tx_drop_skb(tx_q, skb); + + needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; + if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + IDPF_DESC_UNUSED(tx_q), + needed, needed)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + + return NETDEV_TX_BUSY; + } + + protocol = vlan_get_protocol(skb); + if (protocol == htons(ETH_P_IP)) + offload.tx_flags |= IDPF_TX_FLAGS_IPV4; + else if (protocol == htons(ETH_P_IPV6)) + offload.tx_flags |= IDPF_TX_FLAGS_IPV6; + + tso = idpf_tso(skb, &offload); + if (tso < 0) + goto out_drop; + + csum = idpf_tx_singleq_csum(skb, &offload); + if (csum < 0) + goto out_drop; + + if (tso || offload.cd_tunneling) + idpf_tx_singleq_build_ctx_desc(tx_q, &offload); + + /* record the location of the first descriptor for this packet */ + first = &tx_q->tx_buf[tx_q->next_to_use]; + first->skb = skb; + + if (tso) { + first->packets = offload.tso_segs; + first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len); + } else { + first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + first->packets = 1; + } + idpf_tx_singleq_map(tx_q, first, &offload); + + return NETDEV_TX_OK; + +out_drop: + return idpf_tx_drop_skb(tx_q, skb); +} + +/** + * idpf_tx_singleq_clean - Reclaim resources from queue + * @tx_q: Tx queue to clean + * @napi_budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + */ +static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget, + int *cleaned) +{ + struct libeth_sq_napi_stats ss = { }; + struct idpf_base_tx_desc *tx_desc; + u32 budget = tx_q->clean_budget; + s16 ntc = tx_q->next_to_clean; + struct libeth_cq_pp cp = { + .dev = tx_q->dev, + .ss = &ss, + .napi = napi_budget, + }; + struct idpf_netdev_priv *np; + struct idpf_tx_buf *tx_buf; + struct netdev_queue *nq; + bool dont_wake; + + tx_desc = &tx_q->base_tx[ntc]; + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + + do { + struct idpf_base_tx_desc *eop_desc; + + /* If this entry in the ring was used as a context descriptor, + * it's corresponding entry in the buffer ring will indicate as + * such. We can skip this descriptor since there is no buffer + * to clean. + */ + if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) { + tx_buf->type = LIBETH_SQE_EMPTY; + goto fetch_next_txq_desc; + } + + if (unlikely(tx_buf->type != LIBETH_SQE_SKB)) + break; + + /* prevent any other reads prior to type */ + smp_rmb(); + + eop_desc = &tx_q->base_tx[tx_buf->rs_idx]; + + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->qw1 & + cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) + break; + + /* update the statistics for this packet */ + libeth_tx_complete(tx_buf, &cp); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buf++; + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; + tx_desc = &tx_q->base_tx[0]; + } + + /* unmap any remaining paged data */ + libeth_tx_complete(tx_buf, &cp); + } + + /* update budget only if we did something */ + budget--; + +fetch_next_txq_desc: + tx_buf++; + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; + tx_desc = &tx_q->base_tx[0]; + } + } while (likely(budget)); + + ntc += tx_q->desc_count; + tx_q->next_to_clean = ntc; + + *cleaned += ss.packets; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_add(&tx_q->q_stats.packets, ss.packets); + u64_stats_add(&tx_q->q_stats.bytes, ss.bytes); + u64_stats_update_end(&tx_q->stats_sync); + + np = netdev_priv(tx_q->netdev); + nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); + + dont_wake = !test_bit(IDPF_VPORT_UP, np->state) || + !netif_carrier_ok(tx_q->netdev); + __netif_txq_completed_wake(nq, ss.packets, ss.bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); + + return !!budget; +} + +/** + * idpf_tx_singleq_clean_all - Clean all Tx queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_txq = q_vec->num_txq; + bool clean_complete = true; + int i, budget_per_q; + + budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; + for (i = 0; i < num_txq; i++) { + struct idpf_tx_queue *q; + + q = q_vec->tx[i]; + clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, + cleaned); + } + + return clean_complete; +} + +/** + * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor + * status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_ptype_len doesn't need to be shifted because it begins + * at offset zero. + */ +static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, + const u64 stat_err_bits) +{ + return !!(rx_desc->base_wb.qword1.status_error_ptype_len & + cpu_to_le64(stat_err_bits)); +} + +/** + * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers + * @rx_desc: Rx descriptor for current buffer + */ +static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc) +{ + /* if we are the last buffer then there is nothing else to do */ + if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) + return false; + + return true; +} + +/** + * idpf_rx_singleq_csum - Indicate in skb if checksum is good + * @rxq: Rx ring being processed + * @skb: skb currently being received and modified + * @csum_bits: checksum bits from descriptor + * @decoded: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, + struct sk_buff *skb, + struct libeth_rx_csum csum_bits, + struct libeth_rx_pt decoded) +{ + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ + if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded)) + return; + + /* check if HW has decoded the packet and checksum */ + if (unlikely(!csum_bits.l3l4p)) + return; + + ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; + ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; + + /* Check if there were any checksum errors */ + if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe))) + goto checksum_fail; + + /* Device could not do any checksum offload for certain extension + * headers as indicated by setting IPV6EXADD bit + */ + if (unlikely(ipv6 && csum_bits.ipv6exadd)) + return; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (unlikely(csum_bits.l4e)) + goto checksum_fail; + + if (unlikely(csum_bits.nat && csum_bits.eudpe)) + goto checksum_fail; + + /* Handle packets that were not able to be checksummed due to arrival + * speed, in this case the stack can compute the csum. + */ + if (unlikely(csum_bits.pprs)) + return; + + /* If there is an outer header present that might contain a checksum + * we need to bump the checksum level by 1 to reflect the fact that + * we are indicating we validated the inner checksum. + */ + if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) + skb->csum_level = 1; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + return; + +checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); +} + +/** + * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum + * @rx_desc: the receive descriptor + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + * + * Return: parsed checksum status. + **/ +static struct libeth_rx_csum +idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc) +{ + struct libeth_rx_csum csum_bits = { }; + u32 rx_error, rx_status; + u64 qword; + + qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); + rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); + + csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); + csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, + rx_error); + csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); + csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, + rx_error); + csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, + rx_status); + csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, + rx_status); + + return csum_bits; +} + +/** + * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum + * @rx_desc: the receive descriptor + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + * + * Return: parsed checksum status. + **/ +static struct libeth_rx_csum +idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc) +{ + struct libeth_rx_csum csum_bits = { }; + u16 rx_status0, rx_status1; + + rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); + + csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, + rx_status0); + csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, + rx_status0); + csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, + rx_status0); + csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, + rx_status0); + csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, + rx_status0); + csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, + rx_status0); + csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, + rx_status1); + + return csum_bits; +} + +/** + * idpf_rx_singleq_base_hash - set the hash value in the skb + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: specific descriptor + * @decoded: Decoded Rx packet type related fields + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, + const union virtchnl2_rx_desc *rx_desc, + struct libeth_rx_pt decoded) +{ + u64 mask, qw1; + + if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) + return; + + mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; + qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + if (FIELD_GET(mask, qw1) == mask) { + u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); + + libeth_rx_pt_set_hash(skb, hash, decoded); + } +} + +/** + * idpf_rx_singleq_flex_hash - set the hash value in the skb + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: specific descriptor + * @decoded: Decoded Rx packet type related fields + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, + const union virtchnl2_rx_desc *rx_desc, + struct libeth_rx_pt decoded) +{ + if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) + return; + + if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, + le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) { + u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash); + + libeth_rx_pt_set_hash(skb, hash, decoded); + } +} + +/** + * __idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx + * descriptor + * @rx_q: Rx ring being processed + * @skb: pointer to current skb being populated + * @rx_desc: descriptor for skb + * @ptype: packet type + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +static void +__idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, + const union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; + struct libeth_rx_csum csum_bits; + + /* Check if we're using base mode descriptor IDs */ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { + idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); + csum_bits = idpf_rx_singleq_base_csum(rx_desc); + } else { + idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded); + csum_bits = idpf_rx_singleq_flex_csum(rx_desc); + } + + idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); +} + +/** + * idpf_rx_buf_hw_update - Store the new tail and head values + * @rxq: queue to bump + * @val: new head index + */ +static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val) +{ + rxq->next_to_use = val; + + if (unlikely(!rxq->tail)) + return; + + /* writel has an implicit memory barrier */ + writel(val, rxq->tail); +} + +/** + * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers + * @rx_q: queue for which the hw buffers are allocated + * @cleaned_count: number of buffers to replace + * + * Returns false if all allocations were successful, true if any fail + */ +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q, + u16 cleaned_count) +{ + struct virtchnl2_singleq_rx_buf_desc *desc; + const struct libeth_fq_fp fq = { + .pp = rx_q->pp, + .fqes = rx_q->rx_buf, + .truesize = rx_q->truesize, + .count = rx_q->desc_count, + }; + u16 nta = rx_q->next_to_alloc; + + if (!cleaned_count) + return false; + + desc = &rx_q->single_buf[nta]; + + do { + dma_addr_t addr; + + addr = libeth_rx_alloc(&fq, nta); + if (addr == DMA_MAPPING_ERROR) + break; + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + desc->pkt_addr = cpu_to_le64(addr); + desc->hdr_addr = 0; + desc++; + + nta++; + if (unlikely(nta == rx_q->desc_count)) { + desc = &rx_q->single_buf[0]; + nta = 0; + } + + cleaned_count--; + } while (cleaned_count); + + if (rx_q->next_to_alloc != nta) { + idpf_rx_buf_hw_update(rx_q, nta); + rx_q->next_to_alloc = nta; + } + + return !!cleaned_count; +} + +/** + * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + * Decode the Rx descriptor and extract relevant information including the + * size and Rx packet type. + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + */ +static void +idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc, + struct libeth_rqe_info *fields) +{ + u64 qword; + + qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + fields->len = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); + fields->ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); +} + +/** + * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + * Decode the Rx descriptor and extract relevant information including the + * size and Rx packet type. + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + */ +static void +idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc, + struct libeth_rqe_info *fields) +{ + fields->len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, + le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); + fields->ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, + le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); +} + +/** + * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + */ +static void +idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, + const union virtchnl2_rx_desc *rx_desc, + struct libeth_rqe_info *fields) +{ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) + idpf_rx_singleq_extract_base_fields(rx_desc, fields); + else + idpf_rx_singleq_extract_flex_fields(rx_desc, fields); +} + +static bool +idpf_rx_singleq_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs) +{ + struct libeth_rqe_info fields; + struct idpf_rx_queue *rxq; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); + + idpf_rx_singleq_extract_fields(rxq, xdp->desc, &fields); + __idpf_rx_singleq_process_skb_fields(rxq, skb, xdp->desc, + fields.ptype); + + return true; +} + +static void idpf_xdp_run_pass(struct libeth_xdp_buff *xdp, + struct napi_struct *napi, + struct libeth_rq_napi_stats *rs, + const union virtchnl2_rx_desc *desc) +{ + libeth_xdp_run_pass(xdp, NULL, napi, rs, desc, NULL, + idpf_rx_singleq_process_skb_fields); +} + +/** + * idpf_rx_singleq_clean - Reclaim resources after receive completes + * @rx_q: rx queue to clean + * @budget: Total limit on number of packets to process + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) +{ + struct libeth_rq_napi_stats rs = { }; + u16 ntc = rx_q->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); + u16 cleaned_count = 0; + + libeth_xdp_init_buff(xdp, &rx_q->xdp, &rx_q->xdp_rxq); + + /* Process Rx packets bounded by budget */ + while (likely(rs.packets < budget)) { + struct libeth_rqe_info fields = { }; + union virtchnl2_rx_desc *rx_desc; + struct idpf_rx_buf *rx_buf; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ + rx_desc = &rx_q->rx[ntc]; + + /* status_error_ptype_len will always be zero for unused + * descriptors because it's cleared in cleanup, and overlaps + * with hdr_addr which is always zero because packet split + * isn't used, if the hardware wrote DD then the length will be + * non-zero + */ +#define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M + if (!idpf_rx_singleq_test_staterr(rx_desc, + IDPF_RXD_DD)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc + */ + dma_rmb(); + + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); + + rx_buf = &rx_q->rx_buf[ntc]; + libeth_xdp_process_buff(xdp, rx_buf, fields.len); + rx_buf->netmem = 0; + + IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); + cleaned_count++; + + /* skip if it is non EOP desc */ + if (idpf_rx_singleq_is_non_eop(rx_desc) || + unlikely(!xdp->data)) + continue; + +#define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ + VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) + if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, + IDPF_RXD_ERR_S))) { + libeth_xdp_return_buff_slow(xdp); + continue; + } + + idpf_xdp_run_pass(xdp, rx_q->pp->p.napi, &rs, rx_desc); + } + + rx_q->next_to_clean = ntc; + libeth_xdp_save_buff(&rx_q->xdp, xdp); + + page_pool_nid_changed(rx_q->pp, numa_mem_id()); + if (cleaned_count) + idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + + u64_stats_update_begin(&rx_q->stats_sync); + u64_stats_add(&rx_q->q_stats.packets, rs.packets); + u64_stats_add(&rx_q->q_stats.bytes, rs.bytes); + u64_stats_update_end(&rx_q->stats_sync); + + return rs.packets; +} + +/** + * idpf_rx_singleq_clean_all - Clean all Rx queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_rxq = q_vec->num_rxq; + bool clean_complete = true; + int budget_per_q, i; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { + struct idpf_rx_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); + + /* if we clean as many as budgeted, we must not be done */ + if (pkts_cleaned_per_q >= budget_per_q) + clean_complete = false; + *cleaned += pkts_cleaned_per_q; + } + + return clean_complete; +} + +/** + * idpf_vport_singleq_napi_poll - NAPI handler + * @napi: struct from which you get q_vector + * @budget: budget provided by stack + */ +int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) +{ + struct idpf_q_vector *q_vector = + container_of(napi, struct idpf_q_vector, napi); + bool clean_complete; + int work_done = 0; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) { + idpf_tx_singleq_clean_all(q_vector, budget, &work_done); + + return budget; + } + + clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, + &work_done); + clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, + &work_done); + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) { + idpf_vport_intr_set_wb_on_itr(q_vector); + return budget; + } + + work_done = min_t(int, work_done, budget - 1); + + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + idpf_vport_intr_update_itr_ena_irq(q_vector); + else + idpf_vport_intr_set_wb_on_itr(q_vector); + + return work_done; +} |
