summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google/gve/gve_rx_dqo.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_rx_dqo.c')
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c717
1 files changed, 409 insertions, 308 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 8e8071308aeb..7380c2b7a2d8 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -8,6 +8,7 @@
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_utils.h"
+#include <linux/bpf.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/skbuff.h>
@@ -15,221 +16,122 @@
#include <net/ip6_checksum.h>
#include <net/ipv6.h>
#include <net/tcp.h>
+#include <net/xdp_sock_drv.h>
-static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
-{
- return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
-}
-
-static void gve_free_page_dqo(struct gve_priv *priv,
- struct gve_rx_buf_state_dqo *bs,
- bool free_page)
-{
- page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
- if (free_page)
- gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
- DMA_FROM_DEVICE);
- bs->page_info.page = NULL;
-}
-
-static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
-{
- struct gve_rx_buf_state_dqo *buf_state;
- s16 buffer_id;
-
- buffer_id = rx->dqo.free_buf_states;
- if (unlikely(buffer_id == -1))
- return NULL;
-
- buf_state = &rx->dqo.buf_states[buffer_id];
-
- /* Remove buf_state from free list */
- rx->dqo.free_buf_states = buf_state->next;
-
- /* Point buf_state to itself to mark it as allocated */
- buf_state->next = buffer_id;
-
- return buf_state;
-}
-
-static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
-
- return buf_state->next == buffer_id;
-}
-
-static void gve_free_buf_state(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
+static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
+ struct device *hdev = &priv->pdev->dev;
+ int buf_count = rx->dqo.bufq.mask + 1;
- buf_state->next = rx->dqo.free_buf_states;
- rx->dqo.free_buf_states = buffer_id;
+ if (rx->dqo.hdr_bufs.data) {
+ dma_free_coherent(hdev, priv->header_buf_size * buf_count,
+ rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
+ rx->dqo.hdr_bufs.data = NULL;
+ }
}
-static struct gve_rx_buf_state_dqo *
-gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
+static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
+ const u32 buffer_queue_slots,
+ const u32 completion_queue_slots)
{
- struct gve_rx_buf_state_dqo *buf_state;
- s16 buffer_id;
-
- buffer_id = list->head;
- if (unlikely(buffer_id == -1))
- return NULL;
-
- buf_state = &rx->dqo.buf_states[buffer_id];
-
- /* Remove buf_state from list */
- list->head = buf_state->next;
- if (buf_state->next == -1)
- list->tail = -1;
-
- /* Point buf_state to itself to mark it as allocated */
- buf_state->next = buffer_id;
-
- return buf_state;
-}
+ int i;
-static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
- struct gve_index_list *list,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
+ /* Set buffer queue state */
+ rx->dqo.bufq.mask = buffer_queue_slots - 1;
+ rx->dqo.bufq.head = 0;
+ rx->dqo.bufq.tail = 0;
- buf_state->next = -1;
+ /* Set completion queue state */
+ rx->dqo.complq.num_free_slots = completion_queue_slots;
+ rx->dqo.complq.mask = completion_queue_slots - 1;
+ rx->dqo.complq.cur_gen_bit = 0;
+ rx->dqo.complq.head = 0;
- if (list->head == -1) {
- list->head = buffer_id;
- list->tail = buffer_id;
- } else {
- int tail = list->tail;
+ /* Set RX SKB context */
+ rx->ctx.skb_head = NULL;
+ rx->ctx.skb_tail = NULL;
- rx->dqo.buf_states[tail].next = buffer_id;
- list->tail = buffer_id;
+ /* Set up linked list of buffer IDs */
+ if (rx->dqo.buf_states) {
+ for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
+ rx->dqo.buf_states[i].next = i + 1;
+ rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
}
+
+ rx->dqo.free_buf_states = 0;
+ rx->dqo.recycled_buf_states.head = -1;
+ rx->dqo.recycled_buf_states.tail = -1;
+ rx->dqo.used_buf_states.head = -1;
+ rx->dqo.used_buf_states.tail = -1;
}
-static struct gve_rx_buf_state_dqo *
-gve_get_recycled_buf_state(struct gve_rx_ring *rx)
+static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
{
- struct gve_rx_buf_state_dqo *buf_state;
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ size_t size;
int i;
- /* Recycled buf states are immediately usable. */
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
- if (likely(buf_state))
- return buf_state;
-
- if (unlikely(rx->dqo.used_buf_states.head == -1))
- return NULL;
+ const u32 buffer_queue_slots = priv->rx_desc_cnt;
+ const u32 completion_queue_slots = priv->rx_desc_cnt;
- /* Used buf states are only usable when ref count reaches 0, which means
- * no SKBs refer to them.
- *
- * Search a limited number before giving up.
- */
- for (i = 0; i < 5; i++) {
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0) {
- rx->dqo.used_buf_states_cnt--;
- return buf_state;
- }
-
- gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ /* Reset buffer queue */
+ if (rx->dqo.bufq.desc_ring) {
+ size = sizeof(rx->dqo.bufq.desc_ring[0]) *
+ buffer_queue_slots;
+ memset(rx->dqo.bufq.desc_ring, 0, size);
}
- /* For QPL, we cannot allocate any new buffers and must
- * wait for the existing ones to be available.
- */
- if (rx->dqo.qpl)
- return NULL;
-
- /* If there are no free buf states discard an entry from
- * `used_buf_states` so it can be used.
- */
- if (unlikely(rx->dqo.free_buf_states == -1)) {
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0)
- return buf_state;
-
- gve_free_page_dqo(rx->gve, buf_state, true);
- gve_free_buf_state(rx, buf_state);
+ /* Reset completion queue */
+ if (rx->dqo.complq.desc_ring) {
+ size = sizeof(rx->dqo.complq.desc_ring[0]) *
+ completion_queue_slots;
+ memset(rx->dqo.complq.desc_ring, 0, size);
}
- return NULL;
-}
+ /* Reset q_resources */
+ if (rx->q_resources)
+ memset(rx->q_resources, 0, sizeof(*rx->q_resources));
-static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- struct gve_priv *priv = rx->gve;
- u32 idx;
+ /* Reset buf states */
+ if (rx->dqo.buf_states) {
+ for (i = 0; i < rx->dqo.num_buf_states; i++) {
+ struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
- if (!rx->dqo.qpl) {
- int err;
-
- err = gve_alloc_page(priv, &priv->pdev->dev,
- &buf_state->page_info.page,
- &buf_state->addr,
- DMA_FROM_DEVICE, GFP_ATOMIC);
- if (err)
- return err;
- } else {
- idx = rx->dqo.next_qpl_page_idx;
- if (idx >= priv->rx_pages_per_qpl) {
- net_err_ratelimited("%s: Out of QPL pages\n",
- priv->dev->name);
- return -ENOMEM;
+ if (rx->dqo.page_pool)
+ gve_free_to_page_pool(rx, bs, false);
+ else
+ gve_free_qpl_page_dqo(bs);
}
- buf_state->page_info.page = rx->dqo.qpl->pages[idx];
- buf_state->addr = rx->dqo.qpl->page_buses[idx];
- rx->dqo.next_qpl_page_idx++;
}
- buf_state->page_info.page_offset = 0;
- buf_state->page_info.page_address =
- page_address(buf_state->page_info.page);
- buf_state->last_single_ref_offset = 0;
-
- /* The page already has 1 ref. */
- page_ref_add(buf_state->page_info.page, INT_MAX - 1);
- buf_state->page_info.pagecnt_bias = INT_MAX;
- return 0;
-}
-
-static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
-{
- struct device *hdev = &priv->pdev->dev;
- int buf_count = rx->dqo.bufq.mask + 1;
-
- if (rx->dqo.hdr_bufs.data) {
- dma_free_coherent(hdev, priv->header_buf_size * buf_count,
- rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
- rx->dqo.hdr_bufs.data = NULL;
- }
+ gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
+ completion_queue_slots);
}
void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
{
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+ struct gve_rx_ring *rx = &priv->rx[idx];
if (!gve_rx_was_added_to_block(priv, idx))
return;
+ if (rx->dqo.page_pool)
+ page_pool_disable_direct_recycling(rx->dqo.page_pool);
gve_remove_napi(priv, ntfy_idx);
gve_rx_remove_from_block(priv, idx);
+ gve_rx_reset_ring_dqo(priv, idx);
}
-static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
- struct gve_rx_alloc_rings_cfg *cfg)
+void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
struct device *hdev = &priv->pdev->dev;
size_t completion_queue_slots;
size_t buffer_queue_slots;
int idx = rx->q_num;
size_t size;
+ u32 qpl_id;
int i;
completion_queue_slots = rx->dqo.complq.mask + 1;
@@ -243,12 +145,20 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
for (i = 0; i < rx->dqo.num_buf_states; i++) {
struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
- /* Only free page for RDA. QPL pages are freed in gve_main. */
- if (bs->page_info.page)
- gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
+
+ if (rx->dqo.page_pool)
+ gve_free_to_page_pool(rx, bs, false);
+ else
+ gve_free_qpl_page_dqo(bs);
+ if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) {
+ xsk_buff_free(bs->xsk_buff);
+ bs->xsk_buff = NULL;
+ }
}
+
if (rx->dqo.qpl) {
- gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id);
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
rx->dqo.qpl = NULL;
}
@@ -270,15 +180,20 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
kvfree(rx->dqo.buf_states);
rx->dqo.buf_states = NULL;
+ if (rx->dqo.page_pool) {
+ page_pool_destroy(rx->dqo.page_pool);
+ rx->dqo.page_pool = NULL;
+ }
+
gve_rx_free_hdr_bufs(priv, rx);
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
+static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
+ const u32 buf_count)
{
struct device *hdev = &priv->pdev->dev;
- int buf_count = rx->dqo.bufq.mask + 1;
rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
&rx->dqo.hdr_bufs.addr, GFP_KERNEL);
@@ -296,17 +211,18 @@ void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
}
-static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
- struct gve_rx_alloc_rings_cfg *cfg,
- struct gve_rx_ring *rx,
- int idx)
+int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
struct device *hdev = &priv->pdev->dev;
+ struct page_pool *pool;
+ int qpl_page_cnt;
size_t size;
- int i;
+ u32 qpl_id;
- const u32 buffer_queue_slots = cfg->raw_addressing ?
- priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
+ const u32 buffer_queue_slots = cfg->ring_size;
const u32 completion_queue_slots = cfg->ring_size;
netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
@@ -314,36 +230,29 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
memset(rx, 0, sizeof(*rx));
rx->gve = priv;
rx->q_num = idx;
- rx->dqo.bufq.mask = buffer_queue_slots - 1;
- rx->dqo.complq.num_free_slots = completion_queue_slots;
- rx->dqo.complq.mask = completion_queue_slots - 1;
- rx->ctx.skb_head = NULL;
- rx->ctx.skb_tail = NULL;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
- rx->dqo.num_buf_states = cfg->raw_addressing ?
- min_t(s16, S16_MAX, buffer_queue_slots * 4) :
- priv->rx_pages_per_qpl;
- rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
- sizeof(rx->dqo.buf_states[0]),
- GFP_KERNEL);
+ if (cfg->xdp) {
+ rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO;
+ rx->rx_headroom = XDP_PACKET_HEADROOM;
+ } else {
+ rx->packet_buffer_truesize = rx->packet_buffer_size;
+ rx->rx_headroom = 0;
+ }
+
+ rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
+ gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
+ rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states,
+ sizeof(rx->dqo.buf_states[0]),
+ GFP_KERNEL, priv->numa_node);
if (!rx->dqo.buf_states)
return -ENOMEM;
/* Allocate header buffers for header-split */
if (cfg->enable_header_split)
- if (gve_rx_alloc_hdr_bufs(priv, rx))
+ if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
goto err;
- /* Set up linked list of buffer IDs */
- for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
- rx->dqo.buf_states[i].next = i + 1;
-
- rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
- rx->dqo.recycled_buf_states.head = -1;
- rx->dqo.recycled_buf_states.tail = -1;
- rx->dqo.used_buf_states.head = -1;
- rx->dqo.used_buf_states.tail = -1;
-
/* Allocate RX completion queue */
size = sizeof(rx->dqo.complq.desc_ring[0]) *
completion_queue_slots;
@@ -359,8 +268,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
if (!rx->dqo.bufq.desc_ring)
goto err;
- if (!cfg->raw_addressing) {
- rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
+ if (cfg->raw_addressing) {
+ pool = gve_rx_create_page_pool(priv, rx, cfg->xdp);
+ if (IS_ERR(pool))
+ goto err;
+
+ rx->dqo.page_pool = pool;
+ } else {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
+
+ rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!rx->dqo.qpl)
goto err;
rx->dqo.next_qpl_page_idx = 0;
@@ -371,6 +290,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
if (!rx->q_resources)
goto err;
+ gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
+ completion_queue_slots);
+
return 0;
err:
@@ -393,18 +315,12 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
int err;
int i;
- if (!cfg->raw_addressing && !cfg->qpls) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc QPL ring before allocing QPLs\n");
- return -EINVAL;
- }
-
- rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
GFP_KERNEL);
if (!rx)
return -ENOMEM;
- for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -433,7 +349,7 @@ void gve_rx_free_rings_dqo(struct gve_priv *priv,
if (!rx)
return;
- for (i = 0; i < cfg->qcfg->num_queues; i++)
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
gve_rx_free_ring_dqo(priv, &rx[i], cfg);
kvfree(rx);
@@ -455,26 +371,14 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
while (num_posted < num_avail_slots) {
struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
- struct gve_rx_buf_state_dqo *buf_state;
-
- buf_state = gve_get_recycled_buf_state(rx);
- if (unlikely(!buf_state)) {
- buf_state = gve_alloc_buf_state(rx);
- if (unlikely(!buf_state))
- break;
-
- if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
- u64_stats_update_begin(&rx->statss);
- rx->rx_buf_alloc_fail++;
- u64_stats_update_end(&rx->statss);
- gve_free_buf_state(rx, buf_state);
- break;
- }
+
+ if (unlikely(gve_alloc_buffer(rx, desc))) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
+ break;
}
- desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
- desc->buf_addr = cpu_to_le64(buf_state->addr +
- buf_state->page_info.page_offset);
if (rx->dqo.hdr_bufs.data)
desc->header_buf_addr =
cpu_to_le64(rx->dqo.hdr_bufs.addr +
@@ -491,48 +395,6 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
rx->fill_cnt += num_posted;
}
-static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- const u16 data_buffer_size = priv->data_buffer_size_dqo;
- int pagecount;
-
- /* Can't reuse if we only fit one buffer per page */
- if (data_buffer_size * 2 > PAGE_SIZE)
- goto mark_used;
-
- pagecount = gve_buf_ref_cnt(buf_state);
-
- /* Record the offset when we have a single remaining reference.
- *
- * When this happens, we know all of the other offsets of the page are
- * usable.
- */
- if (pagecount == 1) {
- buf_state->last_single_ref_offset =
- buf_state->page_info.page_offset;
- }
-
- /* Use the next buffer sized chunk in the page. */
- buf_state->page_info.page_offset += data_buffer_size;
- buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
-
- /* If we wrap around to the same offset without ever dropping to 1
- * reference, then we don't know if this offset was ever freed.
- */
- if (buf_state->page_info.page_offset ==
- buf_state->last_single_ref_offset) {
- goto mark_used;
- }
-
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
- return;
-
-mark_used:
- gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
- rx->dqo.used_buf_states_cnt++;
-}
-
static void gve_rx_skb_csum(struct sk_buff *skb,
const struct gve_rx_compl_desc_dqo *desc,
struct gve_ptype ptype)
@@ -581,11 +443,36 @@ static void gve_rx_skb_hash(struct sk_buff *skb,
skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
}
-static void gve_rx_free_skb(struct gve_rx_ring *rx)
+/* Expand the hardware timestamp to the full 64 bits of width, and add it to the
+ * skb.
+ *
+ * This algorithm works by using the passed hardware timestamp to generate a
+ * diff relative to the last read of the nic clock. This diff can be positive or
+ * negative, as it is possible that we have read the clock more recently than
+ * the hardware has received this packet. To detect this, we use the high bit of
+ * the diff, and assume that the read is more recent if the high bit is set. In
+ * this case we invert the process.
+ *
+ * Note that this means if the time delta between packet reception and the last
+ * clock read is greater than ~2 seconds, this will provide invalid results.
+ */
+static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts)
+{
+ u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter);
+ struct sk_buff *skb = rx->ctx.skb_head;
+ u32 low = (u32)last_read;
+ s32 diff = hwts - low;
+
+ skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff);
+}
+
+static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
{
if (!rx->ctx.skb_head)
return;
+ if (rx->ctx.skb_head == napi->skb)
+ napi->skb = NULL;
dev_kfree_skb_any(rx->ctx.skb_head);
rx->ctx.skb_head = NULL;
rx->ctx.skb_tail = NULL;
@@ -606,7 +493,7 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state,
u16 buf_len)
{
- struct page *page = alloc_page(GFP_ATOMIC);
+ struct page *page = alloc_pages_node(rx->gve->numa_node, GFP_ATOMIC, 0);
int num_frags;
if (!page)
@@ -628,6 +515,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
return 0;
}
+static void gve_skb_add_rx_frag(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ int num_frags, u16 buf_len)
+{
+ if (rx->dqo.page_pool) {
+ skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.netmem,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ } else {
+ skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.page,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ }
+}
+
/* Chains multi skbs for single rx packet.
* Returns 0 if buffer is appended, -1 otherwise.
*/
@@ -645,6 +551,9 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (!skb)
return -1;
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(skb);
+
if (rx->ctx.skb_tail == rx->ctx.skb_head)
skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
else
@@ -655,23 +564,157 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (rx->ctx.skb_tail != rx->ctx.skb_head) {
rx->ctx.skb_head->len += buf_len;
rx->ctx.skb_head->data_len += buf_len;
- rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
+ rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
}
/* Trigger ondemand page allocation if we are running low on buffers */
if (gve_rx_should_trigger_copy_ondemand(rx))
return gve_rx_copy_ondemand(rx, buf_state, buf_len);
- skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
- buf_state->page_info.page,
- buf_state->page_info.page_offset,
- buf_len, priv->data_buffer_size_dqo);
- gve_dec_pagecnt_bias(&buf_state->page_info);
+ gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len);
+ gve_reuse_buffer(rx, buf_state);
+ return 0;
+}
- /* Advances buffer page-offset if page is partially used.
- * Marks buffer as used if page is full.
- */
- gve_try_recycle_buf(priv, rx, buf_state);
+static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp)
+{
+ struct gve_tx_ring *tx;
+ struct xdp_frame *xdpf;
+ u32 tx_qid;
+ int err;
+
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf)) {
+ if (rx->xsk_pool)
+ xsk_buff_free(xdp);
+ return -ENOSPC;
+ }
+
+ tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
+ tx = &priv->tx[tx_qid];
+ spin_lock(&tx->dqo_tx.xdp_lock);
+ err = gve_xdp_xmit_one_dqo(priv, tx, xdpf);
+ spin_unlock(&tx->dqo_tx.xdp_lock);
+
+ return err;
+}
+
+static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act)
+{
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ xsk_buff_free(xdp);
+ break;
+ case XDP_TX:
+ if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp)))
+ goto err;
+ break;
+ case XDP_REDIRECT:
+ if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog)))
+ goto err;
+ break;
+ }
+
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+ return;
+
+err:
+ u64_stats_update_begin(&rx->statss);
+ if (xdp_act == XDP_TX)
+ rx->xdp_tx_errors++;
+ if (xdp_act == XDP_REDIRECT)
+ rx->xdp_redirect_errors++;
+ u64_stats_update_end(&rx->statss);
+}
+
+static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ int err;
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ gve_free_buffer(rx, buf_state);
+ break;
+ case XDP_TX:
+ err = gve_xdp_tx_dqo(priv, rx, xdp);
+ if (unlikely(err))
+ goto err;
+ gve_reuse_buffer(rx, buf_state);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(priv->dev, xdp, xprog);
+ if (unlikely(err))
+ goto err;
+ gve_reuse_buffer(rx, buf_state);
+ break;
+ }
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+ return;
+err:
+ u64_stats_update_begin(&rx->statss);
+ if (xdp_act == XDP_TX)
+ rx->xdp_tx_errors++;
+ else if (xdp_act == XDP_REDIRECT)
+ rx->xdp_redirect_errors++;
+ u64_stats_update_end(&rx->statss);
+ gve_free_buffer(rx, buf_state);
+ return;
+}
+
+static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state, int buf_len,
+ struct bpf_prog *xprog)
+{
+ struct xdp_buff *xdp = buf_state->xsk_buff;
+ struct gve_priv *priv = rx->gve;
+ int xdp_act;
+
+ xdp->data_end = xdp->data + buf_len;
+ xsk_buff_dma_sync_for_cpu(xdp);
+
+ if (xprog) {
+ xdp_act = bpf_prog_run_xdp(xprog, xdp);
+ buf_len = xdp->data_end - xdp->data;
+ if (xdp_act != XDP_PASS) {
+ gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act);
+ gve_free_buf_state(rx, buf_state);
+ return 0;
+ }
+ }
+
+ /* Copy the data to skb */
+ rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
+ xdp->data, buf_len);
+ if (unlikely(!rx->ctx.skb_head)) {
+ xsk_buff_free(xdp);
+ gve_free_buf_state(rx, buf_state);
+ return -ENOMEM;
+ }
+ rx->ctx.skb_tail = rx->ctx.skb_head;
+
+ /* Free XSK buffer and Buffer state */
+ xsk_buff_free(xdp);
+ gve_free_buf_state(rx, buf_state);
+
+ /* Update Stats */
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
return 0;
}
@@ -689,6 +732,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
const bool hsplit = compl_desc->split_header;
struct gve_rx_buf_state_dqo *buf_state;
struct gve_priv *priv = rx->gve;
+ struct bpf_prog *xprog;
u16 buf_len;
u16 hdr_len;
@@ -705,18 +749,26 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
}
if (unlikely(compl_desc->rx_error)) {
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
- buf_state);
+ gve_free_buffer(rx, buf_state);
return -EINVAL;
}
buf_len = compl_desc->packet_len;
hdr_len = compl_desc->header_len;
+ xprog = READ_ONCE(priv->xdp_prog);
+ if (buf_state->xsk_buff)
+ return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog);
+
/* Page might have not been used for awhile and was likely last written
* by a different thread.
*/
- prefetch(buf_state->page_info.page);
+ if (rx->dqo.page_pool) {
+ if (!netmem_is_net_iov(buf_state->page_info.netmem))
+ prefetch(netmem_to_page(buf_state->page_info.netmem));
+ } else {
+ prefetch(buf_state->page_info.page);
+ }
/* Copy the header into the skb in the case of header split */
if (hsplit) {
@@ -730,6 +782,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
if (unlikely(!rx->ctx.skb_head))
goto error;
rx->ctx.skb_tail = rx->ctx.skb_head;
+
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(rx->ctx.skb_head);
} else {
unsplit = 1;
}
@@ -742,7 +797,8 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
/* Sync the portion of dma buffer for CPU to read. */
dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
- buf_state->page_info.page_offset,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad,
buf_len, DMA_FROM_DEVICE);
/* Append to current skb if one exists. */
@@ -754,6 +810,33 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
+ if (xprog) {
+ struct xdp_buff xdp;
+ void *old_data;
+ int xdp_act;
+
+ xdp_init_buff(&xdp, buf_state->page_info.buf_size,
+ &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp,
+ buf_state->page_info.page_address +
+ buf_state->page_info.page_offset,
+ buf_state->page_info.pad,
+ buf_len, false);
+ old_data = xdp.data;
+ xdp_act = bpf_prog_run_xdp(xprog, &xdp);
+ buf_state->page_info.pad += xdp.data - old_data;
+ buf_len = xdp.data_end - xdp.data;
+ if (xdp_act != XDP_PASS) {
+ gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act,
+ buf_state);
+ return 0;
+ }
+
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ }
+
if (eop && buf_len <= priv->rx_copybreak) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
&buf_state->page_info, buf_len);
@@ -766,8 +849,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
rx->rx_copybreak_pkt++;
u64_stats_update_end(&rx->statss);
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
- buf_state);
+ gve_free_buffer(rx, buf_state);
return 0;
}
@@ -782,16 +864,15 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
- skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
- buf_state->page_info.page_offset, buf_len,
- priv->data_buffer_size_dqo);
- gve_dec_pagecnt_bias(&buf_state->page_info);
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(rx->ctx.skb_head);
- gve_try_recycle_buf(priv, rx, buf_state);
+ gve_skb_add_rx_frag(rx, buf_state, 0, buf_len);
+ gve_reuse_buffer(rx, buf_state);
return 0;
error:
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+ gve_free_buffer(rx, buf_state);
return -ENOMEM;
}
@@ -837,6 +918,9 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
if (feat & NETIF_F_RXCSUM)
gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
+ if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)
+ gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts));
+
/* RSC packets must set gso_size otherwise the TCP stack will complain
* that packets are larger than MTU.
*/
@@ -856,16 +940,27 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
{
- struct napi_struct *napi = &block->napi;
- netdev_features_t feat = napi->dev->features;
-
- struct gve_rx_ring *rx = block->rx;
- struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
-
+ struct gve_rx_compl_queue_dqo *complq;
+ struct napi_struct *napi;
+ netdev_features_t feat;
+ struct gve_rx_ring *rx;
+ struct gve_priv *priv;
+ u64 xdp_redirects;
u32 work_done = 0;
u64 bytes = 0;
+ u64 xdp_txs;
int err;
+ napi = &block->napi;
+ feat = napi->dev->features;
+
+ rx = block->rx;
+ priv = rx->gve;
+ complq = &rx->dqo.complq;
+
+ xdp_redirects = rx->xdp_actions[XDP_REDIRECT];
+ xdp_txs = rx->xdp_actions[XDP_TX];
+
while (work_done < budget) {
struct gve_rx_compl_desc_dqo *compl_desc =
&complq->desc_ring[complq->head];
@@ -884,7 +979,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
if (err < 0) {
- gve_rx_free_skb(rx);
+ gve_rx_free_skb(napi, rx);
u64_stats_update_begin(&rx->statss);
if (err == -ENOMEM)
rx->rx_skb_alloc_fail++;
@@ -927,7 +1022,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
/* gve_rx_complete_skb() will consume skb if successful */
if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
- gve_rx_free_skb(rx);
+ gve_rx_free_skb(napi, rx);
u64_stats_update_begin(&rx->statss);
rx->rx_desc_err_dropped_pkt++;
u64_stats_update_end(&rx->statss);
@@ -939,6 +1034,12 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
rx->ctx.skb_tail = NULL;
}
+ if (xdp_txs != rx->xdp_actions[XDP_TX])
+ gve_xdp_tx_flush_dqo(priv, rx->q_num);
+
+ if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT])
+ xdp_do_flush();
+
gve_rx_post_buffers_dqo(rx);
u64_stats_update_begin(&rx->statss);