diff options
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r-- | drivers/net/ethernet/google/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/Makefile | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 83 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.c | 101 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.h | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c | 25 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_desc_dqo.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ethtool.c | 34 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 429 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ptp.c | 139 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 201 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 384 |
15 files changed, 1209 insertions, 246 deletions
diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig index 564862a57124..14c9431e15e5 100644 --- a/drivers/net/ethernet/google/Kconfig +++ b/drivers/net/ethernet/google/Kconfig @@ -18,6 +18,7 @@ if NET_VENDOR_GOOGLE config GVE tristate "Google Virtual NIC (gVNIC) support" depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN)) + depends on PTP_1588_CLOCK_OPTIONAL select PAGE_POOL help This driver supports Google Virtual NIC (gVNIC)" diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile index 4520f1c07a63..e0ec227a50f7 100644 --- a/drivers/net/ethernet/google/gve/Makefile +++ b/drivers/net/ethernet/google/gve/Makefile @@ -1,5 +1,7 @@ # Makefile for the Google virtual Ethernet (gve) driver obj-$(CONFIG_GVE) += gve.o -gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ +gve-y := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ gve_buffer_mgmt_dqo.o + +gve-$(CONFIG_PTP_1588_CLOCK) += gve_ptp.o diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 2fab38c8ee78..bceaf9b05cb4 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -11,7 +11,9 @@ #include <linux/dmapool.h> #include <linux/ethtool_netlink.h> #include <linux/netdevice.h> +#include <linux/net_tstamp.h> #include <linux/pci.h> +#include <linux/ptp_clock_kernel.h> #include <linux/u64_stats_sync.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> @@ -188,6 +190,9 @@ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ struct gve_rx_slot_page_info page_info; + /* XSK buffer */ + struct xdp_buff *xsk_buff; + /* The DMA address corresponding to `page_info`. */ dma_addr_t addr; @@ -329,7 +334,6 @@ struct gve_rx_ring { /* XDP stuff */ struct xdp_rxq_info xdp_rxq; - struct xdp_rxq_info xsk_rxq; struct xsk_buff_pool *xsk_pool; struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; @@ -398,10 +402,24 @@ enum gve_packet_state { GVE_PACKET_STATE_PENDING_REINJECT_COMPL, /* No valid completion received within the specified timeout. */ GVE_PACKET_STATE_TIMED_OUT_COMPL, + /* XSK pending packet has received a packet/reinjection completion, or + * has timed out. At this point, the pending packet can be counted by + * xsk_tx_complete and freed. + */ + GVE_PACKET_STATE_XSK_COMPLETE, +}; + +enum gve_tx_pending_packet_dqo_type { + GVE_TX_PENDING_PACKET_DQO_SKB, + GVE_TX_PENDING_PACKET_DQO_XDP_FRAME, + GVE_TX_PENDING_PACKET_DQO_XSK, }; struct gve_tx_pending_packet_dqo { - struct sk_buff *skb; /* skb for this packet */ + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; /* 0th element corresponds to the linear portion of `skb`, should be * unmapped with `dma_unmap_single`. @@ -431,7 +449,10 @@ struct gve_tx_pending_packet_dqo { /* Identifies the current state of the packet as defined in * `enum gve_packet_state`. */ - u8 state; + u8 state : 3; + + /* gve_tx_pending_packet_dqo_type */ + u8 type : 2; /* If packet is an outstanding miss completion, then the packet is * freed if the corresponding re-injection completion is not received @@ -453,6 +474,9 @@ struct gve_tx_ring { /* DQO fields. */ struct { + /* Spinlock for XDP tx traffic */ + spinlock_t xdp_lock; + /* Linked list of gve_tx_pending_packet_dqo. Index into * pending_packets, or -1 if empty. * @@ -497,6 +521,8 @@ struct gve_tx_ring { /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */ u32 free_tx_qpl_buf_cnt; }; + + atomic_t xsk_reorder_queue_tail; } dqo_tx; }; @@ -530,6 +556,9 @@ struct gve_tx_ring { /* Last TX ring index fetched by HW */ atomic_t hw_tx_head; + u16 xsk_reorder_queue_head; + u16 xsk_reorder_queue_tail; + /* List to track pending packets which received a miss * completion but not a corresponding reinjection. */ @@ -583,6 +612,8 @@ struct gve_tx_ring { struct gve_tx_pending_packet_dqo *pending_packets; s16 num_pending_packets; + u16 *xsk_reorder_queue; + u32 complq_mask; /* complq size is complq_mask + 1 */ /* QPL fields */ @@ -750,6 +781,12 @@ struct gve_rss_config { u32 *hash_lut; }; +struct gve_ptp { + struct ptp_clock_info info; + struct ptp_clock *clock; + struct gve_priv *priv; +}; + struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ @@ -781,7 +818,9 @@ struct gve_priv { struct gve_tx_queue_config tx_cfg; struct gve_rx_queue_config rx_cfg; - u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ + unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */ + u32 num_ntfy_blks; /* split between TX and RX so must be even */ + int numa_node; struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ __be32 __iomem *db_bar2; /* "array" of doorbells */ @@ -813,6 +852,7 @@ struct gve_priv { u32 adminq_set_driver_parameter_cnt; u32 adminq_report_stats_cnt; u32 adminq_report_link_speed_cnt; + u32 adminq_report_nic_timestamp_cnt; u32 adminq_get_ptype_map_cnt; u32 adminq_verify_driver_compatibility_cnt; u32 adminq_query_flow_rules_cnt; @@ -870,6 +910,14 @@ struct gve_priv { u16 rss_lut_size; bool cache_rss_config; struct gve_rss_config rss_config; + + /* True if the device supports reading the nic clock */ + bool nic_timestamp_supported; + struct gve_ptp *ptp; + struct kernel_hwtstamp_config ts_config; + struct gve_nic_ts_report *nic_ts_report; + dma_addr_t nic_ts_report_bus; + u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */ }; enum gve_service_task_flags_bit { @@ -1138,6 +1186,7 @@ static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) { switch (priv->queue_format) { case GVE_GQI_QPL_FORMAT: + case GVE_DQO_RDA_FORMAT: return true; default: return false; @@ -1161,11 +1210,15 @@ void gve_free_queue_page_list(struct gve_priv *priv, u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); -int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, - u32 flags); +int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, void *data, int len, void *frame_p); void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); +int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct xdp_frame *xdpf); bool gve_tx_poll(struct gve_notify_block *block, int budget); bool gve_xdp_poll(struct gve_notify_block *block, int budget); int gve_xsk_tx_poll(struct gve_notify_block *block, int budget); @@ -1249,6 +1302,24 @@ int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); int gve_flow_rules_reset(struct gve_priv *priv); /* RSS config */ int gve_init_rss_config(struct gve_priv *priv, u16 num_queues); +/* PTP and timestamping */ +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +int gve_clock_nic_ts_read(struct gve_priv *priv); +int gve_init_clock(struct gve_priv *priv); +void gve_teardown_clock(struct gve_priv *priv); +#else /* CONFIG_PTP_1588_CLOCK */ +static inline int gve_clock_nic_ts_read(struct gve_priv *priv) +{ + return -EOPNOTSUPP; +} + +static inline int gve_init_clock(struct gve_priv *priv) +{ + return 0; +} + +static inline void gve_teardown_clock(struct gve_priv *priv) { } +#endif /* CONFIG_PTP_1588_CLOCK */ /* report stats handling */ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 3e8fc33cc11f..4f33d094a2ef 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -46,6 +46,7 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, struct gve_device_option_flow_steering **dev_op_flow_steering, struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_nic_timestamp **dev_op_nic_timestamp, struct gve_device_option_modify_ring **dev_op_modify_ring) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); @@ -225,6 +226,23 @@ void gve_parse_device_option(struct gve_priv *priv, "RSS config"); *dev_op_rss_config = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_NIC_TIMESTAMP: + if (option_length < sizeof(**dev_op_nic_timestamp) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Nic Timestamp", + (int)sizeof(**dev_op_nic_timestamp), + GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_nic_timestamp)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Nic Timestamp"); + *dev_op_nic_timestamp = (void *)(option + 1); + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -246,6 +264,7 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, struct gve_device_option_flow_steering **dev_op_flow_steering, struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_nic_timestamp **dev_op_nic_timestamp, struct gve_device_option_modify_ring **dev_op_modify_ring) { const int num_options = be16_to_cpu(descriptor->num_device_options); @@ -269,6 +288,7 @@ gve_process_device_options(struct gve_priv *priv, dev_op_dqo_rda, dev_op_jumbo_frames, dev_op_dqo_qpl, dev_op_buffer_sizes, dev_op_flow_steering, dev_op_rss_config, + dev_op_nic_timestamp, dev_op_modify_ring); dev_opt = next_opt; } @@ -306,6 +326,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) priv->adminq_set_driver_parameter_cnt = 0; priv->adminq_report_stats_cnt = 0; priv->adminq_report_link_speed_cnt = 0; + priv->adminq_report_nic_timestamp_cnt = 0; priv->adminq_get_ptype_map_cnt = 0; priv->adminq_query_flow_rules_cnt = 0; priv->adminq_cfg_flow_rule_cnt = 0; @@ -442,6 +463,8 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv) int tail, head; int i; + lockdep_assert_held(&priv->adminq_lock); + tail = ioread32be(&priv->reg_bar0->adminq_event_counter); head = priv->adminq_prod_cnt; @@ -467,9 +490,6 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv) return 0; } -/* This function is not threadsafe - the caller is responsible for any - * necessary locks. - */ static int gve_adminq_issue_cmd(struct gve_priv *priv, union gve_adminq_command *cmd_orig) { @@ -477,6 +497,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, u32 opcode; u32 tail; + lockdep_assert_held(&priv->adminq_lock); + tail = ioread32be(&priv->reg_bar0->adminq_event_counter); // Check if next command will overflow the buffer. @@ -544,6 +566,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, case GVE_ADMINQ_REPORT_LINK_SPEED: priv->adminq_report_link_speed_cnt++; break; + case GVE_ADMINQ_REPORT_NIC_TIMESTAMP: + priv->adminq_report_nic_timestamp_cnt++; + break; case GVE_ADMINQ_GET_PTYPE_MAP: priv->adminq_get_ptype_map_cnt++; break; @@ -564,6 +589,7 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, break; default: dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode); + return -EINVAL; } return 0; @@ -625,7 +651,7 @@ static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode, /* The device specifies that the management vector can either be the first irq * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to - * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then + * the ntfy blks. If it is 0 then the management vector is last, if it is 1 then * the management vector is first. * * gve arranges the msix vectors so that the management vector is last. @@ -709,13 +735,19 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_create_tx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, @@ -788,13 +820,19 @@ int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues) int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = 0; i < num_queues; i++) { err = gve_adminq_create_rx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index) @@ -820,13 +858,19 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_destroy_tx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd, @@ -861,13 +905,19 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = 0; i < num_queues; i++) { err = gve_adminq_destroy_rx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static void gve_set_default_desc_cnt(struct gve_priv *priv, @@ -904,6 +954,8 @@ static void gve_enable_supported_features(struct gve_priv *priv, *dev_op_flow_steering, const struct gve_device_option_rss_config *dev_op_rss_config, + const struct gve_device_option_nic_timestamp + *dev_op_nic_timestamp, const struct gve_device_option_modify_ring *dev_op_modify_ring) { @@ -980,10 +1032,15 @@ static void gve_enable_supported_features(struct gve_priv *priv, "RSS device option enabled with key size of %u, lut size of %u.\n", priv->rss_key_size, priv->rss_lut_size); } + + if (dev_op_nic_timestamp && + (supported_features_mask & GVE_SUP_NIC_TIMESTAMP_MASK)) + priv->nic_timestamp_supported = true; } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_nic_timestamp *dev_op_nic_timestamp = NULL; struct gve_device_option_flow_steering *dev_op_flow_steering = NULL; struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; @@ -1024,6 +1081,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) &dev_op_buffer_sizes, &dev_op_flow_steering, &dev_op_rss_config, + &dev_op_nic_timestamp, &dev_op_modify_ring); if (err) goto free_device_descriptor; @@ -1088,7 +1146,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) gve_enable_supported_features(priv, supported_features_mask, dev_op_jumbo_frames, dev_op_dqo_qpl, dev_op_buffer_sizes, dev_op_flow_steering, - dev_op_rss_config, dev_op_modify_ring); + dev_op_rss_config, dev_op_nic_timestamp, + dev_op_modify_ring); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); @@ -1200,6 +1259,22 @@ int gve_adminq_report_link_speed(struct gve_priv *priv) return err; } +int gve_adminq_report_nic_ts(struct gve_priv *priv, + dma_addr_t nic_ts_report_addr) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_NIC_TIMESTAMP); + cmd.report_nic_ts = (struct gve_adminq_report_nic_ts) { + .nic_ts_report_len = + cpu_to_be64(sizeof(struct gve_nic_ts_report)), + .nic_ts_report_addr = cpu_to_be64(nic_ts_report_addr), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, struct gve_ptype_lut *ptype_lut) { diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 228217458275..22a74b6aa17e 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -27,6 +27,7 @@ enum gve_adminq_opcodes { GVE_ADMINQ_GET_PTYPE_MAP = 0xE, GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, GVE_ADMINQ_QUERY_FLOW_RULES = 0x10, + GVE_ADMINQ_REPORT_NIC_TIMESTAMP = 0x11, GVE_ADMINQ_QUERY_RSS = 0x12, /* For commands that are larger than 56 bytes */ @@ -174,6 +175,12 @@ struct gve_device_option_rss_config { static_assert(sizeof(struct gve_device_option_rss_config) == 8); +struct gve_device_option_nic_timestamp { + __be32 supported_features_mask; +}; + +static_assert(sizeof(struct gve_device_option_nic_timestamp) == 4); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -192,6 +199,7 @@ enum gve_dev_opt_id { GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, GVE_DEV_OPT_ID_FLOW_STEERING = 0xb, + GVE_DEV_OPT_ID_NIC_TIMESTAMP = 0xd, GVE_DEV_OPT_ID_RSS_CONFIG = 0xe, }; @@ -206,6 +214,7 @@ enum gve_dev_opt_req_feat_mask { GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP = 0x0, }; enum gve_sup_feature_mask { @@ -214,6 +223,7 @@ enum gve_sup_feature_mask { GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, GVE_SUP_FLOW_STEERING_MASK = 1 << 5, GVE_SUP_RSS_CONFIG_MASK = 1 << 7, + GVE_SUP_NIC_TIMESTAMP_MASK = 1 << 8, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -392,6 +402,21 @@ struct gve_adminq_report_link_speed { static_assert(sizeof(struct gve_adminq_report_link_speed) == 8); +struct gve_adminq_report_nic_ts { + __be64 nic_ts_report_len; + __be64 nic_ts_report_addr; +}; + +static_assert(sizeof(struct gve_adminq_report_nic_ts) == 16); + +struct gve_nic_ts_report { + __be64 nic_timestamp; /* NIC clock in nanoseconds */ + __be64 reserved1; + __be64 reserved2; + __be64 reserved3; + __be64 reserved4; +}; + struct stats { __be32 stat_name; __be32 queue_id; @@ -451,7 +476,7 @@ struct gve_ptype_entry { }; struct gve_ptype_map { - struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */ + struct gve_ptype_entry ptypes[GVE_NUM_PTYPES]; /* PTYPES are always 10 bits. */ }; struct gve_adminq_get_ptype_map { @@ -585,6 +610,7 @@ union gve_adminq_command { struct gve_adminq_query_flow_rules query_flow_rules; struct gve_adminq_configure_rss configure_rss; struct gve_adminq_query_rss query_rss; + struct gve_adminq_report_nic_ts report_nic_ts; struct gve_adminq_extended_command extended_command; }; }; @@ -624,6 +650,8 @@ int gve_adminq_reset_flow_rules(struct gve_priv *priv); int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc); int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); +int gve_adminq_report_nic_ts(struct gve_priv *priv, + dma_addr_t nic_ts_report_addr); struct gve_ptype_lut; int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index a71883e1d920..8f5021e59e0a 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google, Inc. */ +#include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_utils.h" @@ -29,6 +30,10 @@ struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) /* Point buf_state to itself to mark it as allocated */ buf_state->next = buffer_id; + /* Clear the buffer pointers */ + buf_state->page_info.page = NULL; + buf_state->xsk_buff = NULL; + return buf_state; } @@ -246,6 +251,7 @@ struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .nid = priv->numa_node, .dev = &priv->pdev->dev, .netdev = priv->dev, .napi = &priv->ntfy_blocks[ntfy_id].napi, @@ -285,7 +291,24 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) { struct gve_rx_buf_state_dqo *buf_state; - if (rx->dqo.page_pool) { + if (rx->xsk_pool) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool); + if (unlikely(!buf_state->xsk_buff)) { + xsk_set_rx_need_wakeup(rx->xsk_pool); + gve_free_buf_state(rx, buf_state); + return -ENOMEM; + } + /* Allocated xsk buffer. Clear wakeup in case it was set. */ + xsk_clear_rx_need_wakeup(rx->xsk_pool); + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = + cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff)); + return 0; + } else if (rx->dqo.page_pool) { buf_state = gve_alloc_buf_state(rx); if (WARN_ON_ONCE(!buf_state)) return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h index f79cd0591110..d17da841b5a0 100644 --- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h @@ -247,7 +247,8 @@ struct gve_rx_compl_desc_dqo { }; __le32 hash; __le32 reserved6; - __le64 reserved7; + __le32 reserved7; + __le32 ts; /* timestamp in nanosecs */ } __packed; static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32); diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index e83773fb891f..6eb442096e02 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -37,6 +37,8 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); +bool gve_xdp_poll_dqo(struct gve_notify_block *block); +bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget); int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg); @@ -60,6 +62,7 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, struct napi_struct *napi); void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx); void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx); +void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid); static inline void gve_tx_put_doorbell_dqo(const struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 3c1da0cf3f61..d0a223250845 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -76,7 +76,7 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt", "adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt", - "adminq_query_rss_cnt", + "adminq_query_rss_cnt", "adminq_report_nic_timestamp_cnt", }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -456,6 +456,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_cfg_flow_rule_cnt; data[i++] = priv->adminq_cfg_rss_cnt; data[i++] = priv->adminq_query_rss_cnt; + data[i++] = priv->adminq_report_nic_timestamp_cnt; } static void gve_get_channels(struct net_device *netdev, @@ -667,7 +668,7 @@ static u32 gve_get_priv_flags(struct net_device *netdev) struct gve_priv *priv = netdev_priv(netdev); u32 ret_flags = 0; - /* Only 1 flag exists currently: report-stats (BIT(O)), so set that flag. */ + /* Only 1 flag exists currently: report-stats (BIT(0)), so set that flag. */ if (priv->ethtool_flags & BIT(0)) ret_flags |= BIT(0); return ret_flags; @@ -798,9 +799,6 @@ static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = gve_del_flow_rule(priv, cmd); break; - case ETHTOOL_SRXFH: - err = -EOPNOTSUPP; - break; default: err = -EOPNOTSUPP; break; @@ -835,9 +833,6 @@ static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u case ETHTOOL_GRXCLSRLALL: err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs); break; - case ETHTOOL_GRXFH: - err = -EOPNOTSUPP; - break; default: err = -EOPNOTSUPP; break; @@ -928,6 +923,27 @@ static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rx return 0; } +static int gve_get_ts_info(struct net_device *netdev, + struct kernel_ethtool_ts_info *info) +{ + struct gve_priv *priv = netdev_priv(netdev); + + ethtool_op_get_ts_info(netdev, info); + + if (priv->nic_timestamp_supported) { + info->so_timestamping |= SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->rx_filters |= BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + if (priv->ptp) + info->phc_index = ptp_clock_index(priv->ptp->clock); + } + + return 0; +} + const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, @@ -956,5 +972,5 @@ const struct ethtool_ops gve_ethtool_ops = { .get_priv_flags = gve_get_priv_flags, .set_priv_flags = gve_set_priv_flags, .get_link_ksettings = gve_get_link_ksettings, - .get_ts_info = ethtool_op_get_ts_info, + .get_ts_info = gve_get_ts_info, }; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index dc35a23ec47f..1f411d7c4373 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google LLC */ +#include <linux/bitmap.h> #include <linux/bpf.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> @@ -414,14 +415,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) bool reschedule = false; int work_done = 0; - if (block->tx) - reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + else + reschedule |= gve_xdp_poll_dqo(block); + } if (!budget) return 0; if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if + * either datapath has more work to do. + */ + if (priv->xdp_prog) + reschedule |= gve_xsk_tx_poll_dqo(block, budget); reschedule |= work_done == budget; } @@ -457,10 +468,19 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) return work_done; } +static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) +{ + if (priv->numa_node == NUMA_NO_NODE) + return cpu_all_mask; + else + return cpumask_of_node(priv->numa_node); +} + static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; - unsigned int active_cpus; + const struct cpumask *node_mask; + unsigned int cur_cpu; int vecs_enabled; int i, j; int err; @@ -499,8 +519,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; } - /* Half the notification blocks go to TX and half to RX */ - active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); /* Setup Management Vector - the last vector */ snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", @@ -529,6 +547,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) } /* Setup the other blocks - the first n-1 vectors */ + node_mask = gve_get_node_mask(priv); + cur_cpu = cpumask_first(node_mask); for (i = 0; i < priv->num_ntfy_blks; i++) { struct gve_notify_block *block = &priv->ntfy_blocks[i]; int msix_idx = i; @@ -545,9 +565,17 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) goto abort_with_some_ntfy_blocks; } block->irq = priv->msix_vectors[msix_idx].vector; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - get_cpu_mask(i % active_cpus)); + irq_set_affinity_and_hint(block->irq, + cpumask_of(cur_cpu)); block->irq_db_index = &priv->irq_db_indices[i].index; + + cur_cpu = cpumask_next(cur_cpu, node_mask); + /* Wrap once CPUs in the node have been exhausted, or when + * starting RX queue affinities. TX and RX queues of the same + * index share affinity. + */ + if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) + cur_cpu = cpumask_first(node_mask); } return 0; abort_with_some_ntfy_blocks: @@ -619,9 +647,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = gve_alloc_counter_array(priv); if (err) goto abort_with_rss_config_cache; - err = gve_alloc_notify_blocks(priv); + err = gve_init_clock(priv); if (err) goto abort_with_counter; + err = gve_alloc_notify_blocks(priv); + if (err) + goto abort_with_clock; err = gve_alloc_stats_report(priv); if (err) goto abort_with_ntfy_blocks; @@ -674,6 +705,8 @@ abort_with_stats_report: gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); +abort_with_clock: + gve_teardown_clock(priv); abort_with_counter: gve_free_counter_array(priv); abort_with_rss_config_cache: @@ -722,6 +755,7 @@ static void gve_teardown_device_resources(struct gve_priv *priv) gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); + gve_teardown_clock(priv); gve_clear_device_resources_ok(priv); } @@ -1030,7 +1064,7 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(gfp_flags); + *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -1131,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up); static void gve_turndown(struct gve_priv *priv); static void gve_turnup(struct gve_priv *priv); +static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) +{ + struct gve_rx_ring *rx; + + if (!priv->rx) + return; + + rx = &priv->rx[qid]; + rx->xsk_pool = NULL; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); + + if (!priv->tx) + return; + priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; +} + +static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, + struct xsk_buff_pool *pool, u16 qid) +{ + struct gve_rx_ring *rx; + u16 tx_qid; + int err; + + rx = &priv->rx[qid]; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, pool); + if (err) { + gve_unreg_xsk_pool(priv, qid); + return err; + } + + rx->xsk_pool = pool; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i; + + if (!priv->tx_cfg.num_xdp_queues || !priv->rx) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + + gve_unreg_xsk_pool(priv, i); + } +} + +static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) +{ + if (!test_bit(qid, priv->xsk_pools)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, qid); +} + static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) { struct napi_struct *napi; struct gve_rx_ring *rx; int err = 0; - int i, j; - u32 tx_qid; + int i; if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct xsk_buff_pool *xsk_pool; + rx = &priv->rx[i]; napi = &priv->ntfy_blocks[rx->ntfy_id].napi; @@ -1150,7 +1250,11 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - if (gve_is_qpl(priv)) + + xsk_pool = gve_get_xsk_pool(priv, i); + if (xsk_pool) + err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); + else if (gve_is_qpl(priv)) err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); @@ -1160,60 +1264,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) rx->dqo.page_pool); if (err) goto err; - rx->xsk_pool = xsk_get_pool_from_qid(dev, i); - if (rx->xsk_pool) { - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, - napi->napi_id); - if (err) - goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); - if (err) - goto err; - xsk_pool_set_rxq_info(rx->xsk_pool, - &rx->xsk_rxq); - } - } - - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } return 0; err: - for (j = i; j >= 0; j--) { - rx = &priv->rx[j]; - if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); - } + gve_unreg_xdp_info(priv); return err; } -static void gve_unreg_xdp_info(struct gve_priv *priv) -{ - int i, tx_qid; - - if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) - return; - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - struct gve_rx_ring *rx = &priv->rx[i]; - - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (rx->xsk_pool) { - xdp_rxq_info_unreg(&rx->xsk_rxq); - rx->xsk_pool = NULL; - } - } - - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = NULL; - } -} static void gve_drain_page_cache(struct gve_priv *priv) { @@ -1510,14 +1568,24 @@ out: return err; } +static int gve_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + return gve_xdp_xmit_gqi(dev, n, frames, flags); + else if (priv->queue_format == GVE_DQO_RDA_FORMAT) + return gve_xdp_xmit_dqo(dev, n, frames, flags); + + return -EOPNOTSUPP; +} + static int gve_xsk_pool_enable(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) { struct gve_priv *priv = netdev_priv(dev); - struct napi_struct *napi; - struct gve_rx_ring *rx; - int tx_qid; int err; if (qid >= priv->rx_cfg.num_queues) { @@ -1535,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; + set_bit(qid, priv->xsk_pools); + /* If XDP prog is not installed or interface is down, return. */ if (!priv->xdp_prog || !netif_running(dev)) return 0; - rx = &priv->rx[qid]; - napi = &priv->ntfy_blocks[rx->ntfy_id].napi; - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); - if (err) - goto err; - - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); + err = gve_reg_xsk_pool(priv, dev, pool, qid); if (err) - goto err; - - xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); - rx->xsk_pool = pool; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - priv->tx[tx_qid].xsk_pool = pool; + goto err_xsk_pool_dma_mapped; + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv)) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + goto err_xsk_pool_registered; + } return 0; -err: - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); +err_xsk_pool_registered: + gve_unreg_xsk_pool(priv, qid); +err_xsk_pool_dma_mapped: + clear_bit(qid, priv->xsk_pools); xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); return err; } @@ -1574,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev, struct napi_struct *napi_tx; struct xsk_buff_pool *pool; int tx_qid; + int err; - pool = xsk_get_pool_from_qid(dev, qid); - if (!pool) - return -EINVAL; if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed or interface is down, unmap DMA and - * return. - */ - if (!priv->xdp_prog || !netif_running(dev)) - goto done; + clear_bit(qid, priv->xsk_pools); + + pool = xsk_get_pool_from_qid(dev, qid); + if (pool) + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); + + if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) + return 0; + + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv) && priv->xdp_prog) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + return err; + } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ @@ -1594,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev, napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + gve_unreg_xsk_pool(priv, qid); smp_mb(); /* Make sure it is visible to the workers on datapath */ napi_enable(napi_rx); - if (gve_rx_work_pending(&priv->rx[qid])) - napi_schedule(napi_rx); - napi_enable(napi_tx); - if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) - napi_schedule(napi_tx); + if (gve_is_gqi(priv)) { + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + } -done: - xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); return 0; } @@ -1645,9 +1717,8 @@ static int verify_xdp_configuration(struct net_device *dev) return -EOPNOTSUPP; } - if (priv->queue_format != GVE_GQI_QPL_FORMAT) { - netdev_warn(dev, "XDP is not supported in mode %d.\n", - priv->queue_format); + if (priv->header_split_enabled) { + netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); return -EOPNOTSUPP; } @@ -1727,7 +1798,7 @@ int gve_adjust_config(struct gve_priv *priv, { int err; - /* Allocate resources for the new confiugration */ + /* Allocate resources for the new configuration */ err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, @@ -1917,49 +1988,56 @@ static void gve_turnup_and_check_status(struct gve_priv *priv) gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); } -static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) +static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, + unsigned int txqueue) { - struct gve_notify_block *block; - struct gve_tx_ring *tx = NULL; - struct gve_priv *priv; - u32 last_nic_done; - u32 current_time; u32 ntfy_idx; - netdev_info(dev, "Timeout on tx queue, %d", txqueue); - priv = netdev_priv(dev); if (txqueue > priv->tx_cfg.num_queues) - goto reset; + return NULL; ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); if (ntfy_idx >= priv->num_ntfy_blks) - goto reset; + return NULL; + + return &priv->ntfy_blocks[ntfy_idx]; +} - block = &priv->ntfy_blocks[ntfy_idx]; - tx = block->tx; +static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, + unsigned int txqueue) +{ + struct gve_notify_block *block; + u32 current_time; + + block = gve_get_tx_notify_block(priv, txqueue); + + if (!block) + return false; current_time = jiffies_to_msecs(jiffies); - if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) - goto reset; + if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + return false; - /* Check to see if there are missed completions, which will allow us to - * kick the queue. - */ - last_nic_done = gve_tx_load_event_counter(priv, tx); - if (last_nic_done - tx->done) { - netdev_info(dev, "Kicking queue %d", txqueue); - iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); - napi_schedule(&block->napi); - tx->last_kick_msec = current_time; - goto out; - } // Else reset. + netdev_info(priv->dev, "Kicking queue %d", txqueue); + napi_schedule(&block->napi); + block->tx->last_kick_msec = current_time; + return true; +} -reset: - gve_schedule_reset(priv); +static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct gve_notify_block *block; + struct gve_priv *priv; -out: - if (tx) - tx->queue_timeout++; + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + + if (!gve_tx_timeout_try_q_kick(priv, txqueue)) + gve_schedule_reset(priv); + + block = gve_get_tx_notify_block(priv, txqueue); + if (block) + block->tx->queue_timeout++; priv->tx_timeo_cnt++; } @@ -1971,10 +2049,13 @@ u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) return GVE_DEFAULT_RX_BUFFER_SIZE; } -/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +/* Header split is only supported on DQ RDA queue format. If XDP is enabled, + * header split is not allowed. + */ bool gve_header_split_supported(const struct gve_priv *priv) { - return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; + return priv->header_buf_size && + priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; } int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) @@ -2023,6 +2104,12 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; + if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { + netdev_warn(netdev, + "XDP is not supported when LRO is on.\n"); + err = -EOPNOTSUPP; + goto revert_features; + } if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) @@ -2042,6 +2129,46 @@ revert_features: return err; } +static int gve_get_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config) +{ + struct gve_priv *priv = netdev_priv(dev); + + *kernel_config = priv->ts_config; + return 0; +} + +static int gve_set_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { + NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); + return -ERANGE; + } + + if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { + if (!priv->nic_ts_report) { + NL_SET_ERR_MSG_MOD(extack, + "RX timestamping is not supported"); + kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; + return -EOPNOTSUPP; + } + + kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; + gve_clock_nic_ts_read(priv); + ptp_schedule_worker(priv->ptp->clock, 0); + } else { + ptp_cancel_worker_sync(priv->ptp->clock); + } + + priv->ts_config.rx_filter = kernel_config->rx_filter; + + return 0; +} + static const struct net_device_ops gve_netdev_ops = { .ndo_start_xmit = gve_start_xmit, .ndo_features_check = gve_features_check, @@ -2053,6 +2180,8 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_bpf = gve_xdp, .ndo_xdp_xmit = gve_xdp_xmit, .ndo_xsk_wakeup = gve_xsk_wakeup, + .ndo_hwtstamp_get = gve_get_ts_config, + .ndo_hwtstamp_set = gve_set_ts_config, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -2182,6 +2311,10 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) xdp_features = NETDEV_XDP_ACT_BASIC; xdp_features |= NETDEV_XDP_ACT_REDIRECT; xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { xdp_features = 0; } @@ -2236,7 +2369,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } - /* Big TCP is only supported on DQ*/ + /* Big TCP is only supported on DQO */ if (!gve_is_gqi(priv)) netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); @@ -2246,6 +2379,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) */ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; priv->mgmt_msix_idx = priv->num_ntfy_blks; + priv->numa_node = dev_to_node(&priv->pdev->dev); priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); @@ -2272,11 +2406,26 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; } + priv->ts_config.tx_type = HWTSTAMP_TX_OFF; + priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; + setup_device: + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); + if (!priv->xsk_pools) { + err = -ENOMEM; + goto err; + } + gve_set_netdev_xdp_features(priv); err = gve_setup_device_resources(priv); - if (!err) - return 0; + if (err) + goto err_free_xsk_bitmap; + + return 0; + +err_free_xsk_bitmap: + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; err: gve_adminq_free(&priv->pdev->dev, priv); return err; @@ -2286,6 +2435,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv) { gve_teardown_device_resources(priv); gve_adminq_free(&priv->pdev->dev, priv); + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; } static void gve_trigger_reset(struct gve_priv *priv) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c new file mode 100644 index 000000000000..e96247c9d68d --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2025 Google LLC + */ + +#include "gve.h" +#include "gve_adminq.h" + +/* Interval to schedule a nic timestamp calibration, 250ms. */ +#define GVE_NIC_TS_SYNC_INTERVAL_MS 250 + +/* Read the nic timestamp from hardware via the admin queue. */ +int gve_clock_nic_ts_read(struct gve_priv *priv) +{ + u64 nic_raw; + int err; + + err = gve_adminq_report_nic_ts(priv, priv->nic_ts_report_bus); + if (err) + return err; + + nic_raw = be64_to_cpu(priv->nic_ts_report->nic_timestamp); + WRITE_ONCE(priv->last_sync_nic_counter, nic_raw); + + return 0; +} + +static long gve_ptp_do_aux_work(struct ptp_clock_info *info) +{ + const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); + struct gve_priv *priv = ptp->priv; + int err; + + if (gve_get_reset_in_progress(priv) || !gve_get_admin_queue_ok(priv)) + goto out; + + err = gve_clock_nic_ts_read(priv); + if (err && net_ratelimit()) + dev_err(&priv->pdev->dev, + "%s read err %d\n", __func__, err); + +out: + return msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS); +} + +static const struct ptp_clock_info gve_ptp_caps = { + .owner = THIS_MODULE, + .name = "gve clock", + .do_aux_work = gve_ptp_do_aux_work, +}; + +static int gve_ptp_init(struct gve_priv *priv) +{ + struct gve_ptp *ptp; + int err; + + if (!priv->nic_timestamp_supported) { + dev_dbg(&priv->pdev->dev, "Device does not support PTP\n"); + return -EOPNOTSUPP; + } + + priv->ptp = kzalloc(sizeof(*priv->ptp), GFP_KERNEL); + if (!priv->ptp) + return -ENOMEM; + + ptp = priv->ptp; + ptp->info = gve_ptp_caps; + ptp->clock = ptp_clock_register(&ptp->info, &priv->pdev->dev); + + if (IS_ERR(ptp->clock)) { + dev_err(&priv->pdev->dev, "PTP clock registration failed\n"); + err = PTR_ERR(ptp->clock); + goto free_ptp; + } + + ptp->priv = priv; + return 0; + +free_ptp: + kfree(ptp); + priv->ptp = NULL; + return err; +} + +static void gve_ptp_release(struct gve_priv *priv) +{ + struct gve_ptp *ptp = priv->ptp; + + if (!ptp) + return; + + if (ptp->clock) + ptp_clock_unregister(ptp->clock); + + kfree(ptp); + priv->ptp = NULL; +} + +int gve_init_clock(struct gve_priv *priv) +{ + int err; + + if (!priv->nic_timestamp_supported) + return 0; + + err = gve_ptp_init(priv); + if (err) + return err; + + priv->nic_ts_report = + dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + &priv->nic_ts_report_bus, + GFP_KERNEL); + if (!priv->nic_ts_report) { + dev_err(&priv->pdev->dev, "%s dma alloc error\n", __func__); + err = -ENOMEM; + goto release_ptp; + } + + return 0; + +release_ptp: + gve_ptp_release(priv); + return err; +} + +void gve_teardown_clock(struct gve_priv *priv) +{ + gve_ptp_release(priv); + + if (priv->nic_ts_report) { + dma_free_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + priv->nic_ts_report, priv->nic_ts_report_bus); + priv->nic_ts_report = NULL; + } +} diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 90e875c1832f..ec424d2f4f57 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -192,8 +192,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, */ slots = rx->mask + 1; - rx->data.page_info = kvzalloc(slots * - sizeof(*rx->data.page_info), GFP_KERNEL); + rx->data.page_info = kvcalloc_node(slots, sizeof(*rx->data.page_info), + GFP_KERNEL, priv->numa_node); if (!rx->data.page_info) return -ENOMEM; @@ -216,7 +216,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, if (!rx->data.raw_addressing) { for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) { - struct page *page = alloc_page(GFP_KERNEL); + struct page *page = alloc_pages_node(priv->numa_node, + GFP_KERNEL, 0); if (!page) { err = -ENOMEM; @@ -303,10 +304,9 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1; rx->qpl_copy_pool_head = 0; - rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1, - sizeof(rx->qpl_copy_pool[0]), - GFP_KERNEL); - + rx->qpl_copy_pool = kvcalloc_node(rx->qpl_copy_pool_mask + 1, + sizeof(rx->qpl_copy_pool[0]), + GFP_KERNEL, priv->numa_node); if (!rx->qpl_copy_pool) { err = -ENOMEM; goto abort_with_slots; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index dcb0545baa50..7380c2b7a2d8 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -8,6 +8,7 @@ #include "gve_dqo.h" #include "gve_adminq.h" #include "gve_utils.h" +#include <linux/bpf.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/skbuff.h> @@ -15,6 +16,7 @@ #include <net/ip6_checksum.h> #include <net/ipv6.h> #include <net/tcp.h> +#include <net/xdp_sock_drv.h> static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) { @@ -148,6 +150,10 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, gve_free_to_page_pool(rx, bs, false); else gve_free_qpl_page_dqo(bs); + if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) { + xsk_buff_free(bs->xsk_buff); + bs->xsk_buff = NULL; + } } if (rx->dqo.qpl) { @@ -236,9 +242,9 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); - rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, - sizeof(rx->dqo.buf_states[0]), - GFP_KERNEL); + rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states, + sizeof(rx->dqo.buf_states[0]), + GFP_KERNEL, priv->numa_node); if (!rx->dqo.buf_states) return -ENOMEM; @@ -437,6 +443,29 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } +/* Expand the hardware timestamp to the full 64 bits of width, and add it to the + * skb. + * + * This algorithm works by using the passed hardware timestamp to generate a + * diff relative to the last read of the nic clock. This diff can be positive or + * negative, as it is possible that we have read the clock more recently than + * the hardware has received this packet. To detect this, we use the high bit of + * the diff, and assume that the read is more recent if the high bit is set. In + * this case we invert the process. + * + * Note that this means if the time delta between packet reception and the last + * clock read is greater than ~2 seconds, this will provide invalid results. + */ +static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts) +{ + u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter); + struct sk_buff *skb = rx->ctx.skb_head; + u32 low = (u32)last_read; + s32 diff = hwts - low; + + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); +} + static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) @@ -464,7 +493,7 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state, u16 buf_len) { - struct page *page = alloc_page(GFP_ATOMIC); + struct page *page = alloc_pages_node(rx->gve->numa_node, GFP_ATOMIC, 0); int num_frags; if (!page) @@ -547,27 +576,146 @@ static int gve_rx_append_frags(struct napi_struct *napi, return 0; } +static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp) +{ + struct gve_tx_ring *tx; + struct xdp_frame *xdpf; + u32 tx_qid; + int err; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + if (rx->xsk_pool) + xsk_buff_free(xdp); + return -ENOSPC; + } + + tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); + tx = &priv->tx[tx_qid]; + spin_lock(&tx->dqo_tx.xdp_lock); + err = gve_xdp_xmit_one_dqo(priv, tx, xdpf); + spin_unlock(&tx->dqo_tx.xdp_lock); + + return err; +} + +static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act) +{ + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + xsk_buff_free(xdp); + break; + case XDP_TX: + if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp))) + goto err; + break; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog))) + goto err; + break; + } + + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); + return; + +err: + u64_stats_update_begin(&rx->statss); + if (xdp_act == XDP_TX) + rx->xdp_tx_errors++; + if (xdp_act == XDP_REDIRECT) + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); +} + static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, struct xdp_buff *xdp, struct bpf_prog *xprog, int xdp_act, struct gve_rx_buf_state_dqo *buf_state) { - u64_stats_update_begin(&rx->statss); + int err; switch (xdp_act) { case XDP_ABORTED: case XDP_DROP: default: - rx->xdp_actions[xdp_act]++; + gve_free_buffer(rx, buf_state); break; case XDP_TX: - rx->xdp_tx_errors++; + err = gve_xdp_tx_dqo(priv, rx, xdp); + if (unlikely(err)) + goto err; + gve_reuse_buffer(rx, buf_state); break; case XDP_REDIRECT: - rx->xdp_redirect_errors++; + err = xdp_do_redirect(priv->dev, xdp, xprog); + if (unlikely(err)) + goto err; + gve_reuse_buffer(rx, buf_state); break; } + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); + return; +err: + u64_stats_update_begin(&rx->statss); + if (xdp_act == XDP_TX) + rx->xdp_tx_errors++; + else if (xdp_act == XDP_REDIRECT) + rx->xdp_redirect_errors++; u64_stats_update_end(&rx->statss); gve_free_buffer(rx, buf_state); + return; +} + +static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, int buf_len, + struct bpf_prog *xprog) +{ + struct xdp_buff *xdp = buf_state->xsk_buff; + struct gve_priv *priv = rx->gve; + int xdp_act; + + xdp->data_end = xdp->data + buf_len; + xsk_buff_dma_sync_for_cpu(xdp); + + if (xprog) { + xdp_act = bpf_prog_run_xdp(xprog, xdp); + buf_len = xdp->data_end - xdp->data; + if (xdp_act != XDP_PASS) { + gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act); + gve_free_buf_state(rx, buf_state); + return 0; + } + } + + /* Copy the data to skb */ + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + xdp->data, buf_len); + if (unlikely(!rx->ctx.skb_head)) { + xsk_buff_free(xdp); + gve_free_buf_state(rx, buf_state); + return -ENOMEM; + } + rx->ctx.skb_tail = rx->ctx.skb_head; + + /* Free XSK buffer and Buffer state */ + xsk_buff_free(xdp); + gve_free_buf_state(rx, buf_state); + + /* Update Stats */ + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + return 0; } /* Returns 0 if descriptor is completed successfully. @@ -608,6 +756,10 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, buf_len = compl_desc->packet_len; hdr_len = compl_desc->header_len; + xprog = READ_ONCE(priv->xdp_prog); + if (buf_state->xsk_buff) + return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog); + /* Page might have not been used for awhile and was likely last written * by a different thread. */ @@ -658,7 +810,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } - xprog = READ_ONCE(priv->xdp_prog); if (xprog) { struct xdp_buff xdp; void *old_data; @@ -767,6 +918,9 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, if (feat & NETIF_F_RXCSUM) gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); + if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL) + gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts)); + /* RSC packets must set gso_size otherwise the TCP stack will complain * that packets are larger than MTU. */ @@ -786,16 +940,27 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) { - struct napi_struct *napi = &block->napi; - netdev_features_t feat = napi->dev->features; - - struct gve_rx_ring *rx = block->rx; - struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; - + struct gve_rx_compl_queue_dqo *complq; + struct napi_struct *napi; + netdev_features_t feat; + struct gve_rx_ring *rx; + struct gve_priv *priv; + u64 xdp_redirects; u32 work_done = 0; u64 bytes = 0; + u64 xdp_txs; int err; + napi = &block->napi; + feat = napi->dev->features; + + rx = block->rx; + priv = rx->gve; + complq = &rx->dqo.complq; + + xdp_redirects = rx->xdp_actions[XDP_REDIRECT]; + xdp_txs = rx->xdp_actions[XDP_TX]; + while (work_done < budget) { struct gve_rx_compl_desc_dqo *compl_desc = &complq->desc_ring[complq->head]; @@ -869,6 +1034,12 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) rx->ctx.skb_tail = NULL; } + if (xdp_txs != rx->xdp_actions[XDP_TX]) + gve_xdp_tx_flush_dqo(priv, rx->q_num); + + if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT]) + xdp_do_flush(); + gve_rx_post_buffers_dqo(rx); u64_stats_update_begin(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 1b40bf0c811a..c6ff0968929d 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -823,8 +823,8 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, return ndescs; } -int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, - u32 flags) +int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) { struct gve_priv *priv = netdev_priv(dev); struct gve_tx_ring *tx; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 9d705d94b065..6f1d515673d2 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -9,9 +9,11 @@ #include "gve_utils.h" #include "gve_dqo.h" #include <net/ip.h> +#include <linux/bpf.h> #include <linux/tcp.h> #include <linux/slab.h> #include <linux/skbuff.h> +#include <net/xdp_sock_drv.h> /* Returns true if tx_bufs are available. */ static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) @@ -110,6 +112,14 @@ static bool gve_has_pending_packet(struct gve_tx_ring *tx) return false; } +void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid) +{ + u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid); + struct gve_tx_ring *tx = &priv->tx[tx_qid]; + + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); +} + static struct gve_tx_pending_packet_dqo * gve_alloc_pending_packet(struct gve_tx_ring *tx) { @@ -198,7 +208,8 @@ void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) gve_remove_napi(priv, ntfy_idx); gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); - netdev_tx_reset_queue(tx->netdev_txq); + if (tx->netdev_txq) + netdev_tx_reset_queue(tx->netdev_txq); gve_tx_clean_pending_packets(tx); gve_tx_remove_from_block(priv, idx); } @@ -231,6 +242,9 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, tx->dqo.tx_ring = NULL; } + kvfree(tx->dqo.xsk_reorder_queue); + tx->dqo.xsk_reorder_queue = NULL; + kvfree(tx->dqo.pending_packets); tx->dqo.pending_packets = NULL; @@ -276,7 +290,8 @@ void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) gve_tx_add_to_block(priv, idx); - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + if (idx < priv->tx_cfg.num_queues) + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); } @@ -295,6 +310,7 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, memset(tx, 0, sizeof(*tx)); tx->q_num = idx; tx->dev = hdev; + spin_lock_init(&tx->dqo_tx.xdp_lock); atomic_set_release(&tx->dqo_compl.hw_tx_head, 0); /* Queue sizes must be a power of 2 */ @@ -333,6 +349,17 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; atomic_set_release(&tx->dqo_compl.free_pending_packets, -1); + + /* Only alloc xsk pool for XDP queues */ + if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) { + tx->dqo.xsk_reorder_queue = + kvcalloc(tx->dqo.complq_mask + 1, + sizeof(tx->dqo.xsk_reorder_queue[0]), + GFP_KERNEL); + if (!tx->dqo.xsk_reorder_queue) + goto err; + } + tx->dqo_compl.miss_completions.head = -1; tx->dqo_compl.miss_completions.tail = -1; tx->dqo_compl.timed_out_completions.head = -1; @@ -439,12 +466,28 @@ static u32 num_avail_tx_slots(const struct gve_tx_ring *tx) return tx->mask - num_used; } +/* Checks if the requested number of slots are available in the ring */ +static bool gve_has_tx_slots_available(struct gve_tx_ring *tx, u32 slots_req) +{ + u32 num_avail = num_avail_tx_slots(tx); + + slots_req += GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP; + + if (num_avail >= slots_req) + return true; + + /* Update cached TX head pointer */ + tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); + + return num_avail_tx_slots(tx) >= slots_req; +} + static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx, int desc_count, int buf_count) { return gve_has_pending_packet(tx) && - num_avail_tx_slots(tx) >= desc_count && - gve_has_free_tx_qpl_bufs(tx, buf_count); + gve_has_tx_slots_available(tx, desc_count) && + gve_has_free_tx_qpl_bufs(tx, buf_count); } /* Stops the queue if available descriptors is less than 'count'. @@ -456,12 +499,6 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return 0; - /* Update cached TX head pointer */ - tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); - - if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) - return 0; - /* No space, so stop the queue */ tx->stop_queue++; netif_tx_stop_queue(tx->netdev_txq); @@ -472,8 +509,6 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, /* After stopping queue, check if we can transmit again in order to * avoid TOCTOU bug. */ - tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); - if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return -EBUSY; @@ -500,11 +535,9 @@ static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb, } static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, - struct sk_buff *skb, u32 len, u64 addr, + bool enable_csum, u32 len, u64 addr, s16 compl_tag, bool eop, bool is_gso) { - const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL; - while (len > 0) { struct gve_tx_pkt_desc_dqo *desc = &tx->dqo.tx_ring[*desc_idx].pkt; @@ -515,7 +548,7 @@ static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, .buf_addr = cpu_to_le64(addr), .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, .end_of_packet = cur_eop, - .checksum_offload_enable = checksum_offload_en, + .checksum_offload_enable = enable_csum, .compl_tag = cpu_to_le16(compl_tag), .buf_size = cur_len, }; @@ -612,6 +645,25 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, }; } +static void gve_tx_update_tail(struct gve_tx_ring *tx, u32 desc_idx) +{ + u32 last_desc_idx = (desc_idx - 1) & tx->mask; + u32 last_report_event_interval = + (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; + + /* Commit the changes to our state */ + tx->dqo_tx.tail = desc_idx; + + /* Request a descriptor completion on the last descriptor of the + * packet if we are allowed to by the HW enforced interval. + */ + + if (unlikely(last_report_event_interval >= GVE_TX_MIN_RE_INTERVAL)) { + tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; + tx->dqo_tx.last_re_idx = last_desc_idx; + } +} + static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, struct sk_buff *skb, struct gve_tx_pending_packet_dqo *pkt, @@ -619,6 +671,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, u32 *desc_idx, bool is_gso) { + bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL; const struct skb_shared_info *shinfo = skb_shinfo(skb); int i; @@ -644,7 +697,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); ++pkt->num_bufs; - gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr, completion_tag, /*eop=*/shinfo->nr_frags == 0, is_gso); } @@ -664,7 +717,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, dma[pkt->num_bufs], addr); ++pkt->num_bufs; - gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr, completion_tag, is_eop, is_gso); } @@ -709,6 +762,7 @@ static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, u32 *desc_idx, bool is_gso) { + bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL; u32 copy_offset = 0; dma_addr_t dma_addr; u32 copy_len; @@ -730,7 +784,7 @@ static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, copy_offset += copy_len; dma_sync_single_for_device(tx->dev, dma_addr, copy_len, DMA_TO_DEVICE); - gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, copy_len, dma_addr, completion_tag, @@ -768,6 +822,7 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, return -ENOMEM; pkt->skb = skb; + pkt->type = GVE_TX_PENDING_PACKET_DQO_SKB; completion_tag = pkt - tx->dqo.pending_packets; gve_extract_tx_metadata_dqo(skb, &metadata); @@ -800,24 +855,7 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; - /* Commit the changes to our state */ - tx->dqo_tx.tail = desc_idx; - - /* Request a descriptor completion on the last descriptor of the - * packet if we are allowed to by the HW enforced interval. - */ - { - u32 last_desc_idx = (desc_idx - 1) & tx->mask; - u32 last_report_event_interval = - (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; - - if (unlikely(last_report_event_interval >= - GVE_TX_MIN_RE_INTERVAL)) { - tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; - tx->dqo_tx.last_re_idx = last_desc_idx; - } - } - + gve_tx_update_tail(tx, desc_idx); return 0; err: @@ -951,9 +989,8 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, /* Metadata + (optional TSO) + data descriptors. */ total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs; - if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs + - GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP, - num_buffer_descs))) { + if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs, + num_buffer_descs))) { return -1; } @@ -970,6 +1007,38 @@ drop: return 0; } +static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx, + u16 completion_tag) +{ + u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail); + + tx->dqo.xsk_reorder_queue[tail] = completion_tag; + tail = (tail + 1) & tx->dqo.complq_mask; + atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail); +} + +static struct gve_tx_pending_packet_dqo * +gve_xsk_reorder_queue_head(struct gve_tx_ring *tx) +{ + u32 head = tx->dqo_compl.xsk_reorder_queue_head; + + if (head == tx->dqo_compl.xsk_reorder_queue_tail) { + tx->dqo_compl.xsk_reorder_queue_tail = + atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail); + + if (head == tx->dqo_compl.xsk_reorder_queue_tail) + return NULL; + } + + return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]]; +} + +static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx) +{ + tx->dqo_compl.xsk_reorder_queue_head++; + tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask; +} + /* Transmit a given skb and ring the doorbell. */ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) { @@ -993,6 +1062,62 @@ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + int budget) +{ + struct xsk_buff_pool *pool = tx->xsk_pool; + struct xdp_desc desc; + bool repoll = false; + int sent = 0; + + spin_lock(&tx->dqo_tx.xdp_lock); + for (; sent < budget; sent++) { + struct gve_tx_pending_packet_dqo *pkt; + s16 completion_tag; + dma_addr_t addr; + u32 desc_idx; + + if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) { + repoll = true; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) + break; + + pkt = gve_alloc_pending_packet(tx); + pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK; + pkt->num_bufs = 0; + completion_tag = pkt - tx->dqo.pending_packets; + + addr = xsk_buff_raw_get_dma(pool, desc.addr); + xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len); + + desc_idx = tx->dqo_tx.tail; + gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, + true, desc.len, + addr, completion_tag, true, + false); + ++pkt->num_bufs; + gve_tx_update_tail(tx, desc_idx); + tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; + gve_xsk_reorder_queue_push_dqo(tx, completion_tag); + } + + if (sent) { + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); + xsk_tx_release(pool); + } + + spin_unlock(&tx->dqo_tx.xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + + return (sent == budget) || repoll; +} + static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, struct gve_tx_pending_packet_dqo *pending_packet) { @@ -1107,16 +1232,35 @@ static void gve_handle_packet_completion(struct gve_priv *priv, } } tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs; - if (tx->dqo.qpl) - gve_free_tx_qpl_bufs(tx, pending_packet); - else + + switch (pending_packet->type) { + case GVE_TX_PENDING_PACKET_DQO_SKB: + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, pending_packet); + else + gve_unmap_packet(tx->dev, pending_packet); + (*pkts)++; + *bytes += pending_packet->skb->len; + + napi_consume_skb(pending_packet->skb, is_napi); + pending_packet->skb = NULL; + gve_free_pending_packet(tx, pending_packet); + break; + case GVE_TX_PENDING_PACKET_DQO_XDP_FRAME: gve_unmap_packet(tx->dev, pending_packet); + (*pkts)++; + *bytes += pending_packet->xdpf->len; - *bytes += pending_packet->skb->len; - (*pkts)++; - napi_consume_skb(pending_packet->skb, is_napi); - pending_packet->skb = NULL; - gve_free_pending_packet(tx, pending_packet); + xdp_return_frame(pending_packet->xdpf); + pending_packet->xdpf = NULL; + gve_free_pending_packet(tx, pending_packet); + break; + case GVE_TX_PENDING_PACKET_DQO_XSK: + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; + break; + default: + WARN_ON_ONCE(1); + } } static void gve_handle_miss_completion(struct gve_priv *priv, @@ -1213,8 +1357,34 @@ static void remove_timed_out_completions(struct gve_priv *priv, remove_from_list(tx, &tx->dqo_compl.timed_out_completions, pending_packet); + + /* Need to count XSK packets in xsk_tx_completed. */ + if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK) + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; + else + gve_free_pending_packet(tx, pending_packet); + } +} + +static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx) +{ + u32 num_xsks = 0; + + while (true) { + struct gve_tx_pending_packet_dqo *pending_packet = + gve_xsk_reorder_queue_head(tx); + + if (!pending_packet || + pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE) + break; + + num_xsks++; + gve_xsk_reorder_queue_pop_dqo(tx); gve_free_pending_packet(tx, pending_packet); } + + if (num_xsks) + xsk_tx_completed(tx->xsk_pool, num_xsks); } int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, @@ -1287,13 +1457,17 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, num_descs_cleaned++; } - netdev_tx_completed_queue(tx->netdev_txq, - pkt_compl_pkts + miss_compl_pkts, - pkt_compl_bytes + miss_compl_bytes); + if (tx->netdev_txq) + netdev_tx_completed_queue(tx->netdev_txq, + pkt_compl_pkts + miss_compl_pkts, + pkt_compl_bytes + miss_compl_bytes); remove_miss_completions(priv, tx); remove_timed_out_completions(priv, tx); + if (tx->xsk_pool) + gve_tx_process_xsk_completions(tx); + u64_stats_update_begin(&tx->statss); tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; @@ -1325,3 +1499,111 @@ bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean) compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; return compl_desc->generation != tx->dqo_compl.cur_gen_bit; } + +bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget) +{ + struct gve_rx_ring *rx = rx_block->rx; + struct gve_priv *priv = rx->gve; + struct gve_tx_ring *tx; + + tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)]; + if (tx->xsk_pool) + return gve_xsk_tx_dqo(priv, tx, budget); + + return 0; +} + +bool gve_xdp_poll_dqo(struct gve_notify_block *block) +{ + struct gve_tx_compl_desc *compl_desc; + struct gve_tx_ring *tx = block->tx; + struct gve_priv *priv = block->priv; + + gve_clean_tx_done_dqo(priv, tx, &block->napi); + + /* Return true if we still have work. */ + compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; + return compl_desc->generation != tx->dqo_compl.cur_gen_bit; +} + +int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct xdp_frame *xdpf) +{ + struct gve_tx_pending_packet_dqo *pkt; + u32 desc_idx = tx->dqo_tx.tail; + s16 completion_tag; + int num_descs = 1; + dma_addr_t addr; + int err; + + if (unlikely(!gve_has_tx_slots_available(tx, num_descs))) + return -EBUSY; + + pkt = gve_alloc_pending_packet(tx); + if (unlikely(!pkt)) + return -EBUSY; + + pkt->type = GVE_TX_PENDING_PACKET_DQO_XDP_FRAME; + pkt->num_bufs = 0; + pkt->xdpf = xdpf; + completion_tag = pkt - tx->dqo.pending_packets; + + /* Generate Packet Descriptor */ + addr = dma_map_single(tx->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); + err = dma_mapping_error(tx->dev, addr); + if (unlikely(err)) + goto err; + + dma_unmap_len_set(pkt, len[pkt->num_bufs], xdpf->len); + dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); + pkt->num_bufs++; + + gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, + false, xdpf->len, + addr, completion_tag, true, + false); + + gve_tx_update_tail(tx, desc_idx); + return 0; + +err: + pkt->xdpf = NULL; + pkt->num_bufs = 0; + gve_free_pending_packet(tx, pkt); + return err; +} + +int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx; + int i, err = 0, qid; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + qid = gve_xdp_tx_queue_id(priv, + smp_processor_id() % priv->tx_cfg.num_xdp_queues); + + tx = &priv->tx[qid]; + + spin_lock(&tx->dqo_tx.xdp_lock); + for (i = 0; i < n; i++) { + err = gve_xdp_xmit_one_dqo(priv, tx, frames[i]); + if (err) + break; + } + + if (flags & XDP_XMIT_FLUSH) + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); + + spin_unlock(&tx->dqo_tx.xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xmit += n; + tx->xdp_xmit_errors += n - i; + u64_stats_update_end(&tx->statss); + + return i ? i : err; +} |