diff options
Diffstat (limited to 'drivers/net/ethernet/intel/igc')
21 files changed, 4514 insertions, 1217 deletions
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile index 95d1e8c490a4..efc5e7983dad 100644 --- a/drivers/net/ethernet/intel/igc/Makefile +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_IGC) += igc.o -igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ -igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o +igc-y := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ + igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o +igc-$(CONFIG_IGC_LEDS) += igc_leds.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 9e0bbb2e55e3..a427f05814c1 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -13,6 +13,9 @@ #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> #include <linux/net_tstamp.h> +#include <linux/bitfield.h> +#include <linux/hrtimer.h> +#include <net/xdp.h> #include "igc_hw.h" @@ -33,6 +36,16 @@ void igc_ethtool_set_ops(struct net_device *); #define IGC_N_PEROUT 2 #define IGC_N_SDP 4 +#define MAX_FLEX_FILTER 32 + +#define IGC_MAX_TX_TSTAMP_REGS 4 + +struct igc_fpe_t { + struct ethtool_mmsv mmsv; + u32 tx_min_frag_size; + bool tx_enabled; +}; + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -65,6 +78,63 @@ struct igc_rx_packet_stats { u64 other_packets; }; +enum igc_tx_buffer_type { + IGC_TX_BUFFER_TYPE_SKB, + IGC_TX_BUFFER_TYPE_XDP, + IGC_TX_BUFFER_TYPE_XSK, +}; + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer + */ +struct igc_tx_buffer { + union igc_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + enum igc_tx_buffer_type type; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; + unsigned int bytecount; + u16 gso_segs; + __be16 protocol; + + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; + bool xsk_pending_ts; +}; + +struct igc_tx_timestamp_request { + union { /* reference to the packet being timestamped */ + struct sk_buff *skb; + struct igc_tx_buffer *xsk_tx_buffer; + }; + enum igc_tx_buffer_type buffer_type; + unsigned long start; /* when the tstamp request started (jiffies) */ + u32 mask; /* _TSYNCTXCTL_TXTT_{X} bit for this request */ + u32 regl; /* which TXSTMPL_{X} register should be used */ + u32 regh; /* which TXSTMPH_{X} register should be used */ + u32 flags; /* flags that should be added to the tx_buffer */ + u8 xsk_queue_index; /* Tx queue which requesting timestamp */ + struct xsk_tx_metadata_compl xsk_meta; /* ref to xsk Tx metadata */ +}; + +struct igc_inline_rx_tstamps { + /* Timestamps are saved in little endian at the beginning of the packet + * buffer following the layout: + * + * DWORD: | 0 | 1 | 2 | 3 | + * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH | + * + * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds + * part of the timestamp. + * + */ + __le32 timer1[2]; + __le32 timer0[2]; +}; + struct igc_ring_container { struct igc_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -92,9 +162,22 @@ struct igc_ring { u8 queue_index; /* logical index of the ring*/ u8 reg_idx; /* physical index of the ring */ bool launchtime_enable; /* true if LaunchTime is enabled */ + ktime_t last_tx_cycle; /* end of the cycle with a launchtime transmission */ + ktime_t last_ff_cycle; /* Last cycle with an active first flag */ + bool preemptible; /* True if preemptible queue, false if express queue */ u32 start_time; u32 end_time; + u32 max_sdu; + bool oper_gate_closed; /* Operating gate. True if the TX Queue is closed */ + bool admin_gate_closed; /* Future gate. True if the TX Queue will be closed */ + + /* CBS parameters */ + bool cbs_enable; /* indicates if CBS is enabled */ + s32 idleslope; /* idleSlope in kbps */ + s32 sendslope; /* sendSlope in kbps */ + s32 hicredit; /* hiCredit in bytes */ + s32 locredit; /* loCredit in bytes */ /* everything past this point are written often */ u16 next_to_clean; @@ -125,8 +208,7 @@ struct igc_ring { struct igc_adapter { struct net_device *netdev; - struct ethtool_eee eee; - u16 eee_advert; + struct ethtool_keee eee; unsigned long state; unsigned int flags; @@ -147,6 +229,7 @@ struct igc_adapter { struct timer_list watchdog_timer; struct timer_list dma_err_timer; struct timer_list phy_info_timer; + struct hrtimer hrtimer; u32 wol; u32 en_mng_pt; @@ -171,8 +254,21 @@ struct igc_adapter { u32 max_frame_size; u32 min_frame_size; + int tc_setup_type; ktime_t base_time; ktime_t cycle_time; + bool taprio_offload_enable; + u32 qbv_config_change_errors; + bool qbv_transition; + unsigned int qbv_count; + /* Access to oper_gate_closed, admin_gate_closed and qbv_transition + * are protected by the qbv_tx_lock. + */ + spinlock_t qbv_tx_lock; + + bool strict_priority_enable; + u8 num_tc; + u16 queue_per_tc[IGC_MAX_TX_QUEUES]; /* OS defined structs */ struct pci_dev *pdev; @@ -214,17 +310,23 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; - struct work_struct ptp_tx_work; - struct sk_buff *ptp_tx_skb; - struct hwtstamp_config tstamp_config; - unsigned long ptp_tx_start; + /* Access to ptp_tx_skb and ptp_tx_start are protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; + struct igc_tx_timestamp_request tx_tstamp[IGC_MAX_TX_TSTAMP_REGS]; + struct kernel_hwtstamp_config tstamp_config; unsigned int ptp_flags; /* System time value lock */ spinlock_t tmreg_lock; + /* Free-running timer lock */ + spinlock_t free_timer_lock; struct cyclecounter cc; struct timecounter tc; struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ + struct system_time_snapshot snapshot; + struct mutex ptm_lock; /* Only allow one PTM transaction at a time */ char fw_version[32]; @@ -237,6 +339,13 @@ struct igc_adapter { struct timespec64 start; struct timespec64 period; } perout[IGC_N_PEROUT]; + + struct igc_fpe_t fpe; + + /* LEDs */ + struct mutex led_mutex; + struct igc_led_classdev *leds; + bool leds_available; }; void igc_up(struct igc_adapter *adapter); @@ -254,7 +363,6 @@ int igc_reinit_queues(struct igc_adapter *adapter); void igc_write_rss_indir_tbl(struct igc_adapter *adapter); bool igc_has_link(struct igc_adapter *adapter); void igc_reset(struct igc_adapter *adapter); -int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx); void igc_update_stats(struct igc_adapter *adapter); void igc_disable_rx_ring(struct igc_ring *ring); void igc_enable_rx_ring(struct igc_ring *ring); @@ -262,6 +370,9 @@ void igc_disable_tx_ring(struct igc_ring *ring); void igc_enable_tx_ring(struct igc_ring *ring); int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); +/* AF_XDP TX metadata operations */ +extern const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops; + /* igc_dump declarations */ void igc_rings_dump(struct igc_adapter *adapter); void igc_regs_dump(struct igc_adapter *adapter); @@ -280,20 +391,48 @@ extern char igc_driver_name[]; #define IGC_FLAG_PTP BIT(8) #define IGC_FLAG_WOL_SUPPORTED BIT(8) #define IGC_FLAG_NEED_LINK_UPDATE BIT(9) -#define IGC_FLAG_MEDIA_RESET BIT(10) -#define IGC_FLAG_MAS_ENABLE BIT(12) #define IGC_FLAG_HAS_MSIX BIT(13) #define IGC_FLAG_EEE BIT(14) #define IGC_FLAG_VLAN_PROMISC BIT(15) #define IGC_FLAG_RX_LEGACY BIT(16) #define IGC_FLAG_TSN_QBV_ENABLED BIT(17) +#define IGC_FLAG_TSN_QAV_ENABLED BIT(18) +#define IGC_FLAG_TSN_PREEMPT_ENABLED BIT(19) +#define IGC_FLAG_TSN_REVERSE_TXQ_PRIO BIT(20) + +#define IGC_FLAG_TSN_ANY_ENABLED \ + (IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED | \ + IGC_FLAG_TSN_PREEMPT_ENABLED) #define IGC_FLAG_RSS_FIELD_IPV4_UDP BIT(6) #define IGC_FLAG_RSS_FIELD_IPV6_UDP BIT(7) -#define IGC_MRQC_ENABLE_RSS_MQ 0x00000002 -#define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 -#define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +/* RX-desc Write-Back format RSS Type's */ +enum igc_rss_type_num { + IGC_RSS_TYPE_NO_HASH = 0, + IGC_RSS_TYPE_HASH_TCP_IPV4 = 1, + IGC_RSS_TYPE_HASH_IPV4 = 2, + IGC_RSS_TYPE_HASH_TCP_IPV6 = 3, + IGC_RSS_TYPE_HASH_IPV6_EX = 4, + IGC_RSS_TYPE_HASH_IPV6 = 5, + IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6, + IGC_RSS_TYPE_HASH_UDP_IPV4 = 7, + IGC_RSS_TYPE_HASH_UDP_IPV6 = 8, + IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9, + IGC_RSS_TYPE_MAX = 10, +}; +#define IGC_RSS_TYPE_MAX_TABLE 16 +#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */ + +/* igc_rss_type - Rx descriptor RSS type field */ +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) +{ + /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved) + * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info) + * is slightly slower than via u32 (wb.lower.lo_dword.data) + */ + return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK); +} /* Interrupt defines */ #define IGC_START_ITR 648 /* ~6000 ints/sec */ @@ -310,11 +449,11 @@ extern char igc_driver_name[]; /* TX/RX descriptor defines */ #define IGC_DEFAULT_TXD 256 #define IGC_DEFAULT_TX_WORK 128 -#define IGC_MIN_TXD 80 +#define IGC_MIN_TXD 64 #define IGC_MAX_TXD 4096 #define IGC_DEFAULT_RXD 256 -#define IGC_MIN_RXD 80 +#define IGC_MIN_RXD 64 #define IGC_MAX_RXD 4096 /* Supported Rx Buffer Sizes */ @@ -346,12 +485,30 @@ extern char igc_driver_name[]; * descriptors until either it has this many to write back, or the * ITR timer expires. */ -#define IGC_RX_PTHRESH 8 -#define IGC_RX_HTHRESH 8 -#define IGC_TX_PTHRESH 8 -#define IGC_TX_HTHRESH 1 -#define IGC_RX_WTHRESH 4 -#define IGC_TX_WTHRESH 16 +#define IGC_RXDCTL_PTHRESH 8 +#define IGC_RXDCTL_HTHRESH 8 +#define IGC_RXDCTL_WTHRESH 4 +/* Ena specific Rx Queue */ +#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 +/* Receive Software Flush */ +#define IGC_RXDCTL_SWFLUSH 0x04000000 + +#define IGC_TXDCTL_PTHRESH_MASK GENMASK(4, 0) +#define IGC_TXDCTL_HTHRESH_MASK GENMASK(12, 8) +#define IGC_TXDCTL_WTHRESH_MASK GENMASK(20, 16) +#define IGC_TXDCTL_QUEUE_ENABLE_MASK GENMASK(25, 25) +#define IGC_TXDCTL_SWFLUSH_MASK GENMASK(26, 26) +#define IGC_TXDCTL_PRIORITY_MASK GENMASK(27, 27) + +#define IGC_TXDCTL_PTHRESH(x) FIELD_PREP(IGC_TXDCTL_PTHRESH_MASK, (x)) +#define IGC_TXDCTL_HTHRESH(x) FIELD_PREP(IGC_TXDCTL_HTHRESH_MASK, (x)) +#define IGC_TXDCTL_WTHRESH(x) FIELD_PREP(IGC_TXDCTL_WTHRESH_MASK, (x)) +/* Ena specific Tx Queue */ +#define IGC_TXDCTL_QUEUE_ENABLE FIELD_PREP(IGC_TXDCTL_QUEUE_ENABLE_MASK, 1) +/* Transmit Software Flush */ +#define IGC_TXDCTL_SWFLUSH FIELD_PREP(IGC_TXDCTL_SWFLUSH_MASK, 1) +#define IGC_TXDCTL_PRIORITY(x) FIELD_PREP(IGC_TXDCTL_PRIORITY_MASK, (x)) +#define IGC_TXDCTL_PRIORITY_HIGH IGC_TXDCTL_PRIORITY(1) #define IGC_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) @@ -385,7 +542,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { @@ -397,6 +553,12 @@ enum igc_tx_flags { /* olinfo flags */ IGC_TX_FLAGS_IPV4 = 0x10, IGC_TX_FLAGS_CSUM = 0x20, + + IGC_TX_FLAGS_TSTAMP_1 = 0x100, + IGC_TX_FLAGS_TSTAMP_2 = 0x200, + IGC_TX_FLAGS_TSTAMP_3 = 0x400, + + IGC_TX_FLAGS_TSTAMP_TIMER_1 = 0x800, }; enum igc_boards { @@ -413,32 +575,6 @@ enum igc_boards { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -enum igc_tx_buffer_type { - IGC_TX_BUFFER_TYPE_SKB, - IGC_TX_BUFFER_TYPE_XDP, - IGC_TX_BUFFER_TYPE_XSK, -}; - -/* wrapper around a pointer to a socket buffer, - * so a DMA handle can be stored along with the buffer - */ -struct igc_tx_buffer { - union igc_adv_tx_desc *next_to_watch; - unsigned long time_stamp; - enum igc_tx_buffer_type type; - union { - struct sk_buff *skb; - struct xdp_frame *xdpf; - }; - unsigned int bytecount; - u16 gso_segs; - __be16 protocol; - - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); - u32 tx_flags; -}; - struct igc_rx_buffer { union { struct { @@ -455,6 +591,21 @@ struct igc_rx_buffer { }; }; +/* context wrapper around xdp_buff to provide access to descriptor metadata */ +struct igc_xdp_buff { + struct xdp_buff xdp; + union igc_adv_rx_desc *rx_desc; + struct igc_inline_rx_tstamps *rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ +}; + +struct igc_metadata_request { + struct igc_tx_buffer *tx_buffer; + struct xsk_tx_metadata *meta; + struct igc_ring *tx_ring; + u32 cmd_type; + u16 used_desc; +}; + struct igc_q_vector { struct igc_adapter *adapter; /* backlink */ void __iomem *itr_register; @@ -469,25 +620,36 @@ struct igc_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; - struct net_device poll_dev; /* for dynamic allocation of rings associated with this q_vector */ struct igc_ring ring[] ____cacheline_internodealigned_in_smp; }; enum igc_filter_match_flags { - IGC_FILTER_FLAG_ETHER_TYPE = 0x1, - IGC_FILTER_FLAG_VLAN_TCI = 0x2, - IGC_FILTER_FLAG_SRC_MAC_ADDR = 0x4, - IGC_FILTER_FLAG_DST_MAC_ADDR = 0x8, + IGC_FILTER_FLAG_ETHER_TYPE = BIT(0), + IGC_FILTER_FLAG_VLAN_TCI = BIT(1), + IGC_FILTER_FLAG_SRC_MAC_ADDR = BIT(2), + IGC_FILTER_FLAG_DST_MAC_ADDR = BIT(3), + IGC_FILTER_FLAG_USER_DATA = BIT(4), + IGC_FILTER_FLAG_VLAN_ETYPE = BIT(5), + IGC_FILTER_FLAG_DEFAULT_QUEUE = BIT(6), }; struct igc_nfc_filter { u8 match_flags; u16 etype; + u16 vlan_etype; u16 vlan_tci; + u16 vlan_tci_mask; u8 src_addr[ETH_ALEN]; u8 dst_addr[ETH_ALEN]; + u8 user_data[8]; + u8 user_mask[8]; + u8 flex_index; + u8 rx_queue; + u8 prio; + u8 immediate_irq; + u8 drop; }; struct igc_nfc_rule { @@ -495,12 +657,28 @@ struct igc_nfc_rule { struct igc_nfc_filter filter; u32 location; u16 action; + bool flex; }; -/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority - * based, and 8 ethertype based. +/* IGC supports a total of 65 NFC rules, listed below in order of priority: + * - 16 MAC address based filtering rules (highest priority) + * - 8 ethertype based filtering rules + * - 32 Flex filter based filtering rules + * - 8 VLAN priority based filtering rules + * - 1 default queue rule (lowest priority) */ -#define IGC_MAX_RXNFC_RULES 32 +#define IGC_MAX_RXNFC_RULES 65 + +struct igc_flex_filter { + u8 index; + u8 data[128]; + u8 mask[16]; + u8 length; + u8 rx_queue; + u8 prio; + u8 immediate_irq; + u8 drop; +}; /* igc_desc_unused - calculate if we have unused descriptors */ static inline u16 igc_desc_unused(const struct igc_ring *ring) @@ -540,6 +718,8 @@ enum igc_ring_flags_t { IGC_RING_FLAG_TX_CTX_IDX, IGC_RING_FLAG_TX_DETECT_HANG, IGC_RING_FLAG_AF_XDP_ZC, + IGC_RING_FLAG_TX_HWTSTAMP, + IGC_RING_FLAG_RX_ALLOC_FAILED, }; #define ring_uses_large_buffer(ring) \ @@ -578,7 +758,7 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data) if (hw->phy.ops.read_reg) return hw->phy.ops.read_reg(hw, offset, data); - return 0; + return -EOPNOTSUPP; } void igc_reinit_locked(struct igc_adapter *); @@ -586,16 +766,26 @@ struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, u32 location); int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); - +void igc_disable_empty_addr_recv(struct igc_adapter *adapter); +int igc_enable_empty_addr_recv(struct igc_adapter *adapter); +struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu); +void igc_flush_tx_descriptors(struct igc_ring *ring); void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf); -int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); -int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); +int igc_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int igc_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); + +int igc_led_setup(struct igc_adapter *adapter); +void igc_led_free(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index d0700d48ecf9..1613b562d17c 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -40,7 +40,7 @@ static s32 igc_reset_hw_base(struct igc_hw *hw) ctrl = rd32(IGC_CTRL); hw_dbg("Issuing a global reset to MAC\n"); - wr32(IGC_CTRL, ctrl | IGC_CTRL_DEV_RST); + wr32(IGC_CTRL, ctrl | IGC_CTRL_RST); ret_val = igc_get_auto_rd_done(hw); if (ret_val) { @@ -68,8 +68,11 @@ static s32 igc_init_nvm_params_base(struct igc_hw *hw) u32 eecd = rd32(IGC_EECD); u16 size; - size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >> - IGC_EECD_SIZE_EX_SHIFT); + /* failed to read reg and got all F's */ + if (!(~eecd)) + return -ENXIO; + + size = FIELD_GET(IGC_EECD_SIZE_EX_MASK, eecd); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. @@ -158,17 +161,11 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) struct igc_phy_info *phy = &hw->phy; s32 ret_val = 0; - if (hw->phy.media_type != igc_media_type_copper) { - phy->type = igc_phy_none; - goto out; - } - phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT_2500; phy->reset_delay_us = 100; /* set lan id */ - hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >> - IGC_STATUS_FUNC_SHIFT; + hw->bus.func = FIELD_GET(IGC_STATUS_FUNC_MASK, rd32(IGC_STATUS)); /* Make sure the PHY is in a good state. Several people have reported * firmware leaving the PHY's page select register set to something @@ -187,16 +184,6 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) igc_check_for_copper_link(hw); - /* Verify phy id and set remaining function pointers */ - switch (phy->id) { - case I225_I_PHY_ID: - phy->type = igc_phy_i225; - break; - default: - ret_val = -IGC_ERR_PHY; - goto out; - } - out: return ret_val; } @@ -215,6 +202,7 @@ static s32 igc_get_invariants_base(struct igc_hw *hw) case IGC_DEV_ID_I225_K2: case IGC_DEV_ID_I226_K: case IGC_DEV_ID_I225_LMVP: + case IGC_DEV_ID_I226_LMVP: case IGC_DEV_ID_I225_IT: case IGC_DEV_ID_I226_LM: case IGC_DEV_ID_I226_V: @@ -237,6 +225,8 @@ static s32 igc_get_invariants_base(struct igc_hw *hw) /* NVM initialization */ ret_val = igc_init_nvm_params_base(hw); + if (ret_val) + goto out; switch (hw->mac.type) { case igc_i225: ret_val = igc_init_nvm_params_i225(hw); @@ -410,6 +400,35 @@ void igc_rx_fifo_flush_base(struct igc_hw *hw) rd32(IGC_MPC); } +bool igc_is_device_id_i225(struct igc_hw *hw) +{ + switch (hw->device_id) { + case IGC_DEV_ID_I225_LM: + case IGC_DEV_ID_I225_V: + case IGC_DEV_ID_I225_I: + case IGC_DEV_ID_I225_K: + case IGC_DEV_ID_I225_K2: + case IGC_DEV_ID_I225_LMVP: + case IGC_DEV_ID_I225_IT: + return true; + default: + return false; + } +} + +bool igc_is_device_id_i226(struct igc_hw *hw) +{ + switch (hw->device_id) { + case IGC_DEV_ID_I226_LM: + case IGC_DEV_ID_I226_V: + case IGC_DEV_ID_I226_K: + case IGC_DEV_ID_I226_IT: + return true; + default: + return false; + } +} + static struct igc_mac_operations igc_mac_ops_base = { .init_hw = igc_init_hw_base, .check_for_link = igc_check_for_copper_link, diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index ce530f5fd7bd..eaf17cd031c3 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -7,6 +7,8 @@ /* forward declaration */ void igc_rx_fifo_flush_base(struct igc_hw *hw); void igc_power_down_phy_copper_base(struct igc_hw *hw); +bool igc_is_device_id_i225(struct igc_hw *hw); +bool igc_is_device_id_i226(struct igc_hw *hw); /* Transmit Descriptor - Advanced */ union igc_adv_tx_desc { @@ -32,6 +34,13 @@ struct igc_adv_tx_context_desc { /* Adv Transmit Descriptor Config Masks */ #define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */ +#define IGC_ADVTXD_TSTAMP_REG_1 0x00010000 /* Select register 1 for timestamp */ +#define IGC_ADVTXD_TSTAMP_REG_2 0x00020000 /* Select register 2 for timestamp */ +#define IGC_ADVTXD_TSTAMP_REG_3 0x00030000 /* Select register 3 for timestamp */ +#define IGC_ADVTXD_TSTAMP_TIMER_1 0x00010000 /* Select timer 1 for timestamp */ +#define IGC_ADVTXD_TSTAMP_TIMER_2 0x00020000 /* Select timer 2 for timestamp */ +#define IGC_ADVTXD_TSTAMP_TIMER_3 0x00030000 /* Select timer 3 for timestamp */ + #define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ #define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ #define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ @@ -40,6 +49,7 @@ struct igc_adv_tx_context_desc { #define IGC_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ #define IGC_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ #define IGC_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define IGC_ADVTXD_PAYLEN_MASK 0XFFFFC000 /* Adv desc PAYLEN mask */ #define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ #define IGC_RAR_ENTRIES 16 @@ -76,17 +86,14 @@ union igc_adv_rx_desc { } wb; /* writeback */ }; -/* Additional Transmit Descriptor Control definitions */ -#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ -#define IGC_TXDCTL_SWFLUSH 0x04000000 /* Transmit Software Flush */ - -/* Additional Receive Descriptor Control definitions */ -#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ -#define IGC_RXDCTL_SWFLUSH 0x04000000 /* Receive Software Flush */ - /* SRRCTL bit definitions */ -#define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ -#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ -#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define IGC_SRRCTL_BSIZEPKT_MASK GENMASK(6, 0) +#define IGC_SRRCTL_BSIZEPKT(x) FIELD_PREP(IGC_SRRCTL_BSIZEPKT_MASK, \ + (x) / 1024) /* in 1 KB resolution */ +#define IGC_SRRCTL_BSIZEHDR_MASK GENMASK(13, 8) +#define IGC_SRRCTL_BSIZEHDR(x) FIELD_PREP(IGC_SRRCTL_BSIZEHDR_MASK, \ + (x) / 64) /* in 64 bytes resolution */ +#define IGC_SRRCTL_DESCTYPE_MASK GENMASK(27, 25) +#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF FIELD_PREP(IGC_SRRCTL_DESCTYPE_MASK, 1) #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index c3a5a5518790..498ba1522ca4 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -4,6 +4,8 @@ #ifndef _IGC_DEFINES_H_ #define _IGC_DEFINES_H_ +#include <linux/bitfield.h> + /* Number of Transmit and Receive Descriptors must be a multiple of 8 */ #define REQ_TX_DESCRIPTOR_MULTIPLE 8 #define REQ_RX_DESCRIPTOR_MULTIPLE 8 @@ -17,11 +19,22 @@ #define IGC_WUC_PME_EN 0x00000002 /* PME Enable */ /* Wake Up Filter Control */ -#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ -#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ -#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ -#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ -#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IGC_WUFC_FLEX_HQ BIT(14) /* Flex Filters Host Queuing */ +#define IGC_WUFC_FLX0 BIT(16) /* Flexible Filter 0 Enable */ +#define IGC_WUFC_FLX1 BIT(17) /* Flexible Filter 1 Enable */ +#define IGC_WUFC_FLX2 BIT(18) /* Flexible Filter 2 Enable */ +#define IGC_WUFC_FLX3 BIT(19) /* Flexible Filter 3 Enable */ +#define IGC_WUFC_FLX4 BIT(20) /* Flexible Filter 4 Enable */ +#define IGC_WUFC_FLX5 BIT(21) /* Flexible Filter 5 Enable */ +#define IGC_WUFC_FLX6 BIT(22) /* Flexible Filter 6 Enable */ +#define IGC_WUFC_FLX7 BIT(23) /* Flexible Filter 7 Enable */ + +#define IGC_WUFC_FILTER_MASK GENMASK(23, 14) #define IGC_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ @@ -46,6 +59,34 @@ /* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */ #define IGC_WUPM_BYTES 128 +/* Wakeup Filter Control Extended */ +#define IGC_WUFC_EXT_FLX8 BIT(8) /* Flexible Filter 8 Enable */ +#define IGC_WUFC_EXT_FLX9 BIT(9) /* Flexible Filter 9 Enable */ +#define IGC_WUFC_EXT_FLX10 BIT(10) /* Flexible Filter 10 Enable */ +#define IGC_WUFC_EXT_FLX11 BIT(11) /* Flexible Filter 11 Enable */ +#define IGC_WUFC_EXT_FLX12 BIT(12) /* Flexible Filter 12 Enable */ +#define IGC_WUFC_EXT_FLX13 BIT(13) /* Flexible Filter 13 Enable */ +#define IGC_WUFC_EXT_FLX14 BIT(14) /* Flexible Filter 14 Enable */ +#define IGC_WUFC_EXT_FLX15 BIT(15) /* Flexible Filter 15 Enable */ +#define IGC_WUFC_EXT_FLX16 BIT(16) /* Flexible Filter 16 Enable */ +#define IGC_WUFC_EXT_FLX17 BIT(17) /* Flexible Filter 17 Enable */ +#define IGC_WUFC_EXT_FLX18 BIT(18) /* Flexible Filter 18 Enable */ +#define IGC_WUFC_EXT_FLX19 BIT(19) /* Flexible Filter 19 Enable */ +#define IGC_WUFC_EXT_FLX20 BIT(20) /* Flexible Filter 20 Enable */ +#define IGC_WUFC_EXT_FLX21 BIT(21) /* Flexible Filter 21 Enable */ +#define IGC_WUFC_EXT_FLX22 BIT(22) /* Flexible Filter 22 Enable */ +#define IGC_WUFC_EXT_FLX23 BIT(23) /* Flexible Filter 23 Enable */ +#define IGC_WUFC_EXT_FLX24 BIT(24) /* Flexible Filter 24 Enable */ +#define IGC_WUFC_EXT_FLX25 BIT(25) /* Flexible Filter 25 Enable */ +#define IGC_WUFC_EXT_FLX26 BIT(26) /* Flexible Filter 26 Enable */ +#define IGC_WUFC_EXT_FLX27 BIT(27) /* Flexible Filter 27 Enable */ +#define IGC_WUFC_EXT_FLX28 BIT(28) /* Flexible Filter 28 Enable */ +#define IGC_WUFC_EXT_FLX29 BIT(29) /* Flexible Filter 29 Enable */ +#define IGC_WUFC_EXT_FLX30 BIT(30) /* Flexible Filter 30 Enable */ +#define IGC_WUFC_EXT_FLX31 BIT(31) /* Flexible Filter 31 Enable */ + +#define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8) + /* Loop limit on how long we wait for auto-negotiation to complete */ #define COPPER_LINK_UP_LIMIT 10 #define PHY_AUTO_NEG_LIMIT 45 @@ -88,7 +129,7 @@ #define IGC_ERR_SWFW_SYNC 13 /* Device Control */ -#define IGC_CTRL_DEV_RST 0x20000000 /* Device reset */ +#define IGC_CTRL_RST 0x04000000 /* Global reset */ #define IGC_CTRL_PHY_RST 0x80000000 /* PHY Reset */ #define IGC_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ @@ -137,7 +178,6 @@ /* PHY GPY 211 registers */ #define STANDARD_AN_REG_MASK 0x0007 /* MMD */ -#define ANEG_MULTIGBT_AN_CTRL 0x0020 /* MULTI GBT AN Control Register */ #define MMD_DEVADDR_SHIFT 16 /* Shift MMD to higher bits */ #define CR_2500T_FD_CAPS 0x0080 /* Advertise 2500T FD capability */ @@ -268,6 +308,8 @@ #define IGC_TXD_DTYP_C 0x00000000 /* Context Descriptor */ #define IGC_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ #define IGC_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define IGC_TXD_POPTS_SMD_MASK 0x3000 /* Indicates whether it's SMD-V or SMD-R */ + #define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */ #define IGC_TXD_CMD_IC 0x04000000 /* Insert Checksum */ #define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ @@ -278,10 +320,14 @@ #define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ #define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ +#define IGC_TXD_PTP2_TIMER_1 0x00000020 + /* IPSec Encrypt Enable */ #define IGC_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define IGC_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ +#define IGC_ADVTXD_TSN_CNTX_FIRST 0x00000080 + /* Transmit Control */ #define IGC_TCTL_EN 0x00000002 /* enable Tx */ #define IGC_TCTL_PSP 0x00000008 /* pad short packets */ @@ -319,6 +365,8 @@ #define IGC_SRRCTL_TIMER0SEL(timer) (((timer) & 0x3) << 17) /* Receive Descriptor bit definitions */ +#define IGC_RXD_STAT_SMD_TYPE_V 0x01 /* SMD-V Packet */ +#define IGC_RXD_STAT_SMD_TYPE_R 0x02 /* SMD-R Packet */ #define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ #define IGC_RXD_STAT_IXSM 0x04 /* Ignore checksum */ #define IGC_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ @@ -328,17 +376,22 @@ #define IGC_RXDEXT_STATERR_LB 0x00040000 /* Advanced Receive Descriptor bit definitions */ -#define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ +#define IGC_RXDADV_STAT_SMD_TYPE_MASK 0x06000 +#define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ #define IGC_RXDEXT_STATERR_L4E 0x20000000 #define IGC_RXDEXT_STATERR_IPE 0x40000000 #define IGC_RXDEXT_STATERR_RXE 0x80000000 +#define IGC_MRQC_ENABLE_RSS_MQ 0x00000002 #define IGC_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 #define IGC_MRQC_RSS_FIELD_IPV4 0x00020000 #define IGC_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 #define IGC_MRQC_RSS_FIELD_IPV6 0x00100000 #define IGC_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 +#define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 +#define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +#define IGC_MRQC_DEFAULT_QUEUE_MASK GENMASK(5, 3) /* Header split receive */ #define IGC_RFCTL_IPV6_EX_DIS 0x00010000 @@ -352,15 +405,66 @@ #define IGC_RCTL_PMCF 0x00800000 /* pass MAC control frames */ #define IGC_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ -#define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ -#define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ -#define IGC_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ - -#define IGC_TXPBSIZE_TSN 0x04145145 /* 5k bytes buffer for each queue */ +/* Mask for RX packet buffer size */ +#define IGC_RXPBSIZE_EXP_MASK GENMASK(5, 0) +#define IGC_BMC2OSPBSIZE_MASK GENMASK(11, 6) +#define IGC_RXPBSIZE_BE_MASK GENMASK(17, 12) +/* Mask for timestamp in RX buffer */ +#define IGC_RXPBS_CFG_TS_EN_MASK GENMASK(31, 31) +/* High-priority RX packet buffer size (KB). Used for Express traffic when preemption is enabled */ +#define IGC_RXPBSIZE_EXP(x) FIELD_PREP(IGC_RXPBSIZE_EXP_MASK, (x)) +/* BMC to OS packet buffer size in KB */ +#define IGC_BMC2OSPBSIZE(x) FIELD_PREP(IGC_BMC2OSPBSIZE_MASK, (x)) +/* Low-priority RX packet buffer size (KB). Used for BE traffic when preemption is enabled */ +#define IGC_RXPBSIZE_BE(x) FIELD_PREP(IGC_RXPBSIZE_BE_MASK, (x)) +/* Enable RX packet buffer for timestamp descriptor, saving 16 bytes per packet if set */ +#define IGC_RXPBS_CFG_TS_EN FIELD_PREP(IGC_RXPBS_CFG_TS_EN_MASK, 1) +/* Default value following I225/I226 SW User Manual Section 8.3.1 */ +#define IGC_RXPBSIZE_EXP_BMC_DEFAULT ( \ + IGC_RXPBSIZE_EXP(34) | IGC_BMC2OSPBSIZE(2)) +#define IGC_RXPBSIZE_EXP_BMC_BE_TSN ( \ + IGC_RXPBSIZE_EXP(15) | IGC_BMC2OSPBSIZE(2) | IGC_RXPBSIZE_BE(15)) + +/* Mask for TX packet buffer size */ +#define IGC_TXPB0SIZE_MASK GENMASK(5, 0) +#define IGC_TXPB1SIZE_MASK GENMASK(11, 6) +#define IGC_TXPB2SIZE_MASK GENMASK(17, 12) +#define IGC_TXPB3SIZE_MASK GENMASK(23, 18) +/* Mask for OS to BMC packet buffer size */ +#define IGC_OS2BMCPBSIZE_MASK GENMASK(29, 24) +/* TX Packet buffer size in KB */ +#define IGC_TXPB0SIZE(x) FIELD_PREP(IGC_TXPB0SIZE_MASK, (x)) +#define IGC_TXPB1SIZE(x) FIELD_PREP(IGC_TXPB1SIZE_MASK, (x)) +#define IGC_TXPB2SIZE(x) FIELD_PREP(IGC_TXPB2SIZE_MASK, (x)) +#define IGC_TXPB3SIZE(x) FIELD_PREP(IGC_TXPB3SIZE_MASK, (x)) +/* OS to BMC packet buffer size in KB */ +#define IGC_OS2BMCPBSIZE(x) FIELD_PREP(IGC_OS2BMCPBSIZE_MASK, (x)) +/* Default value following I225/I226 SW User Manual Section 8.3.2 */ +#define IGC_TXPBSIZE_DEFAULT ( \ + IGC_TXPB0SIZE(20) | IGC_TXPB1SIZE(0) | IGC_TXPB2SIZE(0) | \ + IGC_TXPB3SIZE(0) | IGC_OS2BMCPBSIZE(4)) +#define IGC_TXPBSIZE_TSN ( \ + IGC_TXPB0SIZE(7) | IGC_TXPB1SIZE(7) | IGC_TXPB2SIZE(7) | \ + IGC_TXPB3SIZE(7) | IGC_OS2BMCPBSIZE(4)) #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ +/* Retry Buffer Control */ +#define IGC_RETX_CTL 0x041C +#define IGC_RETX_CTL_WATERMARK_MASK 0xF +#define IGC_RETX_CTL_QBVFULLTH_SHIFT 8 /* QBV Retry Buffer Full Threshold */ +#define IGC_RETX_CTL_QBVFULLEN 0x1000 /* Enable QBV Retry Buffer Full Threshold */ + +/* Transmit Scheduling Latency */ +/* Latency between transmission scheduling (LaunchTime) and the time + * the packet is transmitted to the network in nanosecond. + */ +#define IGC_TXOFFSET_SPEED_10 0x000034BC +#define IGC_TXOFFSET_SPEED_100 0x00000578 +#define IGC_TXOFFSET_SPEED_1000 0x0000012C +#define IGC_TXOFFSET_SPEED_2500 0x00000578 + /* Time Sync Interrupt Causes */ #define IGC_TSICR_SYS_WRAP BIT(0) /* SYSTIM Wrap around. */ #define IGC_TSICR_TXTS BIT(1) /* Transmit Timestamp. */ @@ -404,6 +508,9 @@ /* Time Sync Transmit Control bit definitions */ #define IGC_TSYNCTXCTL_TXTT_0 0x00000001 /* Tx timestamp reg 0 valid */ +#define IGC_TSYNCTXCTL_TXTT_1 0x00000002 /* Tx timestamp reg 1 valid */ +#define IGC_TSYNCTXCTL_TXTT_2 0x00000004 /* Tx timestamp reg 2 valid */ +#define IGC_TSYNCTXCTL_TXTT_3 0x00000008 /* Tx timestamp reg 3 valid */ #define IGC_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ #define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ #define IGC_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */ @@ -411,6 +518,10 @@ #define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ #define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */ +#define IGC_TSYNCTXCTL_TXTT_ANY ( \ + IGC_TSYNCTXCTL_TXTT_0 | IGC_TSYNCTXCTL_TXTT_1 | \ + IGC_TSYNCTXCTL_TXTT_2 | IGC_TSYNCTXCTL_TXTT_3) + /* Timer selection bits */ #define IGC_AUX_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for auxiliary time stamp */ #define IGC_AUX_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for auxiliary time stamp */ @@ -425,7 +536,9 @@ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ @@ -471,16 +584,72 @@ /* Transmit Scheduling */ #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x00000001 +#define IGC_TQAVCTRL_PREEMPT_ENA 0x00000002 #define IGC_TQAVCTRL_ENHANCED_QAV 0x00000008 +#define IGC_TQAVCTRL_FUTSCDDIS 0x00000080 +#define IGC_TQAVCTRL_MIN_FRAG_MASK 0x0000C000 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x00000001 #define IGC_TXQCTL_STRICT_CYCLE 0x00000002 #define IGC_TXQCTL_STRICT_END 0x00000004 +#define IGC_TXQCTL_PREEMPTIBLE 0x00000008 +#define IGC_TXQCTL_QAV_SEL_MASK 0x000000C0 +#define IGC_TXQCTL_QAV_SEL_CBS0 0x00000080 +#define IGC_TXQCTL_QAV_SEL_CBS1 0x000000C0 + +#define IGC_TQAVCC_IDLESLOPE_MASK 0xFFFF +#define IGC_TQAVCC_KEEP_CREDITS BIT(30) + +#define IGC_MAX_SR_QUEUES 2 + +#define IGC_TXARB_TXQ_PRIO_0_MASK GENMASK(1, 0) +#define IGC_TXARB_TXQ_PRIO_1_MASK GENMASK(3, 2) +#define IGC_TXARB_TXQ_PRIO_2_MASK GENMASK(5, 4) +#define IGC_TXARB_TXQ_PRIO_3_MASK GENMASK(7, 6) +#define IGC_TXARB_TXQ_PRIO_0(x) FIELD_PREP(IGC_TXARB_TXQ_PRIO_0_MASK, (x)) +#define IGC_TXARB_TXQ_PRIO_1(x) FIELD_PREP(IGC_TXARB_TXQ_PRIO_1_MASK, (x)) +#define IGC_TXARB_TXQ_PRIO_2(x) FIELD_PREP(IGC_TXARB_TXQ_PRIO_2_MASK, (x)) +#define IGC_TXARB_TXQ_PRIO_3(x) FIELD_PREP(IGC_TXARB_TXQ_PRIO_3_MASK, (x)) /* Receive Checksum Control */ #define IGC_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ #define IGC_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ +/* PCIe PTM Control */ +#define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */ +#define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */ +#define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */ +#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) +#define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) + +/* A short cycle time of 1us theoretically should work, but appears to be too + * short in practice. + */ +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */ +#define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */ +#define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */ + +/* PCIe Digital Delay */ +#define IGC_PCIE_DIG_DELAY_DEFAULT 0x01440000 + +/* PCIe PHY Delay */ +#define IGC_PCIE_PHY_DELAY_DEFAULT 0x40900000 + +#define IGC_TIMADJ_ADJUST_METH 0x40000000 + +/* PCIe PTM Status */ +#define IGC_PTM_STAT_VALID BIT(0) /* PTM Status */ +#define IGC_PTM_STAT_RET_ERR BIT(1) /* Root port timeout */ +#define IGC_PTM_STAT_BAD_PTM_RES BIT(2) /* PTM Response msg instead of PTM Response Data */ +#define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */ +#define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */ +#define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */ +#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */ + +/* PCIe PTM Cycle Control */ +#define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */ +#define IGC_PTM_CYCLE_CTRL_AUTO_CYC_EN BIT(31) /* PTM Cycle Control */ + /* GPY211 - I225 defines */ #define GPY_MMD_MASK 0xFFFF0000 #define GPY_MMD_SHIFT 16 @@ -503,7 +672,6 @@ #define IGC_GEN_POLL_TIMEOUT 1920 /* PHY Control Register */ -#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ #define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ #define MII_CR_POWER_DOWN 0x0800 /* Power down */ #define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ @@ -524,9 +692,6 @@ #define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ -/* Bit definitions for valid PHY IDs. I = Integrated E = External */ -#define I225_I_PHY_ID 0x67C9DC00 - /* MDI Control */ #define IGC_MDIC_DATA_MASK 0x0000FFFF #define IGC_MDIC_REG_MASK 0x001F0000 @@ -536,9 +701,18 @@ #define IGC_MDIC_OP_WRITE 0x04000000 #define IGC_MDIC_OP_READ 0x08000000 #define IGC_MDIC_READY 0x10000000 -#define IGC_MDIC_INT_EN 0x20000000 #define IGC_MDIC_ERROR 0x40000000 +/* EEE Link Ability */ +#define IGC_EEE_2500BT_MASK BIT(0) +#define IGC_EEE_1000BT_MASK BIT(2) +#define IGC_EEE_100BT_MASK BIT(1) + +/* EEE Link-Partner Ability */ +#define IGC_LP_EEE_2500BT_MASK BIT(0) +#define IGC_LP_EEE_1000BT_MASK BIT(2) +#define IGC_LP_EEE_100BT_MASK BIT(1) + #define IGC_N0_QUEUE -1 #define IGC_MAX_MAC_HDR_LEN 127 @@ -575,9 +749,6 @@ */ #define IGC_TW_SYSTEM_100_MASK 0x0000FF00 #define IGC_TW_SYSTEM_100_SHIFT 8 -#define IGC_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */ -#define IGC_DMACR_DMACTHR_MASK 0x00FF0000 -#define IGC_DMACR_DMACTHR_SHIFT 16 /* Reg val to set scale to 1024 nsec */ #define IGC_LTRMINV_SCALE_1024 2 /* Reg val to set scale to 32768 nsec */ diff --git a/drivers/net/ethernet/intel/igc/igc_diag.c b/drivers/net/ethernet/intel/igc/igc_diag.c index cc621970c0cd..a43d7244ee70 100644 --- a/drivers/net/ethernet/intel/igc/igc_diag.c +++ b/drivers/net/ethernet/intel/igc/igc_diag.c @@ -173,8 +173,7 @@ bool igc_link_test(struct igc_adapter *adapter, u64 *data) *data = 0; /* add delay to give enough time for autonegotioation to finish */ - if (adapter->hw.mac.autoneg) - ssleep(5); + ssleep(5); link_up = igc_has_link(adapter); if (!link_up) { diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index fa4171860623..e94c1922b97a 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -8,6 +8,7 @@ #include "igc.h" #include "igc_diag.h" +#include "igc_tsn.h" /* forward declaration */ struct igc_stats { @@ -67,6 +68,7 @@ static const struct igc_stats igc_gstrings_stats[] = { IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), IGC_STAT("tx_lpi_counter", stats.tlpic), IGC_STAT("rx_lpi_counter", stats.rlpic), + IGC_STAT("qbv_config_change_errors", qbv_config_change_errors), }; #define IGC_NETDEV_STAT(_net_stat) { \ @@ -120,9 +122,11 @@ static const char igc_gstrings_test[][ETH_GSTRING_LEN] = { #define IGC_STATS_LEN \ (IGC_GLOBAL_STATS_LEN + IGC_NETDEV_STATS_LEN + IGC_QUEUE_STATS_LEN) +#define IGC_PRIV_FLAGS_LEGACY_RX BIT(0) +#define IGC_PRIV_FLAGS_REVERSE_TSN_TXQ_PRIO BIT(1) static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IGC_PRIV_FLAGS_LEGACY_RX BIT(0) "legacy-rx", + "reverse-tsn-txq-prio", }; #define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings) @@ -567,8 +571,11 @@ static int igc_ethtool_set_eeprom(struct net_device *netdev, return ret_val; } -static void igc_ethtool_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) +static void +igc_ethtool_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ering, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -578,8 +585,11 @@ static void igc_ethtool_get_ringparam(struct net_device *netdev, ring->tx_pending = adapter->tx_ring_count; } -static int igc_ethtool_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) +static int +igc_ethtool_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ering, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_ring *temp_ring; @@ -617,11 +627,11 @@ static int igc_ethtool_set_ringparam(struct net_device *netdev, } if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(array_size(sizeof(struct igc_ring), - adapter->num_tx_queues)); + temp_ring = vmalloc_array(adapter->num_tx_queues, + sizeof(struct igc_ring)); else - temp_ring = vmalloc(array_size(sizeof(struct igc_ring), - adapter->num_rx_queues)); + temp_ring = vmalloc_array(adapter->num_rx_queues, + sizeof(struct igc_ring)); if (!temp_ring) { err = -ENOMEM; @@ -766,10 +776,9 @@ static void igc_ethtool_get_strings(struct net_device *netdev, u32 stringset, break; case ETH_SS_STATS: for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) - ethtool_sprintf(&p, igc_gstrings_stats[i].stat_string); + ethtool_puts(&p, igc_gstrings_stats[i].stat_string); for (i = 0; i < IGC_NETDEV_STATS_LEN; i++) - ethtool_sprintf(&p, - igc_gstrings_net_stats[i].stat_string); + ethtool_puts(&p, igc_gstrings_net_stats[i].stat_string); for (i = 0; i < adapter->num_tx_queues; i++) { ethtool_sprintf(&p, "tx_queue_%u_packets", i); ethtool_sprintf(&p, "tx_queue_%u_bytes", i); @@ -801,7 +810,7 @@ static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGC_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } @@ -833,15 +842,15 @@ static void igc_ethtool_get_stats(struct net_device *netdev, ring = adapter->tx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); data[i] = ring->tx_stats.packets; data[i + 1] = ring->tx_stats.bytes; data[i + 2] = ring->tx_stats.restart_queue; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); + start = u64_stats_fetch_begin(&ring->tx_syncp2); restart2 = ring->tx_stats.restart_queue2; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); data[i + 2] += restart2; i += IGC_TX_QUEUE_STATS_LEN; @@ -849,40 +858,47 @@ static void igc_ethtool_get_stats(struct net_device *netdev, for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); data[i] = ring->rx_stats.packets; data[i + 1] = ring->rx_stats.bytes; data[i + 2] = ring->rx_stats.drops; data[i + 3] = ring->rx_stats.csum_err; data[i + 4] = ring->rx_stats.alloc_failed; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); i += IGC_RX_QUEUE_STATS_LEN; } spin_unlock(&adapter->stats64_lock); } +static int igc_ethtool_get_previous_rx_coalesce(struct igc_adapter *adapter) +{ + return (adapter->rx_itr_setting <= 3) ? + adapter->rx_itr_setting : adapter->rx_itr_setting >> 2; +} + +static int igc_ethtool_get_previous_tx_coalesce(struct igc_adapter *adapter) +{ + return (adapter->tx_itr_setting <= 3) ? + adapter->tx_itr_setting : adapter->tx_itr_setting >> 2; +} + static int igc_ethtool_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); - if (adapter->rx_itr_setting <= 3) - ec->rx_coalesce_usecs = adapter->rx_itr_setting; - else - ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; - - if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) { - if (adapter->tx_itr_setting <= 3) - ec->tx_coalesce_usecs = adapter->tx_itr_setting; - else - ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; - } + ec->rx_coalesce_usecs = igc_ethtool_get_previous_rx_coalesce(adapter); + ec->tx_coalesce_usecs = igc_ethtool_get_previous_tx_coalesce(adapter); return 0; } static int igc_ethtool_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); int i; @@ -899,8 +915,12 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev, ec->tx_coalesce_usecs == 2) return -EINVAL; - if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) + if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && + ec->tx_coalesce_usecs != igc_ethtool_get_previous_tx_coalesce(adapter)) { + NL_SET_ERR_MSG_MOD(extack, + "Queue Pair mode enabled, both Rx and Tx coalescing controlled by rx-usecs"); return -EINVAL; + } /* If ITR is disabled, disable DMAC */ if (ec->rx_coalesce_usecs == 0) { @@ -939,6 +959,7 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev, } #define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +#define VLAN_TCI_FULL_MASK ((__force __be16)~0) static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -961,10 +982,16 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; } + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { + fsp->flow_type |= FLOW_EXT; + fsp->h_ext.vlan_etype = htons(rule->filter.vlan_etype); + fsp->m_ext.vlan_etype = ETHER_TYPE_FULL_MASK; + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { fsp->flow_type |= FLOW_EXT; fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci); - fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); + fsp->m_ext.vlan_tci = htons(rule->filter.vlan_tci_mask); } if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { @@ -979,6 +1006,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, eth_broadcast_addr(fsp->m_u.ether_spec.h_source); } + if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) { + fsp->flow_type |= FLOW_EXT; + memcpy(fsp->h_ext.data, rule->filter.user_data, sizeof(fsp->h_ext.data)); + memcpy(fsp->m_ext.data, rule->filter.user_mask, sizeof(fsp->m_ext.data)); + } + mutex_unlock(&adapter->nfc_rule_lock); return 0; @@ -1014,9 +1047,11 @@ static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter, return 0; } -static int igc_ethtool_get_rss_hash_opts(struct igc_adapter *adapter, - struct ethtool_rxnfc *cmd) +static int igc_ethtool_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *cmd) { + struct igc_adapter *adapter = netdev_priv(dev); + cmd->data = 0; /* Report default options for RSS on igc */ @@ -1056,15 +1091,19 @@ static int igc_ethtool_get_rss_hash_opts(struct igc_adapter *adapter, return 0; } +static u32 igc_ethtool_get_rx_ring_count(struct net_device *dev) +{ + struct igc_adapter *adapter = netdev_priv(dev); + + return adapter->num_rx_queues; +} + static int igc_ethtool_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct igc_adapter *adapter = netdev_priv(dev); switch (cmd->cmd) { - case ETHTOOL_GRXRINGS: - cmd->data = adapter->num_rx_queues; - return 0; case ETHTOOL_GRXCLSRLCNT: cmd->rule_cnt = adapter->nfc_rule_count; return 0; @@ -1072,8 +1111,6 @@ static int igc_ethtool_get_rxnfc(struct net_device *dev, return igc_ethtool_get_nfc_rule(adapter, cmd); case ETHTOOL_GRXCLSRLALL: return igc_ethtool_get_nfc_rules(adapter, cmd, rule_locs); - case ETHTOOL_GRXFH: - return igc_ethtool_get_rss_hash_opts(adapter, cmd); default: return -EOPNOTSUPP; } @@ -1081,9 +1118,11 @@ static int igc_ethtool_get_rxnfc(struct net_device *dev, #define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \ IGC_FLAG_RSS_FIELD_IPV6_UDP) -static int igc_ethtool_set_rss_hash_opt(struct igc_adapter *adapter, - struct ethtool_rxnfc *nfc) +static int igc_ethtool_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack) { + struct igc_adapter *adapter = netdev_priv(dev); u32 flags = adapter->flags; /* RSS does not support anything other than hashing @@ -1193,6 +1232,7 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci); + rule->filter.vlan_tci_mask = ntohs(fsp->m_ext.vlan_tci); rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI; } @@ -1215,6 +1255,56 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, ether_addr_copy(rule->filter.dst_addr, fsp->h_u.ether_spec.h_dest); } + + /* VLAN etype matching */ + if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) { + rule->filter.vlan_etype = ntohs(fsp->h_ext.vlan_etype); + rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE; + } + + /* Check for user defined data */ + if ((fsp->flow_type & FLOW_EXT) && + (fsp->h_ext.data[0] || fsp->h_ext.data[1])) { + rule->filter.match_flags |= IGC_FILTER_FLAG_USER_DATA; + memcpy(rule->filter.user_data, fsp->h_ext.data, sizeof(fsp->h_ext.data)); + memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data)); + } + + /* The i225/i226 has various different filters. Flex filters provide a + * way to match up to the first 128 bytes of a packet. Use them for: + * a) For specific user data + * b) For VLAN EtherType + * c) For full TCI match + * d) Or in case multiple filter criteria are set + * + * Otherwise, use the simple MAC, VLAN PRIO or EtherType filters. + */ + if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) || + (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) || + ((rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) && + rule->filter.vlan_tci_mask == ntohs(VLAN_TCI_FULL_MASK)) || + (rule->filter.match_flags & (rule->filter.match_flags - 1))) + rule->flex = true; + else + rule->flex = false; + + /* The wildcard rule is only applied if: + * a) None of the other filtering rules match (match_flags is zero) + * b) The flow type is ETHER_FLOW only (no additional fields set) + * c) Mask for Source MAC address is not specified (all zeros) + * d) Mask for Destination MAC address is not specified (all zeros) + * e) Mask for L2 EtherType is not specified (zero) + * + * If all these conditions are met, the rule is treated as a wildcard + * rule. Default queue feature will be used, so that all packets that do + * not match any other rule will be routed to the default queue. + */ + if (!rule->filter.match_flags && + fsp->flow_type == ETHER_FLOW && + is_zero_ether_addr(fsp->m_u.ether_spec.h_source) && + is_zero_ether_addr(fsp->m_u.ether_spec.h_dest) && + !fsp->m_u.ether_spec.h_proto) + rule->filter.match_flags = IGC_FILTER_FLAG_DEFAULT_QUEUE; } /** @@ -1244,11 +1334,6 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter, return -EINVAL; } - if (flags & (flags - 1)) { - netdev_dbg(dev, "Rule with multiple matches not supported\n"); - return -EOPNOTSUPP; - } - list_for_each_entry(tmp, &adapter->nfc_rule_list, list) { if (!memcmp(&rule->filter, &tmp->filter, sizeof(rule->filter)) && @@ -1280,15 +1365,29 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter, return -EOPNOTSUPP; } + if (fsp->ring_cookie >= adapter->num_rx_queues) { + netdev_dbg(netdev, "Invalid action\n"); + return -EINVAL; + } + + /* There are two ways to match the VLAN TCI: + * 1. Match on PCP field and use vlan prio filter for it + * 2. Match on complete TCI field and use flex filter for it + */ if ((fsp->flow_type & FLOW_EXT) && - fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { + fsp->m_ext.vlan_tci && + fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK) && + fsp->m_ext.vlan_tci != VLAN_TCI_FULL_MASK) { netdev_dbg(netdev, "VLAN mask not supported\n"); return -EOPNOTSUPP; } - if (fsp->ring_cookie >= adapter->num_rx_queues) { - netdev_dbg(netdev, "Invalid action\n"); - return -EINVAL; + /* VLAN EtherType can only be matched by full mask. */ + if ((fsp->flow_type & FLOW_EXT) && + fsp->m_ext.vlan_etype && + fsp->m_ext.vlan_etype != ETHER_TYPE_FULL_MASK) { + netdev_dbg(netdev, "VLAN EtherType mask not supported\n"); + return -EOPNOTSUPP; } if (fsp->location >= IGC_MAX_RXNFC_RULES) { @@ -1352,8 +1451,6 @@ static int igc_ethtool_set_rxnfc(struct net_device *dev, struct igc_adapter *adapter = netdev_priv(dev); switch (cmd->cmd) { - case ETHTOOL_SRXFH: - return igc_ethtool_set_rss_hash_opt(adapter, cmd); case ETHTOOL_SRXCLSRLINS: return igc_ethtool_add_nfc_rule(adapter, cmd); case ETHTOOL_SRXCLSRLDEL: @@ -1390,45 +1487,46 @@ static u32 igc_ethtool_get_rxfh_indir_size(struct net_device *netdev) return IGC_RETA_SIZE; } -static int igc_ethtool_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, - u8 *hfunc) +static int igc_ethtool_get_rxfh(struct net_device *netdev, + struct ethtool_rxfh_param *rxfh) { struct igc_adapter *adapter = netdev_priv(netdev); int i; - if (hfunc) - *hfunc = ETH_RSS_HASH_TOP; - if (!indir) + rxfh->hfunc = ETH_RSS_HASH_TOP; + if (!rxfh->indir) return 0; for (i = 0; i < IGC_RETA_SIZE; i++) - indir[i] = adapter->rss_indir_tbl[i]; + rxfh->indir[i] = adapter->rss_indir_tbl[i]; return 0; } -static int igc_ethtool_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key, const u8 hfunc) +static int igc_ethtool_set_rxfh(struct net_device *netdev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); u32 num_queues; int i; /* We do not allow change in unsupported parameters */ - if (key || - (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + if (rxfh->key || + (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP)) return -EOPNOTSUPP; - if (!indir) + if (!rxfh->indir) return 0; num_queues = adapter->rss_queues; /* Verify user input. */ for (i = 0; i < IGC_RETA_SIZE; i++) - if (indir[i] >= num_queues) + if (rxfh->indir[i] >= num_queues) return -EINVAL; for (i = 0; i < IGC_RETA_SIZE; i++) - adapter->rss_indir_tbl[i] = indir[i]; + adapter->rss_indir_tbl[i] = rxfh->indir[i]; igc_write_rss_indir_tbl(adapter); @@ -1467,6 +1565,10 @@ static int igc_ethtool_set_channels(struct net_device *netdev, if (ch->other_count != NON_Q_VECTORS) return -EINVAL; + /* Do not allow channel reconfiguration when mqprio is enabled */ + if (adapter->strict_priority_enable) + return -EINVAL; + /* Verify the number of channels doesn't exceed hw limits */ max_combined = igc_get_max_rss_queues(adapter); if (count > max_combined) @@ -1486,21 +1588,17 @@ static int igc_ethtool_set_channels(struct net_device *netdev, } static int igc_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct igc_adapter *adapter = netdev_priv(dev); if (adapter->ptp_clock) info->phc_index = ptp_clock_index(adapter->ptp_clock); - else - info->phc_index = -1; switch (adapter->hw.mac.type) { case igc_i225: info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; @@ -1526,6 +1624,9 @@ static u32 igc_ethtool_get_priv_flags(struct net_device *netdev) if (adapter->flags & IGC_FLAG_RX_LEGACY) priv_flags |= IGC_PRIV_FLAGS_LEGACY_RX; + if (adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO) + priv_flags |= IGC_PRIV_FLAGS_REVERSE_TSN_TXQ_PRIO; + return priv_flags; } @@ -1534,10 +1635,13 @@ static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags) struct igc_adapter *adapter = netdev_priv(netdev); unsigned int flags = adapter->flags; - flags &= ~IGC_FLAG_RX_LEGACY; + flags &= ~(IGC_FLAG_RX_LEGACY | IGC_FLAG_TSN_REVERSE_TXQ_PRIO); if (priv_flags & IGC_PRIV_FLAGS_LEGACY_RX) flags |= IGC_FLAG_RX_LEGACY; + if (priv_flags & IGC_PRIV_FLAGS_REVERSE_TSN_TXQ_PRIO) + flags |= IGC_FLAG_TSN_REVERSE_TXQ_PRIO; + if (flags != adapter->flags) { adapter->flags = flags; @@ -1550,18 +1654,89 @@ static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags) } static int igc_ethtool_get_eee(struct net_device *netdev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; - u32 eeer; + struct igc_phy_info *phy = &hw->phy; + u16 eee_advert, eee_lp_advert; + u32 eeer, ret_val; + + /* EEE supported */ + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + edata->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + edata->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + edata->supported); + + /* EEE Advertisement 1 - reg 7.60 */ + ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << + MMD_DEVADDR_SHIFT) | + IGC_ANEG_EEE_AB1, + &eee_advert); + if (ret_val) { + netdev_err(adapter->netdev, + "Failed to read IEEE 7.60 register\n"); + return -EINVAL; + } + + if (eee_advert & IGC_EEE_1000BT_MASK) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + edata->advertised); + + if (eee_advert & IGC_EEE_100BT_MASK) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + edata->advertised); + + /* EEE Advertisement 2 - reg 7.62 */ + ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << + MMD_DEVADDR_SHIFT) | + IGC_ANEG_EEE_AB2, + &eee_advert); + if (ret_val) { + netdev_err(adapter->netdev, + "Failed to read IEEE 7.62 register\n"); + return -EINVAL; + } + + if (eee_advert & IGC_EEE_2500BT_MASK) + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + edata->advertised); - if (hw->dev_spec._base.eee_enable) - edata->advertised = - mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); + /* EEE Link-Partner Ability 1 - reg 7.61 */ + ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << + MMD_DEVADDR_SHIFT) | + IGC_ANEG_EEE_LP_AB1, + &eee_lp_advert); + if (ret_val) { + netdev_err(adapter->netdev, + "Failed to read IEEE 7.61 register\n"); + return -EINVAL; + } - *edata = adapter->eee; - edata->supported = SUPPORTED_Autoneg; + if (eee_lp_advert & IGC_LP_EEE_1000BT_MASK) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + edata->lp_advertised); + + if (eee_lp_advert & IGC_LP_EEE_100BT_MASK) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + edata->lp_advertised); + + /* EEE Link-Partner Ability 2 - reg 7.63 */ + ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << + MMD_DEVADDR_SHIFT) | + IGC_ANEG_EEE_LP_AB2, + &eee_lp_advert); + if (ret_val) { + netdev_err(adapter->netdev, + "Failed to read IEEE 7.63 register\n"); + return -EINVAL; + } + + if (eee_lp_advert & IGC_LP_EEE_2500BT_MASK) + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + edata->lp_advertised); eeer = rd32(IGC_EEER); @@ -1574,9 +1749,6 @@ static int igc_ethtool_get_eee(struct net_device *netdev, edata->eee_enabled = hw->dev_spec._base.eee_enable; - edata->advertised = SUPPORTED_Autoneg; - edata->lp_advertised = SUPPORTED_Autoneg; - /* Report correct negotiated EEE status for devices that * wrongly report EEE at half-duplex */ @@ -1584,21 +1756,21 @@ static int igc_ethtool_get_eee(struct net_device *netdev, edata->eee_enabled = false; edata->eee_active = false; edata->tx_lpi_enabled = false; - edata->advertised &= ~edata->advertised; + linkmode_zero(edata->advertised); } return 0; } static int igc_ethtool_set_eee(struct net_device *netdev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; - struct ethtool_eee eee_curr; + struct ethtool_keee eee_curr; s32 ret_val; - memset(&eee_curr, 0, sizeof(struct ethtool_eee)); + memset(&eee_curr, 0, sizeof(struct ethtool_keee)); ret_val = igc_ethtool_get_eee(netdev, &eee_curr); if (ret_val) { @@ -1626,7 +1798,6 @@ static int igc_ethtool_set_eee(struct net_device *netdev, return -EINVAL; } - adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); if (hw->dev_spec._base.eee_enable != edata->eee_enabled) { hw->dev_spec._base.eee_enable = edata->eee_enabled; adapter->flags |= IGC_FLAG_EEE; @@ -1641,19 +1812,81 @@ static int igc_ethtool_set_eee(struct net_device *netdev, return 0; } -static int igc_ethtool_begin(struct net_device *netdev) +static int igc_ethtool_get_mm(struct net_device *netdev, + struct ethtool_mm_state *cmd) { struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_fpe_t *fpe = &adapter->fpe; + + ethtool_mmsv_get_mm(&fpe->mmsv, cmd); + cmd->tx_min_frag_size = fpe->tx_min_frag_size; + cmd->rx_min_frag_size = IGC_RX_MIN_FRAG_SIZE; - pm_runtime_get_sync(&adapter->pdev->dev); return 0; } -static void igc_ethtool_complete(struct net_device *netdev) +static int igc_ethtool_set_mm(struct net_device *netdev, + struct ethtool_mm_cfg *cmd, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_fpe_t *fpe = &adapter->fpe; + + fpe->tx_min_frag_size = igc_fpe_get_supported_frag_size(cmd->tx_min_frag_size); + if (fpe->tx_min_frag_size != cmd->tx_min_frag_size) + NL_SET_ERR_MSG_MOD(extack, + "tx-min-frag-size value set is unsupported. Rounded up to supported value (64, 128, 192, 256)"); + + if (fpe->mmsv.pmac_enabled != cmd->pmac_enabled) { + if (cmd->pmac_enabled) + static_branch_inc(&igc_fpe_enabled); + else + static_branch_dec(&igc_fpe_enabled); + } + + ethtool_mmsv_set_mm(&fpe->mmsv, cmd); - pm_runtime_put(&adapter->pdev->dev); + return igc_tsn_offload_apply(adapter); +} + +/** + * igc_ethtool_get_frame_ass_error - Get the frame assembly error count. + * @reg_value: Register value for IGC_PRMEXCPRCNT + * Return: The count of frame assembly errors. + */ +static u64 igc_ethtool_get_frame_ass_error(u32 reg_value) +{ + /* Out of order statistics */ + u32 ooo_frame_cnt, ooo_frag_cnt; + u32 miss_frame_frag_cnt; + + ooo_frame_cnt = FIELD_GET(IGC_PRMEXCPRCNT_OOO_FRAME_CNT, reg_value); + ooo_frag_cnt = FIELD_GET(IGC_PRMEXCPRCNT_OOO_FRAG_CNT, reg_value); + miss_frame_frag_cnt = FIELD_GET(IGC_PRMEXCPRCNT_MISS_FRAME_FRAG_CNT, + reg_value); + + return ooo_frame_cnt + ooo_frag_cnt + miss_frame_frag_cnt; +} + +static u64 igc_ethtool_get_frame_smd_error(u32 reg_value) +{ + return FIELD_GET(IGC_PRMEXCPRCNT_OOO_SMDC, reg_value); +} + +static void igc_ethtool_get_mm_stats(struct net_device *dev, + struct ethtool_mm_stats *stats) +{ + struct igc_adapter *adapter = netdev_priv(dev); + struct igc_hw *hw = &adapter->hw; + u32 reg_value; + + reg_value = rd32(IGC_PRMEXCPRCNT); + + stats->MACMergeFrameAssErrorCount = igc_ethtool_get_frame_ass_error(reg_value); + stats->MACMergeFrameSmdErrorCount = igc_ethtool_get_frame_smd_error(reg_value); + stats->MACMergeFrameAssOkCount = rd32(IGC_PRMPTDRCNT); + stats->MACMergeFragCountRx = rd32(IGC_PRMEVNTRCNT); + stats->MACMergeFragCountTx = rd32(IGC_PRMEVNTTCNT); } static int igc_ethtool_get_link_ksettings(struct net_device *netdev, @@ -1678,6 +1911,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, /* twisted pair */ cmd->base.port = PORT_TP; cmd->base.phy_address = hw->phy.addr; + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); /* advertising link modes */ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) @@ -1694,11 +1929,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); /* set autoneg settings */ - if (hw->mac.autoneg == 1) { - ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - Autoneg); - } + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); /* Set pause flow control settings */ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); @@ -1751,10 +1983,7 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, cmd->base.duplex = DUPLEX_UNKNOWN; } cmd->base.speed = speed; - if (hw->mac.autoneg) - cmd->base.autoneg = AUTONEG_ENABLE; - else - cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.autoneg = AUTONEG_ENABLE; /* MDI-X => 2; MDI =>1; Invalid =>0 */ if (hw->phy.media_type == igc_media_type_copper) @@ -1778,7 +2007,7 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, struct igc_adapter *adapter = netdev_priv(netdev); struct net_device *dev = adapter->netdev; struct igc_hw *hw = &adapter->hw; - u32 advertising; + u16 advertised = 0; /* When adapter in resetting mode, autoneg/speed/duplex * cannot be changed @@ -1803,18 +2032,32 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) usleep_range(1000, 2000); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); - /* Converting to legacy u32 drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT. - * We have to check this and convert it to ADVERTISE_2500_FULL - * (aka ETHTOOL_LINK_MODE_2500baseX_Full_BIT) explicitly. - */ - if (ethtool_link_ksettings_test_link_mode(cmd, advertising, 2500baseT_Full)) - advertising |= ADVERTISE_2500_FULL; + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 2500baseT_Full)) + advertised |= ADVERTISE_2500_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 1000baseT_Full)) + advertised |= ADVERTISE_1000_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100baseT_Full)) + advertised |= ADVERTISE_100_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100baseT_Half)) + advertised |= ADVERTISE_100_HALF; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10baseT_Full)) + advertised |= ADVERTISE_10_FULL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10baseT_Half)) + advertised |= ADVERTISE_10_HALF; if (cmd->base.autoneg == AUTONEG_ENABLE) { - hw->mac.autoneg = 1; - hw->phy.autoneg_advertised = advertising; + hw->phy.autoneg_advertised = advertised; if (adapter->fc_autoneg) hw->fc.requested_mode = igc_fc_default; } else { @@ -1855,6 +2098,9 @@ static void igc_ethtool_diag_test(struct net_device *netdev, netdev_info(adapter->netdev, "Offline testing starting"); set_bit(__IGC_TESTING, &adapter->state); + /* power up PHY for link test */ + igc_power_up_phy_copper(&adapter->hw); + /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ @@ -1928,9 +2174,12 @@ static const struct ethtool_ops igc_ethtool_ops = { .set_coalesce = igc_ethtool_set_coalesce, .get_rxnfc = igc_ethtool_get_rxnfc, .set_rxnfc = igc_ethtool_set_rxnfc, + .get_rx_ring_count = igc_ethtool_get_rx_ring_count, .get_rxfh_indir_size = igc_ethtool_get_rxfh_indir_size, .get_rxfh = igc_ethtool_get_rxfh, .set_rxfh = igc_ethtool_set_rxfh, + .get_rxfh_fields = igc_ethtool_get_rxfh_fields, + .set_rxfh_fields = igc_ethtool_set_rxfh_fields, .get_ts_info = igc_ethtool_get_ts_info, .get_channels = igc_ethtool_get_channels, .set_channels = igc_ethtool_set_channels, @@ -1938,11 +2187,12 @@ static const struct ethtool_ops igc_ethtool_ops = { .set_priv_flags = igc_ethtool_set_priv_flags, .get_eee = igc_ethtool_get_eee, .set_eee = igc_ethtool_set_eee, - .begin = igc_ethtool_begin, - .complete = igc_ethtool_complete, .get_link_ksettings = igc_ethtool_get_link_ksettings, .set_link_ksettings = igc_ethtool_set_link_ksettings, .self_test = igc_ethtool_diag_test, + .get_mm = igc_ethtool_get_mm, + .get_mm_stats = igc_ethtool_get_mm_stats, + .set_mm = igc_ethtool_set_mm, }; void igc_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 4461f8b9a864..be8a49a86d09 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -22,8 +22,9 @@ #define IGC_DEV_ID_I220_V 0x15F7 #define IGC_DEV_ID_I225_K 0x3100 #define IGC_DEV_ID_I225_K2 0x3101 +#define IGC_DEV_ID_I226_K 0x3102 #define IGC_DEV_ID_I225_LMVP 0x5502 -#define IGC_DEV_ID_I226_K 0x5504 +#define IGC_DEV_ID_I226_LMVP 0x5503 #define IGC_DEV_ID_I225_IT 0x0D9F #define IGC_DEV_ID_I226_LM 0x125B #define IGC_DEV_ID_I226_V 0x125C @@ -52,12 +53,6 @@ enum igc_mac_type { igc_num_macs /* List is 1-based, so subtract 1 for true count. */ }; -enum igc_phy_type { - igc_phy_unknown = 0, - igc_phy_none, - igc_phy_i225, -}; - enum igc_media_type { igc_media_type_unknown = 0, igc_media_type_copper = 1, @@ -67,8 +62,6 @@ enum igc_media_type { enum igc_nvm_type { igc_nvm_unknown = 0, igc_nvm_eeprom_spi, - igc_nvm_flash_hw, - igc_nvm_invm, }; struct igc_info { @@ -96,12 +89,9 @@ struct igc_mac_info { u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; - u8 forced_speed_duplex; - bool asf_firmware_present; bool arc_subsystem_valid; - bool autoneg; bool autoneg_failed; bool get_link_status; }; @@ -140,8 +130,6 @@ struct igc_nvm_info { struct igc_phy_info { struct igc_phy_operations ops; - enum igc_phy_type type; - u32 addr; u32 id; u32 reset_delay_us; /* in usec */ @@ -284,15 +272,11 @@ struct igc_hw_stats { u64 o2bspc; u64 b2ospc; u64 b2ogprc; + u64 txdrop; }; struct net_device *igc_get_hw_dev(struct igc_hw *hw); #define hw_dbg(format, arg...) \ netdev_dbg(igc_get_hw_dev(hw), format, ##arg) -s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); -s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); -void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); -void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); - #endif /* _IGC_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index b2ef9fde97b3..5226d10cc95b 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Intel Corporation */ +#include <linux/bitfield.h> #include <linux/delay.h> #include "igc_hw.h" @@ -156,8 +157,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask) { u32 swfw_sync; - while (igc_get_hw_semaphore_i225(hw)) - ; /* Empty */ + /* Releasing the resource requires first getting the HW semaphore. + * If we fail to get the semaphore, there is nothing we can do, + * except log an error and quit. We are not allowed to hang here + * indefinitely, as it may cause denial of service or system crash. + */ + if (igc_get_hw_semaphore_i225(hw)) { + hw_dbg("Failed to release SW_FW_SYNC.\n"); + return; + } swfw_sync = rd32(IGC_SW_FW_SYNC); swfw_sync &= ~mask; @@ -427,7 +435,7 @@ static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw) } checksum += nvm_data; } - checksum = (u16)NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) { @@ -473,13 +481,11 @@ s32 igc_init_nvm_params_i225(struct igc_hw *hw) /* NVM Function Pointers */ if (igc_get_flash_presence_i225(hw)) { - hw->nvm.type = igc_nvm_flash_hw; nvm->ops.read = igc_read_nvm_srrd_i225; nvm->ops.write = igc_write_nvm_srwr_i225; nvm->ops.validate = igc_validate_nvm_checksum_i225; nvm->ops.update = igc_update_nvm_checksum_i225; } else { - hw->nvm.type = igc_nvm_invm; nvm->ops.read = igc_read_nvm_eerd; nvm->ops.write = NULL; nvm->ops.validate = NULL; @@ -573,9 +579,8 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) /* Calculate tw_system (nsec). */ if (speed == SPEED_100) { - tw_system = ((rd32(IGC_EEE_SU) & - IGC_TW_SYSTEM_100_MASK) >> - IGC_TW_SYSTEM_100_SHIFT) * 500; + tw_system = FIELD_GET(IGC_TW_SYSTEM_100_MASK, + rd32(IGC_EEE_SU)) * 500; } else { tw_system = (rd32(IGC_EEE_SU) & IGC_TW_SYSTEM_1000_MASK) * 500; @@ -588,20 +593,11 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) size = rd32(IGC_RXPBS) & IGC_RXPBS_SIZE_I225_MASK; - /* Calculations vary based on DMAC settings. */ - if (rd32(IGC_DMACR) & IGC_DMACR_DMAC_EN) { - size -= (rd32(IGC_DMACR) & - IGC_DMACR_DMACTHR_MASK) >> - IGC_DMACR_DMACTHR_SHIFT; - /* Convert size to bits. */ - size *= 1024 * 8; - } else { - /* Convert size to bytes, subtract the MTU, and then - * convert the size to bits. - */ - size *= 1024; - size *= 8; - } + /* Convert size to bytes, subtract the MTU, and then + * convert the size to bits. + */ + size *= 1024; + size *= 8; if (size < 0) { hw_dbg("Invalid effective Rx buffer size %d\n", @@ -636,7 +632,7 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) ltrv = rd32(IGC_LTRMAXV); if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) { ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max | - (scale_min << IGC_LTRMAXV_SCALE_SHIFT); + (scale_max << IGC_LTRMAXV_SCALE_SHIFT); wr32(IGC_LTRMAXV, ltrv); } } diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c new file mode 100644 index 000000000000..3929b25b6ae6 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_leds.c @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024 Linutronix GmbH */ + +#include <linux/bits.h> +#include <linux/leds.h> +#include <linux/netdevice.h> +#include <linux/pm_runtime.h> +#include <uapi/linux/uleds.h> + +#include "igc.h" + +#define IGC_NUM_LEDS 3 + +#define IGC_LEDCTL_LED0_MODE_SHIFT 0 +#define IGC_LEDCTL_LED0_MODE_MASK GENMASK(3, 0) +#define IGC_LEDCTL_LED0_BLINK BIT(7) +#define IGC_LEDCTL_LED1_MODE_SHIFT 8 +#define IGC_LEDCTL_LED1_MODE_MASK GENMASK(11, 8) +#define IGC_LEDCTL_LED1_BLINK BIT(15) +#define IGC_LEDCTL_LED2_MODE_SHIFT 16 +#define IGC_LEDCTL_LED2_MODE_MASK GENMASK(19, 16) +#define IGC_LEDCTL_LED2_BLINK BIT(23) + +#define IGC_LEDCTL_MODE_ON 0x00 +#define IGC_LEDCTL_MODE_OFF 0x01 +#define IGC_LEDCTL_MODE_LINK_10 0x05 +#define IGC_LEDCTL_MODE_LINK_100 0x06 +#define IGC_LEDCTL_MODE_LINK_1000 0x07 +#define IGC_LEDCTL_MODE_LINK_2500 0x08 +#define IGC_LEDCTL_MODE_ACTIVITY 0x0b + +#define IGC_SUPPORTED_MODES \ + (BIT(TRIGGER_NETDEV_LINK_2500) | BIT(TRIGGER_NETDEV_LINK_1000) | \ + BIT(TRIGGER_NETDEV_LINK_100) | BIT(TRIGGER_NETDEV_LINK_10) | \ + BIT(TRIGGER_NETDEV_RX) | BIT(TRIGGER_NETDEV_TX)) + +#define IGC_ACTIVITY_MODES \ + (BIT(TRIGGER_NETDEV_RX) | BIT(TRIGGER_NETDEV_TX)) + +struct igc_led_classdev { + struct net_device *netdev; + struct led_classdev led; + int index; +}; + +#define lcdev_to_igc_ldev(lcdev) \ + container_of(lcdev, struct igc_led_classdev, led) + +static void igc_led_select(struct igc_adapter *adapter, int led, + u32 *mask, u32 *shift, u32 *blink) +{ + switch (led) { + case 0: + *mask = IGC_LEDCTL_LED0_MODE_MASK; + *shift = IGC_LEDCTL_LED0_MODE_SHIFT; + *blink = IGC_LEDCTL_LED0_BLINK; + break; + case 1: + *mask = IGC_LEDCTL_LED1_MODE_MASK; + *shift = IGC_LEDCTL_LED1_MODE_SHIFT; + *blink = IGC_LEDCTL_LED1_BLINK; + break; + case 2: + *mask = IGC_LEDCTL_LED2_MODE_MASK; + *shift = IGC_LEDCTL_LED2_MODE_SHIFT; + *blink = IGC_LEDCTL_LED2_BLINK; + break; + default: + *mask = *shift = *blink = 0; + netdev_err(adapter->netdev, "Unknown LED %d selected!\n", led); + } +} + +static void igc_led_set(struct igc_adapter *adapter, int led, u32 mode, + bool blink) +{ + u32 shift, mask, blink_bit, ledctl; + struct igc_hw *hw = &adapter->hw; + + igc_led_select(adapter, led, &mask, &shift, &blink_bit); + + pm_runtime_get_sync(&adapter->pdev->dev); + mutex_lock(&adapter->led_mutex); + + /* Set mode */ + ledctl = rd32(IGC_LEDCTL); + ledctl &= ~mask; + ledctl |= mode << shift; + + /* Configure blinking */ + if (blink) + ledctl |= blink_bit; + else + ledctl &= ~blink_bit; + wr32(IGC_LEDCTL, ledctl); + + mutex_unlock(&adapter->led_mutex); + pm_runtime_put(&adapter->pdev->dev); +} + +static u32 igc_led_get(struct igc_adapter *adapter, int led) +{ + u32 shift, mask, blink_bit, ledctl; + struct igc_hw *hw = &adapter->hw; + + igc_led_select(adapter, led, &mask, &shift, &blink_bit); + + pm_runtime_get_sync(&adapter->pdev->dev); + mutex_lock(&adapter->led_mutex); + ledctl = rd32(IGC_LEDCTL); + mutex_unlock(&adapter->led_mutex); + pm_runtime_put(&adapter->pdev->dev); + + return (ledctl & mask) >> shift; +} + +static int igc_led_brightness_set_blocking(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev); + struct igc_adapter *adapter = netdev_priv(ldev->netdev); + u32 mode; + + if (brightness) + mode = IGC_LEDCTL_MODE_ON; + else + mode = IGC_LEDCTL_MODE_OFF; + + netdev_dbg(adapter->netdev, "Set brightness for LED %d to mode %u!\n", + ldev->index, mode); + + igc_led_set(adapter, ldev->index, mode, false); + + return 0; +} + +static int igc_led_hw_control_is_supported(struct led_classdev *led_cdev, + unsigned long flags) +{ + if (flags & ~IGC_SUPPORTED_MODES) + return -EOPNOTSUPP; + + /* If Tx and Rx selected, activity can be offloaded unless some other + * mode is selected as well. + */ + if ((flags & BIT(TRIGGER_NETDEV_TX)) && + (flags & BIT(TRIGGER_NETDEV_RX)) && + !(flags & ~IGC_ACTIVITY_MODES)) + return 0; + + /* Single Rx or Tx activity is not supported. */ + if (flags & IGC_ACTIVITY_MODES) + return -EOPNOTSUPP; + + /* Only one mode can be active at a given time. */ + if (flags & (flags - 1)) + return -EOPNOTSUPP; + + return 0; +} + +static int igc_led_hw_control_set(struct led_classdev *led_cdev, + unsigned long flags) +{ + struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev); + struct igc_adapter *adapter = netdev_priv(ldev->netdev); + u32 mode = IGC_LEDCTL_MODE_OFF; + bool blink = false; + + if (flags & BIT(TRIGGER_NETDEV_LINK_10)) + mode = IGC_LEDCTL_MODE_LINK_10; + if (flags & BIT(TRIGGER_NETDEV_LINK_100)) + mode = IGC_LEDCTL_MODE_LINK_100; + if (flags & BIT(TRIGGER_NETDEV_LINK_1000)) + mode = IGC_LEDCTL_MODE_LINK_1000; + if (flags & BIT(TRIGGER_NETDEV_LINK_2500)) + mode = IGC_LEDCTL_MODE_LINK_2500; + if ((flags & BIT(TRIGGER_NETDEV_TX)) && + (flags & BIT(TRIGGER_NETDEV_RX))) + mode = IGC_LEDCTL_MODE_ACTIVITY; + + netdev_dbg(adapter->netdev, "Set HW control for LED %d to mode %u!\n", + ldev->index, mode); + + /* blink is recommended for activity */ + if (mode == IGC_LEDCTL_MODE_ACTIVITY) + blink = true; + + igc_led_set(adapter, ldev->index, mode, blink); + + return 0; +} + +static int igc_led_hw_control_get(struct led_classdev *led_cdev, + unsigned long *flags) +{ + struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev); + struct igc_adapter *adapter = netdev_priv(ldev->netdev); + u32 mode; + + mode = igc_led_get(adapter, ldev->index); + + switch (mode) { + case IGC_LEDCTL_MODE_ACTIVITY: + *flags = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX); + break; + case IGC_LEDCTL_MODE_LINK_10: + *flags = BIT(TRIGGER_NETDEV_LINK_10); + break; + case IGC_LEDCTL_MODE_LINK_100: + *flags = BIT(TRIGGER_NETDEV_LINK_100); + break; + case IGC_LEDCTL_MODE_LINK_1000: + *flags = BIT(TRIGGER_NETDEV_LINK_1000); + break; + case IGC_LEDCTL_MODE_LINK_2500: + *flags = BIT(TRIGGER_NETDEV_LINK_2500); + break; + } + + return 0; +} + +static struct device *igc_led_hw_control_get_device(struct led_classdev *led_cdev) +{ + struct igc_led_classdev *ldev = lcdev_to_igc_ldev(led_cdev); + + return &ldev->netdev->dev; +} + +static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf, + size_t buf_len) +{ + snprintf(buf, buf_len, "igc-%x%x-led%d", + pci_domain_nr(adapter->pdev->bus), + pci_dev_id(adapter->pdev), index); +} + +static int igc_setup_ldev(struct igc_led_classdev *ldev, + struct net_device *netdev, int index) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct led_classdev *led_cdev = &ldev->led; + char led_name[LED_MAX_NAME_SIZE]; + + ldev->netdev = netdev; + ldev->index = index; + + igc_led_get_name(adapter, index, led_name, LED_MAX_NAME_SIZE); + led_cdev->name = led_name; + led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN; + led_cdev->max_brightness = 1; + led_cdev->brightness_set_blocking = igc_led_brightness_set_blocking; + led_cdev->hw_control_trigger = "netdev"; + led_cdev->hw_control_is_supported = igc_led_hw_control_is_supported; + led_cdev->hw_control_set = igc_led_hw_control_set; + led_cdev->hw_control_get = igc_led_hw_control_get; + led_cdev->hw_control_get_device = igc_led_hw_control_get_device; + + return led_classdev_register(&netdev->dev, led_cdev); +} + +int igc_led_setup(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct igc_led_classdev *leds; + int i, err; + + mutex_init(&adapter->led_mutex); + + leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL); + if (!leds) + return -ENOMEM; + + for (i = 0; i < IGC_NUM_LEDS; i++) { + err = igc_setup_ldev(leds + i, netdev, i); + if (err) + goto err; + } + + adapter->leds = leds; + + return 0; + +err: + for (i--; i >= 0; i--) + led_classdev_unregister(&((leds + i)->led)); + + kfree(leds); + return err; +} + +void igc_led_free(struct igc_adapter *adapter) +{ + struct igc_led_classdev *leds = adapter->leds; + int i; + + for (i = 0; i < IGC_NUM_LEDS; i++) + led_classdev_unregister(&((leds + i)->led)); + + kfree(leds); +} diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 67b8ffd21d8a..7ac6637f8db7 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -127,7 +127,7 @@ s32 igc_setup_link(struct igc_hw *hw) goto out; /* If requested flow control is set to default, set flow control - * to the both 'rx' and 'tx' pause frames. + * to both 'rx' and 'tx' pause frames. */ if (hw->fc.requested_mode == igc_fc_default) hw->fc.requested_mode = igc_fc_full; @@ -193,7 +193,7 @@ s32 igc_force_mac_fc(struct igc_hw *hw) * 1: Rx flow control is enabled (we can receive pause * frames but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames - * frames but we do not receive pause frames). + * but we do not receive pause frames). * 3: Both Rx and TX flow control (symmetric) is enabled. * other: No other values should be possible at this point. */ @@ -386,14 +386,6 @@ s32 igc_check_for_copper_link(struct igc_hw *hw) */ igc_check_downshift(hw); - /* If we are forcing speed/duplex, then we simply return since - * we have already determined whether we have link or not. - */ - if (!mac->autoneg) { - ret_val = -IGC_ERR_CONFIG; - goto out; - } - /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. @@ -468,173 +460,171 @@ s32 igc_config_fc_after_link_up(struct igc_hw *hw) goto out; } - /* Check for the case where we have copper media and auto-neg is - * enabled. In this case, we need to check and see if Auto-Neg - * has completed, and if so, how the PHY and link partner has - * flow control configured. + /* In auto-neg, we need to check and see if Auto-Neg has completed, + * and if so, how the PHY and link partner has flow control + * configured. */ - if (mac->autoneg) { - /* Read the MII Status Register and check to see if AutoNeg - * has completed. We read this twice because this reg has - * some "sticky" (latched) bits. - */ - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, - &mii_status_reg); - if (ret_val) - goto out; - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, - &mii_status_reg); - if (ret_val) - goto out; - if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { - hw_dbg("Copper PHY and Auto Neg has not completed.\n"); - goto out; - } + /* Read the MII Status Register and check to see if AutoNeg + * has completed. We read this twice because this reg has + * some "sticky" (latched) bits. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, + &mii_status_reg); + if (ret_val) + goto out; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, + &mii_status_reg); + if (ret_val) + goto out; - /* The AutoNeg process has completed, so we now need to - * read both the Auto Negotiation Advertisement - * Register (Address 4) and the Auto_Negotiation Base - * Page Ability Register (Address 5) to determine how - * flow control was negotiated. - */ - ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, - &mii_nway_adv_reg); - if (ret_val) - goto out; - ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, - &mii_nway_lp_ability_reg); - if (ret_val) - goto out; - /* Two bits in the Auto Negotiation Advertisement Register - * (Address 4) and two bits in the Auto Negotiation Base - * Page Ability Register (Address 5) determine flow control - * for both the PHY and the link partner. The following - * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, - * 1999, describes these PAUSE resolution bits and how flow - * control is determined based upon these settings. - * NOTE: DC = Don't Care - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution - *-------|---------|-------|---------|-------------------- - * 0 | 0 | DC | DC | igc_fc_none - * 0 | 1 | 0 | DC | igc_fc_none - * 0 | 1 | 1 | 0 | igc_fc_none - * 0 | 1 | 1 | 1 | igc_fc_tx_pause - * 1 | 0 | 0 | DC | igc_fc_none - * 1 | DC | 1 | DC | igc_fc_full - * 1 | 1 | 0 | 0 | igc_fc_none - * 1 | 1 | 0 | 1 | igc_fc_rx_pause - * - * Are both PAUSE bits set to 1? If so, this implies - * Symmetric Flow Control is enabled at both ends. The - * ASM_DIR bits are irrelevant per the spec. - * - * For Symmetric Flow Control: - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 1 | DC | 1 | DC | IGC_fc_full - * - */ - if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { - /* Now we need to check if the user selected RX ONLY - * of pause frames. In this case, we had to advertise - * FULL flow control because we could not advertise RX - * ONLY. Hence, we must now check to see if we need to - * turn OFF the TRANSMISSION of PAUSE frames. - */ - if (hw->fc.requested_mode == igc_fc_full) { - hw->fc.current_mode = igc_fc_full; - hw_dbg("Flow Control = FULL.\n"); - } else { - hw->fc.current_mode = igc_fc_rx_pause; - hw_dbg("Flow Control = RX PAUSE frames only.\n"); - } - } + if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { + hw_dbg("Copper PHY and Auto Neg has not completed.\n"); + goto out; + } - /* For receiving PAUSE frames ONLY. - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 0 | 1 | 1 | 1 | igc_fc_tx_pause - */ - else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && - (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && - (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { - hw->fc.current_mode = igc_fc_tx_pause; - hw_dbg("Flow Control = TX PAUSE frames only.\n"); - } - /* For transmitting PAUSE frames ONLY. - * - * LOCAL DEVICE | LINK PARTNER - * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result - *-------|---------|-------|---------|-------------------- - * 1 | 1 | 0 | 1 | igc_fc_rx_pause - */ - else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && - (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && - !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { - hw->fc.current_mode = igc_fc_rx_pause; - hw_dbg("Flow Control = RX PAUSE frames only.\n"); - } - /* Per the IEEE spec, at this point flow control should be - * disabled. However, we want to consider that we could - * be connected to a legacy switch that doesn't advertise - * desired flow control, but can be forced on the link - * partner. So if we advertised no flow control, that is - * what we will resolve to. If we advertised some kind of - * receive capability (Rx Pause Only or Full Flow Control) - * and the link partner advertised none, we will configure - * ourselves to enable Rx Flow Control only. We can do - * this safely for two reasons: If the link partner really - * didn't want flow control enabled, and we enable Rx, no - * harm done since we won't be receiving any PAUSE frames - * anyway. If the intent on the link partner was to have - * flow control enabled, then by us enabling RX only, we - * can at least receive pause frames and process them. - * This is a good idea because in most cases, since we are - * predominantly a server NIC, more times than not we will - * be asked to delay transmission of packets than asking - * our link partner to pause transmission of frames. + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement + * Register (Address 4) and the Auto_Negotiation Base + * Page Ability Register (Address 5) to determine how + * flow control was negotiated. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, + &mii_nway_adv_reg); + if (ret_val) + goto out; + ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, + &mii_nway_lp_ability_reg); + if (ret_val) + goto out; + /* Two bits in the Auto Negotiation Advertisement Register + * (Address 4) and two bits in the Auto Negotiation Base + * Page Ability Register (Address 5) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | igc_fc_none + * 0 | 1 | 0 | DC | igc_fc_none + * 0 | 1 | 1 | 0 | igc_fc_none + * 0 | 1 | 1 | 1 | igc_fc_tx_pause + * 1 | 0 | 0 | DC | igc_fc_none + * 1 | DC | 1 | DC | igc_fc_full + * 1 | 1 | 0 | 0 | igc_fc_none + * 1 | 1 | 0 | 1 | igc_fc_rx_pause + * + * Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | IGC_fc_full + * + */ + if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { + /* Now we need to check if the user selected RX ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise RX + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. */ - else if ((hw->fc.requested_mode == igc_fc_none) || - (hw->fc.requested_mode == igc_fc_tx_pause) || - (hw->fc.strict_ieee)) { - hw->fc.current_mode = igc_fc_none; - hw_dbg("Flow Control = NONE.\n"); + if (hw->fc.requested_mode == igc_fc_full) { + hw->fc.current_mode = igc_fc_full; + hw_dbg("Flow Control = FULL.\n"); } else { hw->fc.current_mode = igc_fc_rx_pause; hw_dbg("Flow Control = RX PAUSE frames only.\n"); } + } - /* Now we need to do one last check... If we auto- - * negotiated to HALF DUPLEX, flow control should not be - * enabled per IEEE 802.3 spec. - */ - ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex); - if (ret_val) { - hw_dbg("Error getting link speed and duplex\n"); - goto out; - } + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | igc_fc_tx_pause + */ + else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = igc_fc_tx_pause; + hw_dbg("Flow Control = TX PAUSE frames only.\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | igc_fc_rx_pause + */ + else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = igc_fc_rx_pause; + hw_dbg("Flow Control = RX PAUSE frames only.\n"); + } + /* Per the IEEE spec, at this point flow control should be + * disabled. However, we want to consider that we could + * be connected to a legacy switch that doesn't advertise + * desired flow control, but can be forced on the link + * partner. So if we advertised no flow control, that is + * what we will resolve to. If we advertised some kind of + * receive capability (Rx Pause Only or Full Flow Control) + * and the link partner advertised none, we will configure + * ourselves to enable Rx Flow Control only. We can do + * this safely for two reasons: If the link partner really + * didn't want flow control enabled, and we enable Rx, no + * harm done since we won't be receiving any PAUSE frames + * anyway. If the intent on the link partner was to have + * flow control enabled, then by us enabling RX only, we + * can at least receive pause frames and process them. + * This is a good idea because in most cases, since we are + * predominantly a server NIC, more times than not we will + * be asked to delay transmission of packets than asking + * our link partner to pause transmission of frames. + */ + else if ((hw->fc.requested_mode == igc_fc_none) || + (hw->fc.requested_mode == igc_fc_tx_pause) || + (hw->fc.strict_ieee)) { + hw->fc.current_mode = igc_fc_none; + hw_dbg("Flow Control = NONE.\n"); + } else { + hw->fc.current_mode = igc_fc_rx_pause; + hw_dbg("Flow Control = RX PAUSE frames only.\n"); + } - if (duplex == HALF_DUPLEX) - hw->fc.current_mode = igc_fc_none; + /* Now we need to do one last check... If we auto- + * negotiated to HALF DUPLEX, flow control should not be + * enabled per IEEE 802.3 spec. + */ + ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex); + if (ret_val) { + hw_dbg("Error getting link speed and duplex\n"); + goto out; + } - /* Now we call a subroutine to actually force the MAC - * controller to use the correct flow control settings. - */ - ret_val = igc_force_mac_fc(hw); - if (ret_val) { - hw_dbg("Error forcing flow control settings\n"); - goto out; - } + if (duplex == HALF_DUPLEX) + hw->fc.current_mode = igc_fc_none; + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + ret_val = igc_force_mac_fc(hw); + if (ret_val) { + hw_dbg("Error forcing flow control settings\n"); + goto out; } out: diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 95323095094d..7aafa60ba0c8 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4,7 +4,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/if_vlan.h> -#include <linux/aer.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/ip.h> @@ -12,6 +11,9 @@ #include <net/pkt_sched.h> #include <linux/bpf_trace.h> #include <net/xdp_sock_drv.h> +#include <linux/pci.h> +#include <linux/mdio.h> + #include <net/ipv6.h> #include "igc.h" @@ -30,7 +32,6 @@ static int debug = -1; -MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_LICENSE("GPL v2"); module_param(debug, int, 0); @@ -54,6 +55,7 @@ static const struct pci_device_id igc_pci_tbl[] = { { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, + { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, @@ -118,7 +120,7 @@ void igc_reset(struct igc_adapter *adapter) igc_ptp_reset(adapter); /* Re-enable TSN offloading, where applicable. */ - igc_tsn_offload_apply(adapter); + igc_tsn_reset(adapter); igc_get_phy_info(hw); } @@ -149,6 +151,9 @@ static void igc_release_hw_control(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u32 ctrl_ext; + if (!pci_device_is_present(adapter->pdev)) + return; + /* Let firmware take over control of h/w */ ctrl_ext = rd32(IGC_CTRL_EXT); wr32(IGC_CTRL_EXT, @@ -232,6 +237,8 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); } + tx_buffer->next_to_watch = NULL; + /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; i++; @@ -247,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); + /* Zero out the buffer ring */ + memset(tx_ring->tx_buffer_info, 0, + sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -260,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) */ void igc_free_tx_resources(struct igc_ring *tx_ring) { - igc_clean_tx_ring(tx_ring); + igc_disable_tx_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; @@ -302,6 +316,33 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter) igc_clean_tx_ring(adapter->tx_ring[i]); } +static void igc_disable_tx_ring_hw(struct igc_ring *ring) +{ + struct igc_hw *hw = &ring->q_vector->adapter->hw; + u8 idx = ring->reg_idx; + u32 txdctl; + + txdctl = rd32(IGC_TXDCTL(idx)); + txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; + txdctl |= IGC_TXDCTL_SWFLUSH; + wr32(IGC_TXDCTL(idx), txdctl); +} + +/** + * igc_disable_all_tx_rings_hw - Disable all transmit queue operation + * @adapter: board private structure + */ +static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + igc_disable_tx_ring_hw(tx_ring); + } +} + /** * igc_setup_tx_resources - allocate Tx resources (Descriptors) * @tx_ring: tx descriptor ring (for a specific queue) to setup @@ -497,6 +538,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) u8 index = rx_ring->queue_index; int size, desc_len, res; + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { @@ -630,15 +674,18 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, else buf_size = IGC_RXBUFFER_2048; - srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT; + srrctl = rd32(IGC_SRRCTL(reg_idx)); + srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | + IGC_SRRCTL_DESCTYPE_MASK); + srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); + srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; wr32(IGC_SRRCTL(reg_idx), srrctl); - rxdctl |= IGC_RX_PTHRESH; - rxdctl |= IGC_RX_HTHRESH << 8; - rxdctl |= IGC_RX_WTHRESH << 16; + rxdctl |= IGC_RXDCTL_PTHRESH; + rxdctl |= IGC_RXDCTL_HTHRESH << 8; + rxdctl |= IGC_RXDCTL_WTHRESH << 16; /* initialize rx_buffer_info */ memset(ring->rx_buffer_info, 0, @@ -691,7 +738,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); @@ -703,11 +749,9 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, wr32(IGC_TDH(reg_idx), 0); writel(0, ring->tail); - txdctl |= IGC_TX_PTHRESH; - txdctl |= IGC_TX_HTHRESH << 8; - txdctl |= IGC_TX_WTHRESH << 16; + txdctl |= IGC_TXDCTL_PTHRESH(8) | IGC_TXDCTL_HTHRESH(1) | + IGC_TXDCTL_WTHRESH(16) | IGC_TXDCTL_QUEUE_ENABLE; - txdctl |= IGC_TXDCTL_QUEUE_ENABLE; wr32(IGC_TXDCTL(reg_idx), txdctl); } @@ -942,7 +986,7 @@ static int igc_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); /* set the correct pool for the new PF MAC address in entry 0 */ @@ -989,25 +1033,110 @@ static int igc_write_mc_addr_list(struct net_device *netdev) return netdev_mc_count(netdev); } -static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime) +static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, + bool *first_flag, bool *insert_empty) { + struct igc_adapter *adapter = netdev_priv(ring->netdev); ktime_t cycle_time = adapter->cycle_time; ktime_t base_time = adapter->base_time; - u32 launchtime; + ktime_t now = ktime_get_clocktai(); + ktime_t baset_est, end_of_cycle; + s32 launchtime; + s64 n; + + n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); + + baset_est = ktime_add_ns(base_time, cycle_time * (n)); + end_of_cycle = ktime_add_ns(baset_est, cycle_time); + + if (ktime_compare(txtime, end_of_cycle) >= 0) { + if (baset_est != ring->last_ff_cycle) { + *first_flag = true; + ring->last_ff_cycle = baset_est; - /* FIXME: when using ETF together with taprio, we may have a - * case where 'delta' is larger than the cycle_time, this may - * cause problems if we don't read the current value of - * IGC_BASET, as the value writen into the launchtime - * descriptor field may be misinterpreted. + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) + *insert_empty = true; + } + } + + /* Introducing a window at end of cycle on which packets + * potentially not honor launchtime. Window of 5us chosen + * considering software update the tail pointer and packets + * are dma'ed to packet buffer. */ - div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime); + if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) + netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", + txtime); + + ring->last_tx_cycle = end_of_cycle; + + launchtime = ktime_sub_ns(txtime, baset_est); + if (launchtime > 0) + div_s64_rem(launchtime, cycle_time, &launchtime); + else + launchtime = 0; return cpu_to_le32(launchtime); } +static int igc_init_empty_frame(struct igc_ring *ring, + struct igc_tx_buffer *buffer, + struct sk_buff *skb) +{ + unsigned int size; + dma_addr_t dma; + + size = skb_headlen(skb); + + dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) { + net_err_ratelimited("%s: DMA mapping error for empty frame\n", + netdev_name(ring->netdev)); + return -ENOMEM; + } + + buffer->type = IGC_TX_BUFFER_TYPE_SKB; + buffer->skb = skb; + buffer->protocol = 0; + buffer->bytecount = skb->len; + buffer->gso_segs = 1; + buffer->time_stamp = jiffies; + dma_unmap_len_set(buffer, len, skb->len); + dma_unmap_addr_set(buffer, dma, dma); + + return 0; +} + +static void igc_init_tx_empty_descriptor(struct igc_ring *ring, + struct sk_buff *skb, + struct igc_tx_buffer *first) +{ + union igc_adv_tx_desc *desc; + u32 cmd_type, olinfo_status; + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | + first->bytecount; + olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; + + desc = IGC_TX_DESC(ring, ring->next_to_use); + desc->read.cmd_type_len = cpu_to_le32(cmd_type); + desc->read.olinfo_status = cpu_to_le32(olinfo_status); + desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); + + netdev_tx_sent_queue(txring_txq(ring), skb->len); + + first->next_to_watch = desc; + + ring->next_to_use++; + if (ring->next_to_use == ring->count) + ring->next_to_use = 0; +} + +#define IGC_EMPTY_FRAME_SIZE 60 + static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, - struct igc_tx_buffer *first, + __le32 launch_time, bool first_flag, u32 vlan_macip_lens, u32 type_tucmd, u32 mss_l4len_idx) { @@ -1026,26 +1155,17 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) mss_l4len_idx |= tx_ring->reg_idx << 4; + if (first_flag) + mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); - - /* We assume there is always a valid Tx time available. Invalid times - * should have been handled by the upper layers. - */ - if (tx_ring->launchtime_enable) { - struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); - ktime_t txtime = first->skb->tstamp; - - skb_txtime_consumed(first->skb); - context_desc->launch_time = igc_tx_launchtime(adapter, - txtime); - } else { - context_desc->launch_time = 0; - } + context_desc->launch_time = launch_time; } -static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) +static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, + __le32 launch_time, bool first_flag) { struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; @@ -1085,7 +1205,8 @@ no_csum: vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; - igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); + igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, + vlan_macip_lens, type_tucmd, 0); } static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) @@ -1140,10 +1261,21 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, (IGC_ADVTXD_DCMD_TSE)); - /* set timestamp bit if present */ + /* set timestamp bit if present, will select the register set + * based on the _TSTAMP(_X) bit. + */ cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, (IGC_ADVTXD_MAC_TSTAMP)); + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1, + (IGC_ADVTXD_TSTAMP_REG_1)); + + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2, + (IGC_ADVTXD_TSTAMP_REG_2)); + + cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3, + (IGC_ADVTXD_TSTAMP_REG_3)); + /* insert frame checksum */ cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); @@ -1157,14 +1289,16 @@ static void igc_tx_olinfo_status(struct igc_ring *tx_ring, u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; /* insert L4 checksum */ - olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) * - ((IGC_TXD_POPTS_TXSM << 8) / - IGC_TX_FLAGS_CSUM); + olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, + (IGC_TXD_POPTS_TXSM << 8)); /* insert IPv4 checksum */ - olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) * - (((IGC_TXD_POPTS_IXSM << 8)) / - IGC_TX_FLAGS_IPV4); + olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, + (IGC_TXD_POPTS_IXSM << 8)); + + /* Use the second timer (free running, in general) for the timestamp */ + olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, + IGC_TXD_PTP2_TIMER_1); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); } @@ -1309,6 +1443,7 @@ dma_error: static int igc_tso(struct igc_ring *tx_ring, struct igc_tx_buffer *first, + __le32 launch_time, bool first_flag, u8 *hdr_len) { u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; @@ -1395,20 +1530,78 @@ static int igc_tso(struct igc_ring *tx_ring, vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; - igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, - type_tucmd, mss_l4len_idx); + igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, + vlan_macip_lens, type_tucmd, mss_l4len_idx); return 1; } +static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags) +{ + int i; + + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (tstamp->skb) + continue; + + tstamp->skb = skb_get(skb); + tstamp->start = jiffies; + *flags = tstamp->flags; + + return true; + } + + return false; +} + +static int igc_insert_empty_frame(struct igc_ring *tx_ring) +{ + struct igc_tx_buffer *empty_info; + struct sk_buff *empty_skb; + void *data; + int ret; + + empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + empty_skb = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); + if (unlikely(!empty_skb)) { + net_err_ratelimited("%s: skb alloc error for empty frame\n", + netdev_name(tx_ring->netdev)); + return -ENOMEM; + } + + data = skb_put(empty_skb, IGC_EMPTY_FRAME_SIZE); + memset(data, 0, IGC_EMPTY_FRAME_SIZE); + + /* Prepare DMA mapping and Tx buffer information */ + ret = igc_init_empty_frame(tx_ring, empty_info, empty_skb); + if (unlikely(ret)) { + dev_kfree_skb_any(empty_skb); + return ret; + } + + /* Prepare advanced context descriptor for empty packet */ + igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); + + /* Prepare advanced data descriptor for empty packet */ + igc_init_tx_empty_descriptor(tx_ring, empty_skb, empty_info); + + return 0; +} + static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, struct igc_ring *tx_ring) { + struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + bool first_flag = false, insert_empty = false; u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); struct igc_tx_buffer *first; + __le32 launch_time = 0; u32 tx_flags = 0; unsigned short f; + ktime_t txtime; u8 hdr_len = 0; int tso = 0; @@ -1416,17 +1609,39 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, * + 1 desc for context descriptor, + * + 2 desc for inserting an empty packet for launch time, * otherwise try next time */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) count += TXD_USE_COUNT(skb_frag_size( &skb_shinfo(skb)->frags[f])); - if (igc_maybe_stop_tx(tx_ring, count + 3)) { + if (igc_maybe_stop_tx(tx_ring, count + 5)) { /* this is a hard error */ return NETDEV_TX_BUSY; } + if (!tx_ring->launchtime_enable) + goto done; + + txtime = skb->tstamp; + skb->tstamp = ktime_set(0, 0); + launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); + + if (insert_empty) { + /* Reset the launch time if the required empty frame fails to + * be inserted. However, this packet is not dropped, so it + * "dirties" the current Qbv cycle. This ensures that the + * upcoming packet, which is scheduled in the next Qbv cycle, + * does not require an empty frame. This way, the launch time + * continues to function correctly despite the current failure + * to insert the empty frame. + */ + if (igc_insert_empty_frame(tx_ring)) + launch_time = 0; + } + +done: /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; first->type = IGC_TX_BUFFER_TYPE_SKB; @@ -1434,24 +1649,31 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); + if (adapter->qbv_transition || tx_ring->oper_gate_closed) + goto out_drop; - /* FIXME: add support for retrieving timestamps from - * the other timer registers before skipping the - * timestamping request. - */ - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - tx_flags |= IGC_TX_FLAGS_TSTAMP; + if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { + adapter->stats.txdrop++; + goto out_drop; + } + + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + unsigned long flags; + u32 tstamp_flags; - adapter->ptp_tx_skb = skb_get(skb); - adapter->ptp_tx_start = jiffies; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; + if (skb->sk && + READ_ONCE(skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC) + tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } if (skb_vlan_tag_present(skb)) { @@ -1463,11 +1685,20 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, first->tx_flags = tx_flags; first->protocol = protocol; - tso = igc_tso(tx_ring, first, &hdr_len); + /* For preemptible queue, manually pad the skb so that HW includes + * padding bytes in mCRC calculation + */ + if (tx_ring->preemptible && skb->len < ETH_ZLEN) { + if (skb_padto(skb, ETH_ZLEN)) + goto out_drop; + skb_put(skb, ETH_ZLEN - skb->len); + } + + tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); if (tso < 0) goto out_drop; else if (!tso) - igc_tx_csum(tx_ring, first); + igc_tx_csum(tx_ring, first, launch_time, first_flag); igc_tx_map(tx_ring, first, hdr_len); @@ -1548,14 +1779,36 @@ static void igc_rx_checksum(struct igc_ring *ring, le32_to_cpu(rx_desc->wb.upper.status_error)); } +/* Mapping HW RSS Type to enum pkt_hash_types */ +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, + [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ + [13] = PKT_HASH_TYPE_NONE, + [14] = PKT_HASH_TYPE_NONE, + [15] = PKT_HASH_TYPE_NONE, +}; + static inline void igc_rx_hash(struct igc_ring *ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (ring->netdev->features & NETIF_F_RXHASH) - skb_set_hash(skb, - le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - PKT_HASH_TYPE_L3); + if (ring->netdev->features & NETIF_F_RXHASH) { + u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + u32 rss_type = igc_rss_type(rx_desc); + + skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); + } } static void igc_rx_vlan(struct igc_ring *rx_ring, @@ -1710,24 +1963,26 @@ static void igc_add_rx_frag(struct igc_ring *rx_ring, static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, struct igc_rx_buffer *rx_buffer, - union igc_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + unsigned int size = xdp->data_end - xdp->data; unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; /* prefetch first cache line of first page */ - net_prefetch(va); + net_prefetch(xdp->data_meta); /* build an skb around the page buffer */ - skb = build_skb(va - IGC_SKB_PAD, truesize); + skb = napi_build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL; /* update pointers within the skb to store the data */ - skb_reserve(skb, IGC_SKB_PAD); + skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, size); + if (metasize) + skb_metadata_set(skb, metasize); igc_rx_buffer_flip(rx_buffer, truesize); return skb; @@ -1735,9 +1990,10 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, struct igc_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - ktime_t timestamp) + struct igc_xdp_buff *ctx) { + struct xdp_buff *xdp = &ctx->xdp; + unsigned int metasize = xdp->data - xdp->data_meta; unsigned int size = xdp->data_end - xdp->data; unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); void *va = xdp->data; @@ -1745,15 +2001,18 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, struct sk_buff *skb; /* prefetch first cache line of first page */ - net_prefetch(va); + net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, + IGC_RX_HDR_LEN + metasize); if (unlikely(!skb)) return NULL; - if (timestamp) - skb_hwtstamps(skb)->hwtstamp = timestamp; + if (ctx->rx_ts) { + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; + skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; + } /* Determine available headroom for copy */ headlen = size; @@ -1761,7 +2020,13 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, + ALIGN(headlen + metasize, sizeof(long))); + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } /* update all of the pointers */ size -= headlen; @@ -1885,10 +2150,6 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { - /* XDP packets use error pointer so abort at this point */ - if (IS_ERR(skb)) - return true; - if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { struct net_device *netdev = rx_ring->netdev; @@ -1953,6 +2214,7 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_failed++; + set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } @@ -1969,6 +2231,7 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, __free_page(page); rx_ring->rx_stats.alloc_failed++; + set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } @@ -2062,6 +2325,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) if (!count) return ok; + XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); + desc = IGC_RX_DESC(ring, i); bi = &ring->rx_buffer_info[i]; i -= ring->count; @@ -2108,69 +2373,105 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) return ok; } -static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, - struct xdp_frame *xdpf, - struct igc_ring *ring) -{ - dma_addr_t dma; - - dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); - if (dma_mapping_error(ring->dev, dma)) { - netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); - return -ENOMEM; - } - - buffer->type = IGC_TX_BUFFER_TYPE_XDP; - buffer->xdpf = xdpf; - buffer->protocol = 0; - buffer->bytecount = xdpf->len; - buffer->gso_segs = 1; - buffer->time_stamp = jiffies; - dma_unmap_len_set(buffer, len, xdpf->len); - dma_unmap_addr_set(buffer, dma, dma); - return 0; -} - /* This function requires __netif_tx_lock is held by the caller. */ static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, struct xdp_frame *xdpf) { - struct igc_tx_buffer *buffer; - union igc_adv_tx_desc *desc; - u32 cmd_type, olinfo_status; - int err; + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; + u16 count, index = ring->next_to_use; + struct igc_tx_buffer *head = &ring->tx_buffer_info[index]; + struct igc_tx_buffer *buffer = head; + union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index); + u32 olinfo_status, len = xdpf->len, cmd_type; + void *data = xdpf->data; + u16 i; - if (!igc_desc_unused(ring)) - return -EBUSY; + count = TXD_USE_COUNT(len); + for (i = 0; i < nr_frags; i++) + count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i])); - buffer = &ring->tx_buffer_info[ring->next_to_use]; - err = igc_xdp_init_tx_buffer(buffer, xdpf, ring); - if (err) - return err; + if (igc_maybe_stop_tx(ring, count + 3)) { + /* this is a hard error */ + return -EBUSY; + } - cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | - IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | - buffer->bytecount; - olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; + i = 0; + head->bytecount = xdp_get_frame_len(xdpf); + head->type = IGC_TX_BUFFER_TYPE_XDP; + head->gso_segs = 1; + head->xdpf = xdpf; - desc = IGC_TX_DESC(ring, ring->next_to_use); - desc->read.cmd_type_len = cpu_to_le32(cmd_type); + olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; desc->read.olinfo_status = cpu_to_le32(olinfo_status); - desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); - netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount); + for (;;) { + dma_addr_t dma; - buffer->next_to_watch = desc; + dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) { + netdev_err_once(ring->netdev, + "Failed to map DMA for TX\n"); + goto unmap; + } - ring->next_to_use++; - if (ring->next_to_use == ring->count) - ring->next_to_use = 0; + dma_unmap_len_set(buffer, len, len); + dma_unmap_addr_set(buffer, dma, dma); + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | len; + + desc->read.cmd_type_len = cpu_to_le32(cmd_type); + desc->read.buffer_addr = cpu_to_le64(dma); + + buffer->protocol = 0; + + if (++index == ring->count) + index = 0; + + if (i == nr_frags) + break; + + buffer = &ring->tx_buffer_info[index]; + desc = IGC_TX_DESC(ring, index); + desc->read.olinfo_status = 0; + + data = skb_frag_address(&sinfo->frags[i]); + len = skb_frag_size(&sinfo->frags[i]); + i++; + } + desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD); + + netdev_tx_sent_queue(txring_txq(ring), head->bytecount); + /* set the timestamp */ + head->time_stamp = jiffies; + /* set next_to_watch value indicating a packet is present */ + head->next_to_watch = desc; + ring->next_to_use = index; return 0; + +unmap: + for (;;) { + buffer = &ring->tx_buffer_info[index]; + if (dma_unmap_len(buffer, len)) + dma_unmap_page(ring->dev, + dma_unmap_addr(buffer, dma), + dma_unmap_len(buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(buffer, len, 0); + if (buffer == head) + break; + + if (!index) + index += ring->count; + index--; + } + + return -ENOMEM; } -static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, - int cpu) +struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu) { int index = cpu; @@ -2194,10 +2495,12 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) if (unlikely(!xdpf)) return -EFAULT; - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); res = igc_xdp_init_tx_descriptor(ring, xdpf); __netif_tx_unlock(nq); return res; @@ -2223,7 +2526,7 @@ static int __igc_xdp_run_prog(struct igc_adapter *adapter, return IGC_XDP_REDIRECT; break; default: - bpf_warn_invalid_xdp_action(act); + bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); fallthrough; case XDP_ABORTED: out_failure: @@ -2234,8 +2537,7 @@ out_failure: } } -static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, - struct xdp_buff *xdp) +static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp) { struct bpf_prog *prog; int res; @@ -2249,11 +2551,11 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, res = __igc_xdp_run_prog(adapter, prog, xdp); out: - return ERR_PTR(-res); + return res; } /* This function assumes __netif_tx_lock is held by the caller. */ -static void igc_flush_tx_descriptors(struct igc_ring *ring) +void igc_flush_tx_descriptors(struct igc_ring *ring) { /* Once tail pointer is updated, hardware can fetch the descriptors * any time so we issue a write membar here to ensure all memory @@ -2270,7 +2572,7 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status) struct igc_ring *ring; if (status & IGC_XDP_TX) { - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); @@ -2304,13 +2606,13 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = igc_desc_unused(rx_ring); int xdp_status = 0, rx_buffer_pgcnt; + int xdp_res = 0; while (likely(total_packets < budget)) { - union igc_adv_rx_desc *rx_desc; + struct igc_xdp_buff ctx = { .rx_ts = NULL }; struct igc_rx_buffer *rx_buffer; + union igc_adv_rx_desc *rx_desc; unsigned int size, truesize; - ktime_t timestamp = 0; - struct xdp_buff xdp; int pkt_offset = 0; void *pktbuf; @@ -2337,23 +2639,31 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { - timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, - pktbuf); + ctx.rx_ts = pktbuf; pkt_offset = IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; } - if (!skb) { - xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); - xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), - igc_rx_offset(rx_ring) + pkt_offset, size, false); - - skb = igc_xdp_run_prog(adapter, &xdp); + if (igc_fpe_is_pmac_enabled(adapter) && + igc_fpe_handle_mpacket(adapter, rx_desc, size, pktbuf)) { + /* Advance the ring next-to-clean */ + igc_is_non_eop(rx_ring, rx_desc); + cleaned_count++; + continue; } - if (IS_ERR(skb)) { - unsigned int xdp_res = -PTR_ERR(skb); + if (!skb) { + xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); + xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), + igc_rx_offset(rx_ring) + pkt_offset, + size, true); + xdp_buff_clear_frags_flag(&ctx.xdp); + ctx.rx_desc = rx_desc; + + xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp); + } + if (xdp_res) { switch (xdp_res) { case IGC_XDP_CONSUMED: rx_buffer->pagecnt_bias++; @@ -2370,15 +2680,15 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } else if (skb) igc_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) - skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size); + skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); else - skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, - timestamp); + skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); /* exit if we failed to retrieve a buffer */ - if (!skb) { + if (!xdp_res && !skb) { rx_ring->rx_stats.alloc_failed++; rx_buffer->pagecnt_bias++; + set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); break; } @@ -2390,7 +2700,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) continue; /* verify the packet layout is correct */ - if (igc_cleanup_headers(rx_ring, rx_desc, skb)) { + if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) { skb = NULL; continue; } @@ -2425,44 +2735,49 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, - struct xdp_buff *xdp) + struct igc_xdp_buff *ctx) { + struct xdp_buff *xdp = &ctx->xdp; + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; - unsigned int totalsize = metasize + datasize; struct sk_buff *skb; - skb = __napi_alloc_skb(&ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, - GFP_ATOMIC | __GFP_NOWARN); + net_prefetch(xdp->data_meta); + + skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); - memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } + + if (ctx->rx_ts) { + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; + skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; + } return skb; } static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, union igc_adv_rx_desc *desc, - struct xdp_buff *xdp, - ktime_t timestamp) + struct igc_xdp_buff *ctx) { struct igc_ring *ring = q_vector->rx.ring; struct sk_buff *skb; - skb = igc_construct_skb_zc(ring, xdp); + skb = igc_construct_skb_zc(ring, ctx); if (!skb) { ring->rx_stats.alloc_failed++; + set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); return; } - if (timestamp) - skb_hwtstamps(skb)->hwtstamp = timestamp; - if (igc_cleanup_headers(ring, desc, skb)) return; @@ -2470,6 +2785,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, napi_gro_receive(&q_vector->napi, skb); } +static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) +{ + /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The + * igc_xdp_buff shares its layout with xdp_buff_xsk and private + * igc_xdp_buff fields fall into xdp_buff_xsk->cb + */ + return (struct igc_xdp_buff *)xdp; +} + static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) { struct igc_adapter *adapter = q_vector->adapter; @@ -2488,7 +2812,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) while (likely(total_packets < budget)) { union igc_adv_rx_desc *desc; struct igc_rx_buffer *bi; - ktime_t timestamp = 0; + struct igc_xdp_buff *ctx; unsigned int size; int res; @@ -2505,9 +2829,11 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) bi = &ring->rx_buffer_info[ntc]; + ctx = xsk_buff_to_igc_ctx(bi->xdp); + ctx->rx_desc = desc; + if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { - timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, - bi->xdp->data); + ctx->rx_ts = bi->xdp->data; bi->xdp->data += IGC_TS_HDR_LEN; @@ -2516,15 +2842,17 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) */ bi->xdp->data_meta += IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; + } else { + ctx->rx_ts = NULL; } bi->xdp->data_end = bi->xdp->data + size; - xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool); + xsk_buff_dma_sync_for_cpu(bi->xdp); res = __igc_xdp_run_prog(adapter, prog, bi->xdp); switch (res) { case IGC_XDP_PASS: - igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); + igc_dispatch_skb_zc(q_vector, desc, ctx); fallthrough; case IGC_XDP_CONSUMED: xsk_buff_free(bi->xdp); @@ -2580,42 +2908,193 @@ static void igc_update_tx_stats(struct igc_q_vector *q_vector, q_vector->tx.total_packets += packets; } +static void igc_xsk_request_timestamp(void *_priv) +{ + struct igc_metadata_request *meta_req = _priv; + struct igc_ring *tx_ring = meta_req->tx_ring; + struct igc_tx_timestamp_request *tstamp; + u32 tx_flags = IGC_TX_FLAGS_TSTAMP; + struct igc_adapter *adapter; + unsigned long lock_flags; + bool found = false; + int i; + + if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { + adapter = netdev_priv(tx_ring->netdev); + + spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); + + /* Search for available tstamp regs */ + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + tstamp = &adapter->tx_tstamp[i]; + + /* tstamp->skb and tstamp->xsk_tx_buffer are in union. + * When tstamp->skb is equal to NULL, + * tstamp->xsk_tx_buffer is equal to NULL as well. + * This condition means that the particular tstamp reg + * is not occupied by other packet. + */ + if (!tstamp->skb) { + found = true; + break; + } + } + + /* Return if no available tstamp regs */ + if (!found) { + adapter->tx_hwtstamp_skipped++; + spin_unlock_irqrestore(&adapter->ptp_tx_lock, + lock_flags); + return; + } + + tstamp->start = jiffies; + tstamp->xsk_queue_index = tx_ring->queue_index; + tstamp->xsk_tx_buffer = meta_req->tx_buffer; + tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; + + /* Hold the transmit completion until timestamp is ready */ + meta_req->tx_buffer->xsk_pending_ts = true; + + /* Keep the pointer to tx_timestamp, which is located in XDP + * metadata area. It is the location to store the value of + * tx hardware timestamp. + */ + xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); + + /* Set timestamp bit based on the _TSTAMP(_X) bit. */ + tx_flags |= tstamp->flags; + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP, + (IGC_ADVTXD_MAC_TSTAMP)); + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP_1, + (IGC_ADVTXD_TSTAMP_REG_1)); + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP_2, + (IGC_ADVTXD_TSTAMP_REG_2)); + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP_3, + (IGC_ADVTXD_TSTAMP_REG_3)); + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); + } +} + +static u64 igc_xsk_fill_timestamp(void *_priv) +{ + return *(u64 *)_priv; +} + +static void igc_xsk_request_launch_time(u64 launch_time, void *_priv) +{ + struct igc_metadata_request *meta_req = _priv; + struct igc_ring *tx_ring = meta_req->tx_ring; + __le32 launch_time_offset; + bool insert_empty = false; + bool first_flag = false; + u16 used_desc = 0; + + if (!tx_ring->launchtime_enable) + return; + + launch_time_offset = igc_tx_launchtime(tx_ring, + ns_to_ktime(launch_time), + &first_flag, &insert_empty); + if (insert_empty) { + /* Disregard the launch time request if the required empty frame + * fails to be inserted. + */ + if (igc_insert_empty_frame(tx_ring)) + return; + + meta_req->tx_buffer = + &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + /* Inserting an empty packet requires two descriptors: + * one data descriptor and one context descriptor. + */ + used_desc += 2; + } + + /* Use one context descriptor to specify launch time and first flag. */ + igc_tx_ctxtdesc(tx_ring, launch_time_offset, first_flag, 0, 0, 0); + used_desc += 1; + + /* Update the number of used descriptors in this request */ + meta_req->used_desc += used_desc; +} + +const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { + .tmo_request_timestamp = igc_xsk_request_timestamp, + .tmo_fill_timestamp = igc_xsk_fill_timestamp, + .tmo_request_launch_time = igc_xsk_request_launch_time, +}; + static void igc_xdp_xmit_zc(struct igc_ring *ring) { struct xsk_buff_pool *pool = ring->xsk_pool; struct netdev_queue *nq = txring_txq(ring); union igc_adv_tx_desc *tx_desc = NULL; int cpu = smp_processor_id(); - u16 ntu = ring->next_to_use; struct xdp_desc xdp_desc; - u16 budget; + u16 budget, ntu; if (!netif_carrier_ok(ring->netdev)) return; __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + + ntu = ring->next_to_use; budget = igc_desc_unused(ring); - while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { - u32 cmd_type, olinfo_status; + /* Packets with launch time require one data descriptor and one context + * descriptor. When the launch time falls into the next Qbv cycle, we + * may need to insert an empty packet, which requires two more + * descriptors. Therefore, to be safe, we always ensure we have at least + * 4 descriptors available. + */ + while (budget >= 4 && xsk_tx_peek_desc(pool, &xdp_desc)) { + struct igc_metadata_request meta_req; + struct xsk_tx_metadata *meta = NULL; struct igc_tx_buffer *bi; + u32 olinfo_status; dma_addr_t dma; - cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | - IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | - xdp_desc.len; + meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | + IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | + IGC_TXD_DCMD | xdp_desc.len; olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + meta = xsk_buff_get_metadata(pool, xdp_desc.addr); xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); + bi = &ring->tx_buffer_info[ntu]; + + meta_req.tx_ring = ring; + meta_req.tx_buffer = bi; + meta_req.meta = meta; + meta_req.used_desc = 0; + xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, + &meta_req); + + /* xsk_tx_metadata_request() may have updated next_to_use */ + ntu = ring->next_to_use; + + /* xsk_tx_metadata_request() may have updated Tx buffer info */ + bi = meta_req.tx_buffer; + + /* xsk_tx_metadata_request() may use a few descriptors */ + budget -= meta_req.used_desc; tx_desc = IGC_TX_DESC(ring, ntu); - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); tx_desc->read.buffer_addr = cpu_to_le64(dma); - bi = &ring->tx_buffer_info[ntu]; bi->type = IGC_TX_BUFFER_TYPE_XSK; bi->protocol = 0; bi->bytecount = xdp_desc.len; @@ -2628,9 +3107,11 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) ntu++; if (ntu == ring->count) ntu = 0; + + ring->next_to_use = ntu; + budget--; } - ring->next_to_use = ntu; if (tx_desc) { igc_flush_tx_descriptors(ring); xsk_tx_release(pool); @@ -2678,6 +3159,18 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) break; + if (igc_fpe_is_pmac_enabled(adapter) && + igc_fpe_transmitted_smd_v(tx_desc)) + ethtool_mmsv_event_handle(&adapter->fpe.mmsv, + ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); + + /* Hold the completions while there's a pending tx hardware + * timestamp request from XDP Tx metadata. + */ + if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && + tx_buffer->xsk_pending_ts) + break; + /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; @@ -2761,7 +3254,9 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && - !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { + !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && + (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && + !tx_ring->oper_gate_closed) { /* detected Tx unit hang */ netdev_err(tx_ring->netdev, "Detected Tx Unit Hang\n" @@ -3073,11 +3568,337 @@ static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) etype); } +static int igc_flex_filter_select(struct igc_adapter *adapter, + struct igc_flex_filter *input, + u32 *fhft) +{ + struct igc_hw *hw = &adapter->hw; + u8 fhft_index; + u32 fhftsl; + + if (input->index >= MAX_FLEX_FILTER) { + netdev_err(adapter->netdev, "Wrong Flex Filter index selected!\n"); + return -EINVAL; + } + + /* Indirect table select register */ + fhftsl = rd32(IGC_FHFTSL); + fhftsl &= ~IGC_FHFTSL_FTSL_MASK; + switch (input->index) { + case 0 ... 7: + fhftsl |= 0x00; + break; + case 8 ... 15: + fhftsl |= 0x01; + break; + case 16 ... 23: + fhftsl |= 0x02; + break; + case 24 ... 31: + fhftsl |= 0x03; + break; + } + wr32(IGC_FHFTSL, fhftsl); + + /* Normalize index down to host table register */ + fhft_index = input->index % 8; + + *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : + IGC_FHFT_EXT(fhft_index - 4); + + return 0; +} + +static int igc_write_flex_filter_ll(struct igc_adapter *adapter, + struct igc_flex_filter *input) +{ + struct igc_hw *hw = &adapter->hw; + u8 *data = input->data; + u8 *mask = input->mask; + u32 queuing; + u32 fhft; + u32 wufc; + int ret; + int i; + + /* Length has to be aligned to 8. Otherwise the filter will fail. Bail + * out early to avoid surprises later. + */ + if (input->length % 8 != 0) { + netdev_err(adapter->netdev, "The length of a flex filter has to be 8 byte aligned!\n"); + return -EINVAL; + } + + /* Select corresponding flex filter register and get base for host table. */ + ret = igc_flex_filter_select(adapter, input, &fhft); + if (ret) + return ret; + + /* When adding a filter globally disable flex filter feature. That is + * recommended within the datasheet. + */ + wufc = rd32(IGC_WUFC); + wufc &= ~IGC_WUFC_FLEX_HQ; + wr32(IGC_WUFC, wufc); + + /* Configure filter */ + queuing = input->length & IGC_FHFT_LENGTH_MASK; + queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); + queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); + + if (input->immediate_irq) + queuing |= IGC_FHFT_IMM_INT; + + if (input->drop) + queuing |= IGC_FHFT_DROP; + + wr32(fhft + 0xFC, queuing); + + /* Write data (128 byte) and mask (128 bit) */ + for (i = 0; i < 16; ++i) { + const size_t data_idx = i * 8; + const size_t row_idx = i * 16; + u32 dw0 = + (data[data_idx + 0] << 0) | + (data[data_idx + 1] << 8) | + (data[data_idx + 2] << 16) | + (data[data_idx + 3] << 24); + u32 dw1 = + (data[data_idx + 4] << 0) | + (data[data_idx + 5] << 8) | + (data[data_idx + 6] << 16) | + (data[data_idx + 7] << 24); + u32 tmp; + + /* Write row: dw0, dw1 and mask */ + wr32(fhft + row_idx, dw0); + wr32(fhft + row_idx + 4, dw1); + + /* mask is only valid for MASK(7, 0) */ + tmp = rd32(fhft + row_idx + 8); + tmp &= ~GENMASK(7, 0); + tmp |= mask[i]; + wr32(fhft + row_idx + 8, tmp); + } + + /* Enable filter. */ + wufc |= IGC_WUFC_FLEX_HQ; + if (input->index > 8) { + /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ + u32 wufc_ext = rd32(IGC_WUFC_EXT); + + wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); + + wr32(IGC_WUFC_EXT, wufc_ext); + } else { + wufc |= (IGC_WUFC_FLX0 << input->index); + } + wr32(IGC_WUFC, wufc); + + netdev_dbg(adapter->netdev, "Added flex filter %u to HW.\n", + input->index); + + return 0; +} + +static void igc_flex_filter_add_field(struct igc_flex_filter *flex, + const void *src, unsigned int offset, + size_t len, const void *mask) +{ + int i; + + /* data */ + memcpy(&flex->data[offset], src, len); + + /* mask */ + for (i = 0; i < len; ++i) { + const unsigned int idx = i + offset; + const u8 *ptr = mask; + + if (mask) { + if (ptr[i] & 0xff) + flex->mask[idx / 8] |= BIT(idx % 8); + + continue; + } + + flex->mask[idx / 8] |= BIT(idx % 8); + } +} + +static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc, wufc_ext; + int i; + + wufc = rd32(IGC_WUFC); + wufc_ext = rd32(IGC_WUFC_EXT); + + for (i = 0; i < MAX_FLEX_FILTER; i++) { + if (i < 8) { + if (!(wufc & (IGC_WUFC_FLX0 << i))) + return i; + } else { + if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) + return i; + } + } + + return -ENOSPC; +} + +static bool igc_flex_filter_in_use(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc, wufc_ext; + + wufc = rd32(IGC_WUFC); + wufc_ext = rd32(IGC_WUFC_EXT); + + if (wufc & IGC_WUFC_FILTER_MASK) + return true; + + if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) + return true; + + return false; +} + +static int igc_add_flex_filter(struct igc_adapter *adapter, + struct igc_nfc_rule *rule) +{ + struct igc_nfc_filter *filter = &rule->filter; + unsigned int eth_offset, user_offset; + struct igc_flex_filter flex = { }; + int ret, index; + bool vlan; + + index = igc_find_avail_flex_filter_slot(adapter); + if (index < 0) + return -ENOSPC; + + /* Construct the flex filter: + * -> dest_mac [6] + * -> src_mac [6] + * -> tpid [2] + * -> vlan tci [2] + * -> ether type [2] + * -> user data [8] + * -> = 26 bytes => 32 length + */ + flex.index = index; + flex.length = 32; + flex.rx_queue = rule->action; + + vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; + eth_offset = vlan ? 16 : 12; + user_offset = vlan ? 18 : 14; + + /* Add destination MAC */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) + igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, + ETH_ALEN, NULL); + + /* Add source MAC */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) + igc_flex_filter_add_field(&flex, &filter->src_addr, 6, + ETH_ALEN, NULL); + + /* Add VLAN etype */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) { + __be16 vlan_etype = cpu_to_be16(filter->vlan_etype); + + igc_flex_filter_add_field(&flex, &vlan_etype, 12, + sizeof(vlan_etype), NULL); + } + + /* Add VLAN TCI */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) + igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, + sizeof(filter->vlan_tci), NULL); + + /* Add Ether type */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + __be16 etype = cpu_to_be16(filter->etype); + + igc_flex_filter_add_field(&flex, &etype, eth_offset, + sizeof(etype), NULL); + } + + /* Add user data */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) + igc_flex_filter_add_field(&flex, &filter->user_data, + user_offset, + sizeof(filter->user_data), + filter->user_mask); + + /* Add it down to the hardware and enable it. */ + ret = igc_write_flex_filter_ll(adapter, &flex); + if (ret) + return ret; + + filter->flex_index = index; + + return 0; +} + +static void igc_del_flex_filter(struct igc_adapter *adapter, + u16 reg_index) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc; + + /* Just disable the filter. The filter table itself is kept + * intact. Another flex_filter_add() should override the "old" data + * then. + */ + if (reg_index > 8) { + u32 wufc_ext = rd32(IGC_WUFC_EXT); + + wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); + wr32(IGC_WUFC_EXT, wufc_ext); + } else { + wufc = rd32(IGC_WUFC); + + wufc &= ~(IGC_WUFC_FLX0 << reg_index); + wr32(IGC_WUFC, wufc); + } + + if (igc_flex_filter_in_use(adapter)) + return; + + /* No filters are in use, we may disable flex filters */ + wufc = rd32(IGC_WUFC); + wufc &= ~IGC_WUFC_FLEX_HQ; + wr32(IGC_WUFC, wufc); +} + +static void igc_set_default_queue_filter(struct igc_adapter *adapter, u32 queue) +{ + struct igc_hw *hw = &adapter->hw; + u32 mrqc = rd32(IGC_MRQC); + + mrqc &= ~IGC_MRQC_DEFAULT_QUEUE_MASK; + mrqc |= FIELD_PREP(IGC_MRQC_DEFAULT_QUEUE_MASK, queue); + wr32(IGC_MRQC, mrqc); +} + +static void igc_reset_default_queue_filter(struct igc_adapter *adapter) +{ + /* Reset the default queue to its default value which is Queue 0 */ + igc_set_default_queue_filter(adapter, 0); +} + static int igc_enable_nfc_rule(struct igc_adapter *adapter, - const struct igc_nfc_rule *rule) + struct igc_nfc_rule *rule) { int err; + if (rule->flex) { + return igc_add_flex_filter(adapter, rule); + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { err = igc_add_etype_filter(adapter, rule->filter.etype, rule->action); @@ -3100,26 +3921,32 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter, } if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { - int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> - VLAN_PRIO_SHIFT; + int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); err = igc_add_vlan_prio_filter(adapter, prio, rule->action); if (err) return err; } + if (rule->filter.match_flags & IGC_FILTER_FLAG_DEFAULT_QUEUE) + igc_set_default_queue_filter(adapter, rule->action); + return 0; } static void igc_disable_nfc_rule(struct igc_adapter *adapter, const struct igc_nfc_rule *rule) { + if (rule->flex) { + igc_del_flex_filter(adapter, rule->filter.flex_index); + return; + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) igc_del_etype_filter(adapter, rule->filter.etype); if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { - int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> - VLAN_PRIO_SHIFT; + int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); igc_del_vlan_prio_filter(adapter, prio); } @@ -3131,6 +3958,9 @@ static void igc_disable_nfc_rule(struct igc_adapter *adapter, if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, rule->filter.dst_addr); + + if (rule->filter.match_flags & IGC_FILTER_FLAG_DEFAULT_QUEUE) + igc_reset_default_queue_filter(adapter); } /** @@ -3248,6 +4078,30 @@ static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) } /** + * igc_enable_empty_addr_recv - Enable Rx of packets with all-zeroes MAC address + * @adapter: Pointer to the igc_adapter structure. + * + * Frame preemption verification requires that packets with the all-zeroes + * MAC address are allowed to be received by the driver. This function adds the + * all-zeroes destination address to the list of acceptable addresses. + * + * Return: 0 on success, negative value otherwise. + */ +int igc_enable_empty_addr_recv(struct igc_adapter *adapter) +{ + u8 empty[ETH_ALEN] = {}; + + return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty, -1); +} + +void igc_disable_empty_addr_recv(struct igc_adapter *adapter) +{ + u8 empty[ETH_ALEN] = {}; + + igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, empty); +} + +/** * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure * @@ -4017,8 +4871,7 @@ static int igc_alloc_q_vector(struct igc_adapter *adapter, return -ENOMEM; /* initialize NAPI */ - netif_napi_add(adapter->netdev, &q_vector->napi, - igc_poll, 64); + netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; @@ -4214,6 +5067,7 @@ static int igc_sw_init(struct igc_adapter *adapter) adapter->nfc_rule_count = 0; spin_lock_init(&adapter->stats64_lock); + spin_lock_init(&adapter->qbv_tx_lock); /* Assume MSI-X interrupts, will be checked during IRQ allocation */ adapter->flags |= IGC_FLAG_HAS_MSIX; @@ -4233,6 +5087,22 @@ static int igc_sw_init(struct igc_adapter *adapter) return 0; } +static void igc_set_queue_napi(struct igc_adapter *adapter, int vector, + struct napi_struct *napi) +{ + struct igc_q_vector *q_vector = adapter->q_vector[vector]; + + if (q_vector->rx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->rx.ring->queue_index, + NETDEV_QUEUE_TYPE_RX, napi); + + if (q_vector->tx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->tx.ring->queue_index, + NETDEV_QUEUE_TYPE_TX, napi); +} + /** * igc_up - Open the interface and prepare it to handle traffic * @adapter: board private structure @@ -4240,6 +5110,7 @@ static int igc_sw_init(struct igc_adapter *adapter) void igc_up(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + struct napi_struct *napi; int i = 0; /* hardware has been reset, we need to reload some things */ @@ -4247,8 +5118,11 @@ void igc_up(struct igc_adapter *adapter) clear_bit(__IGC_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&adapter->q_vector[i]->napi); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igc_set_queue_napi(adapter, i, napi); + } if (adapter->msix_entries) igc_configure_msix(adapter); @@ -4306,10 +5180,10 @@ void igc_update_stats(struct igc_adapter *adapter) } do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -4323,10 +5197,10 @@ void igc_update_stats(struct igc_adapter *adapter) struct igc_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -4424,7 +5298,8 @@ void igc_update_stats(struct igc_adapter *adapter) net_stats->tx_window_errors = adapter->stats.latecol; net_stats->tx_carrier_errors = adapter->stats.tncrs; - /* Tx Dropped needs to be maintained elsewhere */ + /* Tx Dropped */ + net_stats->tx_dropped = adapter->stats.txdrop; /* Management Stats */ adapter->stats.mgptc += rd32(IGC_MGTPTC); @@ -4447,38 +5322,42 @@ void igc_down(struct igc_adapter *adapter) igc_ptp_suspend(adapter); - /* disable receives in the hardware */ - rctl = rd32(IGC_RCTL); - wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); - /* flush and sleep below */ - + if (pci_device_is_present(adapter->pdev)) { + /* disable receives in the hardware */ + rctl = rd32(IGC_RCTL); + wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); + /* flush and sleep below */ + } /* set trans_start so we don't get spurious watchdogs during reset */ netif_trans_update(netdev); netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); - /* disable transmits in the hardware */ - tctl = rd32(IGC_TCTL); - tctl &= ~IGC_TCTL_EN; - wr32(IGC_TCTL, tctl); - /* flush both disables and wait for them to finish */ - wrfl(); - usleep_range(10000, 20000); + if (pci_device_is_present(adapter->pdev)) { + /* disable transmits in the hardware */ + tctl = rd32(IGC_TCTL); + tctl &= ~IGC_TCTL_EN; + wr32(IGC_TCTL, tctl); + /* flush both disables and wait for them to finish */ + wrfl(); + usleep_range(10000, 20000); - igc_irq_disable(adapter); + igc_irq_disable(adapter); + } adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; for (i = 0; i < adapter->num_q_vectors; i++) { if (adapter->q_vector[i]) { napi_synchronize(&adapter->q_vector[i]->napi); + igc_set_queue_napi(adapter, i, NULL); napi_disable(&adapter->q_vector[i]->napi); } } - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); /* record the stats before reset*/ spin_lock(&adapter->stats64_lock); @@ -4494,8 +5373,12 @@ void igc_down(struct igc_adapter *adapter) /* clear VLAN promisc flag so VFTA will be updated if necessary */ adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; + igc_disable_all_tx_rings_hw(adapter); igc_clean_all_tx_rings(adapter); igc_clean_all_rx_rings(adapter); + + if (adapter->fpe.mmsv.pmac_enabled) + ethtool_mmsv_stop(&adapter->fpe.mmsv); } void igc_reinit_locked(struct igc_adapter *adapter) @@ -4559,7 +5442,7 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) igc_down(adapter); netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) igc_up(adapter); @@ -4572,6 +5455,24 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) } /** + * igc_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + * @txqueue: queue number that timed out + **/ +static void igc_tx_timeout(struct net_device *netdev, + unsigned int __always_unused txqueue) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + + /* Do the reset outside of interrupt context */ + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); + wr32(IGC_EICS, + (adapter->eims_enable_mask & ~adapter->eims_other)); +} + +/** * igc_get_stats64 - Get System Network Statistics * @netdev: network interface device structure * @stats: rtnl_link_stats64 pointer @@ -4638,7 +5539,7 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | @@ -4664,25 +5565,22 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, static void igc_tsync_interrupt(struct igc_adapter *adapter) { - u32 ack, tsauxc, sec, nsec, tsicr; struct igc_hw *hw = &adapter->hw; + u32 tsauxc, sec, nsec, tsicr; struct ptp_clock_event event; struct timespec64 ts; tsicr = rd32(IGC_TSICR); - ack = 0; if (tsicr & IGC_TSICR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) ptp_clock_event(adapter->ptp_clock, &event); - ack |= IGC_TSICR_SYS_WRAP; } if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - ack |= IGC_TSICR_TXTS; + igc_ptp_tx_tstamp_event(adapter); } if (tsicr & IGC_TSICR_TT0) { @@ -4696,7 +5594,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) wr32(IGC_TSAUXC, tsauxc); adapter->perout[0].start = ts; spin_unlock(&adapter->tmreg_lock); - ack |= IGC_TSICR_TT0; } if (tsicr & IGC_TSICR_TT1) { @@ -4710,7 +5607,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) wr32(IGC_TSAUXC, tsauxc); adapter->perout[1].start = ts; spin_unlock(&adapter->tmreg_lock); - ack |= IGC_TSICR_TT1; } if (tsicr & IGC_TSICR_AUTT0) { @@ -4720,7 +5616,6 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) event.index = 0; event.timestamp = sec * NSEC_PER_SEC + nsec; ptp_clock_event(adapter->ptp_clock, &event); - ack |= IGC_TSICR_AUTT0; } if (tsicr & IGC_TSICR_AUTT1) { @@ -4730,11 +5625,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) event.index = 1; event.timestamp = sec * NSEC_PER_SEC + nsec; ptp_clock_event(adapter->ptp_clock, &event); - ack |= IGC_TSICR_AUTT1; } - - /* acknowledge the interrupts */ - wr32(IGC_TSICR, ack); } /** @@ -4809,6 +5700,7 @@ static irqreturn_t igc_msix_ring(int irq, void *data) */ static int igc_request_msix(struct igc_adapter *adapter) { + unsigned int num_q_vectors = adapter->num_q_vectors; int i = 0, err = 0, vector = 0, free_vector = 0; struct net_device *netdev = adapter->netdev; @@ -4817,7 +5709,13 @@ static int igc_request_msix(struct igc_adapter *adapter) if (err) goto err_out; - for (i = 0; i < adapter->num_q_vectors; i++) { + if (num_q_vectors > MAX_Q_VECTORS) { + num_q_vectors = MAX_Q_VECTORS; + dev_warn(&adapter->pdev->dev, + "The number of queue vectors (%d) is higher than max allowed (%d)\n", + adapter->num_q_vectors, MAX_Q_VECTORS); + } + for (i = 0; i < num_q_vectors; i++) { struct igc_q_vector *q_vector = adapter->q_vector[i]; vector++; @@ -4841,6 +5739,9 @@ static int igc_request_msix(struct igc_adapter *adapter) q_vector); if (err) goto err_free; + + netif_napi_set_irq(&q_vector->napi, + adapter->msix_entries[vector].vector); } igc_configure_msix(adapter); @@ -4877,7 +5778,8 @@ static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) */ static void igc_update_phy_info(struct timer_list *t) { - struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); + struct igc_adapter *adapter = timer_container_of(adapter, t, + phy_info_timer); igc_get_phy_info(&adapter->hw); } @@ -4896,20 +5798,12 @@ bool igc_has_link(struct igc_adapter *adapter) * false until the igc_check_for_link establishes link * for copper adapters ONLY */ - switch (hw->phy.media_type) { - case igc_media_type_copper: - if (!hw->mac.get_link_status) - return true; - hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; - break; - default: - case igc_media_type_unknown: - break; - } + if (!hw->mac.get_link_status) + return true; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; - if (hw->mac.type == igc_i225 && - hw->phy.id == I225_I_PHY_ID) { + if (hw->mac.type == igc_i225) { if (!netif_carrier_ok(adapter->netdev)) { adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { @@ -4927,7 +5821,8 @@ bool igc_has_link(struct igc_adapter *adapter) */ static void igc_watchdog(struct timer_list *t) { - struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); + struct igc_adapter *adapter = timer_container_of(adapter, t, + watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); } @@ -4997,10 +5892,23 @@ static void igc_watchdog_task(struct work_struct *work) adapter->tx_timeout_factor = 14; break; case SPEED_100: - /* maybe add some timeout factor ? */ + case SPEED_1000: + case SPEED_2500: + adapter->tx_timeout_factor = 1; break; } + /* Once the launch time has been set on the wire, there + * is a delay before the link speed can be determined + * based on link-up activity. Write into the register + * as soon as we know the correct link speed. + */ + igc_tsn_adjust_txtime_offset(adapter); + + if (adapter->fpe.mmsv.pmac_enabled) + ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, + true); + if (adapter->link_speed != SPEED_1000) goto no_wait; @@ -5036,30 +5944,17 @@ no_wait: netdev_info(netdev, "NIC Link is Down\n"); netif_carrier_off(netdev); + if (adapter->fpe.mmsv.pmac_enabled) + ethtool_mmsv_link_state_handle(&adapter->fpe.mmsv, + false); + /* link state has changed, schedule phy info update */ if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); - /* link is down, time to check for alternate media */ - if (adapter->flags & IGC_FLAG_MAS_ENABLE) { - if (adapter->flags & IGC_FLAG_MEDIA_RESET) { - schedule_work(&adapter->reset_task); - /* return immediately */ - return; - } - } pm_schedule_suspend(netdev->dev.parent, MSEC_PER_SEC * 5); - - /* also check for alternate media here */ - } else if (!netif_carrier_ok(netdev) && - (adapter->flags & IGC_FLAG_MAS_ENABLE)) { - if (adapter->flags & IGC_FLAG_MEDIA_RESET) { - schedule_work(&adapter->reset_task); - /* return immediately */ - return; - } } } @@ -5092,11 +5987,29 @@ no_wait: if (adapter->flags & IGC_FLAG_HAS_MSIX) { u32 eics = 0; - for (i = 0; i < adapter->num_q_vectors; i++) - eics |= adapter->q_vector[i]->eims_value; - wr32(IGC_EICS, eics); + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igc_q_vector *q_vector = adapter->q_vector[i]; + struct igc_ring *rx_ring; + + if (!q_vector->rx.ring) + continue; + + rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; + + if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + eics |= q_vector->eims_value; + clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + } + } + if (eics) + wr32(IGC_EICS, eics); } else { - wr32(IGC_ICS, IGC_ICS_RXDMT0); + struct igc_ring *rx_ring = adapter->rx_ring[0]; + + if (test_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + clear_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + wr32(IGC_ICS, IGC_ICS_RXDMT0); + } } igc_ptp_tx_hang(adapter); @@ -5141,6 +6054,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5184,6 +6100,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5275,6 +6194,7 @@ static int __igc_open(struct net_device *netdev, bool resuming) struct igc_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = adapter->pdev; struct igc_hw *hw = &adapter->hw; + struct napi_struct *napi; int err = 0; int i = 0; @@ -5308,19 +6228,13 @@ static int __igc_open(struct net_device *netdev, bool resuming) if (err) goto err_req_irq; - /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); - if (err) - goto err_set_queues; - clear_bit(__IGC_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&adapter->q_vector[i]->napi); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igc_set_queue_napi(adapter, i, napi); + } /* Clear any pending interrupts. */ rd32(IGC_ICR); @@ -5337,8 +6251,6 @@ static int __igc_open(struct net_device *netdev, bool resuming) return IGC_SUCCESS; -err_set_queues: - igc_free_irq(adapter); err_req_irq: igc_release_hw_control(adapter); igc_power_down_phy_copper_base(&adapter->hw); @@ -5355,6 +6267,17 @@ err_setup_tx: int igc_open(struct net_device *netdev) { + struct igc_adapter *adapter = netdev_priv(netdev); + int err; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, + adapter->num_rx_queues); + if (err) { + netdev_err(netdev, "error setting real queue count\n"); + return err; + } + return __igc_open(netdev, false); } @@ -5402,29 +6325,10 @@ int igc_close(struct net_device *netdev) return 0; } -/** - * igc_ioctl - Access the hwtstamp interface - * @netdev: network interface device structure - * @ifr: interface request data - * @cmd: ioctl command - **/ -static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCGHWTSTAMP: - return igc_ptp_get_ts_config(netdev, ifr); - case SIOCSHWTSTAMP: - return igc_ptp_set_ts_config(netdev, ifr); - default: - return -EOPNOTSUPP; - } -} - static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, bool enable) { struct igc_ring *ring; - int i; if (queue < 0 || queue >= adapter->num_tx_queues) return -EINVAL; @@ -5432,17 +6336,6 @@ static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, ring = adapter->tx_ring[queue]; ring->launchtime_enable = enable; - if (adapter->base_time) - return 0; - - adapter->cycle_time = NSEC_PER_SEC; - - for (i = 0; i < adapter->num_tx_queues; i++) { - ring = adapter->tx_ring[i]; - ring->start_time = 0; - ring->end_time = NSEC_PER_SEC; - } - return 0; } @@ -5459,6 +6352,7 @@ static bool validate_schedule(struct igc_adapter *adapter, const struct tc_taprio_qopt_offload *qopt) { int queue_uses[IGC_MAX_TX_QUEUES] = { }; + struct igc_hw *hw = &adapter->hw; struct timespec64 now; size_t n; @@ -5471,14 +6365,17 @@ static bool validate_schedule(struct igc_adapter *adapter, * in the future, it will hold all the packets until that * time, causing a lot of TX Hangs, so to avoid that, we * reject schedules that would start in the future. + * Note: Limitation above is no longer in i226. */ - if (!is_base_time_past(qopt->base_time, &now)) + if (!is_base_time_past(qopt->base_time, &now) && + igc_is_device_id_i225(hw)) return false; for (n = 0; n < qopt->num_entries; n++) { - const struct tc_taprio_sched_entry *e; + const struct tc_taprio_sched_entry *e, *prev; int i; + prev = n ? &qopt->entries[n - 1] : NULL; e = &qopt->entries[n]; /* i225 only supports "global" frame preemption @@ -5487,13 +6384,18 @@ static bool validate_schedule(struct igc_adapter *adapter, if (e->command != TC_TAPRIO_CMD_SET_GATES) return false; - for (i = 0; i < IGC_MAX_TX_QUEUES; i++) { - if (e->gate_mask & BIT(i)) + for (i = 0; i < adapter->num_tx_queues; i++) + if (e->gate_mask & BIT(i)) { queue_uses[i]++; - if (queue_uses[i] > 1) - return false; - } + /* There are limitations: A single queue cannot + * be opened and closed multiple times per cycle + * unless the gate stays open. Check for it. + */ + if (queue_uses[i] > 1 && + !(prev->gate_mask & BIT(i))) + return false; + } } return true; @@ -5515,48 +6417,189 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, return igc_tsn_offload_apply(adapter); } +static int igc_qbv_clear_schedule(struct igc_adapter *adapter) +{ + unsigned long flags; + int i; + + adapter->base_time = 0; + adapter->cycle_time = NSEC_PER_SEC; + adapter->taprio_offload_enable = false; + adapter->qbv_config_change_errors = 0; + adapter->qbv_count = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + ring->start_time = 0; + ring->end_time = NSEC_PER_SEC; + ring->max_sdu = 0; + ring->preemptible = false; + } + + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + adapter->qbv_transition = false; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + + return 0; +} + +static int igc_tsn_clear_schedule(struct igc_adapter *adapter) +{ + igc_qbv_clear_schedule(adapter); + + return 0; +} + +static void igc_taprio_stats(struct net_device *dev, + struct tc_taprio_qopt_stats *stats) +{ + /* When Strict_End is enabled, the tx_overruns counter + * will always be zero. + */ + stats->tx_overruns = 0; +} + +static void igc_taprio_queue_stats(struct net_device *dev, + struct tc_taprio_qopt_queue_stats *queue_stats) +{ + struct tc_taprio_qopt_stats *stats = &queue_stats->stats; + + /* When Strict_End is enabled, the tx_overruns counter + * will always be zero. + */ + stats->tx_overruns = 0; +} + static int igc_save_qbv_schedule(struct igc_adapter *adapter, struct tc_taprio_qopt_offload *qopt) { + bool queue_configured[IGC_MAX_TX_QUEUES] = { }; + struct igc_hw *hw = &adapter->hw; u32 start_time = 0, end_time = 0; + struct timespec64 now; + unsigned long flags; size_t n; + int i; - if (!qopt->enable) { - adapter->base_time = 0; - return 0; - } + if (qopt->base_time < 0) + return -ERANGE; - if (adapter->base_time) + if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) return -EALREADY; if (!validate_schedule(adapter, qopt)) return -EINVAL; + if (qopt->mqprio.preemptible_tcs && + !(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) { + NL_SET_ERR_MSG_MOD(qopt->extack, + "reverse-tsn-txq-prio private flag must be enabled before setting preemptible tc"); + return -ENODEV; + } + + igc_ptp_read(adapter, &now); + + if (igc_tsn_is_taprio_activated_by_user(adapter) && + is_base_time_past(qopt->base_time, &now)) + adapter->qbv_config_change_errors++; + adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + adapter->taprio_offload_enable = true; - /* FIXME: be a little smarter about cases when the gate for a - * queue stays open for more than one entry. - */ for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; - int i; end_time += e->interval; - for (i = 0; i < IGC_MAX_TX_QUEUES; i++) { + /* If any of the conditions below are true, we need to manually + * control the end time of the cycle. + * 1. Qbv users can specify a cycle time that is not equal + * to the total GCL intervals. Hence, recalculation is + * necessary here to exclude the time interval that + * exceeds the cycle time. + * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, + * once the end of the list is reached, it will switch + * to the END_OF_CYCLE state and leave the gates in the + * same state until the next cycle is started. + */ + if (end_time > adapter->cycle_time || + n + 1 == qopt->num_entries) + end_time = adapter->cycle_time; + + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; if (!(e->gate_mask & BIT(i))) continue; - ring->start_time = start_time; + /* Check whether a queue stays open for more than one + * entry. If so, keep the start and advance the end + * time. + */ + if (!queue_configured[i]) + ring->start_time = start_time; ring->end_time = end_time; + + if (ring->start_time >= adapter->cycle_time) + queue_configured[i] = false; + else + queue_configured[i] = true; } start_time += e->interval; } + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + /* Check whether a queue gets configured. + * If not, set the start and end time to be end time. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (!is_base_time_past(qopt->base_time, &now)) { + ring->admin_gate_closed = false; + } else { + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + + if (!queue_configured[i]) { + if (!is_base_time_past(qopt->base_time, &now)) + ring->admin_gate_closed = true; + else + ring->oper_gate_closed = true; + + ring->start_time = end_time; + ring->end_time = end_time; + } + } + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + struct net_device *dev = adapter->netdev; + + if (qopt->max_sdu[i]) + ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; + else + ring->max_sdu = 0; + } + + igc_fpe_save_preempt_queue(adapter, &qopt->mqprio); + return 0; } @@ -5569,25 +6612,240 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, if (hw->mac.type != igc_i225) return -EOPNOTSUPP; - err = igc_save_qbv_schedule(adapter, qopt); + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + err = igc_save_qbv_schedule(adapter, qopt); + break; + case TAPRIO_CMD_DESTROY: + err = igc_tsn_clear_schedule(adapter); + break; + case TAPRIO_CMD_STATS: + igc_taprio_stats(adapter->netdev, &qopt->stats); + return 0; + case TAPRIO_CMD_QUEUE_STATS: + igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); + return 0; + default: + return -EOPNOTSUPP; + } + if (err) return err; return igc_tsn_offload_apply(adapter); } +static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, + bool enable, int idleslope, int sendslope, + int hicredit, int locredit) +{ + bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; + struct net_device *netdev = adapter->netdev; + struct igc_ring *ring; + int i; + + /* i225 has two sets of credit-based shaper logic. + * Supporting it only on the top two priority queues + */ + if (queue < 0 || queue > 1) + return -EINVAL; + + ring = adapter->tx_ring[queue]; + + for (i = 0; i < IGC_MAX_SR_QUEUES; i++) + if (adapter->tx_ring[i]) + cbs_status[i] = adapter->tx_ring[i]->cbs_enable; + + /* CBS should be enabled on the highest priority queue first in order + * for the CBS algorithm to operate as intended. + */ + if (enable) { + if (queue == 1 && !cbs_status[0]) { + netdev_err(netdev, + "Enabling CBS on queue1 before queue0\n"); + return -EINVAL; + } + } else { + if (queue == 0 && cbs_status[1]) { + netdev_err(netdev, + "Disabling CBS on queue0 before queue1\n"); + return -EINVAL; + } + } + + ring->cbs_enable = enable; + ring->idleslope = idleslope; + ring->sendslope = sendslope; + ring->hicredit = hicredit; + ring->locredit = locredit; + + return 0; +} + +static int igc_tsn_enable_cbs(struct igc_adapter *adapter, + struct tc_cbs_qopt_offload *qopt) +{ + struct igc_hw *hw = &adapter->hw; + int err; + + if (hw->mac.type != igc_i225) + return -EOPNOTSUPP; + + if (qopt->queue < 0 || qopt->queue > 1) + return -EINVAL; + + err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, + qopt->idleslope, qopt->sendslope, + qopt->hicredit, qopt->locredit); + if (err) + return err; + + return igc_tsn_offload_apply(adapter); +} + +static int igc_tc_query_caps(struct igc_adapter *adapter, + struct tc_query_caps_base *base) +{ + struct igc_hw *hw = &adapter->hw; + + switch (base->type) { + case TC_SETUP_QDISC_MQPRIO: { + struct tc_mqprio_caps *caps = base->caps; + + caps->validate_queue_counts = true; + + return 0; + } + case TC_SETUP_QDISC_TAPRIO: { + struct tc_taprio_caps *caps = base->caps; + + if (!(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) + caps->broken_mqprio = true; + + if (hw->mac.type == igc_i225) { + caps->supports_queue_max_sdu = true; + caps->gate_mask_per_txq = true; + } + + return 0; + } + default: + return -EOPNOTSUPP; + } +} + +static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc, + u16 *offset) +{ + int i; + + adapter->strict_priority_enable = true; + adapter->num_tc = num_tc; + + for (i = 0; i < num_tc; i++) + adapter->queue_per_tc[i] = offset[i]; +} + +static bool +igc_tsn_is_tc_to_queue_priority_ordered(struct tc_mqprio_qopt_offload *mqprio) +{ + int num_tc = mqprio->qopt.num_tc; + int i; + + for (i = 1; i < num_tc; i++) { + if (mqprio->qopt.offset[i - 1] > mqprio->qopt.offset[i]) + return false; + } + + return true; +} + +static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct igc_hw *hw = &adapter->hw; + int err, i; + + if (hw->mac.type != igc_i225) + return -EOPNOTSUPP; + + if (!mqprio->qopt.num_tc) { + adapter->strict_priority_enable = false; + igc_fpe_clear_preempt_queue(adapter); + netdev_reset_tc(adapter->netdev); + goto apply; + } + + /* There are as many TCs as Tx queues. */ + if (mqprio->qopt.num_tc != adapter->num_tx_queues) { + NL_SET_ERR_MSG_FMT_MOD(mqprio->extack, + "Only %d traffic classes supported", + adapter->num_tx_queues); + return -EOPNOTSUPP; + } + + /* Only one queue per TC is supported. */ + for (i = 0; i < mqprio->qopt.num_tc; i++) { + if (mqprio->qopt.count[i] != 1) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "Only one queue per TC supported"); + return -EOPNOTSUPP; + } + } + + if (!igc_tsn_is_tc_to_queue_priority_ordered(mqprio)) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "tc to queue mapping must preserve increasing priority (higher tc -> higher queue)"); + return -EOPNOTSUPP; + } + + igc_save_mqprio_params(adapter, mqprio->qopt.num_tc, + mqprio->qopt.offset); + + err = netdev_set_num_tc(adapter->netdev, adapter->num_tc); + if (err) + return err; + + for (i = 0; i < adapter->num_tc; i++) { + err = netdev_set_tc_queue(adapter->netdev, i, 1, + adapter->queue_per_tc[i]); + if (err) + return err; + } + + /* In case the card is configured with less than four queues. */ + for (; i < IGC_MAX_TX_QUEUES; i++) + adapter->queue_per_tc[i] = i; + + mqprio->qopt.hw = TC_MQPRIO_HW_OFFLOAD_TCS; + igc_fpe_save_preempt_queue(adapter, mqprio); + +apply: + return igc_tsn_offload_apply(adapter); +} + static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct igc_adapter *adapter = netdev_priv(dev); + adapter->tc_setup_type = type; + switch (type) { + case TC_QUERY_CAPS: + return igc_tc_query_caps(adapter, type_data); case TC_SETUP_QDISC_TAPRIO: return igc_tsn_enable_qbv_scheduling(adapter, type_data); case TC_SETUP_QDISC_ETF: return igc_tsn_enable_launchtime(adapter, type_data); + case TC_SETUP_QDISC_CBS: + return igc_tsn_enable_cbs(adapter, type_data); + + case TC_SETUP_QDISC_MQPRIO: + return igc_tsn_enable_mqprio(adapter, type_data); + default: return -EOPNOTSUPP; } @@ -5615,29 +6873,31 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; - if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) + if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); - drops = 0; + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -5645,7 +6905,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, @@ -5685,6 +6945,24 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) return 0; } +static ktime_t igc_get_tstamp(struct net_device *dev, + const struct skb_shared_hwtstamps *hwtstamps, + bool cycles) +{ + struct igc_adapter *adapter = netdev_priv(dev); + struct igc_inline_rx_tstamps *tstamp; + ktime_t timestamp; + + tstamp = hwtstamps->netdev_data; + + if (cycles) + timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); + else + timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); + + return timestamp; +} + static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, @@ -5692,62 +6970,29 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_set_rx_mode = igc_set_rx_mode, .ndo_set_mac_address = igc_set_mac, .ndo_change_mtu = igc_change_mtu, + .ndo_tx_timeout = igc_tx_timeout, .ndo_get_stats64 = igc_get_stats64, .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, .ndo_features_check = igc_features_check, - .ndo_do_ioctl = igc_ioctl, .ndo_setup_tc = igc_setup_tc, .ndo_bpf = igc_bpf, .ndo_xdp_xmit = igc_xdp_xmit, .ndo_xsk_wakeup = igc_xsk_wakeup, + .ndo_get_tstamp = igc_get_tstamp, + .ndo_hwtstamp_get = igc_ptp_hwtstamp_get, + .ndo_hwtstamp_set = igc_ptp_hwtstamp_set, }; -/* PCIe configuration access */ -void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - pci_read_config_word(adapter->pdev, reg, value); -} - -void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - pci_write_config_word(adapter->pdev, reg, *value); -} - -s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - if (!pci_is_pcie(adapter->pdev)) - return -IGC_ERR_CONFIG; - - pcie_capability_read_word(adapter->pdev, reg, value); - - return IGC_SUCCESS; -} - -s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - if (!pci_is_pcie(adapter->pdev)) - return -IGC_ERR_CONFIG; - - pcie_capability_write_word(adapter->pdev, reg, *value); - - return IGC_SUCCESS; -} - u32 igc_rd32(struct igc_hw *hw, u32 reg) { struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; + if (IGC_REMOVED(hw_addr)) + return ~value; + value = readl(&hw_addr[reg]); /* reads should not return all F's */ @@ -5764,54 +7009,85 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) return value; } -int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) +/* Mapping HW RSS Type to enum xdp_rss_hash_type */ +static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, + [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, + [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, + [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, + [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ + [13] = XDP_RSS_TYPE_NONE, + [14] = XDP_RSS_TYPE_NONE, + [15] = XDP_RSS_TYPE_NONE, +}; + +static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) { - struct igc_mac_info *mac = &adapter->hw.mac; + const struct igc_xdp_buff *ctx = (void *)_ctx; - mac->autoneg = false; + if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) + return -ENODATA; - /* Make sure dplx is at most 1 bit and lsb of speed is not set - * for the switch() below to work - */ - if ((spd & 1) || (dplx & ~1)) - goto err_inval; + *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); + *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; - switch (spd + dplx) { - case SPEED_10 + DUPLEX_HALF: - mac->forced_speed_duplex = ADVERTISE_10_HALF; - break; - case SPEED_10 + DUPLEX_FULL: - mac->forced_speed_duplex = ADVERTISE_10_FULL; - break; - case SPEED_100 + DUPLEX_HALF: - mac->forced_speed_duplex = ADVERTISE_100_HALF; - break; - case SPEED_100 + DUPLEX_FULL: - mac->forced_speed_duplex = ADVERTISE_100_FULL; - break; - case SPEED_1000 + DUPLEX_FULL: - mac->autoneg = true; - adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; - break; - case SPEED_1000 + DUPLEX_HALF: /* not supported */ - goto err_inval; - case SPEED_2500 + DUPLEX_FULL: - mac->autoneg = true; - adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; - break; - case SPEED_2500 + DUPLEX_HALF: /* not supported */ - default: - goto err_inval; + return 0; +} + +static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); + struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; + + if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { + *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); + + return 0; } - /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ - adapter->hw.phy.mdix = AUTO_ALL_MODES; + return -ENODATA; +} - return 0; +static const struct xdp_metadata_ops igc_xdp_metadata_ops = { + .xmo_rx_hash = igc_xdp_rx_hash, + .xmo_rx_timestamp = igc_xdp_rx_timestamp, +}; + +static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) +{ + struct igc_adapter *adapter = container_of(timer, struct igc_adapter, + hrtimer); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + adapter->qbv_transition = true; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + if (tx_ring->admin_gate_closed) { + tx_ring->admin_gate_closed = false; + tx_ring->oper_gate_closed = true; + } else { + tx_ring->oper_gate_closed = false; + } + } + adapter->qbv_transition = false; + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); -err_inval: - netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n"); - return -EINVAL; + return HRTIMER_NORESTART; } /** @@ -5832,30 +7108,26 @@ static int igc_probe(struct pci_dev *pdev, struct net_device *netdev; struct igc_hw *hw; const struct igc_info *ei = igc_info_tbl[ent->driver_data]; - int err, pci_using_dac; + int err; err = pci_enable_device_mem(pdev); if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, igc_driver_name); if (err) goto err_pci_reg; - pci_enable_pcie_error_reporting(pdev); + err = pci_enable_ptm(pdev, NULL); + if (err < 0) + dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); pci_set_master(pdev); @@ -5877,6 +7149,17 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* PCI config space info */ + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->revision_id = pdev->revision; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + err = pci_save_state(pdev); if (err) goto err_ioremap; @@ -5891,19 +7174,14 @@ static int igc_probe(struct pci_dev *pdev, hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igc_netdev_ops; + netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; + netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; igc_ethtool_set_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netdev->mem_start = pci_resource_start(pdev, 0); netdev->mem_end = pci_resource_end(pdev, 0); - /* PCI config space info */ - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - hw->revision_id = pdev->revision; - hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; - /* Copy the default MAC and PHY function pointers */ memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); @@ -5918,6 +7196,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_TSO_ECN; + netdev->features |= NETIF_F_RXHASH; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; @@ -5944,10 +7223,17 @@ static int igc_probe(struct pci_dev *pdev, netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= netdev->features; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; + + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= netdev->vlan_features; + + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; - netdev->vlan_features |= netdev->features; + /* enable HW vlan tag insertion/stripping by default */ + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; @@ -5972,7 +7258,7 @@ static int igc_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "NVM Read Error\n"); } - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + eth_hw_addr_set(netdev, hw->mac.addr); if (!is_valid_ether_addr(netdev->dev_addr)) { dev_err(&pdev->dev, "Invalid MAC Address\n"); @@ -5981,8 +7267,8 @@ static int igc_probe(struct pci_dev *pdev, } /* configure RXPBSIZE and TXPBSIZE */ - wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); - wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); + wr32(IGC_RXPBS, IGC_RXPBSIZE_EXP_BMC_DEFAULT); + wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); @@ -5990,9 +7276,11 @@ static int igc_probe(struct pci_dev *pdev, INIT_WORK(&adapter->reset_task, igc_reset_task); INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + hrtimer_setup(&adapter->hrtimer, &igc_qbv_scheduling_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; - hw->mac.autoneg = true; hw->phy.autoneg_advertised = 0xaf; hw->fc.requested_mode = igc_fc_default; @@ -6010,6 +7298,10 @@ static int igc_probe(struct pci_dev *pdev, igc_ptp_init(adapter); + igc_tsn_clear_schedule(adapter); + + igc_fpe_init(adapter); + /* reset the hardware with the new settings */ igc_reset(adapter); @@ -6018,7 +7310,7 @@ static int igc_probe(struct pci_dev *pdev, */ igc_get_hw_control(adapter); - strncpy(netdev->name, "eth%d", IFNAMSIZ); + strscpy(netdev->name, "eth%d", sizeof(netdev->name)); err = register_netdev(netdev); if (err) goto err_register; @@ -6041,10 +7333,23 @@ static int igc_probe(struct pci_dev *pdev, pm_runtime_put_noidle(&pdev->dev); + if (IS_ENABLED(CONFIG_IGC_LEDS)) { + err = igc_led_setup(adapter); + if (err) { + netdev_warn_once(netdev, + "LED init failed (%d); continuing without LED support\n", + err); + adapter->leds_available = false; + } else { + adapter->leds_available = true; + } + } + return 0; err_register: igc_release_hw_control(adapter); + igc_ptp_stop(adapter); err_eeprom: if (!igc_check_reset_block(hw)) igc_reset_phy(hw); @@ -6081,13 +7386,20 @@ static void igc_remove(struct pci_dev *pdev) igc_ptp_stop(adapter); + pci_disable_ptm(pdev); + pci_clear_master(pdev); + set_bit(__IGC_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + timer_delete_sync(&adapter->watchdog_timer); + timer_delete_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); + hrtimer_cancel(&adapter->hrtimer); + + if (IS_ENABLED(CONFIG_IGC_LEDS) && adapter->leds_available) + igc_led_free(adapter); /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. @@ -6101,8 +7413,6 @@ static void igc_remove(struct pci_dev *pdev) free_netdev(netdev); - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); } @@ -6175,8 +7485,7 @@ static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, return 0; } -#ifdef CONFIG_PM -static int __maybe_unused igc_runtime_suspend(struct device *dev) +static int igc_runtime_suspend(struct device *dev) { return __igc_shutdown(to_pci_dev(dev), NULL, 1); } @@ -6211,7 +7520,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev) netif_rx(skb); } -static int __maybe_unused igc_resume(struct device *dev) +static int __igc_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -6221,7 +7530,6 @@ static int __maybe_unused igc_resume(struct device *dev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - pci_save_state(pdev); if (!pci_device_is_present(pdev)) return -ENODEV; @@ -6235,6 +7543,9 @@ static int __maybe_unused igc_resume(struct device *dev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + if (igc_init_interrupt_scheme(adapter, true)) { netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; @@ -6253,23 +7564,30 @@ static int __maybe_unused igc_resume(struct device *dev) wr32(IGC_WUS, ~0); - rtnl_lock(); - if (!err && netif_running(netdev)) + if (netif_running(netdev)) { + if (!rpm) + rtnl_lock(); err = __igc_open(netdev, true); - - if (!err) - netif_device_attach(netdev); - rtnl_unlock(); + if (!rpm) + rtnl_unlock(); + if (!err) + netif_device_attach(netdev); + } return err; } -static int __maybe_unused igc_runtime_resume(struct device *dev) +static int igc_resume(struct device *dev) { - return igc_resume(dev); + return __igc_resume(dev, false); } -static int __maybe_unused igc_suspend(struct device *dev) +static int igc_runtime_resume(struct device *dev) +{ + return __igc_resume(dev, true); +} + +static int igc_suspend(struct device *dev) { return __igc_shutdown(to_pci_dev(dev), NULL, 0); } @@ -6284,7 +7602,6 @@ static int __maybe_unused igc_runtime_idle(struct device *dev) return -EBUSY; } -#endif /* CONFIG_PM */ static void igc_shutdown(struct pci_dev *pdev) { @@ -6312,14 +7629,18 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, struct net_device *netdev = pci_get_drvdata(pdev); struct igc_adapter *adapter = netdev_priv(netdev); + rtnl_lock(); netif_device_detach(netdev); - if (state == pci_channel_io_perm_failure) + if (state == pci_channel_io_perm_failure) { + rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; + } if (netif_running(netdev)) igc_down(adapter); pci_disable_device(pdev); + rtnl_unlock(); /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -6330,7 +7651,7 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igc_resume routine. + * resembles the first-half of the __igc_resume routine. **/ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) { @@ -6345,11 +7666,13 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) } else { pci_set_master(pdev); pci_restore_state(pdev); - pci_save_state(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); + /* In case of PCI error, adapter loses its HW address * so we should re-assign it here. */ @@ -6369,7 +7692,7 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the - * second-half of the igc_resume routine. + * second-half of the __igc_resume routine. */ static void igc_io_resume(struct pci_dev *pdev) { @@ -6379,6 +7702,7 @@ static void igc_io_resume(struct pci_dev *pdev) rtnl_lock(); if (netif_running(netdev)) { if (igc_open(netdev)) { + rtnl_unlock(); netdev_err(netdev, "igc_open failed after reset\n"); return; } @@ -6399,22 +7723,16 @@ static const struct pci_error_handlers igc_err_handler = { .resume = igc_io_resume, }; -#ifdef CONFIG_PM -static const struct dev_pm_ops igc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) - SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, - igc_runtime_idle) -}; -#endif +static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, + igc_runtime_suspend, igc_runtime_resume, + igc_runtime_idle); static struct pci_driver igc_driver = { .name = igc_driver_name, .id_table = igc_pci_tbl, .probe = igc_probe, .remove = igc_remove, -#ifdef CONFIG_PM - .driver.pm = &igc_pm_ops, -#endif + .driver.pm = pm_ptr(&igc_pm_ops), .shutdown = igc_shutdown, .err_handler = &igc_err_handler, }; @@ -6487,18 +7805,6 @@ void igc_enable_rx_ring(struct igc_ring *ring) igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); } -static void igc_disable_tx_ring_hw(struct igc_ring *ring) -{ - struct igc_hw *hw = &ring->q_vector->adapter->hw; - u8 idx = ring->reg_idx; - u32 txdctl; - - txdctl = rd32(IGC_TXDCTL(idx)); - txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; - txdctl |= IGC_TXDCTL_SWFLUSH; - wr32(IGC_TXDCTL(idx), txdctl); -} - void igc_disable_tx_ring(struct igc_ring *ring) { igc_disable_tx_ring_hw(ring); diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c index 58f81aba0144..a47b8d39238c 100644 --- a/drivers/net/ethernet/intel/igc/igc_nvm.c +++ b/drivers/net/ethernet/intel/igc/igc_nvm.c @@ -36,56 +36,6 @@ static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg) } /** - * igc_acquire_nvm - Generic request for access to EEPROM - * @hw: pointer to the HW structure - * - * Set the EEPROM access request bit and wait for EEPROM access grant bit. - * Return successful if access grant bit set, else clear the request for - * EEPROM access and return -IGC_ERR_NVM (-1). - */ -s32 igc_acquire_nvm(struct igc_hw *hw) -{ - s32 timeout = IGC_NVM_GRANT_ATTEMPTS; - u32 eecd = rd32(IGC_EECD); - s32 ret_val = 0; - - wr32(IGC_EECD, eecd | IGC_EECD_REQ); - eecd = rd32(IGC_EECD); - - while (timeout) { - if (eecd & IGC_EECD_GNT) - break; - udelay(5); - eecd = rd32(IGC_EECD); - timeout--; - } - - if (!timeout) { - eecd &= ~IGC_EECD_REQ; - wr32(IGC_EECD, eecd); - hw_dbg("Could not acquire NVM grant\n"); - ret_val = -IGC_ERR_NVM; - } - - return ret_val; -} - -/** - * igc_release_nvm - Release exclusive access to EEPROM - * @hw: pointer to the HW structure - * - * Stop any current commands to the EEPROM and clear the EEPROM request bit. - */ -void igc_release_nvm(struct igc_hw *hw) -{ - u32 eecd; - - eecd = rd32(IGC_EECD); - eecd &= ~IGC_EECD_REQ; - wr32(IGC_EECD, eecd); -} - -/** * igc_read_nvm_eerd - Reads EEPROM using EERD register * @hw: pointer to the HW structure * @offset: offset of word in the EEPROM to read @@ -173,7 +123,7 @@ s32 igc_validate_nvm_checksum(struct igc_hw *hw) checksum += nvm_data; } - if (checksum != (u16)NVM_SUM) { + if (checksum != NVM_SUM) { hw_dbg("NVM Checksum Invalid\n"); ret_val = -IGC_ERR_NVM; goto out; @@ -205,7 +155,7 @@ s32 igc_update_nvm_checksum(struct igc_hw *hw) } checksum += nvm_data; } - checksum = (u16)NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) hw_dbg("NVM Write Error while updating checksum.\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h index f9fc2e9cfb03..ab78d0c64547 100644 --- a/drivers/net/ethernet/intel/igc/igc_nvm.h +++ b/drivers/net/ethernet/intel/igc/igc_nvm.h @@ -4,8 +4,6 @@ #ifndef _IGC_NVM_H_ #define _IGC_NVM_H_ -s32 igc_acquire_nvm(struct igc_hw *hw); -void igc_release_nvm(struct igc_hw *hw); s32 igc_read_mac_addr(struct igc_hw *hw); s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data); s32 igc_validate_nvm_checksum(struct igc_hw *hw); diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 83aeb5e7076f..6c4d204aecfa 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Intel Corporation */ +#include <linux/bitfield.h> #include "igc_phy.h" /** @@ -129,11 +130,7 @@ void igc_power_down_phy_copper(struct igc_hw *hw) /* The PHY will retain its settings across a power down/up cycle */ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); mii_reg |= MII_CR_POWER_DOWN; - - /* Temporary workaround - should be removed when PHY will implement - * IEEE registers as properly - */ - /* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/ + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); usleep_range(1000, 2000); } @@ -141,24 +138,14 @@ void igc_power_down_phy_copper(struct igc_hw *hw) * igc_check_downshift - Checks whether a downshift in speed occurred * @hw: pointer to the HW structure * - * Success returns 0, Failure returns 1 - * * A downshift is detected by querying the PHY link health. */ -s32 igc_check_downshift(struct igc_hw *hw) +void igc_check_downshift(struct igc_hw *hw) { struct igc_phy_info *phy = &hw->phy; - s32 ret_val; - switch (phy->type) { - case igc_phy_i225: - default: - /* speed downshift not supported */ - phy->speed_downgraded = false; - ret_val = 0; - } - - return ret_val; + /* speed downshift not supported */ + phy->speed_downgraded = false; } /** @@ -249,12 +236,11 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) return ret_val; } - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) { + if (phy->autoneg_mask & ADVERTISE_2500_FULL) { /* Read the MULTI GBT AN Control Register - reg 7.32 */ ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | - ANEG_MULTIGBT_AN_CTRL, + IGC_ANEG_MULTIGBT_AN_CTRL, &aneg_multigbt_an_ctrl); if (ret_val) @@ -390,12 +376,11 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) + if (phy->autoneg_mask & ADVERTISE_2500_FULL) ret_val = phy->ops.write_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | - ANEG_MULTIGBT_AN_CTRL, + IGC_ANEG_MULTIGBT_AN_CTRL, aneg_multigbt_an_ctrl); return ret_val; @@ -509,24 +494,12 @@ s32 igc_setup_copper_link(struct igc_hw *hw) s32 ret_val = 0; bool link; - if (hw->mac.autoneg) { - /* Setup autoneg and flow control advertisement and perform - * autonegotiation. - */ - ret_val = igc_copper_link_autoneg(hw); - if (ret_val) - goto out; - } else { - /* PHY will be set to 10H, 10F, 100H or 100F - * depending on user settings. - */ - hw_dbg("Forcing Speed and Duplex\n"); - ret_val = hw->phy.ops.force_speed_duplex(hw); - if (ret_val) { - hw_dbg("Error Forcing Speed and Duplex\n"); - goto out; - } - } + /* Setup autoneg and flow control advertisement and perform + * autonegotiation. + */ + ret_val = igc_copper_link_autoneg(hw); + if (ret_val) + goto out; /* Check link status. Wait up to 100 microseconds for link to become * valid. @@ -583,7 +556,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; @@ -640,7 +613,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; @@ -738,7 +711,7 @@ static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr, */ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) { - u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT; + u8 dev_addr = FIELD_GET(GPY_MMD_MASK, offset); s32 ret_val; offset = offset & GPY_REG_MASK; @@ -748,8 +721,6 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) if (ret_val) return ret_val; ret_val = igc_write_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr, @@ -771,7 +742,7 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) */ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) { - u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT; + u8 dev_addr = FIELD_GET(GPY_MMD_MASK, offset); s32 ret_val; offset = offset & GPY_REG_MASK; @@ -781,8 +752,6 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) if (ret_val) return ret_val; ret_val = igc_read_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr, diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h index 1b031372d206..832a7e359f18 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.h +++ b/drivers/net/ethernet/intel/igc/igc_phy.h @@ -11,7 +11,7 @@ s32 igc_phy_hw_reset(struct igc_hw *hw); s32 igc_get_phy_id(struct igc_hw *hw); s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations, u32 usec_interval, bool *success); -s32 igc_check_downshift(struct igc_hw *hw); +void igc_check_downshift(struct igc_hw *hw); s32 igc_setup_copper_link(struct igc_hw *hw); void igc_power_up_phy_copper(struct igc_hw *hw); void igc_power_down_phy_copper(struct igc_hw *hw); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 69617d2c1be2..b7b46d863bee 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -9,13 +9,18 @@ #include <linux/ptp_classify.h> #include <linux/clocksource.h> #include <linux/ktime.h> +#include <linux/delay.h> +#include <linux/iopoll.h> +#include <net/xdp_sock_drv.h> #define INCVALUE_MASK 0x7fffffff #define ISGN 0x80000000 -#define IGC_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) #define IGC_PTP_TX_TIMEOUT (HZ * 15) +#define IGC_PTM_STAT_SLEEP 2 +#define IGC_PTM_STAT_TIMEOUT 100 + /* SYSTIM read access for I225 */ void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts) { @@ -252,13 +257,6 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, switch (rq->type) { case PTP_CLK_REQ_EXTTS: - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests failing to enable both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -295,10 +293,6 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, return 0; case PTP_CLK_REQ_PEROUT: - /* Reject requests with unsupported flags */ - if (rq->perout.flags) - return -EOPNOTSUPP; - if (on) { pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT, rq->perout.index); @@ -318,7 +312,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, ts = ns_to_timespec64(ns); if (rq->perout.index == 1) { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT1; @@ -329,7 +323,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, freqout = IGC_FREQOUT1; } else { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT0; @@ -343,23 +337,44 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsauxc = rd32(IGC_TSAUXC); tsim = rd32(IGC_TSIM); if (rq->perout.index == 1) { - tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | + IGC_TSAUXC_ST1); tsim &= ~IGC_TSICR_TT1; } else { - tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | + IGC_TSAUXC_ST0); tsim &= ~IGC_TSICR_TT0; } if (on) { + struct timespec64 safe_start; int i = rq->perout.index; igc_pin_perout(igc, i, pin, use_freq); - igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc_ptp_read(igc, &safe_start); + + /* PPS output start time is triggered by Target time(TT) + * register. Programming any past time value into TT + * register will cause PPS to never start. Need to make + * sure we program the TT register a time ahead in + * future. There isn't a stringent need to fire PPS out + * right away. Adding +2 seconds should take care of + * corner cases. Let's say if the SYSTIML is close to + * wrap up and the timer keeps ticking as we program the + * register, adding +2seconds is safe bet. + */ + safe_start.tv_sec += 2; + + if (rq->perout.start.sec < safe_start.tv_sec) + igc->perout[i].start.tv_sec = safe_start.tv_sec; + else + igc->perout[i].start.tv_sec = rq->perout.start.sec; igc->perout[i].start.tv_nsec = rq->perout.start.nsec; igc->perout[i].period.tv_sec = ts.tv_sec; igc->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttimh, rq->perout.start.sec); + wr32(trgttimh, (u32)igc->perout[i].start.tv_sec); /* For now, always select timer 0 as source. */ - wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec | + IGC_TT_IO_TIMER_SEL_SYSTIM0)); if (use_freq) wr32(freqout, ns); tsauxc |= tsauxc_mask; @@ -411,10 +426,12 @@ static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, * * We need to convert the system time value stored in the RX/TXSTMP registers * into a hwtstamp which can be used by the upper level timestamping functions. + * + * Returns 0 on success. **/ -static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) +static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) { switch (adapter->hw.mac.type) { case igc_i225: @@ -424,19 +441,18 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, systim & 0xFFFFFFFF); break; default: - break; + return -EINVAL; } + return 0; } /** * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer * @adapter: Pointer to adapter the packet buffer belongs to - * @buf: Pointer to packet buffer + * @buf: Pointer to start of timestamp in HW format (2 32-bit words) * - * This function retrieves the timestamp saved in the beginning of packet - * buffer. While two timestamps are available, one in timer0 reference and the - * other in timer1 reference, this function considers only the timestamp in - * timer0 reference. + * This function retrieves and converts the timestamp stored at @buf + * to ktime_t, adjusting for hardware latencies. * * Returns timestamp value. */ @@ -446,17 +462,8 @@ ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf) u32 secs, nsecs; int adjust; - /* Timestamps are saved in little endian at the beginning of the packet - * buffer following the layout: - * - * DWORD: | 0 | 1 | 2 | 3 | - * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH | - * - * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds - * part of the timestamp. - */ - nsecs = le32_to_cpu(buf[2]); - secs = le32_to_cpu(buf[3]); + nsecs = le32_to_cpu(buf[0]); + secs = le32_to_cpu(buf[1]); timestamp = ktime_set(secs, nsecs); @@ -514,10 +521,11 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { val = rd32(IGC_SRRCTL(i)); - /* FIXME: For now, only support retrieving RX timestamps from - * timer 0. + /* Enable retrieving timestamps from timer 0, the + * "adjustable clock" and timer 1 the "free running + * clock". */ - val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) | + val |= IGC_SRRCTL_TIMER1SEL(1) | IGC_SRRCTL_TIMER0SEL(0) | IGC_SRRCTL_TIMESTAMP; wr32(IGC_SRRCTL(i), val); } @@ -527,9 +535,63 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCRXCTL, val); } +static void igc_ptp_free_tx_buffer(struct igc_adapter *adapter, + struct igc_tx_timestamp_request *tstamp) +{ + if (tstamp->buffer_type == IGC_TX_BUFFER_TYPE_XSK) { + /* Release the transmit completion */ + tstamp->xsk_tx_buffer->xsk_pending_ts = false; + + /* Note: tstamp->skb and tstamp->xsk_tx_buffer are in union. + * By setting tstamp->xsk_tx_buffer to NULL, tstamp->skb will + * become NULL as well. + */ + tstamp->xsk_tx_buffer = NULL; + tstamp->buffer_type = 0; + + /* Trigger txrx interrupt for transmit completion */ + igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 0); + + return; + } + + dev_kfree_skb_any(tstamp->skb); + tstamp->skb = NULL; +} + +static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (tstamp->skb) + igc_ptp_free_tx_buffer(adapter, tstamp); + } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; + + /* Clear the flags first to avoid new packets to be enqueued + * for TX timestamping. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + + /* Now we can clean the pending TX timestamp requests. */ + igc_ptp_clear_tx_tstamp(adapter); wr32(IGC_TSYNCTXCTL, 0); } @@ -537,12 +599,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG); /* Read TXSTMP registers to discard any timestamp previously stored. */ rd32(IGC_TXSTMPL); rd32(IGC_TXSTMPH); + + /* The hardware is ready to accept TX timestamp requests, + * notify the transmit path. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + } /** @@ -553,12 +626,8 @@ static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) * Return: 0 in case of success, negative errno code otherwise. */ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { - /* reserved for future extensions */ - if (config->flags) - return -EINVAL; - switch (config->tx_type) { case HWTSTAMP_TX_OFF: igc_ptp_disable_tx_timestamp(adapter); @@ -598,59 +667,64 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } -static void igc_ptp_tx_timeout(struct igc_adapter *adapter) +/* Requires adapter->ptp_tx_lock held by caller. */ +static void igc_ptp_tx_timeout(struct igc_adapter *adapter, + struct igc_tx_timestamp_request *tstamp) { - struct igc_hw *hw = &adapter->hw; + if (tstamp->skb) + igc_ptp_free_tx_buffer(adapter, tstamp); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); - /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ - rd32(IGC_TXSTMPH); + netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); } void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + struct igc_tx_timestamp_request *tstamp; + struct igc_hw *hw = &adapter->hw; + unsigned long flags; + bool found = false; + int i; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + tstamp = &adapter->tx_tstamp[i]; + + if (!tstamp->skb) + continue; + + if (time_is_after_jiffies(tstamp->start + IGC_PTP_TX_TIMEOUT)) + continue; + + igc_ptp_tx_timeout(adapter, tstamp); + found = true; } + + if (found) { + /* Reading the high register of the first set of timestamp registers + * clears all the equivalent bits in the TSYNCTXCTL register. + */ + rd32(IGC_TXSTMPH_0); + } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } -/** - * igc_ptp_tx_hwtstamp - utility function which checks for TX time stamp - * @adapter: Board private structure - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) +static void igc_ptp_tx_reg_to_stamp(struct igc_adapter *adapter, + struct igc_tx_timestamp_request *tstamp, u64 regval) { - struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; - struct igc_hw *hw = &adapter->hw; + struct sk_buff *skb; int adjust = 0; - u64 regval; - if (WARN_ON_ONCE(!skb)) + skb = tstamp->skb; + if (!skb) return; - regval = rd32(IGC_TXSTMPL); - regval |= (u64)rd32(IGC_TXSTMPH) << 32; - igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval)) + return; switch (adapter->link_speed) { case SPEED_10: @@ -670,86 +744,349 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + /* Copy the tx hardware timestamp into xdp metadata or skb */ + if (tstamp->buffer_type == IGC_TX_BUFFER_TYPE_XSK) { + struct xsk_buff_pool *xsk_pool; + + xsk_pool = adapter->tx_ring[tstamp->xsk_queue_index]->xsk_pool; + if (xsk_pool && xp_tx_metadata_enabled(xsk_pool)) { + xsk_tx_metadata_complete(&tstamp->xsk_meta, + &igc_xsk_tx_metadata_ops, + &shhwtstamps.hwtstamp); + } + } else { + skb_tstamp_tx(skb, &shhwtstamps); + } - /* Notify the stack and free the skb after we've unlocked */ - skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); + igc_ptp_free_tx_buffer(adapter, tstamp); } /** - * igc_ptp_tx_work - * @work: pointer to work struct + * igc_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure + * + * Check against the ready mask for which of the timestamp register + * sets are ready to be retrieved, then retrieve that and notify the + * rest of the stack. * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ -static void igc_ptp_tx_work(struct work_struct *work) +static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { - struct igc_adapter *adapter = container_of(work, struct igc_adapter, - ptp_tx_work); struct igc_hw *hw = &adapter->hw; - u32 tsynctxctl; + u64 regval; + u32 mask; + int i; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + mask = rd32(IGC_TSYNCTXCTL) & IGC_TSYNCTXCTL_TXTT_ANY; + if (mask & IGC_TSYNCTXCTL_TXTT_0) { + regval = rd32(IGC_TXSTMPL); + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } else { + /* There's a bug in the hardware that could cause + * missing interrupts for TX timestamping. The issue + * is that for new interrupts to be triggered, the + * IGC_TXSTMPH_0 register must be read. + * + * To avoid discarding a valid timestamp that just + * happened at the "wrong" time, we need to confirm + * that there was no timestamp captured, we do that by + * assuming that no two timestamps in sequence have + * the same nanosecond value. + * + * So, we read the "low" register, read the "high" + * register (to latch a new timestamp) and read the + * "low" register again, if "old" and "new" versions + * of the "low" register are different, a valid + * timestamp was captured, we can read the "high" + * register again. + */ + u32 txstmpl_old, txstmpl_new; - tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + txstmpl_old = rd32(IGC_TXSTMPL); + rd32(IGC_TXSTMPH); + txstmpl_new = rd32(IGC_TXSTMPL); + + if (txstmpl_old == txstmpl_new) + goto done; + + regval = txstmpl_new; + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } + + igc_ptp_tx_reg_to_stamp(adapter, &adapter->tx_tstamp[0], regval); + +done: + /* Now that the problematic first register was handled, we can + * use retrieve the timestamps from the other registers + * (starting from '1') with less complications. + */ + for (i = 1; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; + + if (!(tstamp->mask & mask)) + continue; + + regval = rd32(tstamp->regl); + regval |= (u64)rd32(tstamp->regh) << 32; + + igc_ptp_tx_reg_to_stamp(adapter, tstamp, regval); + } +} + +/** + * igc_ptp_tx_tstamp_event + * @adapter: board private structure + * + * Called when a TX timestamp interrupt happens to retrieve the + * timestamp and send it up to the socket. + */ +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) +{ + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); igc_ptp_tx_hwtstamp(adapter); + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** - * igc_ptp_set_ts_config - set hardware time stamping config + * igc_ptp_hwtstamp_set - set hardware time stamping config * @netdev: network interface device structure - * @ifr: interface request data + * @config: timestamping configuration structure + * @extack: netlink extended ack structure for error reporting * **/ -int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr) +int igc_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config config; int err; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = igc_ptp_set_timestamp_mode(adapter, &config); + err = igc_ptp_set_timestamp_mode(adapter, config); if (err) return err; /* save these settings for future reference */ - memcpy(&adapter->tstamp_config, &config, - sizeof(adapter->tstamp_config)); + adapter->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } /** - * igc_ptp_get_ts_config - get hardware time stamping config + * igc_ptp_hwtstamp_get - get hardware time stamping config * @netdev: network interface device structure - * @ifr: interface request data + * @config: timestamping configuration structure * * Get the hwtstamp_config settings to return to the user. Rather than attempt * to deconstruct the settings from the registers, just return a shadow copy * of the last known settings. **/ -int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) +int igc_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { struct igc_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config *config = &adapter->tstamp_config; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + *config = adapter->tstamp_config; + + return 0; +} + +/* The two conditions below must be met for cross timestamping via + * PCIe PTM: + * + * 1. We have an way to convert the timestamps in the PTM messages + * to something related to the system clocks (right now, only + * X86 systems with support for the Always Running Timer allow that); + * + * 2. We have PTM enabled in the path from the device to the PCIe root port. + */ +static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) +{ + if (!IS_ENABLED(CONFIG_X86_TSC)) + return false; + + /* FIXME: it was noticed that enabling support for PCIe PTM in + * some i225-V models could cause lockups when bringing the + * interface up/down. There should be no downsides to + * disabling crosstimestamping support for i225-V, as it + * doesn't have any PTP support. That way we gain some time + * while root causing the issue. + */ + if (adapter->pdev->device == IGC_DEV_ID_I225_V) + return false; + + return pcie_ptm_enabled(adapter->pdev); +} + +static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) +{ +#if IS_ENABLED(CONFIG_X86_TSC) && !defined(CONFIG_UML) + return (struct system_counterval_t) { + .cs_id = CSID_X86_ART, + .cycles = tstamp, + .use_nsecs = true, + }; +#else + return (struct system_counterval_t) { }; +#endif +} + +static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) +{ + struct net_device *netdev = adapter->netdev; + + switch (ptm_stat) { + case IGC_PTM_STAT_RET_ERR: + netdev_err(netdev, "PTM Error: Root port timeout\n"); + break; + case IGC_PTM_STAT_BAD_PTM_RES: + netdev_err(netdev, "PTM Error: Bad response, PTM Response Data expected\n"); + break; + case IGC_PTM_STAT_T4M1_OVFL: + netdev_err(netdev, "PTM Error: T4 minus T1 overflow\n"); + break; + case IGC_PTM_STAT_ADJUST_1ST: + netdev_err(netdev, "PTM Error: 1588 timer adjusted during first PTM cycle\n"); + break; + case IGC_PTM_STAT_ADJUST_CYC: + netdev_err(netdev, "PTM Error: 1588 timer adjusted during non-first PTM cycle\n"); + break; + default: + netdev_err(netdev, "PTM Error: Unknown error (%#x)\n", ptm_stat); + break; + } +} + +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */ +static void igc_ptm_trigger(struct igc_hw *hw) +{ + u32 ctrl; + + /* To "manually" start the PTM cycle we need to set the + * trigger (TRIG) bit + */ + ctrl = rd32(IGC_PTM_CTRL); + ctrl |= IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Perform flush after write to CTRL register otherwise + * transaction may not start + */ + wrfl(); +} + +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */ +static void igc_ptm_reset(struct igc_hw *hw) +{ + u32 ctrl; + + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Write to clear all status */ + wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL); +} + +static int igc_phc_get_syncdevicetime(ktime_t *device, + struct system_counterval_t *system, + void *ctx) +{ + struct igc_adapter *adapter = ctx; + struct igc_hw *hw = &adapter->hw; + u32 stat, t2_curr_h, t2_curr_l; + int err, count = 100; + ktime_t t1, t2_curr; + + /* Doing this in a loop because in the event of a + * badly timed (ha!) system clock adjustment, we may + * get PTM errors from the PCI root, but these errors + * are transitory. Repeating the process returns valid + * data eventually. + */ + do { + /* Get a snapshot of system clocks to use as historic value. */ + ktime_get_snapshot(&adapter->snapshot); + + igc_ptm_trigger(hw); + + err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, + stat, IGC_PTM_STAT_SLEEP, + IGC_PTM_STAT_TIMEOUT); + igc_ptm_reset(hw); + + if (err < 0) { + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); + return err; + } + + if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID) + break; + + igc_ptm_log_error(adapter, stat); + } while (--count); + + if (!count) { + netdev_err(adapter->netdev, "Exceeded number of tries for PTM cycle\n"); + return -ETIMEDOUT; + } + + t1 = ktime_set(rd32(IGC_PTM_T1_TIM0_H), rd32(IGC_PTM_T1_TIM0_L)); + + t2_curr_l = rd32(IGC_PTM_CURR_T2_L); + t2_curr_h = rd32(IGC_PTM_CURR_T2_H); + + /* FIXME: When the register that tells the endianness of the + * PTM registers are implemented, check them here and add the + * appropriate conversion. + */ + t2_curr_h = swab32(t2_curr_h); + + t2_curr = ((s64)t2_curr_h << 32 | t2_curr_l); + + *device = t1; + *system = igc_device_tstamp_to_system(t2_curr); + + return 0; +} + +static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts) +{ + struct igc_adapter *adapter = container_of(ptp, struct igc_adapter, + ptp_caps); + int ret; + + /* This blocks until any in progress PTM transactions complete */ + mutex_lock(&adapter->ptm_lock); + + ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime, + adapter, &adapter->snapshot, cts); + mutex_unlock(&adapter->ptm_lock); + + return ret; +} + +static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct igc_adapter *igc = container_of(ptp, struct igc_adapter, ptp_caps); + struct igc_hw *hw = &igc->hw; + unsigned long flags; + + spin_lock_irqsave(&igc->free_timer_lock, flags); + + ptp_read_system_prets(sts); + ts->tv_nsec = rd32(IGC_SYSTIML_1); + ts->tv_sec = rd32(IGC_SYSTIMH_1); + ptp_read_system_postts(sts); + + spin_unlock_irqrestore(&igc->free_timer_lock, flags); + + return 0; } /** @@ -762,9 +1099,34 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) void igc_ptp_init(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct igc_tx_timestamp_request *tstamp; struct igc_hw *hw = &adapter->hw; int i; + tstamp = &adapter->tx_tstamp[0]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_0; + tstamp->regl = IGC_TXSTMPL_0; + tstamp->regh = IGC_TXSTMPH_0; + tstamp->flags = 0; + + tstamp = &adapter->tx_tstamp[1]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_1; + tstamp->regl = IGC_TXSTMPL_1; + tstamp->regh = IGC_TXSTMPH_1; + tstamp->flags = IGC_TX_FLAGS_TSTAMP_1; + + tstamp = &adapter->tx_tstamp[2]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_2; + tstamp->regl = IGC_TXSTMPL_2; + tstamp->regh = IGC_TXSTMPH_2; + tstamp->flags = IGC_TX_FLAGS_TSTAMP_2; + + tstamp = &adapter->tx_tstamp[3]; + tstamp->mask = IGC_TSYNCTXCTL_TXTT_3; + tstamp->regl = IGC_TXSTMPL_3; + tstamp->regh = IGC_TXSTMPH_3; + tstamp->flags = IGC_TX_FLAGS_TSTAMP_3; + switch (hw->mac.type) { case igc_i225: for (i = 0; i < IGC_N_SDP; i++) { @@ -780,22 +1142,33 @@ void igc_ptp_init(struct igc_adapter *adapter) adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225; adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225; adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225; + adapter->ptp_caps.getcyclesx64 = igc_ptp_getcyclesx64; adapter->ptp_caps.settime64 = igc_ptp_settime_i225; adapter->ptp_caps.enable = igc_ptp_feature_enable_i225; adapter->ptp_caps.pps = 1; adapter->ptp_caps.pin_config = adapter->sdp_config; adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS; adapter->ptp_caps.n_per_out = IGC_N_PEROUT; + adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; adapter->ptp_caps.n_pins = IGC_N_SDP; adapter->ptp_caps.verify = igc_ptp_verify_pin; + + if (!igc_is_crosststamp_supported(adapter)) + break; + + adapter->ptp_caps.getcrosststamp = igc_ptp_getcrosststamp; break; default: adapter->ptp_clock = NULL; return; } + spin_lock_init(&adapter->ptp_tx_lock); + spin_lock_init(&adapter->free_timer_lock); spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); + mutex_init(&adapter->ptm_lock); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -808,6 +1181,7 @@ void igc_ptp_init(struct igc_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; netdev_err(netdev, "ptp_clock_register failed\n"); + mutex_destroy(&adapter->ptm_lock); } else if (adapter->ptp_clock) { netdev_info(netdev, "PHC added\n"); adapter->ptp_flags |= IGC_PTP_ENABLED; @@ -832,6 +1206,19 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter) igc_ptp_write_i225(adapter, &ts); } +static void igc_ptm_stop(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 ctrl; + + mutex_lock(&adapter->ptm_lock); + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_EN; + + wr32(IGC_PTM_CTRL, ctrl); + mutex_unlock(&adapter->ptm_lock); +} + /** * igc_ptp_suspend - Disable PTP work items and prepare for suspend * @adapter: Board private structure @@ -844,12 +1231,12 @@ void igc_ptp_suspend(struct igc_adapter *adapter) if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - cancel_work_sync(&adapter->ptp_tx_work); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); + igc_ptp_clear_tx_tstamp(adapter); - igc_ptp_time_save(adapter); + if (pci_device_is_present(adapter->pdev)) { + igc_ptp_time_save(adapter); + igc_ptm_stop(adapter); + } } /** @@ -860,13 +1247,18 @@ void igc_ptp_suspend(struct igc_adapter *adapter) **/ void igc_ptp_stop(struct igc_adapter *adapter) { + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + igc_ptp_suspend(adapter); + adapter->ptp_flags &= ~IGC_PTP_ENABLED; if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); netdev_info(adapter->netdev, "PHC removed\n"); adapter->ptp_flags &= ~IGC_PTP_ENABLED; } + mutex_destroy(&adapter->ptm_lock); } /** @@ -878,21 +1270,59 @@ void igc_ptp_stop(struct igc_adapter *adapter) void igc_ptp_reset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + u32 cycle_ctrl, ctrl, stat; unsigned long flags; + u32 timadj; + + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + mutex_lock(&adapter->ptm_lock); + spin_lock_irqsave(&adapter->tmreg_lock, flags); switch (adapter->hw.mac.type) { case igc_i225: + timadj = rd32(IGC_TIMADJ); + timadj |= IGC_TIMADJ_ADJUST_METH; + wr32(IGC_TIMADJ, timadj); + wr32(IGC_TSAUXC, 0x0); wr32(IGC_TSSDP, 0x0); wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS | (adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0)); wr32(IGC_IMS, IGC_IMS_TS); + + if (!igc_is_crosststamp_supported(adapter)) + break; + + wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); + wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); + + cycle_ctrl = IGC_PTM_CYCLE_CTRL_CYC_TIME(IGC_PTM_CYC_TIME_DEFAULT); + + wr32(IGC_PTM_CYCLE_CTRL, cycle_ctrl); + + ctrl = IGC_PTM_CTRL_EN | + IGC_PTM_CTRL_START_NOW | + IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) | + IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT); + + wr32(IGC_PTM_CTRL, ctrl); + + /* Force the first cycle to run. */ + igc_ptm_trigger(hw); + + if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat, + stat, IGC_PTM_STAT_SLEEP, + IGC_PTM_STAT_TIMEOUT)) + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); + + igc_ptm_reset(hw); break; default: /* No work to do. */ @@ -909,5 +1339,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptm_lock); + wrfl(); } diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 0f82990567d9..f343c6bfc6be 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -12,6 +12,7 @@ #define IGC_MDIC 0x00020 /* MDI Control - RW */ #define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ #define IGC_VET 0x00038 /* VLAN Ether Type - RW */ +#define IGC_LEDCTL 0x00E00 /* LED Control - RW */ #define IGC_I225_PHPM 0x00E14 /* I225 PHY Power Management */ #define IGC_GPHY_VERSION 0x0001E /* I225 gPHY Firmware Version */ @@ -59,14 +60,14 @@ #define IGC_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ #define IGC_GPIE 0x01514 /* General Purpose Intr Enable - RW */ -/* MSI-X Table Register Descriptions */ -#define IGC_PBACL 0x05B68 /* MSIx PBA Clear - R/W 1 to clear */ - /* RSS registers */ #define IGC_MRQC 0x05818 /* Multiple Receive Control - RW */ /* Filtering Registers */ #define IGC_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ +#define IGC_FHFT(_n) (0x09000 + (256 * (_n))) /* Flexible Host Filter */ +#define IGC_FHFT_EXT(_n) (0x09A00 + (256 * (_n))) /* Flexible Host Filter Extended */ +#define IGC_FHFTSL 0x05804 /* Flex Filter indirect table select */ /* ETQF register bit definitions */ #define IGC_ETQF_FILTER_ENABLE BIT(26) @@ -75,6 +76,19 @@ #define IGC_ETQF_QUEUE_MASK 0x00070000 #define IGC_ETQF_ETYPE_MASK 0x0000FFFF +/* FHFT register bit definitions */ +#define IGC_FHFT_LENGTH_MASK GENMASK(7, 0) +#define IGC_FHFT_QUEUE_SHIFT 8 +#define IGC_FHFT_QUEUE_MASK GENMASK(10, 8) +#define IGC_FHFT_PRIO_SHIFT 16 +#define IGC_FHFT_PRIO_MASK GENMASK(18, 16) +#define IGC_FHFT_IMM_INT BIT(24) +#define IGC_FHFT_DROP BIT(25) + +/* FHFTSL register bit definitions */ +#define IGC_FHFTSL_FTSL_SHIFT 0 +#define IGC_FHFTSL_FTSL_MASK GENMASK(1, 0) + /* Redirection Table - RW Array */ #define IGC_RETA(_i) (0x05C00 + ((_i) * 4)) /* RSS Random Key - RW Array */ @@ -208,9 +222,26 @@ #define IGC_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ +/* Time sync registers - preemption statistics */ +#define IGC_PRMPTDRCNT 0x04284 /* Good RX Preempted Packets */ +#define IGC_PRMEVNTTCNT 0x04298 /* TX Preemption event counter */ +#define IGC_PRMEVNTRCNT 0x0429C /* RX Preemption event counter */ + + /* Preemption Exception Counter */ + #define IGC_PRMEXCPRCNT 0x42A0 +/* Received out of order packets with SMD-C */ +#define IGC_PRMEXCPRCNT_OOO_SMDC 0x000000FF +/* Received out of order packets with SMD-C and wrong Frame CNT */ +#define IGC_PRMEXCPRCNT_OOO_FRAME_CNT 0x0000FF00 +/* Received out of order packets with SMD-C and wrong Frag CNT */ +#define IGC_PRMEXCPRCNT_OOO_FRAG_CNT 0x00FF0000 +/* Received packets with SMD-S and wrong Frag CNT and Frame CNT */ +#define IGC_PRMEXCPRCNT_MISS_FRAME_FRAG_CNT 0xFF000000 + /* Transmit Scheduling Registers */ #define IGC_TQAVCTRL 0x3570 #define IGC_TXQCTL(_n) (0x3344 + 0x4 * (_n)) +#define IGC_GTXOFFSET 0x3310 #define IGC_BASET_L 0x3314 #define IGC_BASET_H 0x3318 #define IGC_QBVCYCLET 0x331C @@ -220,15 +251,60 @@ #define IGC_ENDQT(_n) (0x3334 + 0x4 * (_n)) #define IGC_DTXMXPKTSZ 0x355C +#define IGC_TQAVCC(_n) (0x3004 + ((_n) * 0x40)) +#define IGC_TQAVHC(_n) (0x300C + ((_n) * 0x40)) + +#define IGC_TXARB 0x3354 /* Tx Arbitration Control TxARB - RW */ + /* System Time Registers */ #define IGC_SYSTIML 0x0B600 /* System time register Low - RO */ #define IGC_SYSTIMH 0x0B604 /* System time register High - RO */ #define IGC_SYSTIMR 0x0B6F8 /* System time register Residue */ #define IGC_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define IGC_SYSTIML_1 0x0B688 /* System time register Low - RO (timer 1) */ +#define IGC_SYSTIMH_1 0x0B68C /* System time register High - RO (timer 1) */ +#define IGC_SYSTIMR_1 0x0B684 /* System time register Residue (timer 1) */ +#define IGC_TIMINCA_1 0x0B690 /* Increment attributes register - RW (timer 1) */ + +/* TX Timestamp Low */ +#define IGC_TXSTMPL_0 0x0B618 +#define IGC_TXSTMPL_1 0x0B698 +#define IGC_TXSTMPL_2 0x0B6B8 +#define IGC_TXSTMPL_3 0x0B6D8 + +/* TX Timestamp High */ +#define IGC_TXSTMPH_0 0x0B61C +#define IGC_TXSTMPH_1 0x0B69C +#define IGC_TXSTMPH_2 0x0B6BC +#define IGC_TXSTMPH_3 0x0B6DC + #define IGC_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ #define IGC_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ +#define IGC_TIMADJ 0x0B60C /* Time Adjustment Offset Register */ + +/* PCIe Registers */ +#define IGC_PTM_CTRL 0x12540 /* PTM Control */ +#define IGC_PTM_STAT 0x12544 /* PTM Status */ +#define IGC_PTM_CYCLE_CTRL 0x1254C /* PTM Cycle Control */ + +/* PTM Time registers */ +#define IGC_PTM_T1_TIM0_L 0x12558 /* T1 on Timer 0 Low */ +#define IGC_PTM_T1_TIM0_H 0x1255C /* T1 on Timer 0 High */ + +#define IGC_PTM_CURR_T2_L 0x1258C /* Current T2 Low */ +#define IGC_PTM_CURR_T2_H 0x12590 /* Current T2 High */ +#define IGC_PTM_PREV_T2_L 0x12584 /* Previous T2 Low */ +#define IGC_PTM_PREV_T2_H 0x12588 /* Previous T2 High */ +#define IGC_PTM_PREV_T4M1 0x12578 /* T4 Minus T1 on previous PTM Cycle */ +#define IGC_PTM_CURR_T4M1 0x1257C /* T4 Minus T1 on this PTM Cycle */ +#define IGC_PTM_PREV_T3M2 0x12580 /* T3 Minus T2 on previous PTM Cycle */ +#define IGC_PTM_TDELAY 0x12594 /* PTM PCIe Link Delay */ + +#define IGC_PCIE_DIG_DELAY 0x12550 /* PCIe Digital Delay */ +#define IGC_PCIE_PHY_DELAY 0x12554 /* PCIe PHY Delay */ + /* Management registers */ #define IGC_MANC 0x05820 /* Management Control - RW */ @@ -240,6 +316,7 @@ #define IGC_WUFC 0x05808 /* Wakeup Filter Control - RW */ #define IGC_WUS 0x05810 /* Wakeup Status - R/W1C */ #define IGC_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define IGC_WUFC_EXT 0x0580C /* Wakeup Filter Control Register Extended - RW */ /* Wake Up packet memory */ #define IGC_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) @@ -249,9 +326,18 @@ #define IGC_IPCNFG 0x0E38 /* Internal PHY Configuration */ #define IGC_EEE_SU 0x0E34 /* EEE Setup */ +/* MULTI GBT AN Control Register - reg. 7.32 */ +#define IGC_ANEG_MULTIGBT_AN_CTRL 0x0020 + +/* EEE ANeg Advertisement Register - reg 7.60 and reg 7.62 */ +#define IGC_ANEG_EEE_AB1 0x003c +#define IGC_ANEG_EEE_AB2 0x003e +/* EEE ANeg Link-Partner Advertisement Register - reg 7.61 and reg 7.63 */ +#define IGC_ANEG_EEE_LP_AB1 0x003d +#define IGC_ANEG_EEE_LP_AB2 0x003f + /* LTR registers */ #define IGC_LTRC 0x01A0 /* Latency Tolerance Reporting Control */ -#define IGC_DMACR 0x02508 /* DMA Coalescing Control Register */ #define IGC_LTRMINV 0x5BB0 /* LTR Minimum Value */ #define IGC_LTRMAXV 0x5BB4 /* LTR Maximum Value */ @@ -263,7 +349,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg); #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ - writel((val), &hw_addr[(reg)]); \ + if (!IGC_REMOVED(hw_addr)) \ + writel((val), &hw_addr[(reg)]); \ } while (0) #define rd32(reg) (igc_rd32(hw, reg)) @@ -275,4 +362,6 @@ do { \ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) +#define IGC_REMOVED(h) unlikely(!(h)) + #endif diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 174103c4bea6..8a110145bfee 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -2,8 +2,206 @@ /* Copyright (c) 2019 Intel Corporation */ #include "igc.h" +#include "igc_base.h" +#include "igc_hw.h" #include "igc_tsn.h" +#define MIN_MULTPLIER_TX_MIN_FRAG 0 +#define MAX_MULTPLIER_TX_MIN_FRAG 3 +/* Frag size is based on the Section 8.12.2 of the SW User Manual */ +#define TX_MIN_FRAG_SIZE 64 +#define TX_MAX_FRAG_SIZE (TX_MIN_FRAG_SIZE * \ + (MAX_MULTPLIER_TX_MIN_FRAG + 1)) + +enum tx_queue { + TX_QUEUE_0 = 0, + TX_QUEUE_1, + TX_QUEUE_2, + TX_QUEUE_3, +}; + +DEFINE_STATIC_KEY_FALSE(igc_fpe_enabled); + +static int igc_fpe_init_smd_frame(struct igc_ring *ring, + struct igc_tx_buffer *buffer, + struct sk_buff *skb) +{ + dma_addr_t dma = dma_map_single(ring->dev, skb->data, skb->len, + DMA_TO_DEVICE); + + if (dma_mapping_error(ring->dev, dma)) { + netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); + return -ENOMEM; + } + + buffer->skb = skb; + buffer->protocol = 0; + buffer->bytecount = skb->len; + buffer->gso_segs = 1; + buffer->time_stamp = jiffies; + dma_unmap_len_set(buffer, len, skb->len); + dma_unmap_addr_set(buffer, dma, dma); + + return 0; +} + +static int igc_fpe_init_tx_descriptor(struct igc_ring *ring, + struct sk_buff *skb, + enum igc_txd_popts_type type) +{ + u32 cmd_type, olinfo_status = 0; + struct igc_tx_buffer *buffer; + union igc_adv_tx_desc *desc; + int err; + + if (!igc_desc_unused(ring)) + return -EBUSY; + + buffer = &ring->tx_buffer_info[ring->next_to_use]; + err = igc_fpe_init_smd_frame(ring, buffer, skb); + if (err) + return err; + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | + buffer->bytecount; + + olinfo_status |= FIELD_PREP(IGC_ADVTXD_PAYLEN_MASK, buffer->bytecount); + + switch (type) { + case SMD_V: + case SMD_R: + olinfo_status |= FIELD_PREP(IGC_TXD_POPTS_SMD_MASK, type); + break; + } + + desc = IGC_TX_DESC(ring, ring->next_to_use); + desc->read.cmd_type_len = cpu_to_le32(cmd_type); + desc->read.olinfo_status = cpu_to_le32(olinfo_status); + desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); + + netdev_tx_sent_queue(txring_txq(ring), skb->len); + + buffer->next_to_watch = desc; + ring->next_to_use = (ring->next_to_use + 1) % ring->count; + + return 0; +} + +static int igc_fpe_xmit_smd_frame(struct igc_adapter *adapter, + enum igc_txd_popts_type type) +{ + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + struct sk_buff *skb; + int err; + + ring = igc_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + skb = alloc_skb(SMD_FRAME_SIZE, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb_put_zero(skb, SMD_FRAME_SIZE); + + __netif_tx_lock(nq, cpu); + + err = igc_fpe_init_tx_descriptor(ring, skb, type); + igc_flush_tx_descriptors(ring); + + __netif_tx_unlock(nq); + + return err; +} + +static void igc_fpe_configure_tx(struct ethtool_mmsv *mmsv, bool tx_enable) +{ + struct igc_fpe_t *fpe = container_of(mmsv, struct igc_fpe_t, mmsv); + struct igc_adapter *adapter; + + adapter = container_of(fpe, struct igc_adapter, fpe); + adapter->fpe.tx_enabled = tx_enable; + + /* Update config since tx_enabled affects preemptible queue configuration */ + igc_tsn_offload_apply(adapter); +} + +static void igc_fpe_send_mpacket(struct ethtool_mmsv *mmsv, + enum ethtool_mpacket type) +{ + struct igc_fpe_t *fpe = container_of(mmsv, struct igc_fpe_t, mmsv); + struct igc_adapter *adapter; + int err; + + adapter = container_of(fpe, struct igc_adapter, fpe); + + if (type == ETHTOOL_MPACKET_VERIFY) { + err = igc_fpe_xmit_smd_frame(adapter, SMD_V); + if (err && net_ratelimit()) + netdev_err(adapter->netdev, "Error sending SMD-V\n"); + } else if (type == ETHTOOL_MPACKET_RESPONSE) { + err = igc_fpe_xmit_smd_frame(adapter, SMD_R); + if (err && net_ratelimit()) + netdev_err(adapter->netdev, "Error sending SMD-R frame\n"); + } +} + +static const struct ethtool_mmsv_ops igc_mmsv_ops = { + .configure_tx = igc_fpe_configure_tx, + .send_mpacket = igc_fpe_send_mpacket, +}; + +void igc_fpe_init(struct igc_adapter *adapter) +{ + adapter->fpe.tx_min_frag_size = TX_MIN_FRAG_SIZE; + adapter->fpe.tx_enabled = false; + ethtool_mmsv_init(&adapter->fpe.mmsv, adapter->netdev, &igc_mmsv_ops); +} + +void igc_fpe_clear_preempt_queue(struct igc_adapter *adapter) +{ + for (int i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + tx_ring->preemptible = false; + } +} + +static u32 igc_fpe_map_preempt_tc_to_queue(const struct igc_adapter *adapter, + unsigned long preemptible_tcs) +{ + struct net_device *dev = adapter->netdev; + u32 i, queue = 0; + + for (i = 0; i < dev->num_tc; i++) { + u32 offset, count; + + if (!(preemptible_tcs & BIT(i))) + continue; + + offset = dev->tc_to_txq[i].offset; + count = dev->tc_to_txq[i].count; + queue |= GENMASK(offset + count - 1, offset); + } + + return queue; +} + +void igc_fpe_save_preempt_queue(struct igc_adapter *adapter, + const struct tc_mqprio_qopt_offload *mqprio) +{ + u32 preemptible_queue = igc_fpe_map_preempt_tc_to_queue(adapter, + mqprio->preemptible_tcs); + + for (int i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + tx_ring->preemptible = !!(preemptible_queue & BIT(i)); + } +} + static bool is_any_launchtime(struct igc_adapter *adapter) { int i; @@ -18,8 +216,143 @@ static bool is_any_launchtime(struct igc_adapter *adapter) return false; } +static bool is_cbs_enabled(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (ring->cbs_enable) + return true; + } + + return false; +} + +static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) +{ + unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; + + + if (adapter->taprio_offload_enable || is_any_launchtime(adapter) || + adapter->strict_priority_enable) + new_flags |= IGC_FLAG_TSN_QBV_ENABLED; + + if (is_cbs_enabled(adapter)) + new_flags |= IGC_FLAG_TSN_QAV_ENABLED; + + if (adapter->fpe.mmsv.pmac_enabled) + new_flags |= IGC_FLAG_TSN_PREEMPT_ENABLED; + + return new_flags; +} + +static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN); +} + +void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u16 txoffset; + + if (!igc_tsn_is_tx_mode_in_tsn(adapter)) + return; + + switch (adapter->link_speed) { + case SPEED_10: + txoffset = IGC_TXOFFSET_SPEED_10; + break; + case SPEED_100: + txoffset = IGC_TXOFFSET_SPEED_100; + break; + case SPEED_1000: + txoffset = IGC_TXOFFSET_SPEED_1000; + break; + case SPEED_2500: + txoffset = IGC_TXOFFSET_SPEED_2500; + break; + default: + txoffset = 0; + break; + } + + wr32(IGC_GTXOFFSET, txoffset); +} + +static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl; + + retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK; + wr32(IGC_RETX_CTL, retxctl); +} + +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + adapter->taprio_offload_enable; +} + +static void igc_tsn_tx_arb(struct igc_adapter *adapter, bool reverse_prio) +{ + struct igc_hw *hw = &adapter->hw; + u32 txarb; + + txarb = rd32(IGC_TXARB); + + txarb &= ~(IGC_TXARB_TXQ_PRIO_0_MASK | + IGC_TXARB_TXQ_PRIO_1_MASK | + IGC_TXARB_TXQ_PRIO_2_MASK | + IGC_TXARB_TXQ_PRIO_3_MASK); + + if (reverse_prio) { + txarb |= IGC_TXARB_TXQ_PRIO_0(TX_QUEUE_3); + txarb |= IGC_TXARB_TXQ_PRIO_1(TX_QUEUE_2); + txarb |= IGC_TXARB_TXQ_PRIO_2(TX_QUEUE_1); + txarb |= IGC_TXARB_TXQ_PRIO_3(TX_QUEUE_0); + } else { + txarb |= IGC_TXARB_TXQ_PRIO_0(TX_QUEUE_0); + txarb |= IGC_TXARB_TXQ_PRIO_1(TX_QUEUE_1); + txarb |= IGC_TXARB_TXQ_PRIO_2(TX_QUEUE_2); + txarb |= IGC_TXARB_TXQ_PRIO_3(TX_QUEUE_3); + } + + wr32(IGC_TXARB, txarb); +} + +/** + * igc_tsn_set_rxpbsize - Set the receive packet buffer size + * @adapter: Pointer to the igc_adapter structure + * @rxpbs_exp_bmc_be: Value to set the receive packet buffer size, including + * express buffer, BMC buffer, and Best Effort buffer + * + * The IGC_RXPBS register value may include allocations for the Express buffer, + * BMC buffer, Best Effort buffer, and the timestamp descriptor buffer + * (IGC_RXPBS_CFG_TS_EN). + */ +static void igc_tsn_set_rxpbsize(struct igc_adapter *adapter, + u32 rxpbs_exp_bmc_be) +{ + struct igc_hw *hw = &adapter->hw; + u32 rxpbs = rd32(IGC_RXPBS); + + rxpbs &= ~(IGC_RXPBSIZE_EXP_MASK | IGC_BMC2OSPBSIZE_MASK | + IGC_RXPBSIZE_BE_MASK); + rxpbs |= rxpbs_exp_bmc_be; + + wr32(IGC_RXPBS, rxpbs); +} + /* Returns the TSN specific registers to their default values after - * TSN offloading is disabled. + * the adapter is reset. */ static int igc_tsn_disable_offload(struct igc_adapter *adapter) { @@ -27,76 +360,139 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) u32 tqavctrl; int i; - if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED)) - return 0; + wr32(IGC_GTXOFFSET, 0); + wr32(IGC_TXPBS, IGC_TXPBSIZE_DEFAULT); + wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); - adapter->cycle_time = 0; + igc_tsn_set_rxpbsize(adapter, IGC_RXPBSIZE_EXP_BMC_DEFAULT); - wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); - wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + if (igc_is_device_id_i226(hw)) + igc_tsn_restore_retx_default(adapter); tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | - IGC_TQAVCTRL_ENHANCED_QAV); + IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS | + IGC_TQAVCTRL_PREEMPT_ENA | IGC_TQAVCTRL_MIN_FRAG_MASK); + wr32(IGC_TQAVCTRL, tqavctrl); for (i = 0; i < adapter->num_tx_queues; i++) { - struct igc_ring *ring = adapter->tx_ring[i]; - - ring->start_time = 0; - ring->end_time = 0; - ring->launchtime_enable = false; + int reg_idx = adapter->tx_ring[i]->reg_idx; + u32 txdctl; wr32(IGC_TXQCTL(i), 0); wr32(IGC_STQT(i), 0); wr32(IGC_ENDQT(i), NSEC_PER_SEC); + + txdctl = rd32(IGC_TXDCTL(reg_idx)); + txdctl &= ~IGC_TXDCTL_PRIORITY_HIGH; + wr32(IGC_TXDCTL(reg_idx), txdctl); } - wr32(IGC_QBVCYCLET_S, NSEC_PER_SEC); + wr32(IGC_QBVCYCLET_S, 0); wr32(IGC_QBVCYCLET, NSEC_PER_SEC); + /* Restore the default Tx arbitration: Priority 0 has the highest + * priority and is assigned to queue 0 and so on and so forth. + */ + igc_tsn_tx_arb(adapter, false); + adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED; return 0; } +/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes + * to 88 Bytes by setting RETX_CTL register using the recommendation from: + * a) Ethernet Controller I225/I226 Specification Update Rev 2.1 + * Item 9: TSN: Packet Transmission Might Cross the Qbv Window + * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control + */ +static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl, watermark; + + retxctl = rd32(IGC_RETX_CTL); + watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK; + /* Set QBVFULLTH value using watermark and set QBVFULLEN */ + retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) | + IGC_RETX_CTL_QBVFULLEN; + wr32(IGC_RETX_CTL, retxctl); +} + +static u8 igc_fpe_get_frag_size_mult(const struct igc_fpe_t *fpe) +{ + u8 mult = (fpe->tx_min_frag_size / TX_MIN_FRAG_SIZE) - 1; + + return clamp_t(u8, mult, MIN_MULTPLIER_TX_MIN_FRAG, + MAX_MULTPLIER_TX_MIN_FRAG); +} + +u32 igc_fpe_get_supported_frag_size(u32 frag_size) +{ + static const u32 supported_sizes[] = { 64, 128, 192, 256 }; + + /* Find the smallest supported size that is >= frag_size */ + for (int i = 0; i < ARRAY_SIZE(supported_sizes); i++) { + if (frag_size <= supported_sizes[i]) + return supported_sizes[i]; + } + + /* Should not happen */ + return TX_MAX_FRAG_SIZE; +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; + u32 frag_size_mult; int i; - if (adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) - return 0; - - cycle = adapter->cycle_time; - base_time = adapter->base_time; - wr32(IGC_TSAUXC, 0); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); - tqavctrl = rd32(IGC_TQAVCTRL); - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; - wr32(IGC_TQAVCTRL, tqavctrl); + igc_tsn_set_rxpbsize(adapter, IGC_RXPBSIZE_EXP_BMC_BE_TSN); - wr32(IGC_QBVCYCLET_S, cycle); - wr32(IGC_QBVCYCLET, cycle); + if (igc_is_device_id_i226(hw)) + igc_tsn_set_retx_qbvfullthreshold(adapter); + + if (adapter->strict_priority_enable || + adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO) + igc_tsn_tx_arb(adapter, true); for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; + u32 txdctl = rd32(IGC_TXDCTL(ring->reg_idx)); u32 txqctl = 0; + u16 cbs_value; + u32 tqavcc; wr32(IGC_STQT(i), ring->start_time); wr32(IGC_ENDQT(i), ring->end_time); - if (adapter->base_time) { - /* If we have a base_time we are in "taprio" + if (adapter->taprio_offload_enable) { + /* If taprio_offload_enable is set we are in "taprio" * mode and we need to be strict about the * cycles: only transmit a packet if it can be * completed during that cycle. + * + * If taprio_offload_enable is NOT true when + * enabling TSN offload, the cycle should have + * no external effects, but is only used internally + * to adapt the base time register after a second + * has passed. + * + * Enabling strict mode in this case would + * unnecessarily prevent the transmission of + * certain packets (i.e. at the boundary of a + * second) and thus interfere with the launchtime + * feature that promises transmission at a + * certain point in time. */ txqctl |= IGC_TXQCTL_STRICT_CYCLE | IGC_TXQCTL_STRICT_END; @@ -105,53 +501,222 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) if (ring->launchtime_enable) txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; + if (!adapter->fpe.tx_enabled) { + /* fpe inactive: clear both flags */ + txqctl &= ~IGC_TXQCTL_PREEMPTIBLE; + txdctl &= ~IGC_TXDCTL_PRIORITY_HIGH; + } else if (ring->preemptible) { + /* fpe active + preemptible: enable preemptible queue + set low priority */ + txqctl |= IGC_TXQCTL_PREEMPTIBLE; + txdctl &= ~IGC_TXDCTL_PRIORITY_HIGH; + } else { + /* fpe active + express: enable express queue + set high priority */ + txqctl &= ~IGC_TXQCTL_PREEMPTIBLE; + txdctl |= IGC_TXDCTL_PRIORITY_HIGH; + } + + wr32(IGC_TXDCTL(ring->reg_idx), txdctl); + + /* Skip configuring CBS for Q2 and Q3 */ + if (i > 1) + goto skip_cbs; + + if (ring->cbs_enable) { + if (i == 0) + txqctl |= IGC_TXQCTL_QAV_SEL_CBS0; + else + txqctl |= IGC_TXQCTL_QAV_SEL_CBS1; + + /* According to i225 datasheet section 7.5.2.7, we + * should set the 'idleSlope' field from TQAVCC + * register following the equation: + * + * value = link-speed 0x7736 * BW * 0.2 + * ---------- * ----------------- (E1) + * 100Mbps 2.5 + * + * Note that 'link-speed' is in Mbps. + * + * 'BW' is the percentage bandwidth out of full + * link speed which can be found with the + * following equation. Note that idleSlope here + * is the parameter from this function + * which is in kbps. + * + * BW = idleSlope + * ----------------- (E2) + * link-speed * 1000 + * + * That said, we can come up with a generic + * equation to calculate the value we should set + * it TQAVCC register by replacing 'BW' in E1 by E2. + * The resulting equation is: + * + * value = link-speed * 0x7736 * idleSlope * 0.2 + * ------------------------------------- (E3) + * 100 * 2.5 * link-speed * 1000 + * + * 'link-speed' is present in both sides of the + * fraction so it is canceled out. The final + * equation is the following: + * + * value = idleSlope * 61036 + * ----------------- (E4) + * 2500000 + * + * NOTE: For i225, given the above, we can see + * that idleslope is represented in + * 40.959433 kbps units by the value at + * the TQAVCC register (2.5Gbps / 61036), + * which reduces the granularity for + * idleslope increments. + * + * In i225 controller, the sendSlope and loCredit + * parameters from CBS are not configurable + * by software so we don't do any + * 'controller configuration' in respect to + * these parameters. + */ + cbs_value = DIV_ROUND_UP_ULL(ring->idleslope + * 61036ULL, 2500000); + + tqavcc = rd32(IGC_TQAVCC(i)); + tqavcc &= ~IGC_TQAVCC_IDLESLOPE_MASK; + tqavcc |= cbs_value | IGC_TQAVCC_KEEP_CREDITS; + wr32(IGC_TQAVCC(i), tqavcc); + + wr32(IGC_TQAVHC(i), + 0x80000000 + ring->hicredit * 0x7736); + } else { + /* Disable any CBS for the queue */ + txqctl &= ~(IGC_TXQCTL_QAV_SEL_MASK); + + /* Set idleSlope to zero. */ + tqavcc = rd32(IGC_TQAVCC(i)); + tqavcc &= ~(IGC_TQAVCC_IDLESLOPE_MASK | + IGC_TQAVCC_KEEP_CREDITS); + wr32(IGC_TQAVCC(i), tqavcc); + + /* Set hiCredit to zero. */ + wr32(IGC_TQAVHC(i), 0); + } +skip_cbs: wr32(IGC_TXQCTL(i), txqctl); } + tqavctrl = rd32(IGC_TQAVCTRL) & ~(IGC_TQAVCTRL_FUTSCDDIS | + IGC_TQAVCTRL_PREEMPT_ENA | IGC_TQAVCTRL_MIN_FRAG_MASK); + tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + + if (adapter->fpe.mmsv.pmac_enabled) + tqavctrl |= IGC_TQAVCTRL_PREEMPT_ENA; + + frag_size_mult = igc_fpe_get_frag_size_mult(&adapter->fpe); + tqavctrl |= FIELD_PREP(IGC_TQAVCTRL_MIN_FRAG_MASK, frag_size_mult); + + adapter->qbv_count++; + + cycle = adapter->cycle_time; + base_time = adapter->base_time; + nsec = rd32(IGC_SYSTIML); sec = rd32(IGC_SYSTIMH); systim = ktime_set(sec, nsec); - if (ktime_compare(systim, base_time) > 0) { - s64 n; + s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); - n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); + } else { + if (igc_is_device_id_i226(hw)) { + ktime_t adjust_time, expires_time; + + /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit + * has to be configured before the cycle time and base time. + * Tx won't hang if a GCL is already running, + * so in this case we don't need to set FutScdDis. + */ + if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) + tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + + nsec = rd32(IGC_SYSTIML); + sec = rd32(IGC_SYSTIMH); + systim = ktime_set(sec, nsec); + + adjust_time = adapter->base_time; + expires_time = ktime_sub_ns(adjust_time, systim); + hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL); + } } - baset_h = div_s64_rem(base_time, NSEC_PER_SEC, &baset_l); + wr32(IGC_TQAVCTRL, tqavctrl); + + wr32(IGC_QBVCYCLET_S, cycle); + wr32(IGC_QBVCYCLET, cycle); + baset_h = div_s64_rem(base_time, NSEC_PER_SEC, &baset_l); wr32(IGC_BASET_H, baset_h); - wr32(IGC_BASET_L, baset_l); - adapter->flags |= IGC_FLAG_TSN_QBV_ENABLED; + /* In i226, Future base time is only supported when FutScdDis bit + * is enabled and only active for re-configuration. + * In this case, initialize the base time with zero to create + * "re-configuration" scenario then only set the desired base time. + */ + if (tqavctrl & IGC_TQAVCTRL_FUTSCDDIS) + wr32(IGC_BASET_L, 0); + wr32(IGC_BASET_L, baset_l); return 0; } -int igc_tsn_offload_apply(struct igc_adapter *adapter) +int igc_tsn_reset(struct igc_adapter *adapter) { - bool is_any_enabled = adapter->base_time || is_any_launchtime(adapter); + unsigned int new_flags; + int err = 0; + + if (adapter->fpe.mmsv.pmac_enabled) { + err = igc_enable_empty_addr_recv(adapter); + if (err && net_ratelimit()) + netdev_err(adapter->netdev, "Error adding empty address to MAC filter\n"); + } else { + igc_disable_empty_addr_recv(adapter); + } - if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) && !is_any_enabled) - return 0; + new_flags = igc_tsn_new_flags(adapter); + + if (!(new_flags & IGC_FLAG_TSN_ANY_ENABLED)) + return igc_tsn_disable_offload(adapter); + + err = igc_tsn_enable_offload(adapter); + if (err < 0) + return err; - if (!is_any_enabled) { - int err = igc_tsn_disable_offload(adapter); + adapter->flags = new_flags; - if (err < 0) - return err; + return err; +} - /* The BASET registers aren't cleared when writing - * into them, force a reset if the interface is - * running. - */ - if (netif_running(adapter->netdev)) - schedule_work(&adapter->reset_task); +static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter) +{ + bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) & + IGC_FLAG_TSN_ANY_ENABLED); + return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) || + (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter)); +} + +int igc_tsn_offload_apply(struct igc_adapter *adapter) +{ + /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change + * from legacy->tsn or tsn->legacy, then reset adapter is needed. + */ + if (netif_running(adapter->netdev) && + igc_tsn_will_tx_mode_change(adapter)) { + schedule_work(&adapter->reset_task); return 0; } - return igc_tsn_enable_offload(adapter); + igc_tsn_reset(adapter); + + return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index f76bc86ddccd..a95b893459d7 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -4,6 +4,66 @@ #ifndef _IGC_TSN_H_ #define _IGC_TSN_H_ +#include <net/pkt_sched.h> + +#define IGC_RX_MIN_FRAG_SIZE 60 +#define SMD_FRAME_SIZE 60 + +enum igc_txd_popts_type { + SMD_V = 0x01, + SMD_R = 0x02, +}; + +DECLARE_STATIC_KEY_FALSE(igc_fpe_enabled); + +void igc_fpe_init(struct igc_adapter *adapter); +void igc_fpe_clear_preempt_queue(struct igc_adapter *adapter); +void igc_fpe_save_preempt_queue(struct igc_adapter *adapter, + const struct tc_mqprio_qopt_offload *mqprio); +u32 igc_fpe_get_supported_frag_size(u32 frag_size); int igc_tsn_offload_apply(struct igc_adapter *adapter); +int igc_tsn_reset(struct igc_adapter *adapter); +void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); + +static inline bool igc_fpe_is_pmac_enabled(struct igc_adapter *adapter) +{ + return static_branch_unlikely(&igc_fpe_enabled) && + adapter->fpe.mmsv.pmac_enabled; +} + +static inline bool igc_fpe_handle_mpacket(struct igc_adapter *adapter, + union igc_adv_rx_desc *rx_desc, + unsigned int size, void *pktbuf) +{ + u32 status_error = le32_to_cpu(rx_desc->wb.upper.status_error); + int smd; + + smd = FIELD_GET(IGC_RXDADV_STAT_SMD_TYPE_MASK, status_error); + if (smd != IGC_RXD_STAT_SMD_TYPE_V && smd != IGC_RXD_STAT_SMD_TYPE_R) + return false; + + if (size == SMD_FRAME_SIZE && mem_is_zero(pktbuf, SMD_FRAME_SIZE)) { + struct ethtool_mmsv *mmsv = &adapter->fpe.mmsv; + enum ethtool_mmsv_event event; + + if (smd == IGC_RXD_STAT_SMD_TYPE_V) + event = ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET; + else + event = ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET; + + ethtool_mmsv_event_handle(mmsv, event); + } + + return true; +} + +static inline bool igc_fpe_transmitted_smd_v(union igc_adv_tx_desc *tx_desc) +{ + u32 olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status); + u8 smd = FIELD_GET(IGC_TXD_POPTS_SMD_MASK, olinfo_status); + + return smd == SMD_V; +} #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index a8cf5374be47..9eb47b4beb06 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020, Intel Corporation. */ +#include <linux/if_vlan.h> #include <net/xdp_sock_drv.h> #include "igc.h" @@ -12,6 +13,8 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, struct net_device *dev = adapter->netdev; bool if_running = netif_running(dev); struct bpf_prog *old_prog; + bool need_update; + unsigned int i; if (dev->mtu > ETH_DATA_LEN) { /* For now, the driver doesn't support XDP functionality with @@ -21,15 +24,31 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, return -EOPNOTSUPP; } - if (if_running) - igc_close(dev); + need_update = !!adapter->xdp_prog != !!prog; + if (if_running && need_update) { + for (i = 0; i < adapter->num_rx_queues; i++) { + igc_disable_rx_ring(adapter->rx_ring[i]); + igc_disable_tx_ring(adapter->tx_ring[i]); + napi_disable(&adapter->rx_ring[i]->q_vector->napi); + } + } old_prog = xchg(&adapter->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); - if (if_running) - igc_open(dev); + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + + if (if_running && need_update) { + for (i = 0; i < adapter->num_rx_queues; i++) { + napi_enable(&adapter->rx_ring[i]->q_vector->napi); + igc_enable_tx_ring(adapter->tx_ring[i]); + igc_enable_rx_ring(adapter->rx_ring[i]); + } + } return 0; } |
