summaryrefslogtreecommitdiff
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h777
1 files changed, 552 insertions, 225 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb37817d6382..adb14db25798 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -40,9 +40,9 @@
#include <net/dcbnl.h>
#endif
#include <net/netprio_cgroup.h>
-
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
+#include <linux/netdevice_xmit.h>
#include <uapi/linux/netdevice.h>
#include <uapi/linux/if_bonding.h>
#include <uapi/linux/pkt_cls.h>
@@ -52,6 +52,7 @@
#include <net/net_trackers.h>
#include <net/net_debug.h>
#include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
struct netpoll_info;
struct device;
@@ -59,9 +60,10 @@ struct ethtool_ops;
struct kernel_hwtstamp_config;
struct phy_device;
struct dsa_port;
-struct ip_tunnel_parm;
+struct ip_tunnel_parm_kern;
struct macsec_context;
struct macsec_ops;
+struct netdev_config;
struct netdev_name_node;
struct sd_flow_limit;
struct sfp_bus;
@@ -79,6 +81,9 @@ struct xdp_buff;
struct xdp_frame;
struct xdp_metadata_ops;
struct xdp_md;
+struct ethtool_netdev_state;
+struct phy_link_topology;
+struct hwtstamp_provider;
typedef u32 xdp_features_t;
@@ -335,11 +340,38 @@ struct gro_list {
};
/*
- * size of gro hash buckets, must less than bit number of
- * napi_struct::gro_bitmask
+ * size of gro hash buckets, must be <= the number of bits in
+ * gro_node::bitmask
*/
#define GRO_HASH_BUCKETS 8
+/**
+ * struct gro_node - structure to support Generic Receive Offload
+ * @bitmask: bitmask to indicate used buckets in @hash
+ * @hash: hashtable of pending aggregated skbs, separated by flows
+ * @rx_list: list of pending ``GRO_NORMAL`` skbs
+ * @rx_count: cached current length of @rx_list
+ * @cached_napi_id: napi_struct::napi_id cached for hotpath, 0 for standalone
+ */
+struct gro_node {
+ unsigned long bitmask;
+ struct gro_list hash[GRO_HASH_BUCKETS];
+ struct list_head rx_list;
+ u32 rx_count;
+ u32 cached_napi_id;
+};
+
+/*
+ * Structure for per-NAPI config
+ */
+struct napi_config {
+ u64 gro_flush_timeout;
+ u64 irq_suspend_timeout;
+ u32 defer_hard_irqs;
+ cpumask_t affinity_mask;
+ unsigned int napi_id;
+};
+
/*
* Structure for NAPI scheduling similar to tasklet but with weighting
*/
@@ -354,8 +386,7 @@ struct napi_struct {
unsigned long state;
int weight;
- int defer_hard_irqs_count;
- unsigned long gro_bitmask;
+ u32 defer_hard_irqs_count;
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
/* CPU actively polling if netpoll is configured */
@@ -364,17 +395,23 @@ struct napi_struct {
/* CPU on which NAPI has been scheduled for processing */
int list_owner;
struct net_device *dev;
- struct gro_list gro_hash[GRO_HASH_BUCKETS];
struct sk_buff *skb;
- struct list_head rx_list; /* Pending GRO_NORMAL skbs */
- int rx_count; /* length of rx_list */
- unsigned int napi_id;
+ struct gro_node gro;
struct hrtimer timer;
+ /* all fields past this point are write-protected by netdev_lock */
struct task_struct *thread;
+ unsigned long gro_flush_timeout;
+ unsigned long irq_suspend_timeout;
+ u32 defer_hard_irqs;
/* control-path-only fields follow */
+ u32 napi_id;
struct list_head dev_list;
struct hlist_node napi_hash_node;
int irq;
+ struct irq_affinity_notify notify;
+ int napi_rmap_idx;
+ int index;
+ struct napi_config *config;
};
enum {
@@ -388,6 +425,7 @@ enum {
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
+ NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */
};
enum {
@@ -401,6 +439,7 @@ enum {
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
+ NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER),
};
enum gro_result {
@@ -491,7 +530,7 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n)
* is scheduled for example in the context of delayed timer
* that can be skipped if a NAPI is already scheduled.
*
- * Return True if NAPI is scheduled, False otherwise.
+ * Return: True if NAPI is scheduled, False otherwise.
*/
static inline bool napi_is_scheduled(struct napi_struct *n)
{
@@ -506,7 +545,7 @@ bool napi_schedule_prep(struct napi_struct *n);
*
* Schedule NAPI poll routine to be called if it is not already
* running.
- * Return true if we schedule a NAPI or false if not.
+ * Return: true if we schedule a NAPI or false if not.
* Refer to napi_schedule_prep() for additional reason on why
* a NAPI might not be scheduled.
*/
@@ -540,7 +579,7 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
* Mark NAPI processing as complete. Should only be called if poll budget
* has not been completely consumed.
* Prefer over napi_complete().
- * Return false if device should avoid rearming interrupts.
+ * Return: false if device should avoid rearming interrupts.
*/
bool napi_complete_done(struct napi_struct *n, int work_done);
@@ -551,16 +590,11 @@ static inline bool napi_complete(struct napi_struct *n)
int dev_set_threaded(struct net_device *dev, bool threaded);
-/**
- * napi_disable - prevent NAPI from scheduling
- * @n: NAPI context
- *
- * Stop NAPI from being scheduled on this context.
- * Waits till any outstanding processing completes.
- */
void napi_disable(struct napi_struct *n);
+void napi_disable_locked(struct napi_struct *n);
void napi_enable(struct napi_struct *n);
+void napi_enable_locked(struct napi_struct *n);
/**
* napi_synchronize - wait until NAPI is not running
@@ -642,9 +676,7 @@ struct netdev_queue {
struct Qdisc __rcu *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
-#endif
-#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
- int numa_node;
+ const struct attribute_group **groups;
#endif
unsigned long tx_maxrate;
/*
@@ -656,15 +688,16 @@ struct netdev_queue {
/* Subordinate device that the queue has been assigned to */
struct net_device *sb_dev;
#ifdef CONFIG_XDP_SOCKETS
+ /* "ops protected", see comment about net_device::lock */
struct xsk_buff_pool *pool;
#endif
- /* NAPI instance for the queue
- * Readers and writers must hold RTNL
- */
- struct napi_struct *napi;
+
/*
* write-mostly part
*/
+#ifdef CONFIG_BQL
+ struct dql dql;
+#endif
spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
int xmit_lock_owner;
/*
@@ -674,8 +707,16 @@ struct netdev_queue {
unsigned long state;
-#ifdef CONFIG_BQL
- struct dql dql;
+/*
+ * slow- / control-path part
+ */
+ /* NAPI instance for the queue
+ * "ops protected", see comment about net_device::lock
+ */
+ struct napi_struct *napi;
+
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+ int numa_node;
#endif
} ____cacheline_aligned_in_smp;
@@ -972,9 +1013,13 @@ struct netdev_bpf {
#ifdef CONFIG_XFRM_OFFLOAD
struct xfrmdev_ops {
- int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack);
- void (*xdo_dev_state_delete) (struct xfrm_state *x);
- void (*xdo_dev_state_free) (struct xfrm_state *x);
+ int (*xdo_dev_state_add)(struct net_device *dev,
+ struct xfrm_state *x,
+ struct netlink_ext_ack *extack);
+ void (*xdo_dev_state_delete)(struct net_device *dev,
+ struct xfrm_state *x);
+ void (*xdo_dev_state_free)(struct net_device *dev,
+ struct xfrm_state *x);
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
@@ -1064,8 +1109,8 @@ struct netdev_net_notifier {
*
* int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
* Old-style ioctl entry point. This is used internally by the
- * appletalk and ieee802154 subsystems but is no longer called by
- * the device ioctl handler.
+ * ieee802154 subsystem but is no longer called by the device
+ * ioctl handler.
*
* int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd);
* Used by the bonding driver for its device specific ioctls:
@@ -1225,12 +1270,17 @@ struct netdev_net_notifier {
* int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
* struct net_device *dev,
* const unsigned char *addr, u16 vid, u16 flags,
- * struct netlink_ext_ack *extack);
+ * bool *notified, struct netlink_ext_ack *extack);
* Adds an FDB entry to dev for addr.
+ * Callee shall set *notified to true if it sent any appropriate
+ * notification(s). Otherwise core will send a generic one.
* int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
* struct net_device *dev,
- * const unsigned char *addr, u16 vid)
- * Deletes the FDB entry from dev coresponding to addr.
+ * const unsigned char *addr, u16 vid
+ * bool *notified, struct netlink_ext_ack *extack);
+ * Deletes the FDB entry from dev corresponding to addr.
+ * Callee shall set *notified to true if it sent any appropriate
+ * notification(s). Otherwise core will send a generic one.
* int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev,
* struct netlink_ext_ack *extack);
* int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
@@ -1327,7 +1377,7 @@ struct netdev_net_notifier {
* queue id bound to an AF_XDP socket. The flags field specifies if
* only RX, only Tx, or both should be woken up using the flags
* XDP_WAKEUP_RX and XDP_WAKEUP_TX.
- * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
+ * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm_kern *p,
* int cmd);
* Add, change, delete or get information on an IPv4 tunnel.
* struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
@@ -1405,8 +1455,7 @@ struct net_device_ops {
__be16 proto, u16 vid);
#ifdef CONFIG_NET_POLL_CONTROLLER
void (*ndo_poll_controller)(struct net_device *dev);
- int (*ndo_netpoll_setup)(struct net_device *dev,
- struct netpoll_info *info);
+ int (*ndo_netpoll_setup)(struct net_device *dev);
void (*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
int (*ndo_set_vf_mac)(struct net_device *dev,
@@ -1503,12 +1552,15 @@ struct net_device_ops {
const unsigned char *addr,
u16 vid,
u16 flags,
+ bool *notified,
struct netlink_ext_ack *extack);
int (*ndo_fdb_del)(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr,
- u16 vid, struct netlink_ext_ack *extack);
+ u16 vid,
+ bool *notified,
+ struct netlink_ext_ack *extack);
int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh,
struct net_device *dev,
struct netlink_ext_ack *extack);
@@ -1583,7 +1635,8 @@ struct net_device_ops {
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
int (*ndo_tunnel_ctl)(struct net_device *dev,
- struct ip_tunnel_parm *p, int cmd);
+ struct ip_tunnel_parm_kern *p,
+ int cmd);
struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
struct net_device_path *path);
@@ -1595,6 +1648,14 @@ struct net_device_ops {
int (*ndo_hwtstamp_set)(struct net_device *dev,
struct kernel_hwtstamp_config *kernel_config,
struct netlink_ext_ack *extack);
+
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+ /**
+ * @net_shaper_ops: Device shaping offload operations
+ * see include/net/net_shapers.h
+ */
+ const struct net_shaper_ops *net_shaper_ops;
+#endif
};
/**
@@ -1605,7 +1666,8 @@ struct net_device_ops {
* userspace; this means that the order of these flags can change
* during any kernel release.
*
- * You should have a pretty good reason to be extending these flags.
+ * You should add bitfield booleans after either net_device::priv_flags
+ * (hotpath) or ::threaded (slowpath) instead of extending these flags.
*
* @IFF_802_1Q_VLAN: 802.1Q VLAN device
* @IFF_EBRIDGE: Ethernet bridging device
@@ -1644,10 +1706,6 @@ struct net_device_ops {
* @IFF_NO_ADDRCONF: prevent ipv6 addrconf
* @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
* skb_headlen(skb) == 0 (data starts from frag0)
- * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
- * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to
- * ndo_hwtstamp_set() for all timestamp requests regardless of source,
- * even if those aren't HWTSTAMP_SOURCE_NETDEV.
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1682,42 +1740,8 @@ enum netdev_priv_flags {
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_NO_ADDRCONF = BIT_ULL(30),
IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
- IFF_CHANGE_PROTO_DOWN = BIT_ULL(32),
- IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33),
};
-#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
-#define IFF_EBRIDGE IFF_EBRIDGE
-#define IFF_BONDING IFF_BONDING
-#define IFF_ISATAP IFF_ISATAP
-#define IFF_WAN_HDLC IFF_WAN_HDLC
-#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE
-#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE
-#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL
-#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT
-#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT
-#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH
-#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING
-#define IFF_UNICAST_FLT IFF_UNICAST_FLT
-#define IFF_TEAM_PORT IFF_TEAM_PORT
-#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS
-#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE
-#define IFF_MACVLAN IFF_MACVLAN
-#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM
-#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER
-#define IFF_NO_QUEUE IFF_NO_QUEUE
-#define IFF_OPENVSWITCH IFF_OPENVSWITCH
-#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE
-#define IFF_TEAM IFF_TEAM
-#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
-#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM
-#define IFF_MACSEC IFF_MACSEC
-#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
-#define IFF_FAILOVER IFF_FAILOVER
-#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
-#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
-#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
-
/* Specifies the type of the struct net_device::ml_priv pointer */
enum netdev_ml_priv_type {
ML_PRIV_NONE,
@@ -1747,6 +1771,13 @@ enum netdev_reg_state {
* data with strictly "high-level" data, and it has to know about
* almost every data structure used in the INET module.
*
+ * @priv_flags: flags invisible to userspace defined as bits, see
+ * enum netdev_priv_flags for the definitions
+ * @lltx: device supports lockless Tx. Deprecated for real HW
+ * drivers. Mainly used by logical interfaces, such as
+ * bonding and tunnels
+ * @netmem_tx: device support netmem_tx.
+ *
* @name: This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* of the interface.
@@ -1796,7 +1827,6 @@ enum netdev_reg_state {
* @wireless_handlers: List of functions to handle Wireless Extensions,
* instead of ioctl,
* see <net/iw_handler.h> for details.
- * @wireless_data: Instance data managed by the core of wireless extensions
*
* @netdev_ops: Includes several pointers to callbacks,
* if one wants to override the ndo_*() functions
@@ -1813,10 +1843,9 @@ enum netdev_reg_state {
*
* @flags: Interface flags (a la BSD)
* @xdp_features: XDP capability supported by the device
- * @priv_flags: Like 'flags' but invisible to userspace,
- * see if.h for the definitions
* @gflags: Global flags ( kept as legacy )
- * @padded: How much padding added by alloc_netdev()
+ * @priv_len: Size of the ->priv flexible array
+ * @priv: Flexible array containing private data
* @operstate: RFC2863 operstate
* @link_mode: Mapping policy to operstate
* @if_port: Selectable AUI, TP, ...
@@ -1882,9 +1911,6 @@ enum netdev_reg_state {
* allocated at register_netdev() time
* @real_num_rx_queues: Number of RX queues currently active in device
* @xdp_prog: XDP sockets filter program pointer
- * @gro_flush_timeout: timeout for GRO layer in NAPI
- * @napi_defer_hard_irqs: If not zero, provides a counter that would
- * allow to avoid NIC hard IRQ, on busy queues.
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
@@ -1925,13 +1951,11 @@ enum netdev_reg_state {
*
* @reg_state: Register/unregister state machine
* @dismantle: Device is going to be freed
- * @rtnl_link_state: This enum represents the phases of creating
- * a new link
- *
* @needs_free_netdev: Should unregister perform free_netdev?
* @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
+ * protected by @lock
*
* @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type
@@ -1956,6 +1980,7 @@ enum netdev_reg_state {
* @sysfs_rx_queue_group: Space for optional per-rx queue attributes
* @rtnl_link_ops: Rtnl_link_ops
* @stat_ops: Optional ops for queue-aware statistics
+ * @queue_mgmt_ops: Optional ops for queue management
*
* @gso_max_size: Maximum size of generic segmentation offload
* @tso_max_size: Device (as in HW) limit on the max TSO request size
@@ -1973,6 +1998,7 @@ enum netdev_reg_state {
* @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp
*
* @priomap: XXX: need comments on this one
+ * @link_topo: Physical link topology tracking attached PHYs
* @phydev: Physical device may attach itself
* for hardware timestamping
* @sfp_bus: attached &struct sfp_bus structure.
@@ -1983,10 +2009,25 @@ enum netdev_reg_state {
* switch driver and used to set the phys state of the
* switch port.
*
- * @wol_enabled: Wake-on-LAN is enabled
- *
* @threaded: napi threaded mode is enabled
*
+ * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ
+ * affinity. Set by netif_enable_irq_affinity(), then
+ * the driver must create a persistent napi by
+ * netif_napi_add_config() and finally bind the napi to
+ * IRQ (via netif_napi_set_irq()).
+ *
+ * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
+ * Set by calling netif_enable_cpu_rmap().
+ *
+ * @see_all_hwtstamp_requests: device wants to see calls to
+ * ndo_hwtstamp_set() for all timestamp requests
+ * regardless of source, even if those aren't
+ * HWTSTAMP_SOURCE_NETDEV
+ * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN
+ * @netns_immutable: interface can't change network namespaces
+ * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes
+ *
* @net_notifier_list: List of per-net netdev notifier block
* that follow this device when it is moved
* to another network namespace.
@@ -1996,6 +2037,7 @@ enum netdev_reg_state {
* @udp_tunnel_nic_info: static structure describing the UDP tunnel
* offload capabilities of the device
* @udp_tunnel_nic: UDP tunnel offload state
+ * @ethtool: ethtool related state
* @xdp_state: stores info on attached XDP BPF programs
*
* @nested_level: Used as a parameter of spin_lock_nested() of
@@ -2024,6 +2066,17 @@ enum netdev_reg_state {
* @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
* where the clock is recovered.
*
+ * @max_pacing_offload_horizon: max EDT offload horizon in nsec.
+ * @napi_config: An array of napi_config structures containing per-NAPI
+ * settings.
+ * @gro_flush_timeout: timeout for GRO layer in NAPI
+ * @napi_defer_hard_irqs: If not zero, provides a counter that would
+ * allow to avoid NIC hard IRQ, on busy queues.
+ *
+ * @neighbours: List heads pointing to this device's neighbours'
+ * dev_list, one per address-family.
+ * @hwprov: Tracks which PTP performs hardware packet time stamping.
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@@ -2036,7 +2089,11 @@ struct net_device {
/* TX read-mostly hotpath */
__cacheline_group_begin(net_device_read_tx);
- unsigned long long priv_flags;
+ struct_group(priv_flags_fast,
+ unsigned long priv_flags:32;
+ unsigned long lltx:1;
+ unsigned long netmem_tx:1;
+ );
const struct net_device_ops *netdev_ops;
const struct header_ops *header_ops;
struct netdev_queue *_tx;
@@ -2086,8 +2143,6 @@ struct net_device {
int ifindex;
unsigned int real_num_rx_queues;
struct netdev_rx_queue *_rx;
- unsigned long gro_flush_timeout;
- int napi_defer_hard_irqs;
unsigned int gro_max_size;
unsigned int gro_ipv4_max_size;
rx_handler_func_t __rcu *rx_handler;
@@ -2162,7 +2217,6 @@ struct net_device {
#ifdef CONFIG_WIRELESS_EXT
const struct iw_handler_def *wireless_handlers;
- struct iw_public_data *wireless_data;
#endif
const struct ethtool_ops *ethtool_ops;
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -2196,10 +2250,10 @@ struct net_device {
unsigned short neigh_priv_len;
unsigned short dev_id;
unsigned short dev_port;
- unsigned short padded;
+ int irq;
+ u32 priv_len;
spinlock_t addr_list_lock;
- int irq;
struct netdev_hw_addr_list uc;
struct netdev_hw_addr_list mc;
@@ -2221,6 +2275,9 @@ struct net_device {
/* Protocol-specific pointers */
struct in_device __rcu *ip_ptr;
+ /** @fib_nh_head: nexthops associated with this netdev */
+ struct hlist_head fib_nh_head;
+
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info;
#endif
@@ -2234,7 +2291,7 @@ struct net_device {
void *atalk_ptr;
#endif
#if IS_ENABLED(CONFIG_AX25)
- void *ax25_ptr;
+ struct ax25_dev __rcu *ax25_ptr;
#endif
#if IS_ENABLED(CONFIG_CFG80211)
struct wireless_dev *ieee80211_ptr;
@@ -2307,10 +2364,10 @@ struct net_device {
bool dismantle;
- enum {
- RTNL_LINK_INITIALIZED,
- RTNL_LINK_INITIALIZING,
- } rtnl_link_state:16;
+ /** @moving_ns: device is changing netns, protected by @lock */
+ bool moving_ns;
+ /** @rtnl_link_initializing: Device being created, suppress events */
+ bool rtnl_link_initializing;
bool needs_free_netdev;
void (*priv_destructor)(struct net_device *dev);
@@ -2338,6 +2395,8 @@ struct net_device {
const struct netdev_stat_ops *stat_ops;
+ const struct netdev_queue_mgmt_ops *queue_mgmt_ops;
+
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SEGS 65535u
#define GSO_LEGACY_MAX_SIZE 65536u
@@ -2363,12 +2422,20 @@ struct net_device {
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map __rcu *priomap;
#endif
+ struct phy_link_topology *link_topo;
struct phy_device *phydev;
struct sfp_bus *sfp_bus;
struct lock_class_key *qdisc_tx_busylock;
bool proto_down;
- unsigned wol_enabled:1;
- unsigned threaded:1;
+ bool threaded;
+ bool irq_affinity_auto;
+ bool rx_cpu_rmap_auto;
+
+ /* priv_flags_slow, ungrouped to save space */
+ unsigned long see_all_hwtstamp_requests:1;
+ unsigned long change_proto_down:1;
+ unsigned long netns_immutable:1;
+ unsigned long fcoe_mtu:1;
struct list_head net_notifier_list;
@@ -2379,6 +2446,16 @@ struct net_device {
const struct udp_tunnel_nic_info *udp_tunnel_nic_info;
struct udp_tunnel_nic *udp_tunnel_nic;
+ /** @cfg: net_device queue-related configuration */
+ struct netdev_config *cfg;
+ /**
+ * @cfg_pending: same as @cfg but when device is being actively
+ * reconfigured includes any changes to the configuration
+ * requested by the user, but which may or may not be rejected.
+ */
+ struct netdev_config *cfg_pending;
+ struct ethtool_netdev_state *ethtool;
+
/* protected by rtnl_lock */
struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE];
@@ -2397,7 +2474,85 @@ struct net_device {
/** @page_pools: page pools created for this netdevice */
struct hlist_head page_pools;
#endif
-};
+
+ /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
+ struct dim_irq_moder *irq_moder;
+
+ u64 max_pacing_offload_horizon;
+ struct napi_config *napi_config;
+ unsigned long gro_flush_timeout;
+ u32 napi_defer_hard_irqs;
+
+ /**
+ * @up: copy of @state's IFF_UP, but safe to read with just @lock.
+ * May report false negatives while the device is being opened
+ * or closed (@lock does not protect .ndo_open, or .ndo_close).
+ */
+ bool up;
+
+ /**
+ * @request_ops_lock: request the core to run all @netdev_ops and
+ * @ethtool_ops under the @lock.
+ */
+ bool request_ops_lock;
+
+ /**
+ * @lock: netdev-scope lock, protects a small selection of fields.
+ * Should always be taken using netdev_lock() / netdev_unlock() helpers.
+ * Drivers are free to use it for other protection.
+ *
+ * For the drivers that implement shaper or queue API, the scope
+ * of this lock is expanded to cover most ndo/queue/ethtool/sysfs
+ * operations. Drivers may opt-in to this behavior by setting
+ * @request_ops_lock.
+ *
+ * @lock protection mixes with rtnl_lock in multiple ways, fields are
+ * either:
+ *
+ * - simply protected by the instance @lock;
+ *
+ * - double protected - writers hold both locks, readers hold either;
+ *
+ * - ops protected - protected by the lock held around the NDOs
+ * and other callbacks, that is the instance lock on devices for
+ * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock;
+ *
+ * - double ops protected - always protected by rtnl_lock but for
+ * devices for which netdev_need_ops_lock() returns true - also
+ * the instance lock.
+ *
+ * Simply protects:
+ * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list,
+ * @net_shaper_hierarchy, @reg_state, @threaded
+ *
+ * Double protects:
+ * @up, @moving_ns, @nd_net, @xdp_features
+ *
+ * Double ops protects:
+ * @real_num_rx_queues, @real_num_tx_queues
+ *
+ * Also protects some fields in:
+ * struct napi_struct, struct netdev_queue, struct netdev_rx_queue
+ *
+ * Ordering: take after rtnl_lock.
+ */
+ struct mutex lock;
+
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+ /**
+ * @net_shaper_hierarchy: data tracking the current shaper status
+ * see include/net/net_shapers.h
+ */
+ struct net_shaper_hierarchy *net_shaper_hierarchy;
+#endif
+
+ struct hlist_head neighbours[NEIGH_NR_TABLES];
+
+ struct hwtstamp_provider __rcu *hwprov;
+
+ u8 priv[] ____cacheline_aligned
+ __counted_by(priv_len);
+} ____cacheline_aligned;
#define to_net_dev(d) container_of(d, struct net_device, dev)
/*
@@ -2500,21 +2655,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
f(dev, &dev->_tx[i], arg);
}
-#define netdev_lockdep_set_classes(dev) \
-{ \
- static struct lock_class_key qdisc_tx_busylock_key; \
- static struct lock_class_key qdisc_xmit_lock_key; \
- static struct lock_class_key dev_addr_list_lock_key; \
- unsigned int i; \
- \
- (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
- lockdep_set_class(&(dev)->addr_list_lock, \
- &dev_addr_list_lock_key); \
- for (i = 0; i < (dev)->num_tx_queues; i++) \
- lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \
- &qdisc_xmit_lock_key); \
-}
-
u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
@@ -2574,6 +2714,12 @@ struct net *dev_net(const struct net_device *dev)
}
static inline
+struct net *dev_net_rcu(const struct net_device *dev)
+{
+ return read_pnet_rcu(&dev->nd_net);
+}
+
+static inline
void dev_net_set(struct net_device *dev, struct net *net)
{
write_pnet(&dev->nd_net, net);
@@ -2587,7 +2733,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
*/
static inline void *netdev_priv(const struct net_device *dev)
{
- return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
+ return (void *)dev->priv;
}
/* Set the sysfs physical device reference for the network logical device
@@ -2605,9 +2751,24 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
enum netdev_queue_type type,
struct napi_struct *napi);
+static inline void netdev_lock(struct net_device *dev)
+{
+ mutex_lock(&dev->lock);
+}
+
+static inline void netdev_unlock(struct net_device *dev)
+{
+ mutex_unlock(&dev->lock);
+}
+/* Additional netdev_lock()-related helpers are in net/netdev_lock.h */
+
+void netif_napi_set_irq_locked(struct napi_struct *napi, int irq);
+
static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
{
- napi->irq = irq;
+ netdev_lock(napi->dev);
+ netif_napi_set_irq_locked(napi, irq);
+ netdev_unlock(napi->dev);
}
/* Default NAPI poll() weight
@@ -2615,8 +2776,19 @@ static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
*/
#define NAPI_POLL_WEIGHT 64
-void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight);
+void netif_napi_add_weight_locked(struct net_device *dev,
+ struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int),
+ int weight);
+
+static inline void
+netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
+{
+ netdev_lock(dev);
+ netif_napi_add_weight_locked(dev, napi, poll, weight);
+ netdev_unlock(dev);
+}
/**
* netif_napi_add() - initialize a NAPI context
@@ -2635,6 +2807,13 @@ netif_napi_add(struct net_device *dev, struct napi_struct *napi,
}
static inline void
+netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int))
+{
+ netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT);
+}
+
+static inline void
netif_napi_add_tx_weight(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
@@ -2644,6 +2823,31 @@ netif_napi_add_tx_weight(struct net_device *dev,
netif_napi_add_weight(dev, napi, poll, weight);
}
+static inline void
+netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int index)
+{
+ napi->index = index;
+ napi->config = &dev->napi_config[index];
+ netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT);
+}
+
+/**
+ * netif_napi_add_config - initialize a NAPI context with persistent config
+ * @dev: network device
+ * @napi: NAPI context
+ * @poll: polling function
+ * @index: the NAPI index
+ */
+static inline void
+netif_napi_add_config(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int index)
+{
+ netdev_lock(dev);
+ netif_napi_add_config_locked(dev, napi, poll, index);
+ netdev_unlock(dev);
+}
+
/**
* netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
* @dev: network device
@@ -2661,6 +2865,8 @@ static inline void netif_napi_add_tx(struct net_device *dev,
netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}
+void __netif_napi_del_locked(struct napi_struct *napi);
+
/**
* __netif_napi_del - remove a NAPI context
* @napi: NAPI context
@@ -2669,7 +2875,18 @@ static inline void netif_napi_add_tx(struct net_device *dev,
* containing @napi. Drivers might want to call this helper to combine
* all the needed RCU grace periods into a single one.
*/
-void __netif_napi_del(struct napi_struct *napi);
+static inline void __netif_napi_del(struct napi_struct *napi)
+{
+ netdev_lock(napi->dev);
+ __netif_napi_del_locked(napi);
+ netdev_unlock(napi->dev);
+}
+
+static inline void netif_napi_del_locked(struct napi_struct *napi)
+{
+ __netif_napi_del_locked(napi);
+ synchronize_net();
+}
/**
* netif_napi_del - remove a NAPI context
@@ -2683,6 +2900,9 @@ static inline void netif_napi_del(struct napi_struct *napi)
synchronize_net();
}
+int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs);
+void netif_set_affinity_auto(struct net_device *dev);
+
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
bool ignore_outgoing;
@@ -2727,12 +2947,12 @@ struct pcpu_sw_netstats {
} __aligned(4 * sizeof(u64));
struct pcpu_dstats {
- u64 rx_packets;
- u64 rx_bytes;
- u64 rx_drops;
- u64 tx_packets;
- u64 tx_bytes;
- u64 tx_drops;
+ u64_stats_t rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t tx_packets;
+ u64_stats_t tx_bytes;
+ u64_stats_t rx_drops;
+ u64_stats_t tx_drops;
struct u64_stats_sync syncp;
} __aligned(8 * sizeof(u64));
@@ -2776,6 +2996,46 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len)
u64_stats_update_end(&lstats->syncp);
}
+static inline void dev_dstats_rx_add(struct net_device *dev,
+ unsigned int len)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->rx_packets);
+ u64_stats_add(&dstats->rx_bytes, len);
+ u64_stats_update_end(&dstats->syncp);
+}
+
+static inline void dev_dstats_rx_dropped(struct net_device *dev)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->rx_drops);
+ u64_stats_update_end(&dstats->syncp);
+}
+
+static inline void dev_dstats_tx_add(struct net_device *dev,
+ unsigned int len)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->tx_packets);
+ u64_stats_add(&dstats->tx_bytes, len);
+ u64_stats_update_end(&dstats->syncp);
+}
+
+static inline void dev_dstats_tx_dropped(struct net_device *dev)
+{
+ struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_inc(&dstats->tx_drops);
+ u64_stats_update_end(&dstats->syncp);
+}
+
#define __netdev_alloc_pcpu_stats(type, gfp) \
({ \
typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
@@ -3012,12 +3272,13 @@ int call_netdevice_notifiers_info(unsigned long val,
#define for_each_netdev_continue_rcu(net, d) \
list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_in_bond_rcu(bond, slave) \
- for_each_netdev_rcu(&init_net, slave) \
+ for_each_netdev_rcu(dev_net_rcu(bond), slave) \
if (netdev_master_upper_dev_get_rcu(slave) == (bond))
#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list)
#define for_each_netdev_dump(net, d, ifindex) \
- xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex))
+ for (; (d = xa_find(&(net)->dev_by_index, &ifindex, \
+ ULONG_MAX, XA_PRESENT)); ifindex++)
static inline struct net_device *next_net_device(struct net_device *dev)
{
@@ -3053,6 +3314,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
}
int netdev_boot_setup_check(struct net_device *dev);
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
+ const char *hwaddr);
struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
const char *hwaddr);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -3073,15 +3336,16 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
struct net_device *__dev_get_by_name(struct net *net, const char *name);
bool netdev_name_in_use(struct net *net, const char *name);
int dev_alloc_name(struct net_device *dev, const char *name);
+int netif_open(struct net_device *dev, struct netlink_ext_ack *extack);
int dev_open(struct net_device *dev, struct netlink_ext_ack *extack);
+void netif_close(struct net_device *dev);
void dev_close(struct net_device *dev);
void dev_close_many(struct list_head *head, bool unlink);
+void netif_disable_lro(struct net_device *dev);
void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
-u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev);
int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev);
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
@@ -3117,8 +3381,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
-void netdev_freemem(struct net_device *dev);
-void init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
@@ -3132,7 +3394,7 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex,
struct net_device *netdev_get_by_name(struct net *net, const char *name,
netdevice_tracker *tracker, gfp_t gfp);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
-struct net_device *dev_get_by_napi_id(unsigned int napi_id);
+void netdev_copy_name(struct net_device *dev, char *name);
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
@@ -3195,6 +3457,7 @@ static inline bool dev_has_header(const struct net_device *dev)
struct softnet_data {
struct list_head poll_list;
struct sk_buff_head process_queue;
+ local_lock_t process_queue_bh_lock;
/* stats */
unsigned int processed;
@@ -3203,6 +3466,7 @@ struct softnet_data {
struct softnet_data *rps_ipi_list;
#endif
+ unsigned int received_rps;
bool in_net_rx_action;
bool in_napi_threaded_poll;
@@ -3216,13 +3480,7 @@ struct softnet_data {
struct sk_buff_head xfrm_backlog;
#endif
/* written and read only by owning cpu: */
- struct {
- u16 recursion;
- u8 more;
-#ifdef CONFIG_NET_EGRESS
- u8 skip_txqueue;
-#endif
- } xmit;
+ struct netdev_xmit xmit;
#ifdef CONFIG_RPS
/* input_queue_head should be written by cpu owning this struct,
* and only read by other cpus. Worth using a cache line.
@@ -3235,11 +3493,11 @@ struct softnet_data {
unsigned int cpu;
unsigned int input_queue_tail;
#endif
- unsigned int received_rps;
- unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
+ atomic_t dropped ____cacheline_aligned_in_smp;
+
/* Another possibly contended cache line */
spinlock_t defer_lock ____cacheline_aligned_in_smp;
int defer_count;
@@ -3248,44 +3506,26 @@ struct softnet_data {
call_single_data_t defer_csd;
};
-static inline void input_queue_head_incr(struct softnet_data *sd)
-{
-#ifdef CONFIG_RPS
- sd->input_queue_head++;
-#endif
-}
-
-static inline void input_queue_tail_incr_save(struct softnet_data *sd,
- unsigned int *qtail)
-{
-#ifdef CONFIG_RPS
- *qtail = ++sd->input_queue_tail;
-#endif
-}
-
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+struct page_pool_bh {
+ struct page_pool *pool;
+ local_lock_t bh_lock;
+};
+DECLARE_PER_CPU(struct page_pool_bh, system_page_pool);
+
+#ifndef CONFIG_PREEMPT_RT
static inline int dev_recursion_level(void)
{
return this_cpu_read(softnet_data.xmit.recursion);
}
-
-#define XMIT_RECURSION_LIMIT 8
-static inline bool dev_xmit_recursion(void)
-{
- return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
- XMIT_RECURSION_LIMIT);
-}
-
-static inline void dev_xmit_recursion_inc(void)
+#else
+static inline int dev_recursion_level(void)
{
- __this_cpu_inc(softnet_data.xmit.recursion);
+ return current->net_xmit.recursion;
}
-static inline void dev_xmit_recursion_dec(void)
-{
- __this_cpu_dec(softnet_data.xmit.recursion);
-}
+#endif
void __netif_schedule(struct Qdisc *q);
void netif_schedule_queue(struct netdev_queue *txq);
@@ -3350,6 +3590,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
{
+ /* Paired with READ_ONCE() from dev_watchdog() */
+ WRITE_ONCE(dev_queue->trans_start, jiffies);
+
+ /* This barrier is paired with smp_mb() from dev_watchdog() */
+ smp_mb__before_atomic();
+
/* Must be an atomic op see netif_txq_try_stop() */
set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}
@@ -3476,6 +3722,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
if (likely(dql_avail(&dev_queue->dql) >= 0))
return;
+ /* Paired with READ_ONCE() from dev_watchdog() */
+ WRITE_ONCE(dev_queue->trans_start, jiffies);
+
+ /* This barrier is paired with smp_mb() from dev_watchdog() */
+ smp_mb__before_atomic();
+
set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
/*
@@ -3483,7 +3735,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
* because in netdev_tx_completed_queue we update the dql_completed
* before checking the XOFF flag.
*/
- smp_mb();
+ smp_mb__after_atomic();
/* check again in case another CPU has just made room avail */
if (unlikely(dql_avail(&dev_queue->dql) >= 0))
@@ -3553,7 +3805,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
dql_completed(&dev_queue->dql, bytes);
/*
- * Without the memory barrier there is a small possiblity that
+ * Without the memory barrier there is a small possibility that
* netdev_tx_sent_queue will miss the update and cause the queue to
* be stopped forever
*/
@@ -3592,6 +3844,17 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q)
}
/**
+ * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue
+ * @dev: network device
+ * @qid: stack index of the queue to reset
+ */
+static inline void netdev_tx_reset_subqueue(const struct net_device *dev,
+ u32 qid)
+{
+ netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid));
+}
+
+/**
* netdev_reset_queue - reset the packets and bytes count of a network device
* @dev_queue: network device
*
@@ -3600,7 +3863,7 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q)
*/
static inline void netdev_reset_queue(struct net_device *dev_queue)
{
- netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
+ netdev_tx_reset_subqueue(dev_queue, 0);
}
/**
@@ -3738,7 +4001,7 @@ static inline bool netif_attr_test_mask(unsigned long j,
* @online_mask: bitmask for CPUs/Rx queues that are online
* @nr_bits: number of bits in the bitmask
*
- * Returns true if a CPU/Rx queue is online.
+ * Returns: true if a CPU/Rx queue is online.
*/
static inline bool netif_attr_test_online(unsigned long j,
const unsigned long *online_mask,
@@ -3758,7 +4021,8 @@ static inline bool netif_attr_test_online(unsigned long j,
* @srcp: the cpumask/Rx queue mask pointer
* @nr_bits: number of bits in the bitmask
*
- * Returns >= nr_bits if no further CPUs/Rx queues set.
+ * Returns: next (after n) CPU/Rx queue index in the mask;
+ * >= nr_bits if no further CPUs/Rx queues set.
*/
static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
unsigned int nr_bits)
@@ -3780,7 +4044,8 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
* @src2p: the second CPUs/Rx queues mask pointer
* @nr_bits: number of bits in the bitmask
*
- * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ * Returns: next (after n) CPU/Rx queue index set in both masks;
+ * >= nr_bits if no further CPUs/Rx queues set in both.
*/
static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
const unsigned long *src2p,
@@ -3827,17 +4092,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
}
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
-
-#ifdef CONFIG_SYSFS
int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
-#else
-static inline int netif_set_real_num_rx_queues(struct net_device *dev,
- unsigned int rxqs)
-{
- dev->real_num_rx_queues = rxqs;
- return 0;
-}
-#endif
int netif_set_real_num_queues(struct net_device *dev,
unsigned int txq, unsigned int rxq);
@@ -3886,9 +4141,9 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
}
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog);
-void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
+ const struct bpf_prog *xdp_prog);
+void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog);
+int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);
@@ -3896,10 +4151,15 @@ int netif_receive_skb(struct sk_buff *skb);
int netif_receive_skb_core(struct sk_buff *skb);
void netif_receive_skb_list_internal(struct list_head *head);
void netif_receive_skb_list(struct list_head *head);
-gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
-void napi_gro_flush(struct napi_struct *napi, bool flush_old);
+gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb);
+
+static inline gro_result_t napi_gro_receive(struct napi_struct *napi,
+ struct sk_buff *skb)
+{
+ return gro_receive_skb(&napi->gro, skb);
+}
+
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
@@ -3924,37 +4184,39 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg);
int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
void __user *data, bool *need_copyout);
int dev_ifconf(struct net *net, struct ifconf __user *ifc);
+int dev_eth_ioctl(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd);
int generic_hwtstamp_get_lower(struct net_device *dev,
struct kernel_hwtstamp_config *kernel_cfg);
int generic_hwtstamp_set_lower(struct net_device *dev,
struct kernel_hwtstamp_config *kernel_cfg,
struct netlink_ext_ack *extack);
-int dev_set_hwtstamp_phylib(struct net_device *dev,
- struct kernel_hwtstamp_config *cfg,
- struct netlink_ext_ack *extack);
int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
+int netif_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack);
int dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
+int netif_set_alias(struct net_device *dev, const char *alias, size_t len);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
- const char *pat, int new_ifindex);
-static inline
+ const char *pat, int new_ifindex,
+ struct netlink_ext_ack *extack);
int dev_change_net_namespace(struct net_device *dev, struct net *net,
- const char *pat)
-{
- return __dev_change_net_namespace(dev, net, pat, 0);
-}
+ const char *pat);
int __dev_set_mtu(struct net_device *, int);
+int netif_set_mtu(struct net_device *dev, int new_mtu);
int dev_set_mtu(struct net_device *, int);
int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
struct netlink_ext_ack *extack);
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
+ struct netlink_ext_ack *extack);
+int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
struct netlink_ext_ack *extack);
-int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
+int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss,
struct netlink_ext_ack *extack);
int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
int dev_get_port_parent_id(struct net_device *dev,
@@ -3967,8 +4229,13 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
u8 dev_xdp_prog_count(struct net_device *dev);
+int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
+int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
+u8 dev_xdp_sb_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
+u32 dev_get_min_mp_channel_count(const struct net_device *dev);
+
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);
@@ -4031,7 +4298,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
return 0;
}
-bool dev_nit_active(struct net_device *dev);
+bool dev_nit_active_rcu(const struct net_device *dev);
+static inline bool dev_nit_active(const struct net_device *dev)
+{
+ bool ret;
+
+ rcu_read_lock();
+ ret = dev_nit_active_rcu(dev);
+ rcu_read_unlock();
+ return ret;
+}
+
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
static inline void __dev_put(struct net_device *dev)
@@ -4127,6 +4404,8 @@ static inline void dev_put(struct net_device *dev)
netdev_put(dev, NULL);
}
+DEFINE_FREE(dev_put, struct net_device *, if (_T) dev_put(_T))
+
static inline void netdev_ref_replace(struct net_device *odev,
struct net_device *ndev,
netdevice_tracker *tracker,
@@ -4160,6 +4439,7 @@ void linkwatch_fire_event(struct net_device *dev);
* pending work list (if queued).
*/
void linkwatch_sync_dev(struct net_device *dev);
+void __linkwatch_sync_dev(struct net_device *dev);
/**
* netif_carrier_ok - test if carrier present
@@ -4174,7 +4454,7 @@ static inline bool netif_carrier_ok(const struct net_device *dev)
unsigned long dev_trans_start(struct net_device *dev);
-void __netdev_watchdog_up(struct net_device *dev);
+void netdev_watchdog_up(struct net_device *dev);
void netif_carrier_on(struct net_device *dev);
void netif_carrier_off(struct net_device *dev);
@@ -4419,9 +4699,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
/*
* txq->trans_start can be read locklessly from dev_watchdog()
*/
-static inline void txq_trans_update(struct netdev_queue *txq)
+static inline void txq_trans_update(const struct net_device *dev,
+ struct netdev_queue *txq)
{
- if (txq->xmit_lock_owner != -1)
+ if (!dev->lltx)
WRITE_ONCE(txq->trans_start, jiffies);
}
@@ -4464,7 +4745,7 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
}
#define HARD_TX_LOCK(dev, txq, cpu) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
+ if (!(dev)->lltx) { \
__netif_tx_lock(txq, cpu); \
} else { \
__netif_tx_acquire(txq); \
@@ -4472,12 +4753,12 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
}
#define HARD_TX_TRYLOCK(dev, txq) \
- (((dev->features & NETIF_F_LLTX) == 0) ? \
+ (!(dev)->lltx ? \
__netif_tx_trylock(txq) : \
__netif_tx_acquire(txq))
#define HARD_TX_UNLOCK(dev, txq) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
+ if (!(dev)->lltx) { \
__netif_tx_unlock(txq); \
} else { \
__netif_tx_release(txq); \
@@ -4545,6 +4826,9 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
void ether_setup(struct net_device *dev);
+/* Allocate dummy net_device */
+struct net_device *alloc_netdev_dummy(int sizeof_priv);
+
/* Support for loadable net-drivers */
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned char name_assign_type,
@@ -4565,6 +4849,9 @@ int devm_register_netdev(struct device *dev, struct net_device *ndev);
/* General hardware address lists handling functions */
int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr_list *from_list, int addr_len);
+int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len);
void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr_list *from_list, int addr_len);
int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
@@ -4619,7 +4906,7 @@ void dev_uc_flush(struct net_device *dev);
void dev_uc_init(struct net_device *dev);
/**
- * __dev_uc_sync - Synchonize device's unicast list
+ * __dev_uc_sync - Synchronize device's unicast list
* @dev: device to sync
* @sync: function to call if address should be added
* @unsync: function to call if address should be removed
@@ -4663,7 +4950,7 @@ void dev_mc_flush(struct net_device *dev);
void dev_mc_init(struct net_device *dev);
/**
- * __dev_mc_sync - Synchonize device's multicast list
+ * __dev_mc_sync - Synchronize device's multicast list
* @dev: device to sync
* @sync: function to call if address should be added
* @unsync: function to call if address should be removed
@@ -4696,8 +4983,11 @@ static inline void __dev_mc_unsync(struct net_device *dev,
/* Functions used for secondary unicast and multicast support */
void dev_set_rx_mode(struct net_device *dev);
+int netif_set_promiscuity(struct net_device *dev, int inc);
int dev_set_promiscuity(struct net_device *dev, int inc);
+int netif_set_allmulti(struct net_device *dev, int inc, bool notify);
int dev_set_allmulti(struct net_device *dev, int inc);
+void netif_state_change(struct net_device *dev);
void netdev_state_change(struct net_device *dev);
void __netdev_notify_peers(struct net_device *dev);
void netdev_notify_peers(struct net_device *dev);
@@ -4895,18 +5185,35 @@ static inline ktime_t netdev_get_tstamp(struct net_device *dev,
return hwtstamps->hwtstamp;
}
-static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
- struct sk_buff *skb, struct net_device *dev,
- bool more)
+#ifndef CONFIG_PREEMPT_RT
+static inline void netdev_xmit_set_more(bool more)
{
__this_cpu_write(softnet_data.xmit.more, more);
- return ops->ndo_start_xmit(skb, dev);
}
static inline bool netdev_xmit_more(void)
{
return __this_cpu_read(softnet_data.xmit.more);
}
+#else
+static inline void netdev_xmit_set_more(bool more)
+{
+ current->net_xmit.more = more;
+}
+
+static inline bool netdev_xmit_more(void)
+{
+ return current->net_xmit.more;
+}
+#endif
+
+static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
+ struct sk_buff *skb, struct net_device *dev,
+ bool more)
+{
+ netdev_xmit_set_more(more);
+ return ops->ndo_start_xmit(skb, dev);
+}
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, bool more)
@@ -4916,7 +5223,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi
rc = __netdev_start_xmit(ops, skb, dev, more);
if (rc == NETDEV_TX_OK)
- txq_trans_update(txq);
+ txq_trans_update(dev, txq);
return rc;
}
@@ -4998,6 +5305,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_TCP_ACCECN !=
+ (NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
@@ -5021,6 +5330,24 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs);
void netif_inherit_tso_max(struct net_device *to,
const struct net_device *from);
+static inline unsigned int
+netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb)
+{
+ /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
+ return skb->protocol == htons(ETH_P_IPV6) ?
+ READ_ONCE(dev->gro_max_size) :
+ READ_ONCE(dev->gro_ipv4_max_size);
+}
+
+static inline unsigned int
+netif_get_gso_max_size(const struct net_device *dev, const struct sk_buff *skb)
+{
+ /* pairs with WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
+ return skb->protocol == htons(ETH_P_IPV6) ?
+ READ_ONCE(dev->gso_max_size) :
+ READ_ONCE(dev->gso_ipv4_max_size);
+}
+
static inline bool netif_is_macsec(const struct net_device *dev)
{
return dev->priv_flags & IFF_MACSEC;