summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c109
-rw-r--r--net/core/devlink.c8
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/net-sysfs.c7
-rw-r--r--net/core/rtnetlink.c13
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sock_map.c7
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/utils.c20
10 files changed, 113 insertions, 80 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ad39c87b7fd..81befd0c2510 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5491,9 +5491,29 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
-static int napi_gro_complete(struct sk_buff *skb)
+static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5526,7 +5546,8 @@ static int napi_gro_complete(struct sk_buff *skb)
}
out:
- return netif_receive_skb_internal(skb);
+ gro_normal_one(napi, skb);
+ return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -5539,7 +5560,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return;
skb_list_del_init(skb);
- napi_gro_complete(skb);
+ napi_gro_complete(napi, skb);
napi->gro_hash[index].count--;
}
@@ -5641,7 +5662,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
}
}
-static void gro_flush_oldest(struct list_head *head)
+static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
{
struct sk_buff *oldest;
@@ -5657,7 +5678,7 @@ static void gro_flush_oldest(struct list_head *head)
* SKB to the chain.
*/
skb_list_del_init(oldest);
- napi_gro_complete(oldest);
+ napi_gro_complete(napi, oldest);
}
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
@@ -5733,7 +5754,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (pp) {
skb_list_del_init(pp);
- napi_gro_complete(pp);
+ napi_gro_complete(napi, pp);
napi->gro_hash[hash].count--;
}
@@ -5744,7 +5765,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
goto normal;
if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
- gro_flush_oldest(gro_head);
+ gro_flush_oldest(napi, gro_head);
} else {
napi->gro_hash[hash].count++;
}
@@ -5802,26 +5823,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
@@ -6200,8 +6201,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL)))
return false;
- gro_normal_list(n);
-
if (n->gro_bitmask) {
unsigned long timeout = 0;
@@ -6217,6 +6216,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
}
+
+ gro_normal_list(n);
+
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
@@ -6548,8 +6550,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
goto out_unlock;
}
- gro_normal_list(n);
-
if (n->gro_bitmask) {
/* flush too old packets
* If HZ < 1000, flush all packets.
@@ -6557,6 +6557,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
napi_gro_flush(n, HZ >= 1000);
}
+ gro_normal_list(n);
+
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
*/
@@ -8194,6 +8196,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
}
EXPORT_SYMBOL(__dev_set_mtu);
+int dev_validate_mtu(struct net_device *dev, int new_mtu,
+ struct netlink_ext_ack *extack)
+{
+ /* MTU must be positive, and in range */
+ if (new_mtu < 0 || new_mtu < dev->min_mtu) {
+ NL_SET_ERR_MSG(extack, "mtu less than device minimum");
+ return -EINVAL;
+ }
+
+ if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
+ NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/**
* dev_set_mtu_ext - Change maximum transfer unit
* @dev: device
@@ -8210,16 +8228,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
if (new_mtu == dev->mtu)
return 0;
- /* MTU must be positive, and in range */
- if (new_mtu < 0 || new_mtu < dev->min_mtu) {
- NL_SET_ERR_MSG(extack, "mtu less than device minimum");
- return -EINVAL;
- }
-
- if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
- NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
- return -EINVAL;
- }
+ err = dev_validate_mtu(dev, new_mtu, extack);
+ if (err)
+ return err;
if (!netif_device_present(dev))
return -ENODEV;
@@ -9177,22 +9188,10 @@ static void netdev_unregister_lockdep_key(struct net_device *dev)
void netdev_update_lockdep_key(struct net_device *dev)
{
- struct netdev_queue *queue;
- int i;
-
- lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
lockdep_unregister_key(&dev->addr_list_lock_key);
-
- lockdep_register_key(&dev->qdisc_xmit_lock_key);
lockdep_register_key(&dev->addr_list_lock_key);
lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
- for (i = 0; i < dev->num_tx_queues; i++) {
- queue = netdev_get_tx_queue(dev, i);
-
- lockdep_set_class(&queue->_xmit_lock,
- &dev->qdisc_xmit_lock_key);
- }
}
EXPORT_SYMBOL(netdev_update_lockdep_key);
@@ -9314,8 +9313,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret)
+ if (ret) {
+ dev->reg_state = NETREG_UNREGISTERED;
goto err_uninit;
+ }
dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4c63c9a4c09e..f76219bf0c21 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6406,7 +6406,7 @@ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
}
-#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 30)
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
{
@@ -7563,7 +7563,7 @@ void devlink_region_destroy(struct devlink_region *region)
EXPORT_SYMBOL_GPL(devlink_region_destroy);
/**
- * devlink_region_shapshot_id_get - get snapshot ID
+ * devlink_region_snapshot_id_get - get snapshot ID
*
* This callback should be called when adding a new snapshot,
* Driver should use the same id for multiple snapshots taken
@@ -7571,7 +7571,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy);
*
* @devlink: devlink
*/
-u32 devlink_region_shapshot_id_get(struct devlink *devlink)
+u32 devlink_region_snapshot_id_get(struct devlink *devlink)
{
u32 id;
@@ -7581,7 +7581,7 @@ u32 devlink_region_shapshot_id_get(struct devlink *devlink)
return id;
}
-EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
/**
* devlink_region_snapshot_create - create a new snapshot
diff --git a/net/core/filter.c b/net/core/filter.c
index c19dd0973e0c..538f6a735a19 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2055,6 +2055,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
+ skb->tstamp = 0;
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -2230,10 +2231,10 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
+ offset += len;
len = sk_msg_elem(msg, i)->length;
if (start < offset + len)
break;
- offset += len;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
@@ -2345,7 +2346,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
- u32 new, i = 0, l, space, copy = 0, offset = 0;
+ u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
u8 *raw, *to, *from;
struct page *page;
@@ -2355,11 +2356,11 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
+ offset += l;
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
- offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
@@ -2414,6 +2415,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
+ sg_unmark_end(&rsge);
sk_msg_iter_next(msg, end);
}
@@ -2505,7 +2507,7 @@ static void sk_msg_shift_right(struct sk_msg *msg, int i)
BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
- u32 i = 0, l, space, offset = 0;
+ u32 i = 0, l = 0, space, offset = 0;
u64 last = start + len;
int pop;
@@ -2515,11 +2517,11 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
+ offset += l;
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
- offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
@@ -5317,8 +5319,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (sk) {
sk = sk_to_full_sk(sk);
if (!sk_fullsock(sk)) {
- if (!sock_flag(sk, SOCK_RCU_FREE))
- sock_gen_put(sk);
+ sock_gen_put(sk);
return NULL;
}
}
@@ -5355,8 +5356,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (sk) {
sk = sk_to_full_sk(sk);
if (!sk_fullsock(sk)) {
- if (!sock_flag(sk, SOCK_RCU_FREE))
- sock_gen_put(sk);
+ sock_gen_put(sk);
return NULL;
}
}
@@ -5423,7 +5423,8 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
- if (!sock_flag(sk, SOCK_RCU_FREE))
+ /* Only full sockets have sk->sk_flags. */
+ if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE))
sock_gen_put(sk);
return 0;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 652da6369037..789a73aa7bd8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -98,9 +98,6 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
- if (neigh->parms->neigh_cleanup)
- neigh->parms->neigh_cleanup(neigh);
-
trace_neigh_cleanup_and_release(neigh, 0);
__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
@@ -3293,6 +3290,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
+ (*pos)++;
return NULL;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c4624298996..4c826b8bf9b1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -919,14 +919,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Kobject_put later will trigger rx_queue_release call which
+ * decreases dev refcount: Take that reference here
+ */
+ dev_hold(queue->dev);
+
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index);
if (error)
goto err;
- dev_hold(queue->dev);
-
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02916f43bf63..d9001b5c48eb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3048,8 +3048,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
- if (tb[IFLA_MTU])
- dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+ if (tb[IFLA_MTU]) {
+ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+ int err;
+
+ err = dev_validate_mtu(dev, mtu, extack);
+ if (err) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+ dev->mtu = mtu;
+ }
if (tb[IFLA_ADDRESS]) {
memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
nla_len(tb[IFLA_ADDRESS]));
diff --git a/net/core/sock.c b/net/core/sock.c
index 043db3ce023e..8459ad579f73 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2916,7 +2916,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0UL;
sk->sk_pacing_rate = ~0UL;
- sk->sk_pacing_shift = 10;
+ WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
sk_rx_queue_clear(sk);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index eb114ee419b6..8998e356f423 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -241,8 +241,11 @@ static void sock_map_free(struct bpf_map *map)
struct sock *sk;
sk = xchg(psk, NULL);
- if (sk)
+ if (sk) {
+ lock_sock(sk);
sock_map_unref(sk, psk);
+ release_sock(sk);
+ }
}
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
@@ -862,7 +865,9 @@ static void sock_hash_free(struct bpf_map *map)
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
+ lock_sock(elem->sk);
sock_map_unref(elem->sk, elem);
+ release_sock(elem->sk);
}
raw_spin_unlock_bh(&bucket->lock);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index eb29e5adc84d..9f9e00ba3ad7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -288,6 +288,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
return ret;
}
+# ifdef CONFIG_HAVE_EBPF_JIT
static int
proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -298,6 +299,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
+# endif /* CONFIG_HAVE_EBPF_JIT */
static int
proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
diff --git a/net/core/utils.c b/net/core/utils.c
index 6b6e51db9f3b..1f31a39236d5 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -438,6 +438,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+/**
+ * inet_proto_csum_replace16 - update layer 4 header checksum field
+ * @sum: Layer 4 header checksum field
+ * @skb: sk_buff for the packet
+ * @from: old IPv6 address
+ * @to: new IPv6 address
+ * @pseudohdr: True if layer 4 header checksum includes pseudoheader
+ *
+ * Update layer 4 header as per the update in IPv6 src/dst address.
+ *
+ * There is no need to update skb->csum in this function, because update in two
+ * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other
+ * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to
+ * update skb->csum, because update in 3 fields a.) IPv4 src/dst address,
+ * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as
+ * L4 Header checksum for skb->csum calculation.
+ */
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr)
@@ -449,9 +466,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
*sum = csum_fold(csum_partial(diff, sizeof(diff),
~csum_unfold(*sum)));
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_partial(diff, sizeof(diff),
- ~skb->csum);
} else if (pseudohdr)
*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
csum_unfold(*sum)));