summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c25
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c36
-rw-r--r--net/core/skmsg.c14
-rw-r--r--net/core/sock.c83
-rw-r--r--net/core/sock_destructor.h12
-rw-r--r--net/core/sysctl_net_core.c2
10 files changed, 136 insertions, 72 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26dd..eb3a366bf212 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3163,6 +3163,12 @@ static u16 skb_tx_hash(const struct net_device *dev,
qoffset = sb_dev->tc_to_txq[tc].offset;
qcount = sb_dev->tc_to_txq[tc].count;
+ if (unlikely(!qcount)) {
+ net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
+ sb_dev->name, qoffset, tc);
+ qoffset = 0;
+ qcount = dev->real_num_tx_queues;
+ }
}
if (skb_rx_queue_recorded(skb)) {
@@ -3906,7 +3912,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
skb->pkt_type = PACKET_LOOPBACK;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (skb->ip_summed == CHECKSUM_NONE)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(skb));
skb_dst_force(skb);
netif_rx_ni(skb);
@@ -6923,12 +6930,16 @@ EXPORT_SYMBOL(napi_disable);
*/
void napi_enable(struct napi_struct *n)
{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
- if (n->dev->threaded && n->thread)
- set_bit(NAPI_STATE_THREADED, &n->state);
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
+
+ new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
+ if (n->dev->threaded && n->thread)
+ new |= NAPIF_STATE_THREADED;
+ } while (cmpxchg(&n->state, val, new) != val);
}
EXPORT_SYMBOL(napi_enable);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 8c39283c26ae..f0cb38344126 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -50,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
if (addr_len > MAX_ADDR_LEN)
return -EINVAL;
+ ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+ if (ha && !memcmp(addr, ha->addr, addr_len) &&
+ (!addr_type || addr_type == ha->type))
+ goto found_it;
+
while (*ins_point) {
int diff;
@@ -64,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
} else if (diff > 0) {
ins_point = &parent->rb_right;
} else {
+found_it:
if (exclusive)
return -EEXIST;
if (global) {
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index eab5fc88a002..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
- "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Interface| Receive "
- " | Transmit\n"
- " | bytes packets errs drop fifo frame "
- "compressed multicast| bytes packets errs "
- " drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Inter-| Receive "
+ " | Transmit\n"
+ " face |bytes packets errs drop fifo frame "
+ "compressed multicast|bytes packets errs "
+ "drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-9s %ps\n",
+ seq_printf(seq, " %-8s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,14 +327,12 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
+ if (v == SEQ_START_TOKEN)
return 0;
- }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
+ seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6197774048b..b2e49eb7001d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1973,9 +1973,9 @@ int netdev_register_kobject(struct net_device *ndev)
int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
const struct net *net_new)
{
+ kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
+ kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
struct device *dev = &ndev->dev;
- kuid_t old_uid, new_uid;
- kgid_t old_gid, new_gid;
int error;
net_ns_get_ownership(net_old, &old_uid, &old_gid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 972c8cb303a5..8ccce85562a1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5262,7 +5262,7 @@ nla_put_failure:
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
- size_t size = 0;
+ size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2170bea2c7de..fe9358437380 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -80,6 +80,7 @@
#include <linux/indirect_call_wrapper.h>
#include "datagram.h"
+#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
@@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom);
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
{
int delta = headroom - skb_headroom(skb);
+ int osize = skb_end_offset(skb);
+ struct sock *sk = skb->sk;
if (WARN_ONCE(delta <= 0,
"%s is expecting an increase in the headroom", __func__))
return skb;
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
+ delta = SKB_DATA_ALIGN(delta);
+ /* pskb_expand_head() might crash, if skb is shared. */
+ if (skb_shared(skb) || !is_skb_wmem(skb)) {
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
+ if (unlikely(!nskb))
+ goto fail;
+
+ if (sk)
+ skb_set_owner_w(nskb, sk);
+ consume_skb(skb);
skb = nskb;
}
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
+ if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
+ goto fail;
+
+ if (sk && is_skb_wmem(skb)) {
+ delta = skb_end_offset(skb) - osize;
+ refcount_add(delta, &sk->sk_wmem_alloc);
+ skb->truesize += delta;
}
return skb;
+
+fail:
+ kfree_skb(skb);
+ return NULL;
}
EXPORT_SYMBOL(skb_expand_head);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..a86ef7e844f8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+bool sk_msg_is_readable(struct sock *sk)
+{
+ struct sk_psock *psock;
+ bool empty = true;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (likely(psock))
+ empty = list_empty(&psock->ingress_msg);
+ rcu_read_unlock();
+ return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..c1601f75ec4b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1376,6 +1376,16 @@ set_sndbuf:
}
EXPORT_SYMBOL(sock_setsockopt);
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+ const struct cred *cred;
+
+ spin_lock(&sk->sk_peer_lock);
+ cred = get_cred(sk->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ return cred;
+}
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
@@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
+
+ spin_lock(&sk->sk_peer_lock);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
@@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_PEERGROUPS:
{
+ const struct cred *cred;
int ret, n;
- if (!sk->sk_peer_cred)
+ cred = sk_get_peer_cred(sk);
+ if (!cred)
return -ENODATA;
- n = sk->sk_peer_cred->group_info->ngroups;
+ n = cred->group_info->ngroups;
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
+ put_cred(cred);
return put_user(len, optlen) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval,
- sk->sk_peer_cred->group_info);
+ ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ put_cred(cred);
if (ret)
return ret;
goto lenout;
@@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_frag.page = NULL;
}
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+ put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
+
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
@@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
+ spin_lock_init(&sk->sk_peer_lock);
+
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -3179,17 +3199,15 @@ EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);
@@ -3212,42 +3230,37 @@ void release_sock(struct sock *sk)
}
EXPORT_SYMBOL(release_sock);
-/**
- * lock_sock_fast - fast version of lock_sock
- * @sk: socket
- *
- * This version should be used for very small section, where process wont block
- * return false if fast path is taken:
- *
- * sk_lock.slock locked, owned = 0, BH disabled
- *
- * return true if slow path is taken:
- *
- * sk_lock.slock unlocked, owned = 1, BH enabled
- */
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (!sk->sk_lock.owned)
+ if (!sk->sk_lock.owned) {
/*
- * Note : We must disable BH
+ * Fast path return with bottom halves disabled and
+ * sock::sk_lock.slock held.
+ *
+ * The 'mutex' is not contended and holding
+ * sock::sk_lock.slock prevents all other lockers to
+ * proceed so the corresponding unlock_sock_fast() can
+ * avoid the slow path of release_sock() completely and
+ * just release slock.
+ *
+ * From a semantical POV this is equivalent to 'acquiring'
+ * the 'mutex', hence the corresponding lockdep
+ * mutex_release() has to happen in the fast path of
+ * unlock_sock_fast().
*/
return false;
+ }
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
__acquire(&sk->sk_lock.slock);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
-EXPORT_SYMBOL(lock_sock_fast);
+EXPORT_SYMBOL(__lock_sock_fast);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32)
diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h
new file mode 100644
index 000000000000..2f396e6bfba5
--- /dev/null
+++ b/net/core/sock_destructor.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_SOCK_DESTRUCTOR_H
+#define _NET_CORE_SOCK_DESTRUCTOR_H
+#include <net/tcp.h>
+
+static inline bool is_skb_wmem(const struct sk_buff *skb)
+{
+ return skb->destructor == sock_wfree ||
+ skb->destructor == __sock_wfree ||
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+#endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..5f88526ad61c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
.extra1 = &long_one,
- .extra2 = &long_max,
+ .extra2 = &bpf_jit_limit_max,
},
#endif
{