summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/ax25/af_ax25.c33
-rw-r--r--net/core/flow_offload.c6
-rw-r--r--net/dccp/proto.c33
-rw-r--r--net/ipv4/inet_connection_sock.c247
-rw-r--r--net/ipv4/inet_hashtables.c203
-rw-r--r--net/ipv4/ip_gre.c11
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/xfrm4_protocol.c1
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/seg6_hmac.c1
-rw-r--r--net/ipv6/seg6_local.c1
-rw-r--r--net/l2tp/l2tp_ip6.c5
-rw-r--r--net/netfilter/nf_tables_api.c54
-rw-r--r--net/netfilter/nf_tables_offload.c23
-rw-r--r--net/netfilter/nft_nat.c3
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/conntrack.c4
-rw-r--r--net/sunrpc/xdr.c37
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c4
-rw-r--r--net/tls/tls_main.c8
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/xdp/xsk.c5
-rw-r--r--net/xdp/xsk_queue.h8
23 files changed, 223 insertions, 492 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 95393bb2760b..4c7030ed8d33 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1661,9 +1661,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1675,10 +1678,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1725,6 +1747,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 73f68d4625f3..929f6379a279 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -595,3 +595,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count;
}
EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
+bool flow_indr_dev_exists(void)
+{
+ return !list_empty(&flow_block_indr_dev_list);
+}
+EXPORT_SYMBOL(flow_indr_dev_exists);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2e78458900f2..eb8e128e43e8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1156,7 +1150,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
+ goto out_free_bind_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash2;
+ goto out_free_dccp_bhash;
rc = dccp_ackvec_init();
if (rc)
@@ -1222,38 +1207,30 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c0b7e6c21360..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,32 +117,6 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- int addr_type;
-
- if (sk->sk_family == AF_INET6) {
- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
- int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
- return ipv4_portaddr_hash(net, 0, port);
-}
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -156,71 +130,16 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
- bool reuseport_cb_ok, bool reuseport_ok)
-{
- int bound_dev_if2;
-
- if (sk == sk2)
- return false;
-
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2) {
- if (sk->sk_reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if (!relax || (!reuseport_ok && sk->sk_reuseport &&
- sk2->sk_reuseport && reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
- return true;
- } else if (!reuseport_ok || !sk->sk_reuseport ||
- !sk2->sk_reuseport || !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
- struct inet_bind2_bucket *tb2, kuid_t sk_uid,
- bool relax, bool reuseport_cb_ok,
- bool reuseport_ok)
-{
- struct sock *sk2;
-
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
-
- if (bind_conflict_exist(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
- return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
- struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
bool relax, bool reuseport_ok)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- kuid_t uid = sock_i_uid((struct sock *)sk);
- struct sock_reuseport *reuseport_cb;
- struct inet_bind2_hashbucket *head2;
- bool reuseport_cb_ok;
struct sock *sk2;
- struct net *net;
- int l3mdev;
- u32 hash;
+ bool reuseport_cb_ok;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -231,42 +150,40 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners and tb2->owners list belong
- * to the same net
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
*/
- if (!use_bhash2_on_bind(sk)) {
- sk_for_each_bound(sk2, &tb->owners)
- if (bind_conflict_exist(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
+ sk_for_each_bound(sk2, &tb->owners) {
+ int bound_dev_if2;
- return false;
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ }
+ }
}
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- net = sock_net(sk);
-
- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
- * INADDR_ANY (if ipv4) socket.
- */
- hash = get_bhash2_nulladdr_hash(sk, net, port);
- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
- l3mdev = inet_sk_bound_l3mdev(sk);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
- break;
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- return false;
+ return sk2 != NULL;
}
/*
@@ -274,20 +191,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
- struct inet_bind2_bucket **tb2_ret,
- struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- struct inet_bind2_hashbucket *head2;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- bool relax = false;
- int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -326,12 +239,10 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
- if (!inet_csk_bind_conflict(sk, port, tb, tb2,
- relax, false))
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -361,8 +272,6 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
- *tb2_ret = tb2;
- *head2_ret = head2;
return head;
}
@@ -458,81 +367,54 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- bool bhash_created = false, bhash2_created = false;
- struct inet_bind2_bucket *tb2 = NULL;
- struct inet_bind2_hashbucket *head2;
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- int ret = 1, port = snum;
- bool found_port = false;
+ struct inet_bind_bucket *tb = NULL;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+ head = inet_csk_find_open_port(sk, &tb, &port);
if (!head)
return ret;
- if (tb && tb2)
- goto success;
- found_port = true;
- } else {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev))
- break;
-
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
- }
-
- if (!tb) {
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
- head, port, l3mdev);
if (!tb)
- goto fail_unlock;
- bhash_created = true;
- }
-
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
- if (!tb2)
- goto fail_unlock;
- bhash2_created = true;
+ goto tb_not_found;
+ goto success;
}
-
- /* If we had to find an open port, we already checked for conflicts */
- if (!found_port && !hlist_empty(&tb->owners)) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
+ goto tb_found;
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port, l3mdev);
+ if (!tb)
+ goto fail_unlock;
+tb_found:
+ if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
- if (ret) {
- if (bhash_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
- }
spin_unlock_bh(&head->lock);
return ret;
}
@@ -1079,7 +961,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8de5e699b3f..b9d995b5ce24 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,41 +81,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
return tb;
}
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
- struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk)
-{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb) {
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
-#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
-#endif
- return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
@@ -127,25 +92,12 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(cachep, tb);
- }
-}
-
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum)
+ const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
- inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -157,7 +109,6 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -166,13 +117,6 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- if (inet_csk(sk)->icsk_bind2_hash) {
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
- inet_csk(sk)->icsk_bind2_hash = NULL;
- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
- }
spin_unlock(&head->lock);
}
@@ -189,19 +133,14 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
- struct inet_bind2_hashbucket *head_bhash2;
- bool created_inet_bind_bucket = false;
- struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- if (unlikely(!tb || !tb2)) {
+ if (unlikely(!tb)) {
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -214,45 +153,25 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev))
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- net, head, port, l3mdev);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
- created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
-
- goto bhash2_find;
- } else if (!bind2_bucket_addr_match(tb2, child)) {
- l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
- &head_bhash2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head_bhash2, port,
- l3mdev, child);
- if (!tb2)
- goto error;
- }
}
- inet_bind_hash(child, tb, tb2, port);
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
-
-error:
- if (created_inet_bind_bucket)
- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
- spin_unlock(&head->lock);
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -756,76 +675,6 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
- struct net *net, unsigned short port,
- int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net, const unsigned short port,
- int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
- const struct net *net, unsigned short port)
-{
- u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
- else
-#endif
- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev, struct sock *sk,
- struct inet_bind2_hashbucket **head)
-{
- struct inet_bind2_bucket *bhash2 = NULL;
- struct inet_bind2_hashbucket *h;
-
- h = inet_bhashfn_portaddr(hinfo, sk, net, port);
- inet_bind_bucket_for_each(bhash2, &h->chain) {
- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
- break;
- }
-
- if (head)
- *head = h;
-
- return bhash2;
-}
-
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -846,13 +695,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -909,7 +755,8 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -927,7 +774,6 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
- tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -943,17 +789,6 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* Find the corresponding tb2 bucket since we need to
- * add the socket to the bhash2 table as well
- */
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
- if (!tb2)
- goto error;
- }
-
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -963,7 +798,7 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -975,12 +810,6 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
-
-error:
- if (tb_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
- return -ENOMEM;
}
/*
@@ -1026,10 +855,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
init_hashinfo_lhash2(h);
/* this one is used for source ports of outgoing connections */
- table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
- sizeof(*table_perturb), GFP_KERNEL);
- if (!table_perturb)
- panic("TCP: failed to alloc table_perturb");
+ table_perturb = alloc_large_system_hash("Table-perturb",
+ sizeof(*table_perturb),
+ INET_TABLE_PERTURB_SIZE,
+ 0, 0, NULL, NULL,
+ INET_TABLE_PERTURB_SIZE,
+ INET_TABLE_PERTURB_SIZE);
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7e474a85deaf..3b9cd487075a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -629,21 +629,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
- const int pull_len = tunnel->hlen + sizeof(struct iphdr);
-
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- if (pull_len > skb_transport_offset(skb))
- goto free_skb;
-
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, pull_len);
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9984d23a7f3e..028513d3e2a2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4604,12 +4604,6 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
- tcp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("tcp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT,
- NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4632,9 +4626,8 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind bhash tables",
- sizeof(struct inet_bind_hashbucket) +
- sizeof(struct inet_bind2_hashbucket),
+ alloc_large_system_hash("TCP bind",
+ sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4643,12 +4636,9 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
- tcp_hashinfo.bhash2 =
- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 2fe5860c21d6..b146ce88c5d0 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -304,4 +304,3 @@ void __init xfrm4_protocol_init(void)
{
xfrm_input_register_afinfo(&xfrm4_input_afinfo);
}
-EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4081b12a01ff..77e3f5970ce4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1450,7 +1450,7 @@ static int __ip6_append_data(struct sock *sk,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
unsigned int flags, struct ipcm6_cookie *ipc6)
{
struct sk_buff *skb, *skb_prev = NULL;
@@ -1798,7 +1798,7 @@ error:
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags)
{
@@ -1995,7 +1995,7 @@ EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
unsigned int flags, struct inet_cork_full *cork)
{
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 29bc4e7c3046..6de01185cc68 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -399,7 +399,6 @@ int __init seg6_hmac_init(void)
{
return seg6_hmac_init_algo();
}
-EXPORT_SYMBOL(seg6_hmac_init);
int __net_init seg6_hmac_net_init(struct net *net)
{
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9fbe243a0e81..98a34287439c 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -218,6 +218,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
struct flowi6 fl6;
int dev_flags = 0;
+ memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
fl6.saddr = hdr->saddr;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index c6ff8bf9b55f..9dbd801ddb98 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -504,14 +504,15 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen = len + transhdrlen;
+ int ulen;
int err;
/* Rough check on arithmetic overflow,
* better check is made in ip6_append_data().
*/
- if (len > INT_MAX)
+ if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
+ ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 746be13438ef..51144fc66889 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -544,6 +544,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
+ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -1914,7 +1915,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
goto err_hook_dev;
}
hook->ops.dev = dev;
- hook->inactive = false;
return hook;
@@ -2166,7 +2166,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
+ !nft_chain_offload_support(basechain))
return -EOPNOTSUPP;
flow_block_init(&basechain->flow_block);
@@ -7332,7 +7332,7 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net,
nf_unregister_net_hook(net, &hook->ops);
if (release_netdev) {
list_del(&hook->list);
- kfree_rcu(hook);
+ kfree_rcu(hook, rcu);
}
}
}
@@ -7433,11 +7433,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
if (nla[NFTA_FLOWTABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
- if (flags & ~NFT_FLOWTABLE_MASK)
- return -EOPNOTSUPP;
+ if (flags & ~NFT_FLOWTABLE_MASK) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
- (flags & NFT_FLOWTABLE_HW_OFFLOAD))
- return -EOPNOTSUPP;
+ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
} else {
flags = flowtable->data.flags;
}
@@ -7618,6 +7622,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
+ LIST_HEAD(flowtable_del_list);
struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
@@ -7633,7 +7638,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
err = -ENOENT;
goto err_flowtable_del_hook;
}
- hook->inactive = true;
+ list_move(&hook->list, &flowtable_del_list);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
@@ -7646,6 +7651,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
nft_flowtable_hook_release(&flowtable_hook);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -7653,13 +7659,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
return 0;
err_flowtable_del_hook:
- list_for_each_entry(this, &flowtable_hook.list, list) {
- hook = nft_hook_list_find(&flowtable->hook_list, this);
- if (!hook)
- break;
-
- hook->inactive = false;
- }
+ list_splice(&flowtable_del_list, &flowtable->hook_list);
nft_flowtable_hook_release(&flowtable_hook);
return err;
@@ -8329,6 +8329,9 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_DELRULE:
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
@@ -8563,17 +8566,6 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
-static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
- struct list_head *hook_list)
-{
- struct nft_hook *hook, *next;
-
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- if (hook->inactive)
- list_move(&hook->list, hook_list);
- }
-}
-
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -8828,6 +8820,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
@@ -8918,8 +8913,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nft_flowtable_hooks_del(nft_trans_flowtable(trans),
- &nft_trans_flowtable_hooks(trans));
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
@@ -9000,7 +8993,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- struct nft_hook *hook;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
@@ -9131,8 +9123,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
- hook->inactive = false;
+ list_splice(&nft_trans_flowtable_hooks(trans),
+ &nft_trans_flowtable(trans)->hook_list);
} else {
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 2d36952b1392..910ef881c3b8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -208,7 +208,7 @@ static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
return 0;
}
-int nft_chain_offload_priority(struct nft_base_chain *basechain)
+static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
{
if (basechain->ops.priority <= 0 ||
basechain->ops.priority > USHRT_MAX)
@@ -217,6 +217,27 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
+bool nft_chain_offload_support(const struct nft_base_chain *basechain)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+
+ if (nft_chain_offload_priority(basechain) < 0)
+ return false;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.pf != NFPROTO_NETDEV ||
+ hook->ops.hooknum != NF_NETDEV_INGRESS)
+ return false;
+
+ dev = hook->ops.dev;
+ if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
+ return false;
+ }
+
+ return true;
+}
+
static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 4394df4bc99b..e5fd6995e4bf 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -335,7 +335,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr,
{
const struct nft_nat *priv = nft_expr_priv(expr);
- if (priv->family == nft_pf(pkt))
+ if (priv->family == nft_pf(pkt) ||
+ priv->family == NFPROTO_INET)
nft_nat_eval(expr, regs, pkt);
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1b5d73079dc9..868db4669a29 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -373,6 +373,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
*addr = new_addr;
}
@@ -420,6 +421,7 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
memcpy(addr, new_addr, sizeof(__be32[4]));
}
@@ -660,6 +662,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
+ ovs_ct_clear(skb, NULL);
inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -699,6 +702,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
uh->dest = dst;
flow_key->tp.src = src;
flow_key->tp.dst = dst;
+ ovs_ct_clear(skb, NULL);
}
skb_clear_hash(skb);
@@ -761,6 +765,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
+
flow_key->tp.src = sh->source;
flow_key->tp.dst = sh->dest;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 4a947c13c813..4e70df91d0f2 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1342,7 +1342,9 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key, false);
+
+ if (key)
+ ovs_ct_fill_key(skb, key, false);
return 0;
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index df194cc07035..f87a2d8f23a7 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -919,7 +919,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
- * xdr_commit_encode - Ensure all data is written to buffer
+ * __xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
@@ -931,26 +931,29 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
-inline void xdr_commit_encode(struct xdr_stream *xdr)
+void __xdr_commit_encode(struct xdr_stream *xdr)
{
- int shift = xdr->scratch.iov_len;
+ size_t shift = xdr->scratch.iov_len;
void *page;
- if (shift == 0)
- return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr_reset_scratch_buffer(xdr);
}
-EXPORT_SYMBOL_GPL(xdr_commit_encode);
+EXPORT_SYMBOL_GPL(__xdr_commit_encode);
-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
- size_t nbytes)
+/*
+ * The buffer space to be reserved crosses the boundary between
+ * xdr->buf->head and xdr->buf->pages, or between two pages
+ * in xdr->buf->pages.
+ */
+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
- __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
+ void *p;
if (nbytes > PAGE_SIZE)
goto out_overflow; /* Bigger buffers require special handling */
@@ -964,6 +967,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
+
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
@@ -972,14 +976,19 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
* space at the end of the previous buffer:
*/
xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
- p = page_address(*xdr->page_ptr);
+
/*
- * Note this is where the next encode will start after we've
- * shifted this one back:
+ * xdr->p is where the next encode will start after
+ * xdr_commit_encode() has shifted this one back:
*/
- xdr->p = (void *)p + frag2bytes;
+ p = page_address(*xdr->page_ptr);
+ xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - nbytes >= PAGE_SIZE)
+ xdr->end = p + PAGE_SIZE;
+ else
+ xdr->end = p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5f0155fdefc7..11cf7c646644 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -478,10 +478,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int write_len;
u64 offset;
- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
- if (!seg)
+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
goto out_overflow;
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
write_len = min(remaining, seg->rs_length - info->wi_seg_off);
if (!write_len)
goto out_overflow;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b91ddc110786..da176411c1b5 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -544,7 +544,7 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
rc = do_tls_getsockopt_conf(sk, optval, optlen,
optname == TLS_TX);
break;
- case TLS_TX_ZEROCOPY_SENDFILE:
+ case TLS_TX_ZEROCOPY_RO:
rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
break;
default:
@@ -731,7 +731,7 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
optname == TLS_TX);
release_sock(sk);
break;
- case TLS_TX_ZEROCOPY_SENDFILE:
+ case TLS_TX_ZEROCOPY_RO:
lock_sock(sk);
rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
release_sock(sk);
@@ -970,7 +970,7 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
goto nla_failure;
if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) {
- err = nla_put_flag(skb, TLS_INFO_ZC_SENDFILE);
+ err = nla_put_flag(skb, TLS_INFO_ZC_RO_TX);
if (err)
goto nla_failure;
}
@@ -994,7 +994,7 @@ static size_t tls_get_info_size(const struct sock *sk)
nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */
nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
- nla_total_size(0) + /* TLS_INFO_ZC_SENDFILE */
+ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */
0;
return size;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 654dcef7cfb3..2206e6f8902d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -490,7 +490,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
+ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index e0a4526ab66b..19ac872a6624 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -373,7 +373,8 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
goto out;
}
- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries);
+ max_entries = xskq_cons_nb_entries(xs->tx, max_entries);
+ nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, max_entries);
if (!nb_pkts) {
xs->tx->queue_empty_descs++;
goto out;
@@ -389,7 +390,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
if (!nb_pkts)
goto out;
- xskq_cons_release_n(xs->tx, nb_pkts);
+ xskq_cons_release_n(xs->tx, max_entries);
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index a794410989cc..fb20bf7207cf 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -282,14 +282,6 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
return xskq_cons_read_desc(q, desc, pool);
}
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
-{
- u32 entries = xskq_cons_nb_entries(q, max);
-
- return xskq_cons_read_desc_batch(q, pool, entries);
-}
-
/* To improve performance in the xskq_cons_release functions, only update local state here.
* Reflect this to global state when we get new entries from the ring in
* xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop.