summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c2
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/9p/Kconfig1
-rw-r--r--net/9p/client.c49
-rw-r--r--net/9p/trans_virtio.c1
-rw-r--r--net/Kconfig3
-rw-r--r--net/appletalk/ddp.c19
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/svc.c8
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/ax25/ax25_dev.c51
-rw-r--r--net/ax25/sysctl_net_ax25.c3
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/trace.h4
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/hci_conn.c150
-rw-r--r--net/bluetooth/hci_core.c173
-rw-r--r--net/bluetooth/hci_event.c242
-rw-r--r--net/bluetooth/hci_request.h4
-rw-r--r--net/bluetooth/hci_sock.c5
-rw-r--r--net/bluetooth/hci_sync.c207
-rw-r--r--net/bluetooth/iso.c155
-rw-r--r--net/bluetooth/l2cap_core.c162
-rw-r--r--net/bluetooth/l2cap_sock.c95
-rw-r--r--net/bluetooth/mgmt.c84
-rw-r--r--net/bluetooth/msft.c2
-rw-r--r--net/bluetooth/msft.h4
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/sco.c14
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c55
-rw-r--r--net/bpf/test_run.c8
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_forward.c9
-rw-r--r--net/bridge/br_mst.c16
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c95
-rw-r--r--net/core/dev.h1
-rw-r--r--net/core/dst_cache.c11
-rw-r--r--net/core/filter.c62
-rw-r--r--net/core/gro.c31
-rw-r--r--net/core/hotdata.c7
-rw-r--r--net/core/ieee8021q_helpers.c242
-rw-r--r--net/core/neighbour.c5
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/net_namespace.c13
-rw-r--r--net/core/netdev-genl.c25
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/page_pool.c80
-rw-r--r--net/core/rtnetlink.c153
-rw-r--r--net/core/scm.c1
-rw-r--r--net/core/skbuff.c80
-rw-r--r--net/core/skmsg.c5
-rw-r--r--net/core/sock.c6
-rw-r--r--net/core/sock_map.c263
-rw-r--r--net/core/sysctl_net_core.c20
-rw-r--r--net/dccp/ccids/ccid2.c1
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/dccp/sysctl.c2
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/port.c139
-rw-r--r--net/dsa/trace.h34
-rw-r--r--net/dsa/user.c105
-rw-r--r--net/ethtool/netlink.c48
-rw-r--r--net/ethtool/netlink.h5
-rw-r--r--net/hsr/hsr_device.c65
-rw-r--r--net/hsr/hsr_device.h4
-rw-r--r--net/hsr/hsr_forward.c85
-rw-r--r--net/hsr/hsr_framereg.c52
-rw-r--r--net/hsr/hsr_framereg.h4
-rw-r--r--net/hsr/hsr_main.c2
-rw-r--r--net/hsr/hsr_main.h7
-rw-r--r--net/hsr/hsr_netlink.c30
-rw-r--r--net/hsr/hsr_slave.c1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c6
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/af_inet.c62
-rw-r--r--net/ipv4/arp.c203
-rw-r--r--net/ipv4/bpf_tcp_ca.c6
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/devinet.c25
-rw-r--r--net/ipv4/esp4.c12
-rw-r--r--net/ipv4/icmp.c30
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c32
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c36
-rw-r--r--net/ipv4/tcp_bbr.c6
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_dctcp.c17
-rw-r--r--net/ipv4/tcp_input.c9
-rw-r--r--net/ipv4/tcp_ipv4.c26
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/tcp_offload.c234
-rw-r--r--net/ipv4/tcp_output.c45
-rw-r--r--net/ipv4/tcp_timer.c9
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udp_offload.c35
-rw-r--r--net/ipv4/xfrm4_input.c19
-rw-r--r--net/ipv4/xfrm4_policy.c3
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/anycast.c5
-rw-r--r--net/ipv6/esp6.c12
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/ila/ila_lwt.c4
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_offload.c17
-rw-r--r--net/ipv6/ip6_output.c22
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c38
-rw-r--r--net/ipv6/seg6.c5
-rw-r--r--net/ipv6/seg6_hmac.c42
-rw-r--r--net/ipv6/seg6_iptunnel.c11
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c25
-rw-r--r--net/ipv6/tcpv6_offload.c123
-rw-r--r--net/ipv6/udp.c14
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_input.c26
-rw-r--r--net/ipv6/xfrm6_policy.c3
-rw-r--r--net/iucv/af_iucv.c4
-rw-r--r--net/iucv/iucv.c38
-rw-r--r--net/l2tp/l2tp_core.c81
-rw-r--r--net/l2tp/l2tp_eth.c3
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/llc/af_llc.c7
-rw-r--r--net/llc/sysctl_net_llc.c8
-rw-r--r--net/mac80211/cfg.c9
-rw-r--r--net/mac80211/he.c10
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/main.c10
-rw-r--r--net/mac80211/mesh.c1
-rw-r--r--net/mac80211/mesh_pathtbl.c13
-rw-r--r--net/mac80211/parse.c2
-rw-r--r--net/mac80211/scan.c14
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/trace.h2
-rw-r--r--net/mac80211/util.c2
-rw-r--r--net/mpls/af_mpls.c13
-rw-r--r--net/mpls/mpls_iptunnel.c4
-rw-r--r--net/mptcp/ctrl.c69
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/pm_netlink.c1
-rw-r--r--net/mptcp/pm_userspace.c1
-rw-r--r--net/mptcp/protocol.c24
-rw-r--r--net/mptcp/protocol.h44
-rw-r--r--net/mptcp/sched.c22
-rw-r--r--net/mptcp/sockopt.c68
-rw-r--r--net/mptcp/subflow.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c16
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c6
-rw-r--r--net/netfilter/nf_flow_table_core.c8
-rw-r--r--net/netfilter/nf_flow_table_ip.c8
-rw-r--r--net/netfilter/nf_log.c3
-rw-r--r--net/netfilter/nf_tables_api.c16
-rw-r--r--net/netfilter/nft_chain_filter.c6
-rw-r--r--net/netfilter/nft_connlimit.c4
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_last.c4
-rw-r--r--net/netfilter/nft_limit.c14
-rw-r--r--net/netfilter/nft_quota.c4
-rw-r--r--net/netfilter/nft_rt.c4
-rw-r--r--net/netfilter/nft_set_pipapo.c258
-rw-r--r--net/netfilter/nft_set_pipapo.h2
-rw-r--r--net/netlabel/netlabel_kapi.c31
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_route.c19
-rw-r--r--net/netrom/sysctl_net_netrom.c1
-rw-r--r--net/nfc/llcp_sock.c4
-rw-r--r--net/nfc/nci/core.c17
-rw-r--r--net/nsh/nsh.c14
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/flow.c3
-rw-r--r--net/openvswitch/openvswitch_trace.h8
-rw-r--r--net/packet/af_packet.c3
-rw-r--r--net/phonet/pep.c12
-rw-r--r--net/phonet/pn_netlink.c19
-rw-r--r--net/phonet/socket.c7
-rw-r--r--net/phonet/sysctl.c1
-rw-r--r--net/qrtr/ns.c27
-rw-r--r--net/rds/ib_sysctl.c1
-rw-r--r--net/rds/sysctl.c1
-rw-r--r--net/rds/tcp.c1
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rose/af_rose.c6
-rw-r--r--net/rose/sysctl_net_rose.c1
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/call_object.c7
-rw-r--r--net/rxrpc/conn_object.c9
-rw-r--r--net/rxrpc/input.c49
-rw-r--r--net/rxrpc/insecure.c2
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/rxrpc/sysctl.c1
-rw-r--r--net/rxrpc/txbuf.c10
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_generic.c15
-rw-r--r--net/sched/sch_htb.c22
-rw-r--r--net/sched/sch_sfq.c13
-rw-r--r--net/sched/sch_teql.c4
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/sm_statefuns.c1
-rw-r--r--net/sctp/socket.c15
-rw-r--r--net/sctp/sysctl.c10
-rw-r--r--net/smc/Kconfig13
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c34
-rw-r--r--net/smc/smc_cdc.c36
-rw-r--r--net/smc/smc_core.c61
-rw-r--r--net/smc/smc_core.h1
-rw-r--r--net/smc/smc_ib.c19
-rw-r--r--net/smc/smc_ism.c88
-rw-r--r--net/smc/smc_ism.h10
-rw-r--r--net/smc/smc_loopback.c427
-rw-r--r--net/smc/smc_loopback.h61
-rw-r--r--net/smc/smc_sysctl.c6
-rw-r--r--net/smc/smc_tracepoint.h4
-rw-r--r--net/socket.c17
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h6
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c10
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c168
-rw-r--r--net/sunrpc/sysctl.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c1
-rw-r--r--net/sunrpc/xprtrdma/transport.c1
-rw-r--r--net/sunrpc/xprtrdma/verbs.c6
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/switchdev/switchdev.c99
-rw-r--r--net/sysctl_net.c3
-rw-r--r--net/tipc/msg.c8
-rw-r--r--net/tipc/socket.c13
-rw-r--r--net/tipc/sysctl.c1
-rw-r--r--net/tipc/trace.h16
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/tls/tls_device_fallback.c2
-rw-r--r--net/tls/tls_main.c10
-rw-r--r--net/unix/af_unix.c51
-rw-r--r--net/unix/garbage.c37
-rw-r--r--net/unix/sysctl_net_unix.c1
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/vmw_vsock/virtio_transport.c1
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/pmsr.c8
-rw-r--r--net/wireless/rdev-ops.h6
-rw-r--r--net/wireless/scan.c50
-rw-r--r--net/wireless/sysfs.c4
-rw-r--r--net/wireless/trace.h2
-rw-r--r--net/wireless/util.c7
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/sysctl_net_x25.c1
-rw-r--r--net/xdp/xsk_buff_pool.c29
-rw-r--r--net/xfrm/xfrm_compat.c7
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_input.c19
-rw-r--r--net/xfrm/xfrm_interface_core.c2
-rw-r--r--net/xfrm/xfrm_policy.c11
-rw-r--r--net/xfrm/xfrm_proc.c2
-rw-r--r--net/xfrm/xfrm_replay.c3
-rw-r--r--net/xfrm/xfrm_state.c8
-rw-r--r--net/xfrm/xfrm_sysctl.c5
-rw-r--r--net/xfrm/xfrm_user.c162
292 files changed, 4870 insertions, 2585 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f00158234505..9404dd551dfd 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
if (unlikely(!vhdr))
goto out;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen;
+
type = vhdr->h_vlan_encapsulated_proto;
ptype = gro_find_receive_by_type(type);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 39876eff51d2..3efba4f857ac 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -149,7 +149,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
if (max_mtu < new_mtu)
return -ERANGE;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 00ebce9e5a65..bcdab9c23b40 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -5,6 +5,7 @@
menuconfig NET_9P
tristate "Plan 9 Resource Sharing Support (9P2000)"
+ select NETFS_SUPPORT
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/net/9p/client.c b/net/9p/client.c
index f7e90b4769bb..00774656eeac 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -18,6 +18,7 @@
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
+#include <linux/netfs.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
@@ -1661,6 +1662,54 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
}
EXPORT_SYMBOL(p9_client_write);
+void
+p9_client_write_subreq(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct p9_fid *fid = wreq->netfs_priv;
+ struct p9_client *clnt = fid->clnt;
+ struct p9_req_t *req;
+ unsigned long long start = subreq->start + subreq->transferred;
+ int written, len = subreq->len - subreq->transferred;
+ int err;
+
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n",
+ fid->fid, start, len);
+
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && len > 1024) {
+ req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter,
+ 0, wreq->len, P9_ZC_HDR_SZ, "dqd",
+ fid->fid, start, len);
+ } else {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
+ start, len, &subreq->io_iter);
+ }
+ if (IS_ERR(req)) {
+ netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false);
+ return;
+ }
+
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
+ if (err) {
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, err, false);
+ return;
+ }
+
+ if (written > len) {
+ pr_err("bogus RWRITE count (%d > %u)\n", written, len);
+ written = len;
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len);
+
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, written, false);
+}
+EXPORT_SYMBOL(p9_client_write_subreq);
+
struct p9_wstat *p9_client_stat(struct p9_fid *fid)
{
int err;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e305071eb7b8..0b8086f58ad5 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -781,7 +781,6 @@ static struct virtio_driver p9_virtio_drv = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = p9_virtio_probe,
.remove = p9_virtio_remove,
diff --git a/net/Kconfig b/net/Kconfig
index d5ab791f7afa..f0a8692496ff 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -452,6 +452,9 @@ config GRO_CELLS
config SOCK_VALIDATE_XMIT
bool
+config NET_IEEE8021Q_HELPERS
+ bool
+
config NET_SELFTESTS
def_tristate PHYLIB
depends on PHYLIB && INET
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 198f5ba2feae..b068651984fe 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -88,6 +88,7 @@ static inline void atalk_remove_socket(struct sock *sk)
static struct sock *atalk_search_socket(struct sockaddr_at *to,
struct atalk_iface *atif)
{
+ struct sock *def_socket = NULL;
struct sock *s;
read_lock_bh(&atalk_sockets_lock);
@@ -98,8 +99,20 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
continue;
if (to->sat_addr.s_net == ATADDR_ANYNET &&
- to->sat_addr.s_node == ATADDR_BCAST)
- goto found;
+ to->sat_addr.s_node == ATADDR_BCAST) {
+ if (atif->address.s_node == at->src_node &&
+ atif->address.s_net == at->src_net) {
+ /* This socket's address matches the address of the interface
+ * that received the packet -- use it
+ */
+ goto found;
+ }
+
+ /* Continue searching for a socket matching the interface address,
+ * but use this socket by default if no other one is found
+ */
+ def_socket = s;
+ }
if (to->sat_addr.s_net == at->src_net &&
(to->sat_addr.s_node == at->src_node ||
@@ -116,7 +129,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
goto found;
}
}
- s = NULL;
+ s = def_socket;
found:
read_unlock_bh(&atalk_sockets_lock);
return s;
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index d945b7c0176d..7aebfe903242 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -40,7 +40,6 @@ static struct ctl_table atalk_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
};
static struct ctl_table_header *atalk_table_header;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 362e8d25a79e..42b910cb4e8e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
if (rt->rt_gw_family == AF_INET)
daddr = &rt->rt_gw4;
else
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 36a814f1fbd1..f8137ae693b0 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -324,8 +324,8 @@ out:
return error;
}
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int svc_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
@@ -336,7 +336,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
lock_sock(sk);
- error = svc_create(sock_net(sk), newsock, 0, kern);
+ error = svc_create(sock_net(sk), newsock, 0, arg->kern);
if (error)
goto out;
@@ -355,7 +355,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
error = -sk->sk_err;
break;
}
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
error = -EAGAIN;
break;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9169efb2f43a..8077cf2ee448 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1373,8 +1373,8 @@ out_release:
return err;
}
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int ax25_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -1409,7 +1409,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 282ec581c072..742d7c68e7e7 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -22,11 +22,12 @@
#include <net/sock.h>
#include <linux/uaccess.h>
#include <linux/fcntl.h>
+#include <linux/list.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-ax25_dev *ax25_dev_list;
+static LIST_HEAD(ax25_dev_list);
DEFINE_SPINLOCK(ax25_dev_lock);
ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
@@ -34,10 +35,11 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
ax25_dev *ax25_dev, *res = NULL;
spin_lock_bh(&ax25_dev_lock);
- for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
+ list_for_each_entry(ax25_dev, &ax25_dev_list, list)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
ax25_dev_hold(ax25_dev);
+ break;
}
spin_unlock_bh(&ax25_dev_lock);
@@ -59,7 +61,6 @@ void ax25_dev_device_up(struct net_device *dev)
}
refcount_set(&ax25_dev->refcount, 1);
- dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL);
ax25_dev->forward = NULL;
@@ -78,17 +79,19 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2;
ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN;
ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL;
+
+#ifdef CONFIG_AX25_DAMA_SLAVE
ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT;
+#endif
#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
ax25_ds_setup_timer(ax25_dev);
#endif
spin_lock_bh(&ax25_dev_lock);
- ax25_dev->next = ax25_dev_list;
- ax25_dev_list = ax25_dev;
+ list_add(&ax25_dev->list, &ax25_dev_list);
+ dev->ax25_ptr = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -111,32 +114,19 @@ void ax25_dev_device_down(struct net_device *dev)
/*
* Remove any packet forwarding that points to this device.
*/
- for (s = ax25_dev_list; s != NULL; s = s->next)
+ list_for_each_entry(s, &ax25_dev_list, list)
if (s->forward == dev)
s->forward = NULL;
- if ((s = ax25_dev_list) == ax25_dev) {
- ax25_dev_list = s->next;
- goto unlock_put;
- }
-
- while (s != NULL && s->next != NULL) {
- if (s->next == ax25_dev) {
- s->next = ax25_dev->next;
- goto unlock_put;
+ list_for_each_entry(s, &ax25_dev_list, list) {
+ if (s == ax25_dev) {
+ list_del(&s->list);
+ break;
}
-
- s = s->next;
}
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- ax25_dev_put(ax25_dev);
- return;
-unlock_put:
- spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
+ spin_unlock_bh(&ax25_dev_lock);
netdev_put(dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
}
@@ -200,16 +190,13 @@ struct net_device *ax25_fwd_dev(struct net_device *dev)
*/
void __exit ax25_dev_free(void)
{
- ax25_dev *s, *ax25_dev;
+ ax25_dev *s, *n;
spin_lock_bh(&ax25_dev_lock);
- ax25_dev = ax25_dev_list;
- while (ax25_dev != NULL) {
- s = ax25_dev;
- netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker);
- ax25_dev = ax25_dev->next;
+ list_for_each_entry_safe(s, n, &ax25_dev_list, list) {
+ netdev_put(s->dev, &s->dev_tracker);
+ list_del(&s->list);
kfree(s);
}
- ax25_dev_list = NULL;
spin_unlock_bh(&ax25_dev_lock);
}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index e0128dc9def3..68753aa30334 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -141,8 +141,6 @@ static const struct ctl_table ax25_param_table[] = {
.extra2 = &max_ds_timeout
},
#endif
-
- { } /* that's all, folks! */
};
int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
@@ -155,6 +153,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
if (!table)
return -ENOMEM;
+ BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES);
for (k = 0; k < AX25_MAX_VALUES; k++)
table[k].data = &ax25_dev->values[k];
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 89c51b3cf430..30ecbc2ef1fd 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -159,7 +159,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
bat_priv->mtu_set_by_user = new_mtu;
return 0;
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index 5dd52bc5cabb..6b816cf1a953 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -40,8 +40,8 @@ TRACE_EVENT(batadv_dbg,
),
TP_fast_assign(
- __assign_str(device, bat_priv->soft_iface->name);
- __assign_str(driver, KBUILD_MODNAME);
+ __assign_str(device);
+ __assign_str(driver);
__assign_vstr(msg, vaf->fmt, vaf->va);
),
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 27520a8a486f..50cfec8ccac4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
struct in6_addr *daddr,
struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int count = atomic_read(&dev->peer_count);
const struct in6_addr *nexthop;
struct lowpan_peer *peer;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 05346250f719..0c76dcde5361 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -241,13 +241,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
__u8 vnd_len, *vnd_data = NULL;
struct hci_op_configure_data_path *cmd = NULL;
+ /* Do not take below 2 checks as error since the 1st means user do not
+ * want to use HFP offload mode and the 2nd means the vendor controller
+ * do not need to send below HCI command for offload mode.
+ */
if (!codec->data_path || !hdev->get_codec_config_data)
return 0;
- /* Do not take me as error */
- if (!hdev->get_codec_config_data)
- return 0;
-
err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
&vnd_data);
if (err < 0)
@@ -664,11 +664,6 @@ static void le_conn_timeout(struct work_struct *work)
hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
}
-struct iso_cig_params {
- struct hci_cp_le_set_cig_params cp;
- struct hci_cis_params cis[0x1f];
-};
-
struct iso_list_data {
union {
u8 cig;
@@ -909,11 +904,37 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
{
struct hci_conn *conn;
+ switch (type) {
+ case ACL_LINK:
+ if (!hdev->acl_mtu)
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ case ISO_LINK:
+ if (hdev->iso_mtu)
+ /* Dedicated ISO Buffer exists */
+ break;
+ fallthrough;
+ case LE_LINK:
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return ERR_PTR(-ECONNREFUSED);
+ if (!hdev->le_mtu && hdev->acl_mtu < HCI_MIN_LE_MTU)
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ case SCO_LINK:
+ case ESCO_LINK:
+ if (!hdev->sco_pkts)
+ /* Controller does not support SCO or eSCO over HCI */
+ return ERR_PTR(-ECONNREFUSED);
+ break;
+ default:
+ return ERR_PTR(-ECONNREFUSED);
+ }
+
bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle);
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
- return NULL;
+ return ERR_PTR(-ENOMEM);
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
@@ -944,10 +965,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
switch (type) {
case ACL_LINK:
conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
+ conn->mtu = hdev->acl_mtu;
break;
case LE_LINK:
/* conn->src should reflect the local identity address */
hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+ conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu;
break;
case ISO_LINK:
/* conn->src should reflect the local identity address */
@@ -959,6 +982,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
else if (conn->role == HCI_ROLE_MASTER)
conn->cleanup = cis_cleanup;
+ conn->mtu = hdev->iso_mtu ? hdev->iso_mtu :
+ hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu;
break;
case SCO_LINK:
if (lmp_esco_capable(hdev))
@@ -966,9 +991,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
(hdev->esco_type & EDR_ESCO_MASK);
else
conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
+
+ conn->mtu = hdev->sco_mtu;
break;
case ESCO_LINK:
conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
+ conn->mtu = hdev->sco_mtu;
break;
}
@@ -1011,7 +1039,7 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
handle = hci_conn_hash_alloc_unset(hdev);
if (unlikely(handle < 0))
- return NULL;
+ return ERR_PTR(-ECONNREFUSED);
return hci_conn_add(hdev, type, dst, role, handle);
}
@@ -1140,8 +1168,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
list_for_each_entry(d, &hci_dev_list, list) {
if (!test_bit(HCI_UP, &d->flags) ||
- hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
- d->dev_type != HCI_PRIMARY)
+ hci_dev_test_flag(d, HCI_USER_CHANNEL))
continue;
/* Simple routing:
@@ -1317,8 +1344,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
bacpy(&conn->dst, dst);
} else {
conn = hci_conn_add_unset(hdev, LE_LINK, dst, role);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
hci_conn_hold(conn);
conn->pending_sec_level = sec_level;
}
@@ -1494,8 +1521,8 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EADDRINUSE);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
conn->state = BT_CONNECT;
@@ -1538,8 +1565,8 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
BT_DBG("requesting refresh of dst_addr");
conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) {
hci_conn_del(conn);
@@ -1586,8 +1613,8 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
- if (!acl)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(acl))
+ return acl;
}
hci_conn_hold(acl);
@@ -1655,9 +1682,9 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
if (!sco) {
sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER);
- if (!sco) {
+ if (IS_ERR(sco)) {
hci_conn_drop(acl);
- return ERR_PTR(-ENOMEM);
+ return sco;
}
}
@@ -1722,34 +1749,33 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
static int set_cig_params_sync(struct hci_dev *hdev, void *data)
{
+ DEFINE_FLEX(struct hci_cp_le_set_cig_params, pdu, cis, num_cis, 0x1f);
u8 cig_id = PTR_UINT(data);
struct hci_conn *conn;
struct bt_iso_qos *qos;
- struct iso_cig_params pdu;
+ u8 aux_num_cis = 0;
u8 cis_id;
conn = hci_conn_hash_lookup_cig(hdev, cig_id);
if (!conn)
return 0;
- memset(&pdu, 0, sizeof(pdu));
-
qos = &conn->iso_qos;
- pdu.cp.cig_id = cig_id;
- hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval);
- hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval);
- pdu.cp.sca = qos->ucast.sca;
- pdu.cp.packing = qos->ucast.packing;
- pdu.cp.framing = qos->ucast.framing;
- pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
- pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
+ pdu->cig_id = cig_id;
+ hci_cpu_to_le24(qos->ucast.out.interval, pdu->c_interval);
+ hci_cpu_to_le24(qos->ucast.in.interval, pdu->p_interval);
+ pdu->sca = qos->ucast.sca;
+ pdu->packing = qos->ucast.packing;
+ pdu->framing = qos->ucast.framing;
+ pdu->c_latency = cpu_to_le16(qos->ucast.out.latency);
+ pdu->p_latency = cpu_to_le16(qos->ucast.in.latency);
/* Reprogram all CIS(s) with the same CIG, valid range are:
* num_cis: 0x00 to 0x1F
* cis_id: 0x00 to 0xEF
*/
for (cis_id = 0x00; cis_id < 0xf0 &&
- pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) {
+ aux_num_cis < pdu->num_cis; cis_id++) {
struct hci_cis_params *cis;
conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id);
@@ -1758,7 +1784,7 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
qos = &conn->iso_qos;
- cis = &pdu.cis[pdu.cp.num_cis++];
+ cis = &pdu->cis[aux_num_cis++];
cis->cis_id = cis_id;
cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu);
cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu);
@@ -1769,14 +1795,14 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data)
cis->c_rtn = qos->ucast.out.rtn;
cis->p_rtn = qos->ucast.in.rtn;
}
+ pdu->num_cis = aux_num_cis;
- if (!pdu.cp.num_cis)
+ if (!pdu->num_cis)
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS,
- sizeof(pdu.cp) +
- pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu,
- HCI_CMD_TIMEOUT);
+ struct_size(pdu, cis, pdu->num_cis),
+ pdu, HCI_CMD_TIMEOUT);
}
static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
@@ -1847,8 +1873,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
qos->ucast.cis);
if (!cis) {
cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
- if (!cis)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(cis))
+ return cis;
cis->cleanup = cis_cleanup;
cis->dst_type = dst_type;
cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET;
@@ -1983,14 +2009,8 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
struct bt_iso_io_qos *qos, __u8 phy)
{
/* Only set MTU if PHY is enabled */
- if (!qos->sdu && qos->phy) {
- if (hdev->iso_mtu > 0)
- qos->sdu = hdev->iso_mtu;
- else if (hdev->le_mtu > 0)
- qos->sdu = hdev->le_mtu;
- else
- qos->sdu = hdev->acl_mtu;
- }
+ if (!qos->sdu && qos->phy)
+ qos->sdu = conn->mtu;
/* Use the same PHY as ACL if set to any */
if (qos->phy == BT_ISO_PHY_ANY)
@@ -2071,8 +2091,8 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EBUSY);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
- if (!conn)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(conn))
+ return conn;
conn->iso_qos = *qos;
conn->state = BT_LISTEN;
@@ -2109,13 +2129,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
- struct _packed {
- struct hci_cp_le_big_create_sync cp;
- __u8 bis[0x11];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
int err;
- if (num_bis < 0x01 || num_bis > sizeof(pdu.bis))
+ if (num_bis < 0x01 || num_bis > pdu->num_bis)
return -EINVAL;
err = qos_set_big(hdev, qos);
@@ -2125,18 +2142,17 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
if (hcon)
hcon->iso_qos.bcast.big = qos->bcast.big;
- memset(&pdu, 0, sizeof(pdu));
- pdu.cp.handle = qos->bcast.big;
- pdu.cp.sync_handle = cpu_to_le16(sync_handle);
- pdu.cp.encryption = qos->bcast.encryption;
- memcpy(pdu.cp.bcode, qos->bcast.bcode, sizeof(pdu.cp.bcode));
- pdu.cp.mse = qos->bcast.mse;
- pdu.cp.timeout = cpu_to_le16(qos->bcast.timeout);
- pdu.cp.num_bis = num_bis;
- memcpy(pdu.bis, bis, num_bis);
+ pdu->handle = qos->bcast.big;
+ pdu->sync_handle = cpu_to_le16(sync_handle);
+ pdu->encryption = qos->bcast.encryption;
+ memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode));
+ pdu->mse = qos->bcast.mse;
+ pdu->timeout = cpu_to_le16(qos->bcast.timeout);
+ pdu->num_bis = num_bis;
+ memcpy(pdu->bis, bis, num_bis);
return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
- sizeof(pdu.cp) + num_bis, &pdu);
+ struct_size(pdu, bis, num_bis), pdu);
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a7028d38c1f5..dd3b0f501018 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,8 +149,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
{
int old_state = hdev->discovery.state;
- BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state);
-
if (old_state == state)
return;
@@ -166,6 +164,13 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
case DISCOVERY_STARTING:
break;
case DISCOVERY_FINDING:
+ /* If discovery was not started then it was initiated by the
+ * MGMT interface so no MGMT event shall be generated either
+ */
+ if (old_state != DISCOVERY_STARTING) {
+ hdev->discovery.state = old_state;
+ return;
+ }
mgmt_discovering(hdev, 1);
break;
case DISCOVERY_RESOLVING:
@@ -173,6 +178,8 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
case DISCOVERY_STOPPING:
break;
}
+
+ bt_dev_dbg(hdev, "state %u -> %u", old_state, state);
}
void hci_inquiry_cache_flush(struct hci_dev *hdev)
@@ -395,11 +402,6 @@ int hci_inquiry(void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_PRIMARY) {
- err = -EOPNOTSUPP;
- goto done;
- }
-
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
err = -EOPNOTSUPP;
goto done;
@@ -752,11 +754,6 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
goto done;
}
- if (hdev->dev_type != HCI_PRIMARY) {
- err = -EOPNOTSUPP;
- goto done;
- }
-
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
err = -EOPNOTSUPP;
goto done;
@@ -910,7 +907,7 @@ int hci_get_dev_info(void __user *arg)
strscpy(di.name, hdev->name, sizeof(di.name));
di.bdaddr = hdev->bdaddr;
- di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
+ di.type = (hdev->bus & 0x0f);
di.flags = flags;
di.pkt_type = hdev->pkt_type;
if (lmp_bredr_capable(hdev)) {
@@ -1026,8 +1023,7 @@ static void hci_power_on(struct work_struct *work)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
hci_dev_do_close(hdev);
@@ -1769,6 +1765,15 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
adv->pending = true;
adv->instance = instance;
+
+ /* If controller support only one set and the instance is set to
+ * 1 then there is no option other than using handle 0x00.
+ */
+ if (hdev->le_num_of_adv_sets == 1 && instance == 1)
+ adv->handle = 0x00;
+ else
+ adv->handle = instance;
+
list_add(&adv->list, &hdev->adv_instances);
hdev->adv_instance_cnt++;
}
@@ -2523,16 +2528,16 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->le_adv_channel_map = 0x07;
hdev->le_adv_min_interval = 0x0800;
hdev->le_adv_max_interval = 0x0800;
- hdev->le_scan_interval = 0x0060;
- hdev->le_scan_window = 0x0030;
- hdev->le_scan_int_suspend = 0x0400;
- hdev->le_scan_window_suspend = 0x0012;
+ hdev->le_scan_interval = DISCOV_LE_SCAN_INT_FAST;
+ hdev->le_scan_window = DISCOV_LE_SCAN_WIN_FAST;
+ hdev->le_scan_int_suspend = DISCOV_LE_SCAN_INT_SLOW1;
+ hdev->le_scan_window_suspend = DISCOV_LE_SCAN_WIN_SLOW1;
hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT;
hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN;
- hdev->le_scan_int_adv_monitor = 0x0060;
- hdev->le_scan_window_adv_monitor = 0x0030;
- hdev->le_scan_int_connect = 0x0060;
- hdev->le_scan_window_connect = 0x0060;
+ hdev->le_scan_int_adv_monitor = DISCOV_LE_SCAN_INT_FAST;
+ hdev->le_scan_window_adv_monitor = DISCOV_LE_SCAN_WIN_FAST;
+ hdev->le_scan_int_connect = DISCOV_LE_SCAN_INT_CONN;
+ hdev->le_scan_window_connect = DISCOV_LE_SCAN_WIN_CONN;
hdev->le_conn_min_interval = 0x0018;
hdev->le_conn_max_interval = 0x0028;
hdev->le_conn_latency = 0x0000;
@@ -2549,7 +2554,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES;
hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION;
- hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT;
+ hdev->def_le_autoconnect_timeout = HCI_LE_CONN_TIMEOUT;
hdev->min_le_tx_power = HCI_TX_POWER_INVALID;
hdev->max_le_tx_power = HCI_TX_POWER_INVALID;
@@ -2635,21 +2640,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (!hdev->open || !hdev->close || !hdev->send)
return -EINVAL;
- /* Do not allow HCI_AMP devices to register at index 0,
- * so the index can be used as the AMP controller ID.
- */
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
- break;
- case HCI_AMP:
- id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1,
- GFP_KERNEL);
- break;
- default:
- return -EINVAL;
- }
-
+ id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
if (id < 0)
return id;
@@ -2701,12 +2692,10 @@ int hci_register_dev(struct hci_dev *hdev)
hci_dev_set_flag(hdev, HCI_SETUP);
hci_dev_set_flag(hdev, HCI_AUTO_OFF);
- if (hdev->dev_type == HCI_PRIMARY) {
- /* Assume BR/EDR support until proven otherwise (such as
- * through reading supported features during init.
- */
- hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
- }
+ /* Assume BR/EDR support until proven otherwise (such as
+ * through reading supported features during init.
+ */
+ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
write_lock(&hci_dev_list_lock);
list_add(&hdev->list, &hci_dev_list);
@@ -2768,8 +2757,6 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_unregister_suspend_notifier(hdev);
- msft_unregister(hdev);
-
hci_dev_do_close(hdev);
if (!test_bit(HCI_INIT, &hdev->flags) &&
@@ -2823,6 +2810,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
hci_codec_list_clear(&hdev->local_codecs);
+ msft_release(hdev);
hci_dev_unlock(hdev);
ida_destroy(&hdev->unset_handle_ida);
@@ -3243,17 +3231,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- hci_add_acl_hdr(skb, conn->handle, flags);
- break;
- case HCI_AMP:
- hci_add_acl_hdr(skb, chan->handle, flags);
- break;
- default:
- bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
- return;
- }
+ hci_add_acl_hdr(skb, conn->handle, flags);
list = skb_shinfo(skb)->frag_list;
if (!list) {
@@ -3413,9 +3391,6 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
case ACL_LINK:
cnt = hdev->acl_cnt;
break;
- case AMP_LINK:
- cnt = hdev->block_cnt;
- break;
case SCO_LINK:
case ESCO_LINK:
cnt = hdev->sco_cnt;
@@ -3613,12 +3588,6 @@ static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type)
}
-static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
-{
- /* Calculate count of blocks used by this packet */
- return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
-}
-
static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
{
unsigned long last_tx;
@@ -3732,81 +3701,15 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
hci_prio_recalculate(hdev, ACL_LINK);
}
-static void hci_sched_acl_blk(struct hci_dev *hdev)
-{
- unsigned int cnt = hdev->block_cnt;
- struct hci_chan *chan;
- struct sk_buff *skb;
- int quote;
- u8 type;
-
- BT_DBG("%s", hdev->name);
-
- if (hdev->dev_type == HCI_AMP)
- type = AMP_LINK;
- else
- type = ACL_LINK;
-
- __check_timeout(hdev, cnt, type);
-
- while (hdev->block_cnt > 0 &&
- (chan = hci_chan_sent(hdev, type, &quote))) {
- u32 priority = (skb_peek(&chan->data_q))->priority;
- while (quote > 0 && (skb = skb_peek(&chan->data_q))) {
- int blocks;
-
- BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
- skb->len, skb->priority);
-
- /* Stop if priority has changed */
- if (skb->priority < priority)
- break;
-
- skb = skb_dequeue(&chan->data_q);
-
- blocks = __get_blocks(hdev, skb);
- if (blocks > hdev->block_cnt)
- return;
-
- hci_conn_enter_active_mode(chan->conn,
- bt_cb(skb)->force_active);
-
- hci_send_frame(hdev, skb);
- hdev->acl_last_tx = jiffies;
-
- hdev->block_cnt -= blocks;
- quote -= blocks;
-
- chan->sent += blocks;
- chan->conn->sent += blocks;
- }
- }
-
- if (cnt != hdev->block_cnt)
- hci_prio_recalculate(hdev, type);
-}
-
static void hci_sched_acl(struct hci_dev *hdev)
{
BT_DBG("%s", hdev->name);
/* No ACL link over BR/EDR controller */
- if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
- return;
-
- /* No AMP link over AMP controller */
- if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP)
+ if (!hci_conn_num(hdev, ACL_LINK))
return;
- switch (hdev->flow_ctl_mode) {
- case HCI_FLOW_CTL_MODE_PACKET_BASED:
- hci_sched_acl_pkt(hdev);
- break;
-
- case HCI_FLOW_CTL_MODE_BLOCK_BASED:
- hci_sched_acl_blk(hdev);
- break;
- }
+ hci_sched_acl_pkt(hdev);
}
static void hci_sched_le(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4a27e4a17a67..a487f9df8145 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -913,21 +913,6 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_flow_control_mode *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->flow_ctl_mode = rp->mode;
-
- return rp->status;
-}
-
static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -954,6 +939,9 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, hdev->acl_mtu,
hdev->acl_pkts, hdev->sco_mtu, hdev->sco_pkts);
+ if (!hdev->acl_mtu || !hdev->acl_pkts)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -1068,28 +1056,6 @@ static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_data_block_size *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->block_mtu = __le16_to_cpu(rp->max_acl_len);
- hdev->block_len = __le16_to_cpu(rp->block_len);
- hdev->num_blocks = __le16_to_cpu(rp->num_blocks);
-
- hdev->block_cnt = hdev->num_blocks;
-
- BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
- hdev->block_cnt, hdev->block_len);
-
- return rp->status;
-}
-
static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1124,30 +1090,6 @@ unlock:
return rp->status;
}
-static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_read_local_amp_info *rp = data;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- hdev->amp_status = rp->amp_status;
- hdev->amp_total_bw = __le32_to_cpu(rp->total_bw);
- hdev->amp_max_bw = __le32_to_cpu(rp->max_bw);
- hdev->amp_min_latency = __le32_to_cpu(rp->min_latency);
- hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu);
- hdev->amp_type = rp->amp_type;
- hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap);
- hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size);
- hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
- hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
-
- return rp->status;
-}
-
static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1263,6 +1205,9 @@ static u8 hci_cc_le_read_buffer_size(struct hci_dev *hdev, void *data,
BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -1779,8 +1724,7 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable)
hci_dev_set_flag(hdev, HCI_LE_SCAN);
if (hdev->le_scan_type == LE_SCAN_ACTIVE)
clear_pending_adv_report(hdev);
- if (hci_dev_test_flag(hdev, HCI_MESH))
- hci_discovery_set_state(hdev, DISCOVERY_FINDING);
+ hci_discovery_set_state(hdev, DISCOVERY_FINDING);
break;
case LE_SCAN_DISABLE:
@@ -2342,8 +2286,8 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
if (!conn) {
conn = hci_conn_add_unset(hdev, ACL_LINK, &cp->bdaddr,
HCI_ROLE_MASTER);
- if (!conn)
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn))
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
}
}
@@ -3154,8 +3098,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
BDADDR_BREDR)) {
conn = hci_conn_add_unset(hdev, ev->link_type,
&ev->bdaddr, HCI_ROLE_SLAVE);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new conn");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
} else {
@@ -3343,8 +3287,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
if (!conn) {
conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr,
HCI_ROLE_SLAVE);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
}
@@ -3821,6 +3765,9 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
BT_DBG("%s acl mtu %d:%d iso mtu %d:%d", hdev->name, hdev->acl_mtu,
hdev->acl_pkts, hdev->iso_mtu, hdev->iso_pkts);
+ if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU)
+ return HCI_ERROR_INVALID_PARAMETERS;
+
return rp->status;
}
@@ -4112,12 +4059,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type,
sizeof(struct hci_rp_read_page_scan_type)),
HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type),
- HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size,
- sizeof(struct hci_rp_read_data_block_size)),
- HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode,
- sizeof(struct hci_rp_read_flow_control_mode)),
- HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info,
- sizeof(struct hci_rp_read_local_amp_info)),
HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock,
sizeof(struct hci_rp_read_clock)),
HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size,
@@ -4308,7 +4249,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
hci_dev_lock(hdev);
/* Remove connection if command failed */
- for (i = 0; cp->num_cis; cp->num_cis--, i++) {
+ for (i = 0; i < cp->num_cis; i++) {
struct hci_conn *conn;
u16 handle;
@@ -4324,6 +4265,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
hci_conn_del(conn);
}
}
+ cp->num_cis = 0;
if (pending)
hci_le_create_cis_pending(hdev);
@@ -4452,11 +4394,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
flex_array_size(ev, handles, ev->num)))
return;
- if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
- bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
- return;
- }
-
bt_dev_dbg(hdev, "num %d", ev->num);
for (i = 0; i < ev->num; i++) {
@@ -4524,78 +4461,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
queue_work(hdev->workqueue, &hdev->tx_work);
}
-static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
- __u16 handle)
-{
- struct hci_chan *chan;
-
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- return hci_conn_hash_lookup_handle(hdev, handle);
- case HCI_AMP:
- chan = hci_chan_lookup_handle(hdev, handle);
- if (chan)
- return chan->conn;
- break;
- default:
- bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
- break;
- }
-
- return NULL;
-}
-
-static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_ev_num_comp_blocks *ev = data;
- int i;
-
- if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS,
- flex_array_size(ev, handles, ev->num_hndl)))
- return;
-
- if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
- bt_dev_err(hdev, "wrong event for mode %d",
- hdev->flow_ctl_mode);
- return;
- }
-
- bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks,
- ev->num_hndl);
-
- for (i = 0; i < ev->num_hndl; i++) {
- struct hci_comp_blocks_info *info = &ev->handles[i];
- struct hci_conn *conn = NULL;
- __u16 handle, block_count;
-
- handle = __le16_to_cpu(info->handle);
- block_count = __le16_to_cpu(info->blocks);
-
- conn = __hci_conn_lookup_handle(hdev, handle);
- if (!conn)
- continue;
-
- conn->sent -= block_count;
-
- switch (conn->type) {
- case ACL_LINK:
- case AMP_LINK:
- hdev->block_cnt += block_count;
- if (hdev->block_cnt > hdev->num_blocks)
- hdev->block_cnt = hdev->num_blocks;
- break;
-
- default:
- bt_dev_err(hdev, "unknown type %d conn %p",
- conn->type, conn);
- break;
- }
- }
-
- queue_work(hdev->workqueue, &hdev->tx_work);
-}
-
static void hci_mode_change_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -5768,8 +5633,8 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
goto unlock;
conn = hci_conn_add_unset(hdev, LE_LINK, bdaddr, role);
- if (!conn) {
- bt_dev_err(hdev, "no memory for new connection");
+ if (IS_ERR(conn)) {
+ bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn));
goto unlock;
}
@@ -6493,14 +6358,16 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
- if (ev->status) {
- /* Add connection to indicate the failed PA sync event */
- pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
+ /* Add connection to indicate PA sync event */
+ pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
- if (!pa_sync)
- goto unlock;
+ if (IS_ERR(pa_sync))
+ goto unlock;
+
+ pa_sync->sync_handle = le16_to_cpu(ev->handle);
+ if (ev->status) {
set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags);
/* Notify iso layer */
@@ -6517,6 +6384,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_per_adv_report *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
+ struct hci_conn *pa_sync;
bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
@@ -6524,8 +6392,28 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
if (!(mask & HCI_LM_ACCEPT))
- hci_le_pa_term_sync(hdev, ev->sync_handle);
+ goto unlock;
+
+ if (!(flags & HCI_PROTO_DEFER))
+ goto unlock;
+
+ pa_sync = hci_conn_hash_lookup_pa_sync_handle
+ (hdev,
+ le16_to_cpu(ev->sync_handle));
+
+ if (!pa_sync)
+ goto unlock;
+
+ if (ev->data_status == LE_PA_DATA_COMPLETE &&
+ !test_and_set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags)) {
+ /* Notify iso layer */
+ hci_connect_cfm(pa_sync, 0);
+ /* Notify MGMT layer */
+ mgmt_device_connected(hdev, pa_sync, NULL, 0);
+ }
+
+unlock:
hci_dev_unlock(hdev);
}
@@ -6898,7 +6786,7 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data,
if (!cis) {
cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE,
cis_handle);
- if (!cis) {
+ if (IS_ERR(cis)) {
hci_le_reject_cis(hdev, ev->cis_handle);
goto unlock;
}
@@ -7007,7 +6895,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
if (!bis) {
bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE, handle);
- if (!bis)
+ if (IS_ERR(bis))
continue;
}
@@ -7037,6 +6925,8 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
u16 handle = le16_to_cpu(ev->bis[i]);
bis = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!bis)
+ continue;
set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags);
hci_connect_cfm(bis, ev->status);
@@ -7058,10 +6948,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
- if (!(mask & HCI_LM_ACCEPT)) {
- hci_le_pa_term_sync(hdev, ev->sync_handle);
+ if (!(mask & HCI_LM_ACCEPT))
goto unlock;
- }
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
@@ -7070,24 +6958,11 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
(hdev,
le16_to_cpu(ev->sync_handle));
- if (pa_sync)
- goto unlock;
-
- /* Add connection to indicate the PA sync event */
- pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
-
if (!pa_sync)
goto unlock;
- pa_sync->sync_handle = le16_to_cpu(ev->sync_handle);
- set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags);
-
/* Notify iso layer */
- hci_connect_cfm(pa_sync, 0x00);
-
- /* Notify MGMT layer */
- mgmt_device_connected(hdev, pa_sync, NULL, 0);
+ hci_connect_cfm(pa_sync, 0);
unlock:
hci_dev_unlock(hdev);
@@ -7501,9 +7376,6 @@ static const struct hci_ev {
/* [0x3e = HCI_EV_LE_META] */
HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt,
sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE),
- /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */
- HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
- sizeof(struct hci_ev_num_comp_blocks)),
/* [0xff = HCI_EV_VENDOR] */
HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
};
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 0be75cf0efed..c91f2838f542 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -29,10 +29,6 @@
#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
-#define HCI_REQ_DONE 0
-#define HCI_REQ_PEND 1
-#define HCI_REQ_CANCELED 2
-
struct hci_request {
struct hci_dev *hdev;
struct sk_buff_head cmd_q;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 703b84bd48d5..69c2ba1e843e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -485,7 +485,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
return NULL;
ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
- ni->type = hdev->dev_type;
+ ni->type = 0x00; /* Old hdev->dev_type */
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
@@ -1007,9 +1007,6 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
return -EOPNOTSUPP;
- if (hdev->dev_type != HCI_PRIMARY)
- return -EOPNOTSUPP;
-
switch (cmd) {
case HCISETRAW:
if (!capable(CAP_NET_ADMIN))
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 4c707eb64e6f..16daa79b7981 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1043,11 +1043,10 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
struct hci_cp_ext_adv_set *set;
u8 data[sizeof(*cp) + sizeof(*set) * 1];
u8 size;
+ struct adv_info *adv = NULL;
/* If request specifies an instance that doesn't exist, fail */
if (instance > 0) {
- struct adv_info *adv;
-
adv = hci_find_adv_instance(hdev, instance);
if (!adv)
return -EINVAL;
@@ -1066,7 +1065,7 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp->num_of_sets = !!instance;
cp->enable = 0x00;
- set->handle = instance;
+ set->handle = adv ? adv->handle : instance;
size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets;
@@ -1235,31 +1234,27 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_ext_scan_rsp_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_ext_scan_rsp_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
u8 len;
struct adv_info *adv = NULL;
int err;
- memset(&pdu, 0, sizeof(pdu));
-
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->scan_rsp_changed)
return 0;
}
- len = eir_create_scan_rsp(hdev, instance, pdu.data);
+ len = eir_create_scan_rsp(hdev, instance, pdu->data);
- pdu.cp.handle = instance;
- pdu.cp.length = len;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->length = len;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
if (err)
return err;
@@ -1267,7 +1262,7 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
if (adv) {
adv->scan_rsp_changed = false;
} else {
- memcpy(hdev->scan_rsp_data, pdu.data, len);
+ memcpy(hdev->scan_rsp_data, pdu->data, len);
hdev->scan_rsp_data_len = len;
}
@@ -1335,7 +1330,7 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance)
memset(set, 0, sizeof(*set));
- set->handle = instance;
+ set->handle = adv ? adv->handle : instance;
/* Set duration per instance since controller is responsible for
* scheduling it.
@@ -1411,29 +1406,25 @@ static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance,
static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_per_adv_data cp;
- u8 data[HCI_MAX_PER_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_per_adv_data, pdu, data, length,
+ HCI_MAX_PER_AD_LENGTH);
u8 len;
-
- memset(&pdu, 0, sizeof(pdu));
+ struct adv_info *adv = NULL;
if (instance) {
- struct adv_info *adv = hci_find_adv_instance(hdev, instance);
-
+ adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->periodic)
return 0;
}
- len = eir_create_per_adv_data(hdev, instance, pdu.data);
+ len = eir_create_per_adv_data(hdev, instance, pdu->data);
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
}
@@ -1727,31 +1718,27 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
- struct {
- struct hci_cp_le_set_ext_adv_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
u8 len;
struct adv_info *adv = NULL;
int err;
- memset(&pdu, 0, sizeof(pdu));
-
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
if (!adv || !adv->adv_data_changed)
return 0;
}
- len = eir_create_adv_data(hdev, instance, pdu.data);
+ len = eir_create_adv_data(hdev, instance, pdu->data);
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
+ struct_size(pdu, data, len), pdu,
HCI_CMD_TIMEOUT);
if (err)
return err;
@@ -1760,7 +1747,7 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
if (adv) {
adv->adv_data_changed = false;
} else {
- memcpy(hdev->adv_data, pdu.data, len);
+ memcpy(hdev->adv_data, pdu->data, len);
hdev->adv_data_len = len;
}
@@ -3523,10 +3510,6 @@ static int hci_unconf_init_sync(struct hci_dev *hdev)
/* Read Local Supported Features. */
static int hci_read_local_features_sync(struct hci_dev *hdev)
{
- /* Not all AMP controllers support this command */
- if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20))
- return 0;
-
return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES,
0, NULL, HCI_CMD_TIMEOUT);
}
@@ -3561,51 +3544,6 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev)
return 0;
}
-/* Read Local AMP Info */
-static int hci_read_local_amp_info_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Data Blk size */
-static int hci_read_data_block_size_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Flow Control Mode */
-static int hci_read_flow_control_mode_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* Read Location Data */
-static int hci_read_location_data_sync(struct hci_dev *hdev)
-{
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA,
- 0, NULL, HCI_CMD_TIMEOUT);
-}
-
-/* AMP Controller init stage 1 command sequence */
-static const struct hci_init_stage amp_init1[] = {
- /* HCI_OP_READ_LOCAL_VERSION */
- HCI_INIT(hci_read_local_version_sync),
- /* HCI_OP_READ_LOCAL_COMMANDS */
- HCI_INIT(hci_read_local_cmds_sync),
- /* HCI_OP_READ_LOCAL_AMP_INFO */
- HCI_INIT(hci_read_local_amp_info_sync),
- /* HCI_OP_READ_DATA_BLOCK_SIZE */
- HCI_INIT(hci_read_data_block_size_sync),
- /* HCI_OP_READ_FLOW_CONTROL_MODE */
- HCI_INIT(hci_read_flow_control_mode_sync),
- /* HCI_OP_READ_LOCATION_DATA */
- HCI_INIT(hci_read_location_data_sync),
- {}
-};
-
static int hci_init1_sync(struct hci_dev *hdev)
{
int err;
@@ -3619,28 +3557,9 @@ static int hci_init1_sync(struct hci_dev *hdev)
return err;
}
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
- return hci_init_stage_sync(hdev, br_init1);
- case HCI_AMP:
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
- return hci_init_stage_sync(hdev, amp_init1);
- default:
- bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
- break;
- }
-
- return 0;
+ return hci_init_stage_sync(hdev, br_init1);
}
-/* AMP Controller init stage 2 command sequence */
-static const struct hci_init_stage amp_init2[] = {
- /* HCI_OP_READ_LOCAL_FEATURES */
- HCI_INIT(hci_read_local_features_sync),
- {}
-};
-
/* Read Buffer Size (ACL mtu, max pkt, etc.) */
static int hci_read_buffer_size_sync(struct hci_dev *hdev)
{
@@ -3898,9 +3817,6 @@ static int hci_init2_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
- if (hdev->dev_type == HCI_AMP)
- return hci_init_stage_sync(hdev, amp_init2);
-
err = hci_init_stage_sync(hdev, hci_init2);
if (err)
return err;
@@ -4728,13 +4644,6 @@ static int hci_init_sync(struct hci_dev *hdev)
if (err < 0)
return err;
- /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
- * BR/EDR/LE type controllers. AMP controllers only need the
- * first two stages of init.
- */
- if (hdev->dev_type != HCI_PRIMARY)
- return 0;
-
err = hci_init3_sync(hdev);
if (err < 0)
return err;
@@ -4963,12 +4872,8 @@ int hci_dev_open_sync(struct hci_dev *hdev)
* In case of user channel usage, it is not important
* if a public address or static random address is
* available.
- *
- * This check is only valid for BR/EDR controllers
- * since AMP controllers do not have an address.
*/
if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hdev->dev_type == HCI_PRIMARY &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY)) {
ret = -EADDRNOTAVAIL;
@@ -5003,8 +4908,7 @@ int hci_dev_open_sync(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hci_dev_test_flag(hdev, HCI_MGMT) &&
- hdev->dev_type == HCI_PRIMARY) {
+ hci_dev_test_flag(hdev, HCI_MGMT)) {
ret = hci_powered_update_sync(hdev);
mgmt_power_on(hdev, ret);
}
@@ -5149,8 +5053,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
- if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_MGMT))
__mgmt_power_off(hdev);
@@ -5212,9 +5115,6 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hdev->flags &= BIT(HCI_RAW);
hci_dev_clear_volatile_flags(hdev);
- /* Controller radio is available but is currently powered down */
- hdev->amp_status = AMP_STATUS_POWERED_DOWN;
-
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
@@ -5251,8 +5151,7 @@ static int hci_power_on_sync(struct hci_dev *hdev)
*/
if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
- (hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
hci_dev_close_sync(hdev);
@@ -5354,27 +5253,11 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
return 0;
}
-static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle,
- u8 reason)
-{
- struct hci_cp_disconn_phy_link cp;
-
- memset(&cp, 0, sizeof(cp));
- cp.phy_handle = HCI_PHY_HANDLE(handle);
- cp.reason = reason;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_disconnect cp;
- if (conn->type == AMP_LINK)
- return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
-
if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
/* This is a BIS connection, hci_conn_del will
* do the necessary cleanup.
@@ -6493,10 +6376,8 @@ done:
int hci_le_create_cis_sync(struct hci_dev *hdev)
{
- struct {
- struct hci_cp_le_create_cis cp;
- struct hci_cis cis[0x1f];
- } cmd;
+ DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f);
+ size_t aux_num_cis = 0;
struct hci_conn *conn;
u8 cig = BT_ISO_QOS_CIG_UNSET;
@@ -6523,8 +6404,6 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
* remains pending.
*/
- memset(&cmd, 0, sizeof(cmd));
-
hci_dev_lock(hdev);
rcu_read_lock();
@@ -6561,7 +6440,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
goto done;
list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+ struct hci_cis *cis = &cmd->cis[aux_num_cis];
if (hci_conn_check_create_cis(conn) ||
conn->iso_qos.ucast.cig != cig)
@@ -6570,25 +6449,25 @@ int hci_le_create_cis_sync(struct hci_dev *hdev)
set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
cis->acl_handle = cpu_to_le16(conn->parent->handle);
cis->cis_handle = cpu_to_le16(conn->handle);
- cmd.cp.num_cis++;
+ aux_num_cis++;
- if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
+ if (aux_num_cis >= cmd->num_cis)
break;
}
+ cmd->num_cis = aux_num_cis;
done:
rcu_read_unlock();
hci_dev_unlock(hdev);
- if (!cmd.cp.num_cis)
+ if (!aux_num_cis)
return 0;
/* Wait for HCI_LE_CIS_Established */
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS,
- sizeof(cmd.cp) + sizeof(cmd.cis[0]) *
- cmd.cp.num_cis, &cmd,
- HCI_EVT_LE_CIS_ESTABLISHED,
+ struct_size(cmd, cis, cmd->num_cis),
+ cmd, HCI_EVT_LE_CIS_ESTABLISHED,
conn->conn_timeout, NULL);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index ef0cc80b4c0c..cc055b952ce6 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -54,7 +54,6 @@ static void iso_sock_kill(struct sock *sk);
enum {
BT_SK_BIG_SYNC,
BT_SK_PA_SYNC,
- BT_SK_PA_SYNC_TERM,
};
struct iso_pinfo {
@@ -81,12 +80,14 @@ static bool check_ucast_qos(struct bt_iso_qos *qos);
static bool check_bcast_qos(struct bt_iso_qos *qos);
static bool iso_match_sid(struct sock *sk, void *data);
static bool iso_match_sync_handle(struct sock *sk, void *data);
+static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data);
static void iso_sock_disconn(struct sock *sk);
typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
-static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
- iso_sock_match_t match, void *data);
+static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
+ enum bt_sock_state state,
+ iso_sock_match_t match, void *data);
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
@@ -196,21 +197,10 @@ static void iso_chan_del(struct sock *sk, int err)
sock_set_flag(sk, SOCK_ZAPPED);
}
-static bool iso_match_conn_sync_handle(struct sock *sk, void *data)
-{
- struct hci_conn *hcon = data;
-
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags))
- return false;
-
- return hcon->sync_handle == iso_pi(sk)->sync_handle;
-}
-
static void iso_conn_del(struct hci_conn *hcon, int err)
{
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
- struct sock *parent;
if (!conn)
return;
@@ -226,25 +216,6 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
if (sk) {
lock_sock(sk);
-
- /* While a PA sync hcon is in the process of closing,
- * mark parent socket with a flag, so that any residual
- * BIGInfo adv reports that arrive before PA sync is
- * terminated are not processed anymore.
- */
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_conn_sync_handle,
- hcon);
-
- if (parent) {
- set_bit(BT_SK_PA_SYNC_TERM,
- &iso_pi(parent)->flags);
- sock_put(parent);
- }
- }
-
iso_sock_clear_timer(sk);
iso_chan_del(sk, err);
release_sock(sk);
@@ -581,22 +552,23 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc,
return NULL;
}
-/* Find socket listening:
+/* Find socket in given state:
* source bdaddr (Unicast)
* destination bdaddr (Broadcast only)
* match func - pass NULL to ignore
* match func data - pass -1 to ignore
* Returns closest match.
*/
-static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
- iso_sock_match_t match, void *data)
+static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
+ enum bt_sock_state state,
+ iso_sock_match_t match, void *data)
{
struct sock *sk = NULL, *sk1 = NULL;
read_lock(&iso_sk_list.lock);
sk_for_each(sk, &iso_sk_list.head) {
- if (sk->sk_state != BT_LISTEN)
+ if (sk->sk_state != state)
continue;
/* Match Broadcast destination */
@@ -857,6 +829,7 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
iso_pi(sk)->qos = default_qos;
+ iso_pi(sk)->sync_handle = -1;
bt_sock_link(&iso_sk_list, sk);
return sk;
@@ -904,7 +877,6 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
- iso_pi(sk)->sync_handle = -1;
if (sa->iso_bc->bc_sid > 0x0f)
return -EINVAL;
@@ -981,7 +953,8 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
/* Allow the user to bind a PA sync socket to a number
* of BISes to sync to.
*/
- if (sk->sk_state == BT_CONNECT2 &&
+ if ((sk->sk_state == BT_CONNECT2 ||
+ sk->sk_state == BT_CONNECTED) &&
test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
err = iso_sock_bind_pa_sk(sk, sa, addr_len);
goto done;
@@ -1186,7 +1159,7 @@ done:
}
static int iso_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -1195,7 +1168,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
@@ -1285,7 +1258,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
return -ENOTCONN;
}
- mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+ mtu = iso_pi(sk)->conn->hcon->mtu;
release_sock(sk);
@@ -1393,6 +1366,16 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
release_sock(sk);
return 0;
+ case BT_CONNECTED:
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ iso_conn_big_sync(sk);
+ sk->sk_state = BT_LISTEN;
+ release_sock(sk);
+ return 0;
+ }
+
+ release_sock(sk);
+ break;
case BT_CONNECT:
release_sock(sk);
return iso_connect_cis(sk);
@@ -1538,7 +1521,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
- sk->sk_state != BT_CONNECT2) {
+ sk->sk_state != BT_CONNECT2 &&
+ (!test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags) ||
+ sk->sk_state != BT_CONNECTED)) {
err = -EINVAL;
break;
}
@@ -1759,7 +1744,7 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev = NULL;
struct hci_ev_le_pa_sync_established *ev2 = NULL;
- struct hci_evt_le_big_info_adv_report *ev3 = NULL;
+ struct hci_ev_le_per_adv_report *ev3 = NULL;
struct hci_conn *hcon;
BT_DBG("conn %p", conn);
@@ -1777,32 +1762,37 @@ static void iso_conn_ready(struct iso_conn *conn)
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
/* Get reference to PA sync parent socket, if it exists */
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_pa_sync_flag, NULL);
+ parent = iso_get_sock(&hcon->src, &hcon->dst,
+ BT_LISTEN,
+ iso_match_pa_sync_flag,
+ NULL);
if (!parent && ev)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_big, ev);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_big, ev);
} else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
ev2 = hci_recv_event_data(hcon->hdev,
HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev2)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_sid, ev2);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_sid, ev2);
} else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) {
ev3 = hci_recv_event_data(hcon->hdev,
- HCI_EVT_LE_BIG_INFO_ADV_REPORT);
+ HCI_EV_LE_PER_ADV_REPORT);
if (ev3)
- parent = iso_get_sock_listen(&hcon->src,
- &hcon->dst,
- iso_match_sync_handle, ev3);
+ parent = iso_get_sock(&hcon->src,
+ &hcon->dst,
+ BT_LISTEN,
+ iso_match_sync_handle_pa_report,
+ ev3);
}
if (!parent)
- parent = iso_get_sock_listen(&hcon->src,
- BDADDR_ANY, NULL, NULL);
+ parent = iso_get_sock(&hcon->src, BDADDR_ANY,
+ BT_LISTEN, NULL, NULL);
if (!parent)
return;
@@ -1839,7 +1829,6 @@ static void iso_conn_ready(struct iso_conn *conn)
if (ev3) {
iso_pi(sk)->qos = iso_pi(parent)->qos;
- iso_pi(sk)->qos.bcast.encryption = ev3->encryption;
hcon->iso_qos = iso_pi(sk)->qos;
iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis;
memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS);
@@ -1923,8 +1912,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
*/
ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev1) {
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid,
- ev1);
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
+ iso_match_sid, ev1);
if (sk && !ev1->status)
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
@@ -1933,26 +1922,29 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT);
if (ev2) {
- /* Try to get PA sync listening socket, if it exists */
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_pa_sync_flag, NULL);
-
- if (!sk) {
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle, ev2);
-
- /* If PA Sync is in process of terminating,
- * do not handle any more BIGInfo adv reports.
- */
-
- if (sk && test_bit(BT_SK_PA_SYNC_TERM,
- &iso_pi(sk)->flags))
- return 0;
+ /* Check if BIGInfo report has already been handled */
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECTED,
+ iso_match_sync_handle, ev2);
+ if (sk) {
+ sock_put(sk);
+ sk = NULL;
+ goto done;
}
+ /* Try to get PA sync socket, if it exists */
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECT2,
+ iso_match_sync_handle, ev2);
+ if (!sk)
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr,
+ BT_LISTEN,
+ iso_match_sync_handle,
+ ev2);
+
if (sk) {
int err;
+ iso_pi(sk)->qos.bcast.encryption = ev2->encryption;
+
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
@@ -1971,6 +1963,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
}
}
}
+
+ goto done;
}
ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
@@ -1979,8 +1973,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
u8 *base;
struct hci_conn *hcon;
- sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle_pa_report, ev3);
+ sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
+ iso_match_sync_handle_pa_report, ev3);
if (!sk)
goto done;
@@ -2029,7 +2023,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
hcon->le_per_adv_data_len = 0;
}
} else {
- sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
+ sk = iso_get_sock(&hdev->bdaddr, BDADDR_ANY,
+ BT_LISTEN, NULL, NULL);
}
done:
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 84fc70862d78..5b509b767557 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -415,6 +415,9 @@ static void l2cap_chan_timeout(struct work_struct *work)
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
+ if (!conn)
+ return;
+
mutex_lock(&conn->chan_lock);
/* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling
* this work. No need to call l2cap_chan_hold(chan) here again.
@@ -454,6 +457,9 @@ struct l2cap_chan *l2cap_chan_create(void)
/* Set default lock nesting level */
atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL);
+ /* Available receive buffer space is initially unknown */
+ chan->rx_avail = -1;
+
write_lock(&chan_list_lock);
list_add(&chan->global_l, &chan_list);
write_unlock(&chan_list_lock);
@@ -535,6 +541,28 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
}
EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults);
+static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan)
+{
+ size_t sdu_len = chan->sdu ? chan->sdu->len : 0;
+
+ if (chan->mps == 0)
+ return 0;
+
+ /* If we don't know the available space in the receiver buffer, give
+ * enough credits for a full packet.
+ */
+ if (chan->rx_avail == -1)
+ return (chan->imtu / chan->mps) + 1;
+
+ /* If we know how much space is available in the receive buffer, give
+ * out as many credits as would fill the buffer.
+ */
+ if (chan->rx_avail <= sdu_len)
+ return 0;
+
+ return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps);
+}
+
static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
{
chan->sdu = NULL;
@@ -543,8 +571,7 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits)
chan->tx_credits = tx_credits;
/* Derive MPS from connection MTU to stop HCI fragmentation */
chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE);
- /* Give enough credits for a full packet */
- chan->rx_credits = (chan->imtu / chan->mps) + 1;
+ chan->rx_credits = l2cap_le_rx_credits(chan);
skb_queue_head_init(&chan->tx_q);
}
@@ -556,7 +583,7 @@ static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits)
/* L2CAP implementations shall support a minimum MPS of 64 octets */
if (chan->mps < L2CAP_ECRED_MIN_MPS) {
chan->mps = L2CAP_ECRED_MIN_MPS;
- chan->rx_credits = (chan->imtu / chan->mps) + 1;
+ chan->rx_credits = l2cap_le_rx_credits(chan);
}
}
@@ -1257,7 +1284,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan)
struct l2cap_ecred_conn_data {
struct {
- struct l2cap_ecred_conn_req req;
+ struct l2cap_ecred_conn_req_hdr req;
__le16 scid[5];
} __packed pdu;
struct l2cap_chan *chan;
@@ -3737,7 +3764,7 @@ static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data)
struct l2cap_ecred_rsp_data {
struct {
- struct l2cap_ecred_conn_rsp rsp;
+ struct l2cap_ecred_conn_rsp_hdr rsp;
__le16 scid[L2CAP_ECRED_MAX_CID];
} __packed pdu;
int count;
@@ -3746,6 +3773,8 @@ struct l2cap_ecred_rsp_data {
static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
{
struct l2cap_ecred_rsp_data *rsp = data;
+ struct l2cap_ecred_conn_rsp *rsp_flex =
+ container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr);
if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
return;
@@ -3755,7 +3784,7 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
/* Include all channels pending with the same ident */
if (!rsp->pdu.rsp.result)
- rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid);
+ rsp_flex->dcid[rsp->count++] = cpu_to_le16(chan->scid);
else
l2cap_chan_del(chan, ECONNRESET);
}
@@ -3902,13 +3931,12 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn,
return 0;
}
-static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd,
- u8 *data, u8 rsp_code, u8 amp_id)
+static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd,
+ u8 *data, u8 rsp_code)
{
struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
struct l2cap_conn_rsp rsp;
- struct l2cap_chan *chan = NULL, *pchan;
+ struct l2cap_chan *chan = NULL, *pchan = NULL;
int result, status = L2CAP_CS_NO_INFO;
u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -3921,7 +3949,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
&conn->hcon->dst, ACL_LINK);
if (!pchan) {
result = L2CAP_CR_BAD_PSM;
- goto sendresp;
+ goto response;
}
mutex_lock(&conn->chan_lock);
@@ -3983,17 +4011,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
status = L2CAP_CS_AUTHOR_PEND;
chan->ops->defer(chan);
} else {
- /* Force pending result for AMP controllers.
- * The connection will succeed after the
- * physical link is up.
- */
- if (amp_id == AMP_ID_BREDR) {
- l2cap_state_change(chan, BT_CONFIG);
- result = L2CAP_CR_SUCCESS;
- } else {
- l2cap_state_change(chan, BT_CONNECT2);
- result = L2CAP_CR_PEND;
- }
+ l2cap_state_change(chan, BT_CONNECT2);
+ result = L2CAP_CR_PEND;
status = L2CAP_CS_NO_INFO;
}
} else {
@@ -4008,17 +4027,15 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
}
response:
- l2cap_chan_unlock(pchan);
- mutex_unlock(&conn->chan_lock);
- l2cap_chan_put(pchan);
-
-sendresp:
rsp.scid = cpu_to_le16(scid);
rsp.dcid = cpu_to_le16(dcid);
rsp.result = cpu_to_le16(result);
rsp.status = cpu_to_le16(status);
l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp);
+ if (!pchan)
+ return;
+
if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
struct l2cap_info_req info;
info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
@@ -4041,7 +4058,9 @@ sendresp:
chan->num_conf_req++;
}
- return chan;
+ l2cap_chan_unlock(pchan);
+ mutex_unlock(&conn->chan_lock);
+ l2cap_chan_put(pchan);
}
static int l2cap_connect_req(struct l2cap_conn *conn,
@@ -4058,7 +4077,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
- l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0);
+ l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP);
return 0;
}
@@ -4994,10 +5013,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
u8 *data)
{
struct l2cap_ecred_conn_req *req = (void *) data;
- struct {
- struct l2cap_ecred_conn_rsp rsp;
- __le16 dcid[L2CAP_ECRED_MAX_CID];
- } __packed pdu;
+ DEFINE_RAW_FLEX(struct l2cap_ecred_conn_rsp, pdu, dcid, L2CAP_ECRED_MAX_CID);
struct l2cap_chan *chan, *pchan;
u16 mtu, mps;
__le16 psm;
@@ -5016,7 +5032,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
cmd_len -= sizeof(*req);
num_scid = cmd_len / sizeof(u16);
- if (num_scid > ARRAY_SIZE(pdu.dcid)) {
+ if (num_scid > L2CAP_ECRED_MAX_CID) {
result = L2CAP_CR_LE_INVALID_PARAMS;
goto response;
}
@@ -5045,7 +5061,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps);
- memset(&pdu, 0, sizeof(pdu));
+ memset(pdu, 0, sizeof(*pdu));
/* Check if we have socket listening on psm */
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
@@ -5071,8 +5087,8 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
BT_DBG("scid[%d] 0x%4.4x", i, scid);
- pdu.dcid[i] = 0x0000;
- len += sizeof(*pdu.dcid);
+ pdu->dcid[i] = 0x0000;
+ len += sizeof(*pdu->dcid);
/* Check for valid dynamic CID range */
if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
@@ -5106,13 +5122,13 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
l2cap_ecred_init(chan, __le16_to_cpu(req->credits));
/* Init response */
- if (!pdu.rsp.credits) {
- pdu.rsp.mtu = cpu_to_le16(chan->imtu);
- pdu.rsp.mps = cpu_to_le16(chan->mps);
- pdu.rsp.credits = cpu_to_le16(chan->rx_credits);
+ if (!pdu->credits) {
+ pdu->mtu = cpu_to_le16(chan->imtu);
+ pdu->mps = cpu_to_le16(chan->mps);
+ pdu->credits = cpu_to_le16(chan->rx_credits);
}
- pdu.dcid[i] = cpu_to_le16(chan->scid);
+ pdu->dcid[i] = cpu_to_le16(chan->scid);
__set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
@@ -5134,13 +5150,13 @@ unlock:
l2cap_chan_put(pchan);
response:
- pdu.rsp.result = cpu_to_le16(result);
+ pdu->result = cpu_to_le16(result);
if (defer)
return 0;
l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP,
- sizeof(pdu.rsp) + len, &pdu);
+ sizeof(*pdu) + len, pdu);
return 0;
}
@@ -6239,7 +6255,7 @@ static int l2cap_finish_move(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
chan->rx_state = L2CAP_RX_STATE_RECV;
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->mtu;
return l2cap_resegment(chan);
}
@@ -6306,7 +6322,7 @@ static int l2cap_rx_state_wait_f(struct l2cap_chan *chan,
*/
chan->next_tx_seq = control->reqseq;
chan->unacked_frames = 0;
- chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu;
+ chan->conn->mtu = chan->conn->hcon->mtu;
err = l2cap_resegment(chan);
@@ -6511,9 +6527,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
struct l2cap_le_credits pkt;
- u16 return_credits;
-
- return_credits = (chan->imtu / chan->mps) + 1;
+ u16 return_credits = l2cap_le_rx_credits(chan);
if (chan->rx_credits >= return_credits)
return;
@@ -6532,6 +6546,19 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt);
}
+void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail)
+{
+ if (chan->rx_avail == rx_avail)
+ return;
+
+ BT_DBG("chan %p has %zd bytes avail for rx", chan, rx_avail);
+
+ chan->rx_avail = rx_avail;
+
+ if (chan->state == BT_CONNECTED)
+ l2cap_chan_le_send_credits(chan);
+}
+
static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb)
{
int err;
@@ -6541,6 +6568,12 @@ static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb)
/* Wait recv to confirm reception before updating the credits */
err = chan->ops->recv(chan, skb);
+ if (err < 0 && chan->rx_avail != -1) {
+ BT_ERR("Queueing received LE L2CAP data failed");
+ l2cap_send_disconn_req(chan, ECONNRESET);
+ return err;
+ }
+
/* Update credits whenever an SDU is received */
l2cap_chan_le_send_credits(chan);
@@ -6563,7 +6596,8 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
}
chan->rx_credits--;
- BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits);
+ BT_DBG("chan %p: rx_credits %u -> %u",
+ chan, chan->rx_credits + 1, chan->rx_credits);
/* Update if remote had run out of credits, this should only happens
* if the remote is not using the entire MPS.
@@ -6846,18 +6880,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
- switch (hcon->type) {
- case LE_LINK:
- if (hcon->hdev->le_mtu) {
- conn->mtu = hcon->hdev->le_mtu;
- break;
- }
- fallthrough;
- default:
- conn->mtu = hcon->hdev->acl_mtu;
- break;
- }
-
+ conn->mtu = hcon->mtu;
conn->feat_mask = 0;
conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS;
@@ -7111,14 +7134,11 @@ EXPORT_SYMBOL_GPL(l2cap_chan_connect);
static void l2cap_ecred_reconfigure(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
- struct {
- struct l2cap_ecred_reconf_req req;
- __le16 scid;
- } pdu;
+ DEFINE_RAW_FLEX(struct l2cap_ecred_reconf_req, pdu, scid, 1);
- pdu.req.mtu = cpu_to_le16(chan->imtu);
- pdu.req.mps = cpu_to_le16(chan->mps);
- pdu.scid = cpu_to_le16(chan->scid);
+ pdu->mtu = cpu_to_le16(chan->imtu);
+ pdu->mps = cpu_to_le16(chan->mps);
+ pdu->scid[0] = cpu_to_le16(chan->scid);
chan->ident = l2cap_get_ident(conn);
@@ -7462,10 +7482,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
struct l2cap_conn *conn = hcon->l2cap_data;
int len;
- /* For AMP controller do not create l2cap conn */
- if (!conn && hcon->hdev->dev_type != HCI_PRIMARY)
- goto drop;
-
if (!conn)
conn = l2cap_conn_add(hcon);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 5cc83f906c12..6db60946c627 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -327,7 +327,7 @@ done:
}
static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
@@ -336,7 +336,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock_nested(sk, L2CAP_NESTING_PARENT);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
@@ -1131,6 +1131,34 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg,
return err;
}
+static void l2cap_publish_rx_avail(struct l2cap_chan *chan)
+{
+ struct sock *sk = chan->data;
+ ssize_t avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc);
+ int expected_skbs, skb_overhead;
+
+ if (avail <= 0) {
+ l2cap_chan_rx_avail(chan, 0);
+ return;
+ }
+
+ if (!chan->mps) {
+ l2cap_chan_rx_avail(chan, -1);
+ return;
+ }
+
+ /* Correct available memory by estimated sk_buff overhead.
+ * This is significant due to small transfer sizes. However, accept
+ * at least one full packet if receive space is non-zero.
+ */
+ expected_skbs = DIV_ROUND_UP(avail, chan->mps);
+ skb_overhead = expected_skbs * sizeof(struct sk_buff);
+ if (skb_overhead < avail)
+ l2cap_chan_rx_avail(chan, avail - skb_overhead);
+ else
+ l2cap_chan_rx_avail(chan, -1);
+}
+
static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
@@ -1167,28 +1195,33 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
else
err = bt_sock_recvmsg(sock, msg, len, flags);
- if (pi->chan->mode != L2CAP_MODE_ERTM)
+ if (pi->chan->mode != L2CAP_MODE_ERTM &&
+ pi->chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ pi->chan->mode != L2CAP_MODE_EXT_FLOWCTL)
return err;
- /* Attempt to put pending rx data in the socket buffer */
-
lock_sock(sk);
- if (!test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state))
- goto done;
+ l2cap_publish_rx_avail(pi->chan);
- if (pi->rx_busy_skb) {
- if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
- pi->rx_busy_skb = NULL;
- else
+ /* Attempt to put pending rx data in the socket buffer */
+ while (!list_empty(&pi->rx_busy)) {
+ struct l2cap_rx_busy *rx_busy =
+ list_first_entry(&pi->rx_busy,
+ struct l2cap_rx_busy,
+ list);
+ if (__sock_queue_rcv_skb(sk, rx_busy->skb) < 0)
goto done;
+ list_del(&rx_busy->list);
+ kfree(rx_busy);
}
/* Restore data flow when half of the receive buffer is
* available. This avoids resending large numbers of
* frames.
*/
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
+ if (test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state) &&
+ atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1)
l2cap_chan_busy(pi->chan, 0);
done:
@@ -1449,17 +1482,20 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
struct sock *sk = chan->data;
+ struct l2cap_pinfo *pi = l2cap_pi(sk);
int err;
lock_sock(sk);
- if (l2cap_pi(sk)->rx_busy_skb) {
+ if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
err = -ENOMEM;
goto done;
}
if (chan->mode != L2CAP_MODE_ERTM &&
- chan->mode != L2CAP_MODE_STREAMING) {
+ chan->mode != L2CAP_MODE_STREAMING &&
+ chan->mode != L2CAP_MODE_LE_FLOWCTL &&
+ chan->mode != L2CAP_MODE_EXT_FLOWCTL) {
/* Even if no filter is attached, we could potentially
* get errors from security modules, etc.
*/
@@ -1470,7 +1506,9 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
err = __sock_queue_rcv_skb(sk, skb);
- /* For ERTM, handle one skb that doesn't fit into the recv
+ l2cap_publish_rx_avail(chan);
+
+ /* For ERTM and LE, handle a skb that doesn't fit into the recv
* buffer. This is important to do because the data frames
* have already been acked, so the skb cannot be discarded.
*
@@ -1479,8 +1517,18 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
* acked and reassembled until there is buffer space
* available.
*/
- if (err < 0 && chan->mode == L2CAP_MODE_ERTM) {
- l2cap_pi(sk)->rx_busy_skb = skb;
+ if (err < 0 &&
+ (chan->mode == L2CAP_MODE_ERTM ||
+ chan->mode == L2CAP_MODE_LE_FLOWCTL ||
+ chan->mode == L2CAP_MODE_EXT_FLOWCTL)) {
+ struct l2cap_rx_busy *rx_busy =
+ kmalloc(sizeof(*rx_busy), GFP_KERNEL);
+ if (!rx_busy) {
+ err = -ENOMEM;
+ goto done;
+ }
+ rx_busy->skb = skb;
+ list_add_tail(&rx_busy->list, &pi->rx_busy);
l2cap_chan_busy(chan, 1);
err = 0;
}
@@ -1706,6 +1754,8 @@ static const struct l2cap_ops l2cap_chan_ops = {
static void l2cap_sock_destruct(struct sock *sk)
{
+ struct l2cap_rx_busy *rx_busy, *next;
+
BT_DBG("sk %p", sk);
if (l2cap_pi(sk)->chan) {
@@ -1713,9 +1763,10 @@ static void l2cap_sock_destruct(struct sock *sk)
l2cap_chan_put(l2cap_pi(sk)->chan);
}
- if (l2cap_pi(sk)->rx_busy_skb) {
- kfree_skb(l2cap_pi(sk)->rx_busy_skb);
- l2cap_pi(sk)->rx_busy_skb = NULL;
+ list_for_each_entry_safe(rx_busy, next, &l2cap_pi(sk)->rx_busy, list) {
+ kfree_skb(rx_busy->skb);
+ list_del(&rx_busy->list);
+ kfree(rx_busy);
}
skb_queue_purge(&sk->sk_receive_queue);
@@ -1799,6 +1850,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
chan->data = sk;
chan->ops = &l2cap_chan_ops;
+
+ l2cap_publish_rx_avail(chan);
}
static struct proto l2cap_proto = {
@@ -1820,6 +1873,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
sk->sk_destruct = l2cap_sock_destruct;
sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
+ INIT_LIST_HEAD(&l2cap_pi(sk)->rx_busy);
+
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 965f621ef865..80f220b7e19d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -443,8 +443,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ if (!hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -468,8 +467,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY &&
- !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
+ if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
bt_dev_dbg(hdev, "Added hci%u", d->id);
}
@@ -503,8 +501,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY &&
- hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
count++;
}
@@ -528,8 +525,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY &&
- hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
rp->index[count++] = cpu_to_le16(d->id);
bt_dev_dbg(hdev, "Added hci%u", d->id);
}
@@ -561,10 +557,8 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
read_lock(&hci_dev_list_lock);
count = 0;
- list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP)
- count++;
- }
+ list_for_each_entry(d, &hci_dev_list, list)
+ count++;
rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC);
if (!rp) {
@@ -585,16 +579,10 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_PRIMARY) {
- if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
- rp->entry[count].type = 0x01;
- else
- rp->entry[count].type = 0x00;
- } else if (d->dev_type == HCI_AMP) {
- rp->entry[count].type = 0x02;
- } else {
- continue;
- }
+ if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
+ rp->entry[count].type = 0x01;
+ else
+ rp->entry[count].type = 0x00;
rp->entry[count].bus = d->bus;
rp->entry[count++].index = cpu_to_le16(d->id);
@@ -9331,23 +9319,14 @@ void mgmt_index_added(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
- NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
- ev.type = 0x01;
- } else {
- mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
- HCI_MGMT_INDEX_EVENTS);
- ev.type = 0x00;
- }
- break;
- case HCI_AMP:
- ev.type = 0x02;
- break;
- default:
- return;
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+ mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0,
+ HCI_MGMT_UNCONF_INDEX_EVENTS);
+ ev.type = 0x01;
+ } else {
+ mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
+ HCI_MGMT_INDEX_EVENTS);
+ ev.type = 0x00;
}
ev.bus = hdev->bus;
@@ -9364,25 +9343,16 @@ void mgmt_index_removed(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- switch (hdev->dev_type) {
- case HCI_PRIMARY:
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
+ mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev,
- NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
- ev.type = 0x01;
- } else {
- mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
- HCI_MGMT_INDEX_EVENTS);
- ev.type = 0x00;
- }
- break;
- case HCI_AMP:
- ev.type = 0x02;
- break;
- default:
- return;
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+ mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
+ HCI_MGMT_UNCONF_INDEX_EVENTS);
+ ev.type = 0x01;
+ } else {
+ mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
+ HCI_MGMT_INDEX_EVENTS);
+ ev.type = 0x00;
}
ev.bus = hdev->bus;
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 9612c5d1b13f..d039683d3bdd 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -769,7 +769,7 @@ void msft_register(struct hci_dev *hdev)
mutex_init(&msft->filter_lock);
}
-void msft_unregister(struct hci_dev *hdev)
+void msft_release(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index 2a63205b377b..fe538e9c91c0 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -14,7 +14,7 @@
bool msft_monitor_supported(struct hci_dev *hdev);
void msft_register(struct hci_dev *hdev);
-void msft_unregister(struct hci_dev *hdev);
+void msft_release(struct hci_dev *hdev);
void msft_do_open(struct hci_dev *hdev);
void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb);
@@ -35,7 +35,7 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev)
}
static inline void msft_register(struct hci_dev *hdev) {}
-static inline void msft_unregister(struct hci_dev *hdev) {}
+static inline void msft_release(struct hci_dev *hdev) {}
static inline void msft_do_open(struct hci_dev *hdev) {}
static inline void msft_do_close(struct hci_dev *hdev) {}
static inline void msft_vendor_evt(struct hci_dev *hdev, void *data,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 29aa07e9db9d..37d63d768afb 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -468,8 +468,8 @@ done:
return err;
}
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
@@ -483,7 +483,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
goto done;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 5d03c5440b06..a5ac160c592e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work)
struct sock *sk;
sco_conn_lock(conn);
+ if (!conn->hcon) {
+ sco_conn_unlock(conn);
+ return;
+ }
sk = conn->sk;
if (sk)
sock_hold(sk);
@@ -122,7 +126,6 @@ static void sco_sock_clear_timer(struct sock *sk)
/* ---- SCO connections ---- */
static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
{
- struct hci_dev *hdev = hcon->hdev;
struct sco_conn *conn = hcon->sco_data;
if (conn) {
@@ -140,9 +143,10 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
hcon->sco_data = conn;
conn->hcon = hcon;
+ conn->mtu = hcon->mtu;
- if (hdev->sco_mtu > 0)
- conn->mtu = hdev->sco_mtu;
+ if (hcon->mtu > 0)
+ conn->mtu = hcon->mtu;
else
conn->mtu = 60;
@@ -643,7 +647,7 @@ done:
}
static int sco_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -652,7 +656,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
BT_DBG("sk %p timeo %ld", sk, timeo);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 25b75844891a..891cdf61c65a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -79,6 +79,51 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
args->args[3], args->args[4]);
}
+static const struct bpf_ctx_arg_aux *find_ctx_arg_info(struct bpf_prog_aux *aux, int offset)
+{
+ int i;
+
+ for (i = 0; i < aux->ctx_arg_info_size; i++)
+ if (aux->ctx_arg_info[i].offset == offset)
+ return &aux->ctx_arg_info[i];
+
+ return NULL;
+}
+
+/* There is only one check at the moment:
+ * - zero should not be passed for pointer parameters not marked as nullable.
+ */
+static int check_test_run_args(struct bpf_prog *prog, struct bpf_dummy_ops_test_args *args)
+{
+ const struct btf_type *func_proto = prog->aux->attach_func_proto;
+
+ for (u32 arg_no = 0; arg_no < btf_type_vlen(func_proto) ; ++arg_no) {
+ const struct btf_param *param = &btf_params(func_proto)[arg_no];
+ const struct bpf_ctx_arg_aux *info;
+ const struct btf_type *t;
+ int offset;
+
+ if (args->args[arg_no] != 0)
+ continue;
+
+ /* Program is validated already, so there is no need
+ * to check if t is NULL.
+ */
+ t = btf_type_skip_modifiers(bpf_dummy_ops_btf, param->type, NULL);
+ if (!btf_type_is_ptr(t))
+ continue;
+
+ offset = btf_ctx_arg_offset(bpf_dummy_ops_btf, func_proto, arg_no);
+ info = find_ctx_arg_info(prog->aux, offset);
+ if (info && (info->reg_type & PTR_MAYBE_NULL))
+ continue;
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
extern const struct bpf_link_ops bpf_struct_ops_link_lops;
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -87,7 +132,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_links *tlinks = NULL;
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
@@ -109,6 +154,10 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(args))
return PTR_ERR(args);
+ err = check_test_run_args(prog, args);
+ if (err)
+ goto out;
+
tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
if (!tlinks) {
err = -ENOMEM;
@@ -232,7 +281,7 @@ static void bpf_dummy_unreg(void *kdata)
{
}
-static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+static int bpf_dummy_ops__test_1(struct bpf_dummy_ops_state *cb__nullable)
{
return 0;
}
@@ -249,7 +298,7 @@ static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
}
static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
- .test_1 = bpf_dummy_test_1,
+ .test_1 = bpf_dummy_ops__test_1,
.test_2 = bpf_dummy_test_2,
.test_sleepable = bpf_dummy_test_sleepable,
};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 61efeadaff8d..f6aad4ed2ab2 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -575,6 +575,13 @@ __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
return a + *b + c + d + (long)e + f + g;
}
+__bpf_kfunc int bpf_modify_return_test_tp(int nonce)
+{
+ trace_bpf_trigger_tp(nonce);
+
+ return nonce;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -622,6 +629,7 @@ __bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
BTF_ID_FLAGS(func, bpf_modify_return_test2)
+BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_test_modify_return_ids)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ab4d33e02014..fb1115857e49 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -27,6 +27,7 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ enum skb_drop_reason reason = pskb_may_pull_reason(skb, ETH_HLEN);
struct net_bridge_mcast_port *pmctx_null = NULL;
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mcast *brmctx = &br->multicast_ctx;
@@ -38,6 +39,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const unsigned char *dest;
u16 vid = 0;
+ if (unlikely(reason != SKB_NOT_DROPPED_YET)) {
+ kfree_skb_reason(skb, reason);
+ return NETDEV_TX_OK;
+ }
+
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
br_tc_skb_miss_set(skb, false);
@@ -197,7 +203,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
/* this flag will be cleared if the MTU was automatically adjusted */
br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7431f89e897b..d97064d460dc 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -258,6 +258,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
const unsigned char *src = eth_hdr(skb)->h_source;
+ struct sk_buff *nskb;
if (!should_deliver(p, skb))
return;
@@ -266,12 +267,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
if (skb->dev == p->dev && ether_addr_equal(src, addr))
return;
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb) {
+ __skb_push(skb, ETH_HLEN);
+ nskb = pskb_copy(skb, GFP_ATOMIC);
+ __skb_pull(skb, ETH_HLEN);
+ if (!nskb) {
DEV_STATS_INC(dev, tx_dropped);
return;
}
+ skb = nskb;
+ __skb_pull(skb, ETH_HLEN);
if (!is_broadcast_ether_addr(addr))
memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN);
diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c
index ee680adcee17..3c66141d34d6 100644
--- a/net/bridge/br_mst.c
+++ b/net/bridge/br_mst.c
@@ -78,7 +78,7 @@ static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_v
{
struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
- if (v->state == state)
+ if (br_vlan_get_state(v) == state)
return;
br_vlan_set_state(v, state);
@@ -100,11 +100,12 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
};
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
- int err;
+ int err = 0;
+ rcu_read_lock();
vg = nbp_vlan_group(p);
if (!vg)
- return 0;
+ goto out;
/* MSTI 0 (CST) state changes are notified via the regular
* SWITCHDEV_ATTR_ID_PORT_STP_STATE.
@@ -112,17 +113,20 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
if (msti) {
err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err && err != -EOPNOTSUPP)
- return err;
+ goto out;
}
- list_for_each_entry(v, &vg->vlan_list, vlist) {
+ err = 0;
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
if (v->brvlan->msti != msti)
continue;
br_mst_vlan_set_state(p, v, state);
}
- return 0;
+out:
+ rcu_read_unlock();
+ return err;
}
static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7948a9e7542c..bf30c50b5689 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1226,7 +1226,6 @@ static struct ctl_table brnf_table[] = {
.mode = 0644,
.proc_handler = brnf_sysctl_call_tables,
},
- { }
};
static inline void br_netfilter_sysctl_default(struct brnf_net *brnf)
diff --git a/net/core/Makefile b/net/core/Makefile
index 21d6fbc7e884..62be9aef2528 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+obj-$(CONFIG_NET_IEEE8021Q_HELPERS) += ieee8021q_helpers.o
obj-$(CONFIG_NET_SELFTESTS) += selftests.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
diff --git a/net/core/dev.c b/net/core/dev.c
index e09aa3785c15..e1bb6d7856d9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -158,7 +158,6 @@
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/rps.h>
-#include <linux/phy_link_topology_core.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -940,6 +939,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
}
EXPORT_SYMBOL(dev_get_by_napi_id);
+static DEFINE_SEQLOCK(netdev_rename_lock);
+
+void netdev_copy_name(struct net_device *dev, char *name)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&netdev_rename_lock);
+ strscpy(name, dev->name, IFNAMSIZ);
+ } while (read_seqretry(&netdev_rename_lock, seq));
+}
+
/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
@@ -951,7 +962,6 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
struct net_device *dev;
int ret;
- down_read(&devnet_rename_sem);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
@@ -960,12 +970,11 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
goto out;
}
- strcpy(name, dev->name);
+ netdev_copy_name(dev, name);
ret = 0;
out:
rcu_read_unlock();
- up_read(&devnet_rename_sem);
return ret;
}
@@ -1217,7 +1226,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
+ write_seqlock(&netdev_rename_lock);
err = dev_get_valid_name(net, dev, newname);
+ write_sequnlock(&netdev_rename_lock);
+
if (err < 0) {
up_write(&devnet_rename_sem);
return err;
@@ -1257,7 +1269,9 @@ rollback:
if (err >= 0) {
err = ret;
down_write(&devnet_rename_sem);
+ write_seqlock(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock(&netdev_rename_lock);
memcpy(oldname, newname, IFNAMSIZ);
WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
@@ -4450,7 +4464,6 @@ EXPORT_SYMBOL(__dev_direct_xmit);
*************************************************************************/
static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
-unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -6517,7 +6530,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
}
- dev->threaded = threaded;
+ WRITE_ONCE(dev->threaded, threaded);
/* Make sure kthread is created before THREADED bit
* is set.
@@ -6608,7 +6621,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
- dev->threaded = 0;
+ dev->threaded = false;
netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
@@ -8530,27 +8543,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags;
+ unsigned int promiscuity, flags;
kuid_t uid;
kgid_t gid;
ASSERT_RTNL();
- dev->flags |= IFF_PROMISC;
- dev->promiscuity += inc;
- if (dev->promiscuity == 0) {
+ promiscuity = dev->promiscuity + inc;
+ if (promiscuity == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch promisc and return error.
*/
- if (inc < 0)
- dev->flags &= ~IFF_PROMISC;
- else {
- dev->promiscuity -= inc;
+ if (unlikely(inc > 0)) {
netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW;
}
+ flags = old_flags & ~IFF_PROMISC;
+ } else {
+ flags = old_flags | IFF_PROMISC;
}
- if (dev->flags != old_flags) {
+ WRITE_ONCE(dev->promiscuity, promiscuity);
+ if (flags != old_flags) {
+ WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s promiscuous mode\n",
dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) {
@@ -8601,25 +8616,27 @@ EXPORT_SYMBOL(dev_set_promiscuity);
static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
+ unsigned int allmulti, flags;
ASSERT_RTNL();
- dev->flags |= IFF_ALLMULTI;
- dev->allmulti += inc;
- if (dev->allmulti == 0) {
+ allmulti = dev->allmulti + inc;
+ if (allmulti == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch allmulti and return error.
*/
- if (inc < 0)
- dev->flags &= ~IFF_ALLMULTI;
- else {
- dev->allmulti -= inc;
+ if (unlikely(inc > 0)) {
netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW;
}
+ flags = old_flags & ~IFF_ALLMULTI;
+ } else {
+ flags = old_flags | IFF_ALLMULTI;
}
- if (dev->flags ^ old_flags) {
+ WRITE_ONCE(dev->allmulti, allmulti);
+ if (flags != old_flags) {
+ WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s allmulticast mode\n",
dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI);
@@ -8945,7 +8962,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
return -ERANGE;
if (new_len != orig_len) {
- dev->tx_queue_len = new_len;
+ WRITE_ONCE(dev->tx_queue_len, new_len);
res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
res = notifier_to_errno(res);
if (res)
@@ -8959,7 +8976,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
err_rollback:
netdev_err(dev, "refused to change device tx_queue_len\n");
- dev->tx_queue_len = orig_len;
+ WRITE_ONCE(dev->tx_queue_len, orig_len);
return res;
}
@@ -9205,7 +9222,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
netif_carrier_off(dev);
else
netif_carrier_on(dev);
- dev->proto_down = proto_down;
+ WRITE_ONCE(dev->proto_down, proto_down);
return 0;
}
@@ -9219,18 +9236,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
u32 value)
{
+ u32 proto_down_reason;
int b;
if (!mask) {
- dev->proto_down_reason = value;
+ proto_down_reason = value;
} else {
+ proto_down_reason = dev->proto_down_reason;
for_each_set_bit(b, &mask, 32) {
if (value & (1 << b))
- dev->proto_down_reason |= BIT(b);
+ proto_down_reason |= BIT(b);
else
- dev->proto_down_reason &= ~BIT(b);
+ proto_down_reason &= ~BIT(b);
}
}
+ WRITE_ONCE(dev->proto_down_reason, proto_down_reason);
}
struct bpf_xdp_link {
@@ -10566,8 +10586,9 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
rebroadcast_time = jiffies;
}
+ rcu_barrier();
+
if (!wait) {
- rcu_barrier();
wait = WAIT_REFS_MIN_MSECS;
} else {
msleep(wait);
@@ -10976,12 +10997,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
- dev->link_topo = phy_link_topo_create(dev);
- if (IS_ERR(dev->link_topo)) {
- dev->link_topo = NULL;
- goto free_all;
- }
-
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -11070,8 +11085,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
- phy_link_topo_destroy(dev->link_topo);
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) {
@@ -11403,8 +11416,12 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- if (new_name[0]) /* Rename the netdev to prepared name */
+ if (new_name[0]) {
+ /* Rename the netdev to prepared name */
+ write_seqlock(&netdev_rename_lock);
strscpy(dev->name, new_name, IFNAMSIZ);
+ write_sequnlock(&netdev_rename_lock);
+ }
/* Fixup kobjects */
dev_set_uevent_suppress(&dev->dev, 1);
diff --git a/net/core/dev.h b/net/core/dev.h
index 8572d2c8dc4a..b7b518bc2be5 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -36,7 +36,6 @@ int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
-extern unsigned int sysctl_skb_defer_max;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 0ccfd5fa5cb9..6a0482e676d3 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -47,7 +47,8 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
/* the cache already hold a dst reference; it can't go away */
dst_hold(dst);
- if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
+ if (unlikely(!time_after(idst->refresh_ts,
+ READ_ONCE(dst_cache->reset_ts)) ||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
@@ -83,7 +84,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
return NULL;
*saddr = idst->in_saddr.s_addr;
- return container_of(dst, struct rtable, dst);
+ return dst_rtable(dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
@@ -111,8 +112,8 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
return;
idst = this_cpu_ptr(dst_cache->cache);
- dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
- rt6_get_cookie((struct rt6_info *)dst));
+ dst_cache_per_cpu_dst_set(idst, dst,
+ rt6_get_cookie(dst_rt6_info(dst)));
idst->in6_saddr = *saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
@@ -170,7 +171,7 @@ void dst_cache_reset_now(struct dst_cache *dst_cache)
if (!dst_cache->cache)
return;
- dst_cache->reset_ts = jiffies;
+ dst_cache_reset(dst_cache);
for_each_possible_cpu(i) {
struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
struct dst_entry *dst = idst->dst;
diff --git a/net/core/filter.c b/net/core/filter.c
index 294670d3850d..2510464692af 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,6 +87,9 @@
#include "dev.h"
+/* Keep the struct bpf_fib_lookup small so that it fits into a cacheline */
+static_assert(sizeof(struct bpf_fib_lookup) == 64, "struct bpf_fib_lookup size check");
+
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2215,7 +2218,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
dst = skb_dst(skb);
- nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+ nexthop = rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr);
} else {
nexthop = &nh->ipv6_nh;
@@ -2314,8 +2317,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
- struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = container_of(dst, struct rtable, dst);
+ struct rtable *rt = skb_rtable(skb);
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
} else if (nh->nh_family == AF_INET6) {
@@ -4360,10 +4362,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
struct bpf_map *map;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (unlikely(!xdpf)) {
@@ -4375,11 +4379,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_enqueue_multi(xdpf, dev, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_enqueue(fwd, xdpf, dev);
}
@@ -4442,9 +4455,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog,
- void *fwd,
- enum bpf_map_type map_type, u32 map_id)
+ struct bpf_prog *xdp_prog, void *fwd,
+ enum bpf_map_type map_type, u32 map_id,
+ u32 flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map;
@@ -4454,11 +4467,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
}
@@ -4495,9 +4517,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
@@ -4517,7 +4541,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
return 0;
}
- return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
+ return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
err:
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
return err;
@@ -5886,7 +5910,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
- fl4.flowi4_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl4.flowi4_mark = params->mark;
+ else
+ fl4.flowi4_mark = 0;
fl4.flowi4_secid = 0;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_uid = sock_net_uid(net, NULL);
@@ -6029,7 +6056,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
strict);
} else {
- fl6.flowi6_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl6.flowi6_mark = params->mark;
+ else
+ fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -6107,7 +6137,7 @@ set_fwd_params:
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
- BPF_FIB_LOOKUP_SRC)
+ BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
diff --git a/net/core/gro.c b/net/core/gro.c
index 2459ab697f7f..b3b43de1a650 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -231,6 +231,33 @@ done:
return 0;
}
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+
+ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
@@ -331,8 +358,6 @@ static void gro_list_prepare(const struct list_head *head,
list_for_each_entry(p, head, list) {
unsigned long diffs;
- NAPI_GRO_CB(p)->flush = 0;
-
if (hash != skb_get_hash_raw(p)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
@@ -372,6 +397,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
const skb_frag_t *frag0;
unsigned int headlen;
+ NAPI_GRO_CB(skb)->network_offset = 0;
NAPI_GRO_CB(skb)->data_offset = 0;
headlen = skb_headlen(skb);
NAPI_GRO_CB(skb)->frag0 = skb->data;
@@ -471,7 +497,6 @@ found_ptype:
sizeof(u32))); /* Avoid slow unaligned acc */
*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index c8a7a451c18a..d0aaaaa556f2 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <net/hotdata.h>
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/list.h>
-
+#include <net/hotdata.h>
+#include <net/proto_memory.h>
struct net_hotdata net_hotdata __cacheline_aligned = {
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
@@ -18,5 +18,8 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.max_backlog = 1000,
.dev_tx_weight = 64,
.dev_rx_weight = 64,
+ .sysctl_max_skb_frags = MAX_SKB_FRAGS,
+ .sysctl_skb_defer_max = 64,
+ .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
};
EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c
new file mode 100644
index 000000000000..759a9b9f3f89
--- /dev/null
+++ b/net/core/ieee8021q_helpers.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+#include <linux/array_size.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <net/dscp.h>
+#include <net/ieee8021q.h>
+
+/* The following arrays map Traffic Types (TT) to traffic classes (TC) for
+ * different number of queues as shown in the example provided by
+ * IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and
+ * Table I-1 "Traffic type to traffic class mapping".
+ */
+static const u8 ieee8021q_8queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2,
+ [IEEE8021Q_TT_CA] = 3,
+ [IEEE8021Q_TT_VI] = 4,
+ [IEEE8021Q_TT_VO] = 5,
+ [IEEE8021Q_TT_IC] = 6,
+ [IEEE8021Q_TT_NC] = 7,
+};
+
+static const u8 ieee8021q_7queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2,
+ [IEEE8021Q_TT_CA] = 3,
+ [IEEE8021Q_TT_VI] = 4, [IEEE8021Q_TT_VO] = 4,
+ [IEEE8021Q_TT_IC] = 5,
+ [IEEE8021Q_TT_NC] = 6,
+};
+
+static const u8 ieee8021q_6queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0,
+ [IEEE8021Q_TT_BE] = 1,
+ [IEEE8021Q_TT_EE] = 2, [IEEE8021Q_TT_CA] = 2,
+ [IEEE8021Q_TT_VI] = 3, [IEEE8021Q_TT_VO] = 3,
+ [IEEE8021Q_TT_IC] = 4,
+ [IEEE8021Q_TT_NC] = 5,
+};
+
+static const u8 ieee8021q_5queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1,
+ [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2,
+ [IEEE8021Q_TT_IC] = 3,
+ [IEEE8021Q_TT_NC] = 4,
+};
+
+static const u8 ieee8021q_4queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1,
+ [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2,
+ [IEEE8021Q_TT_IC] = 3, [IEEE8021Q_TT_NC] = 3,
+};
+
+static const u8 ieee8021q_3queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1,
+ [IEEE8021Q_TT_IC] = 2, [IEEE8021Q_TT_NC] = 2,
+};
+
+static const u8 ieee8021q_2queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1,
+ [IEEE8021Q_TT_IC] = 1, [IEEE8021Q_TT_NC] = 1,
+};
+
+static const u8 ieee8021q_1queue_tt_tc_map[] = {
+ [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0,
+ [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0,
+ [IEEE8021Q_TT_VI] = 0, [IEEE8021Q_TT_VO] = 0,
+ [IEEE8021Q_TT_IC] = 0, [IEEE8021Q_TT_NC] = 0,
+};
+
+/**
+ * ieee8021q_tt_to_tc - Map IEEE 802.1Q Traffic Type to Traffic Class
+ * @tt: IEEE 802.1Q Traffic Type
+ * @num_queues: Number of queues
+ *
+ * This function maps an IEEE 802.1Q Traffic Type to a Traffic Class (TC) based
+ * on the number of queues configured on the NIC. The mapping is based on the
+ * example provided by IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic
+ * class mapping" and Table I-1 "Traffic type to traffic class mapping".
+ *
+ * Return: Traffic Class corresponding to the given Traffic Type or negative
+ * value in case of error.
+ */
+int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues)
+{
+ if (tt < 0 || tt >= IEEE8021Q_TT_MAX) {
+ pr_err("Requested Traffic Type (%d) is out of range (%d)\n", tt,
+ IEEE8021Q_TT_MAX);
+ return -EINVAL;
+ }
+
+ switch (num_queues) {
+ case 8:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_8queue_tt_tc_map != max - 1");
+ return ieee8021q_8queue_tt_tc_map[tt];
+ case 7:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_7queue_tt_tc_map != max - 1");
+
+ return ieee8021q_7queue_tt_tc_map[tt];
+ case 6:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_6queue_tt_tc_map != max - 1");
+
+ return ieee8021q_6queue_tt_tc_map[tt];
+ case 5:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_5queue_tt_tc_map != max - 1");
+
+ return ieee8021q_5queue_tt_tc_map[tt];
+ case 4:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_4queue_tt_tc_map != max - 1");
+
+ return ieee8021q_4queue_tt_tc_map[tt];
+ case 3:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_3queue_tt_tc_map != max - 1");
+
+ return ieee8021q_3queue_tt_tc_map[tt];
+ case 2:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_2queue_tt_tc_map != max - 1");
+
+ return ieee8021q_2queue_tt_tc_map[tt];
+ case 1:
+ compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) !=
+ IEEE8021Q_TT_MAX - 1,
+ "ieee8021q_1queue_tt_tc_map != max - 1");
+
+ return ieee8021q_1queue_tt_tc_map[tt];
+ }
+
+ pr_err("Invalid number of queues %d\n", num_queues);
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(ieee8021q_tt_to_tc);
+
+/**
+ * ietf_dscp_to_ieee8021q_tt - Map IETF DSCP to IEEE 802.1Q Traffic Type
+ * @dscp: IETF DSCP value
+ *
+ * This function maps an IETF DSCP value to an IEEE 802.1Q Traffic Type (TT).
+ * Since there is no corresponding mapping between DSCP and IEEE 802.1Q Traffic
+ * Type, this function is inspired by the RFC8325 documentation which describe
+ * the mapping between DSCP and 802.11 User Priority (UP) values.
+ *
+ * Return: IEEE 802.1Q Traffic Type corresponding to the given DSCP value
+ */
+int ietf_dscp_to_ieee8021q_tt(u8 dscp)
+{
+ switch (dscp) {
+ case DSCP_CS0:
+ /* Comment from RFC8325:
+ * [RFC4594], Section 4.8, recommends High-Throughput Data be marked
+ * AF1x (that is, AF11, AF12, and AF13, according to the rules defined
+ * in [RFC2475]).
+ *
+ * By default (as described in Section 2.3), High-Throughput Data will
+ * map to UP 1 and, thus, to the Background Access Category (AC_BK),
+ * which is contrary to the intent expressed in [RFC4594].
+
+ * Unfortunately, there really is no corresponding fit for the High-
+ * Throughput Data service class within the constrained 4 Access
+ * Category [IEEE.802.11-2016] model. If the High-Throughput Data
+ * service class is assigned to the Best Effort Access Category (AC_BE),
+ * then it would contend with Low-Latency Data (while [RFC4594]
+ * recommends a distinction in servicing between these service classes)
+ * as well as with the default service class; alternatively, if it is
+ * assigned to the Background Access Category (AC_BK), then it would
+ * receive a less-then-best-effort service and contend with Low-Priority
+ * Data (as discussed in Section 4.2.10).
+ *
+ * As such, since there is no directly corresponding fit for the High-
+ * Throughout Data service class within the [IEEE.802.11-2016] model, it
+ * is generally RECOMMENDED to map High-Throughput Data to UP 0, thereby
+ * admitting it to the Best Effort Access Category (AC_BE).
+ *
+ * Note: The above text is from RFC8325 which is describing the mapping
+ * between DSCP and 802.11 User Priority (UP) values. The mapping
+ * between UP and IEEE 802.1Q Traffic Type is not defined in the RFC but
+ * the 802.11 AC_BK and AC_BE are closely related to the IEEE 802.1Q
+ * Traffic Types BE and BK.
+ */
+ case DSCP_AF11:
+ case DSCP_AF12:
+ case DSCP_AF13:
+ return IEEE8021Q_TT_BE;
+ /* Comment from RFC8325:
+ * RFC3662 and RFC4594 both recommend Low-Priority Data be marked
+ * with DSCP CS1. The Low-Priority Data service class loosely
+ * corresponds to the [IEEE.802.11-2016] Background Access Category
+ */
+ case DSCP_CS1:
+ return IEEE8021Q_TT_BK;
+ case DSCP_CS2:
+ case DSCP_AF21:
+ case DSCP_AF22:
+ case DSCP_AF23:
+ return IEEE8021Q_TT_EE;
+ case DSCP_CS3:
+ case DSCP_AF31:
+ case DSCP_AF32:
+ case DSCP_AF33:
+ return IEEE8021Q_TT_CA;
+ case DSCP_CS4:
+ case DSCP_AF41:
+ case DSCP_AF42:
+ case DSCP_AF43:
+ return IEEE8021Q_TT_VI;
+ case DSCP_CS5:
+ case DSCP_EF:
+ case DSCP_VOICE_ADMIT:
+ return IEEE8021Q_TT_VO;
+ case DSCP_CS6:
+ return IEEE8021Q_TT_IC;
+ case DSCP_CS7:
+ return IEEE8021Q_TT_NC;
+ }
+
+ return SIMPLE_IETF_DSCP_TO_IEEE8021Q_TT(dscp);
+}
+EXPORT_SYMBOL_GPL(ietf_dscp_to_ieee8021q_tt);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index af270c202d9a..45fd88405b6b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3733,7 +3733,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
@@ -3784,7 +3784,6 @@ static struct neigh_sysctl_table {
.extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
- {},
},
};
@@ -3812,8 +3811,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
- sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
} else {
struct neigh_table *tbl = p->tbl;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1f7f09e56771..4c27a360c294 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -605,13 +605,13 @@ static ssize_t threaded_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ rcu_read_lock();
if (dev_isalive(netdev))
- ret = sysfs_emit(buf, fmt_dec, netdev->threaded);
+ ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded));
+
+ rcu_read_unlock();
- rtnl_unlock();
return ret;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2f5190aa2f15..4f7a61688d18 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -69,12 +69,15 @@ DEFINE_COOKIE(net_cookie);
static struct net_generic *net_alloc_generic(void)
{
+ unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
+ unsigned int generic_size;
struct net_generic *ng;
- unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+ generic_size = offsetof(struct net_generic, ptr[gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
- ng->s.len = max_gen_ptrs;
+ ng->s.len = gen_ptrs;
return ng;
}
@@ -1308,7 +1311,11 @@ static int register_pernet_operations(struct list_head *list,
if (error < 0)
return error;
*ops->id = error;
- max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
+ /* This does not require READ_ONCE as writers already hold
+ * pernet_ops_rwsem. But WRITE_ONCE is needed to protect
+ * net_alloc_generic.
+ */
+ WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1));
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index dd6510f2c652..1f6ae6379e0f 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -489,7 +489,17 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
{
if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
- netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail))
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
return -EMSGSIZE;
return 0;
}
@@ -498,7 +508,18 @@ static int
netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
{
if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
- netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes))
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
+ netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
return -EMSGSIZE;
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 543007f159f9..55bcacf67df3 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -316,7 +316,7 @@ static int netpoll_owner_active(struct net_device *dev)
struct napi_struct *napi;
list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner == smp_processor_id())
+ if (READ_ONCE(napi->poll_owner) == smp_processor_id())
return 1;
}
return 0;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 273c24429bce..f4444b4e39e6 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -5,6 +5,7 @@
* Copyright (C) 2016 Red Hat, Inc.
*/
+#include <linux/error-injection.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -172,19 +173,29 @@ static void page_pool_producer_unlock(struct page_pool *pool,
spin_unlock_bh(&pool->ring.producer_lock);
}
+static void page_pool_struct_check(void)
+{
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
+ CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long));
+}
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params,
int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
+ page_pool_struct_check();
+
memcpy(&pool->p, &params->fast, sizeof(pool->p));
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
pool->cpuid = cpuid;
/* Validate only known flags were used */
- if (pool->p.flags & ~(PP_FLAG_ALL))
+ if (pool->slow.flags & ~PP_FLAG_ALL)
return -EINVAL;
if (pool->p.pool_size)
@@ -198,22 +209,26 @@ static int page_pool_init(struct page_pool *pool,
* DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
* which is the XDP_TX use-case.
*/
- if (pool->p.flags & PP_FLAG_DMA_MAP) {
+ if (pool->slow.flags & PP_FLAG_DMA_MAP) {
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
+
+ pool->dma_map = true;
}
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
+ if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) {
/* In order to request DMA-sync-for-device the page
* needs to be mapped
*/
- if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ if (!(pool->slow.flags & PP_FLAG_DMA_MAP))
return -EINVAL;
if (!pool->p.max_len)
return -EINVAL;
+ pool->dma_sync = true;
+
/* pool->p.offset has to be set according to the address
* offset used by the DMA engine to start copying rx data
*/
@@ -222,7 +237,7 @@ static int page_pool_init(struct page_pool *pool,
pool->has_init_callback = !!pool->slow.init_callback;
#ifdef CONFIG_PAGE_POOL_STATS
- if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) {
+ if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) {
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
if (!pool->recycle_stats)
return -ENOMEM;
@@ -232,12 +247,13 @@ static int page_pool_init(struct page_pool *pool,
* (also percpu) page pool instance.
*/
pool->recycle_stats = &pp_system_recycle_stats;
+ pool->system = true;
}
#endif
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
#ifdef CONFIG_PAGE_POOL_STATS
- if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
+ if (!pool->system)
free_percpu(pool->recycle_stats);
#endif
return -ENOMEM;
@@ -248,7 +264,7 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
- if (pool->p.flags & PP_FLAG_DMA_MAP)
+ if (pool->dma_map)
get_device(pool->p.dev);
return 0;
@@ -258,11 +274,11 @@ static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
- if (pool->p.flags & PP_FLAG_DMA_MAP)
+ if (pool->dma_map)
put_device(pool->p.dev);
#ifdef CONFIG_PAGE_POOL_STATS
- if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
+ if (!pool->system)
free_percpu(pool->recycle_stats);
#endif
}
@@ -383,16 +399,26 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
return page;
}
-static void page_pool_dma_sync_for_device(const struct page_pool *pool,
- const struct page *page,
- unsigned int dma_sync_size)
+static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
+ const struct page *page,
+ u32 dma_sync_size)
{
+#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
dma_addr_t dma_addr = page_pool_get_dma_addr(page);
dma_sync_size = min(dma_sync_size, pool->p.max_len);
- dma_sync_single_range_for_device(pool->p.dev, dma_addr,
- pool->p.offset, dma_sync_size,
- pool->p.dma_dir);
+ __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
+ dma_sync_size, pool->p.dma_dir);
+#endif
+}
+
+static __always_inline void
+page_pool_dma_sync_for_device(const struct page_pool *pool,
+ const struct page *page,
+ u32 dma_sync_size)
+{
+ if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
+ __page_pool_dma_sync_for_device(pool, page, dma_sync_size);
}
static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
@@ -414,8 +440,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
if (page_pool_set_dma_addr(page, dma))
goto unmap_failed;
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+ page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
return true;
@@ -460,8 +485,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
- unlikely(!page_pool_dma_map(pool, page))) {
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) {
put_page(page);
return NULL;
}
@@ -481,8 +505,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
- unsigned int pp_flags = pool->p.flags;
unsigned int pp_order = pool->p.order;
+ bool dma_map = pool->dma_map;
struct page *page;
int i, nr_pages;
@@ -507,8 +531,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
page = pool->alloc.cache[i];
- if ((pp_flags & PP_FLAG_DMA_MAP) &&
- unlikely(!page_pool_dma_map(pool, page))) {
+ if (dma_map && unlikely(!page_pool_dma_map(pool, page))) {
put_page(page);
continue;
}
@@ -550,6 +573,7 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
return page;
}
EXPORT_SYMBOL(page_pool_alloc_pages);
+ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);
/* Calculate distance between two u32 values, valid if distance is below 2^(31)
* https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
@@ -580,7 +604,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
- if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ if (!pool->dma_map)
/* Always account for inflight pages, even if we didn't
* map them
*/
@@ -663,7 +687,7 @@ static bool __page_pool_page_can_be_recycled(const struct page *page)
}
/* If the page refcnt == 1, this will try to recycle the page.
- * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
+ * If pool->dma_sync is set, we'll try to sync the DMA area for
* the configured size min(dma_sync_size, pool->max_len).
* If the page refcnt != 1, then the page will be returned to memory
* subsystem.
@@ -686,9 +710,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
if (likely(__page_pool_page_can_be_recycled(page))) {
/* Read barrier done in page_ref_count / READ_ONCE */
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page,
- dma_sync_size);
+ page_pool_dma_sync_for_device(pool, page, dma_sync_size);
if (allow_direct && page_pool_recycle_in_cache(page, pool))
return NULL;
@@ -827,9 +849,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
return NULL;
if (__page_pool_page_can_be_recycled(page)) {
- if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
- page_pool_dma_sync_for_device(pool, page, -1);
-
+ page_pool_dma_sync_for_device(pool, page, -1);
return page;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 283e42f48af6..b86b0a87367d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1036,8 +1036,8 @@ static size_t rtnl_proto_down_size(const struct net_device *dev)
{
size_t size = nla_total_size(1);
- if (dev->proto_down_reason)
- size += nla_total_size(0) + nla_total_size(4);
+ /* Assume dev->proto_down_reason is not zero. */
+ size += nla_total_size(0) + nla_total_size(4);
return size;
}
@@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb,
static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{
const struct bpf_prog *generic_xdp_prog;
+ u32 res = 0;
- ASSERT_RTNL();
+ rcu_read_lock();
+ generic_xdp_prog = rcu_dereference(dev->xdp_prog);
+ if (generic_xdp_prog)
+ res = generic_xdp_prog->aux->id;
+ rcu_read_unlock();
- generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
- if (!generic_xdp_prog)
- return 0;
- return generic_xdp_prog->aux->id;
+ return res;
}
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
@@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev)
- ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+ ret = nla_put_u32(skb, IFLA_MASTER,
+ READ_ONCE(upper_dev->ifindex));
rcu_read_unlock();
return ret;
@@ -1736,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb,
struct nlattr *pr;
u32 preason;
- if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down)))
goto nla_put_failure;
- preason = dev->proto_down_reason;
+ preason = READ_ONCE(dev->proto_down_reason);
if (!preason)
return 0;
@@ -1812,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
u32 event, int *new_nsid, int new_ifindex,
int tgt_netnsid, gfp_t gfp)
{
+ char devname[IFNAMSIZ];
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
struct Qdisc *qdisc;
@@ -1824,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0;
- ifm->ifi_type = dev->type;
- ifm->ifi_index = dev->ifindex;
+ ifm->ifi_type = READ_ONCE(dev->type);
+ ifm->ifi_index = READ_ONCE(dev->ifindex);
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
- qdisc = rtnl_dereference(dev->qdisc);
- if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
- nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
+ netdev_copy_name(dev, devname);
+ if (nla_put_string(skb, IFLA_IFNAME, devname))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
- nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
- nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
- nla_put_u32(skb, IFLA_GROUP, dev->group) ||
- nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
- nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) ||
- nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
- nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
- nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
- nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
- nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
- nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
- nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
- nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
+ netif_running(dev) ? READ_ONCE(dev->operstate) :
+ IF_OPER_DOWN) ||
+ nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) ||
+ nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) ||
+ nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) ||
+ nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) ||
+ nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) ||
+ nla_put_u32(skb, IFLA_NUM_TX_QUEUES,
+ READ_ONCE(dev->num_tx_queues)) ||
+ nla_put_u32(skb, IFLA_GSO_MAX_SEGS,
+ READ_ONCE(dev->gso_max_segs)) ||
+ nla_put_u32(skb, IFLA_GSO_MAX_SIZE,
+ READ_ONCE(dev->gso_max_size)) ||
+ nla_put_u32(skb, IFLA_GRO_MAX_SIZE,
+ READ_ONCE(dev->gro_max_size)) ||
+ nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE,
+ READ_ONCE(dev->gso_ipv4_max_size)) ||
+ nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE,
+ READ_ONCE(dev->gro_ipv4_max_size)) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SIZE,
+ READ_ONCE(dev->tso_max_size)) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
+ READ_ONCE(dev->tso_max_segs)) ||
#ifdef CONFIG_RPS
- nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
+ nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
+ READ_ONCE(dev->num_rx_queues)) ||
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (qdisc &&
- nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -1909,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
- goto nla_put_failure;
-
if (new_nsid &&
nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure;
@@ -1924,6 +1935,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
+ if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC))
+ goto nla_put_failure_rcu;
+ qdisc = rcu_dereference(dev->qdisc);
+ if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id))
+ goto nla_put_failure_rcu;
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
if (rtnl_fill_link_ifmap(skb, dev))
@@ -2530,7 +2546,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
- nla_len(attr) < NLA_HDRLEN) {
+ nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) {
return -EINVAL;
}
if (len >= MAX_VLAN_LIST_LEN)
@@ -5961,19 +5977,17 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
- int h, s_h, err, s_idx, s_idxattr, s_prividx;
struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
- struct hlist_head *head;
+ struct {
+ unsigned long ifindex;
+ int idxattr;
+ int prividx;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
- int idx = 0;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
- s_idxattr = cb->args[2];
- s_prividx = cb->args[3];
+ int err;
cb->seq = net->dev_base_seq;
@@ -5992,39 +6006,26 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- flags, &filters,
- &s_idxattr, &s_prividx,
- extack);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ flags, &filters,
+ &ctx->idxattr, &ctx->prividx,
+ extack);
+ /* If we ran out of room on the first message,
+ * we're in trouble.
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
- s_prividx = 0;
- s_idxattr = 0;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
+ if (err < 0)
+ break;
+ ctx->prividx = 0;
+ ctx->idxattr = 0;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
}
-out:
- cb->args[3] = s_prividx;
- cb->args[2] = s_idxattr;
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void rtnl_offload_xstats_notify(struct net_device *dev)
diff --git a/net/core/scm.c b/net/core/scm.c
index 5763f3320358..4f6a14babe5a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -91,6 +91,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fpl->user = NULL;
#if IS_ENABLED(CONFIG_UNIX)
fpl->inflight = false;
+ fpl->dead = false;
fpl->edges = NULL;
INIT_LIST_HEAD(&fpl->vertices);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0c8b82750000..466999a7515e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -109,9 +109,6 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#define SKB_SMALL_HEAD_HEADROOM \
SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
-int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
-EXPORT_SYMBOL(sysctl_max_skb_frags);
-
/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
* iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
* netmem is a page.
@@ -907,6 +904,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+static bool is_pp_page(struct page *page)
+{
+ return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+}
+
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom)
{
@@ -1028,6 +1030,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
return napi_pp_put_page(virt_to_page(data));
}
+/**
+ * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
+ * @skb: page pool aware skb
+ *
+ * Increase the fragment reference count (pp_ref_count) of a skb. This is
+ * intended to gain fragment references only for page pool aware skbs,
+ * i.e. when skb->pp_recycle is true, and not for fragments in a
+ * non-pp-recycling skb. It has a fallback to increase references on normal
+ * pages, as page pool aware skbs may also have normal page fragments.
+ */
+static int skb_pp_frag_ref(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+ struct page *head_page;
+ int i;
+
+ if (!skb->pp_recycle)
+ return -EINVAL;
+
+ shinfo = skb_shinfo(skb);
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
+ if (likely(is_pp_page(head_page)))
+ page_pool_ref_page(head_page);
+ else
+ page_ref_inc(head_page);
+ }
+ return 0;
+}
+
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -2079,11 +2112,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
- int headerlen = skb_headroom(skb);
- unsigned int size = skb_end_offset(skb) + skb->data_len;
- struct sk_buff *n = __alloc_skb(size, gfp_mask,
- skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+ struct sk_buff *n;
+ unsigned int size;
+ int headerlen;
+ if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
+ return NULL;
+
+ headerlen = skb_headroom(skb);
+ size = skb_end_offset(skb) + skb->data_len;
+ n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
return NULL;
@@ -2411,12 +2450,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
- gfp_mask, skb_alloc_rx_flag(skb),
- NUMA_NO_NODE);
- int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
+ struct sk_buff *n;
+ int oldheadroom;
+ if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
+ return NULL;
+
+ oldheadroom = skb_headroom(skb);
+ n = __alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
if (!n)
return NULL;
@@ -4152,7 +4196,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- __skb_frag_ref(fragfrom, skb->pp_recycle);
+ __skb_frag_ref(fragfrom);
skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
@@ -4802,7 +4846,7 @@ normal:
}
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
- __skb_frag_ref(nskb_frag, nskb->pp_recycle);
+ __skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
if (pos < offset) {
@@ -5933,8 +5977,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
+ if (skb_pp_frag_ref(from)) {
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
+ }
to->truesize += delta;
to->len += len;
@@ -6988,7 +7034,7 @@ nodefer: kfree_skb_napi_cache(skb);
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
sd = &per_cpu(softnet_data, cpu);
- defer_max = READ_ONCE(sysctl_skb_defer_max);
+ defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
if (READ_ONCE(sd->defer_count) >= defer_max)
goto nodefer;
@@ -7040,7 +7086,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
ssize_t maxsize, gfp_t gfp)
{
- size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+ size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
struct page *pages[8], **ppages = pages;
ssize_t spliced = 0, ret = 0;
unsigned int i;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4d75ef9d24bf..fd20aae30be2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock) {
- read_lock_bh(&sk->sk_callback_lock);
+ if (psock)
sk_psock_data_ready(sk, psock);
- read_unlock_bh(&sk->sk_callback_lock);
- }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index fe9195186c13..8629f9aecf91 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -127,6 +127,7 @@
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
@@ -283,7 +284,6 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
int sysctl_tstamp_allow_data __read_mostly = 1;
@@ -3241,8 +3241,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
}
EXPORT_SYMBOL(sock_no_socketpair);
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+int sock_no_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
return -EOPNOTSUPP;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8598466a3805..9402889840bf 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -24,8 +24,16 @@ struct bpf_stab {
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+/* This mutex is used to
+ * - protect race between prog/link attach/detach and link prog update, and
+ * - protect race between releasing and accessing map in bpf_link.
+ * A single global mutex lock is used since it is expected contention is low.
+ */
+static DEFINE_MUTEX(sockmap_mutex);
+
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which);
+ struct bpf_prog *old, struct bpf_link *link,
+ u32 which);
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
@@ -71,7 +79,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
+ mutex_unlock(&sockmap_mutex);
fdput(f);
return ret;
}
@@ -103,7 +113,9 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
goto put_prog;
}
- ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type);
+ mutex_unlock(&sockmap_mutex);
put_prog:
bpf_prog_put(prog);
put_map:
@@ -1460,55 +1472,84 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
-static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
- u32 which)
+static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ struct bpf_link ***plink, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog **cur_pprog;
+ struct bpf_link **cur_plink;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- *pprog = &progs->msg_parser;
+ cur_pprog = &progs->msg_parser;
+ cur_plink = &progs->msg_parser_link;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- *pprog = &progs->stream_parser;
+ cur_pprog = &progs->stream_parser;
+ cur_plink = &progs->stream_parser_link;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- *pprog = &progs->stream_verdict;
+ cur_pprog = &progs->stream_verdict;
+ cur_plink = &progs->stream_verdict_link;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- *pprog = &progs->skb_verdict;
+ cur_pprog = &progs->skb_verdict;
+ cur_plink = &progs->skb_verdict_link;
break;
default:
return -EOPNOTSUPP;
}
+ *pprog = cur_pprog;
+ if (plink)
+ *plink = cur_plink;
return 0;
}
+/* Handle the following four cases:
+ * prog_attach: prog != NULL, old == NULL, link == NULL
+ * prog_detach: prog == NULL, old != NULL, link == NULL
+ * link_attach: prog != NULL, old == NULL, link != NULL
+ * link_detach: prog == NULL, old != NULL, link != NULL
+ */
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+ struct bpf_prog *old, struct bpf_link *link,
+ u32 which)
{
struct bpf_prog **pprog;
+ struct bpf_link **plink;
int ret;
- ret = sock_map_prog_lookup(map, &pprog, which);
+ ret = sock_map_prog_link_lookup(map, &pprog, &plink, which);
if (ret)
return ret;
- if (old)
- return psock_replace_prog(pprog, prog, old);
+ /* for prog_attach/prog_detach/link_attach, return error if a bpf_link
+ * exists for that prog.
+ */
+ if ((!link || prog) && *plink)
+ return -EBUSY;
- psock_set_prog(pprog, prog);
- return 0;
+ if (old) {
+ ret = psock_replace_prog(pprog, prog, old);
+ if (!ret)
+ *plink = NULL;
+ } else {
+ psock_set_prog(pprog, prog);
+ if (link)
+ *plink = link;
+ }
+
+ return ret;
}
int sock_map_bpf_prog_query(const union bpf_attr *attr,
@@ -1533,7 +1574,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
rcu_read_lock();
- ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type);
if (ret)
goto end;
@@ -1663,6 +1704,196 @@ void sock_map_close(struct sock *sk, long timeout)
}
EXPORT_SYMBOL_GPL(sock_map_close);
+struct sockmap_link {
+ struct bpf_link link;
+ struct bpf_map *map;
+ enum bpf_attach_type attach_type;
+};
+
+static void sock_map_link_release(struct bpf_link *link)
+{
+ struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+
+ mutex_lock(&sockmap_mutex);
+ if (!sockmap_link->map)
+ goto out;
+
+ WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
+ sockmap_link->attach_type));
+
+ bpf_map_put_with_uref(sockmap_link->map);
+ sockmap_link->map = NULL;
+out:
+ mutex_unlock(&sockmap_mutex);
+}
+
+static int sock_map_link_detach(struct bpf_link *link)
+{
+ sock_map_link_release(link);
+ return 0;
+}
+
+static void sock_map_link_dealloc(struct bpf_link *link)
+{
+ kfree(link);
+}
+
+/* Handle the following two cases:
+ * case 1: link != NULL, prog != NULL, old != NULL
+ * case 2: link != NULL, prog != NULL, old == NULL
+ */
+static int sock_map_link_update_prog(struct bpf_link *link,
+ struct bpf_prog *prog,
+ struct bpf_prog *old)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ struct bpf_prog **pprog, *old_link_prog;
+ struct bpf_link **plink;
+ int ret = 0;
+
+ mutex_lock(&sockmap_mutex);
+
+ /* If old prog is not NULL, ensure old prog is the same as link->prog. */
+ if (old && link->prog != old) {
+ ret = -EPERM;
+ goto out;
+ }
+ /* Ensure link->prog has the same type/attach_type as the new prog. */
+ if (link->prog->type != prog->type ||
+ link->prog->expected_attach_type != prog->expected_attach_type) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
+ sockmap_link->attach_type);
+ if (ret)
+ goto out;
+
+ /* return error if the stored bpf_link does not match the incoming bpf_link. */
+ if (link != *plink) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (old) {
+ ret = psock_replace_prog(pprog, prog, old);
+ if (ret)
+ goto out;
+ } else {
+ psock_set_prog(pprog, prog);
+ }
+
+ bpf_prog_inc(prog);
+ old_link_prog = xchg(&link->prog, prog);
+ bpf_prog_put(old_link_prog);
+
+out:
+ mutex_unlock(&sockmap_mutex);
+ return ret;
+}
+
+static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link)
+{
+ u32 map_id = 0;
+
+ mutex_lock(&sockmap_mutex);
+ if (sockmap_link->map)
+ map_id = sockmap_link->map->id;
+ mutex_unlock(&sockmap_mutex);
+ return map_id;
+}
+
+static int sock_map_link_fill_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+ info->sockmap.map_id = map_id;
+ info->sockmap.attach_type = sockmap_link->attach_type;
+ return 0;
+}
+
+static void sock_map_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+ u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+ seq_printf(seq, "map_id:\t%u\n", map_id);
+ seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
+}
+
+static const struct bpf_link_ops sock_map_link_ops = {
+ .release = sock_map_link_release,
+ .dealloc = sock_map_link_dealloc,
+ .detach = sock_map_link_detach,
+ .update_prog = sock_map_link_update_prog,
+ .fill_link_info = sock_map_link_fill_info,
+ .show_fdinfo = sock_map_link_show_fdinfo,
+};
+
+int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct sockmap_link *sockmap_link;
+ enum bpf_attach_type attach_type;
+ struct bpf_map *map;
+ int ret;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ map = bpf_map_get_with_uref(attr->link_create.target_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER);
+ if (!sockmap_link) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ attach_type = attr->link_create.attach_type;
+ bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
+ sockmap_link->map = map;
+ sockmap_link->attach_type = attach_type;
+
+ ret = bpf_link_prime(&sockmap_link->link, &link_primer);
+ if (ret) {
+ kfree(sockmap_link);
+ goto out;
+ }
+
+ mutex_lock(&sockmap_mutex);
+ ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type);
+ mutex_unlock(&sockmap_mutex);
+ if (ret) {
+ bpf_link_cleanup(&link_primer);
+ goto out;
+ }
+
+ /* Increase refcnt for the prog since when old prog is replaced with
+ * psock_replace_prog() and psock_set_prog() its refcnt will be decreased.
+ *
+ * Actually, we do not need to increase refcnt for the prog since bpf_link
+ * will hold a reference. But in order to have less complexity w.r.t.
+ * replacing/setting prog, let us increase the refcnt to make things simpler.
+ */
+ bpf_prog_inc(prog);
+
+ return bpf_link_settle(&link_primer);
+
+out:
+ bpf_map_put_with_uref(map);
+ return ret;
+}
+
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 903ab4a51c17..c9fb9ad87485 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
#include <net/hotdata.h>
+#include <net/proto_memory.h>
#include <net/rps.h>
#include "dev.h"
@@ -415,7 +416,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "mem_pcpu_rsv",
- .data = &sysctl_mem_pcpu_rsv,
+ .data = &net_hotdata.sysctl_mem_pcpu_rsv,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -595,7 +596,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "max_skb_frags",
- .data = &sysctl_max_skb_frags,
+ .data = &net_hotdata.sysctl_max_skb_frags,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -654,13 +655,12 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "skb_defer_max",
- .data = &sysctl_skb_defer_max,
+ .data = &net_hotdata.sysctl_skb_defer_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
- { }
};
static struct ctl_table netns_core_table[] = {
@@ -697,7 +697,6 @@ static struct ctl_table netns_core_table[] = {
.extra2 = SYSCTL_ONE,
.proc_handler = proc_dou8vec_minmax,
},
- { }
};
static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
@@ -715,20 +714,21 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl, *tmp;
+ size_t table_size = ARRAY_SIZE(netns_core_table);
+ struct ctl_table *tbl;
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
+ int i;
tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
- for (tmp = tbl; tmp->procname; tmp++)
- tmp->data += (char *)net - (char *)&init_net;
+ for (i = 0; i < table_size; ++i)
+ tbl[i].data += (char *)net - (char *)&init_net;
}
- net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl,
- ARRAY_SIZE(netns_core_table));
+ net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size);
if (net->core.sysctl_hdr == NULL)
goto err_reg;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 4d9823d6dced..d6b30700af67 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -353,6 +353,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
/**
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
* @sk: socket to perform estimator on
+ * @mrtt: measured RTT
*
* This code is almost identical with TCP's tcp_rtt_estimator(), since
* - it has a higher sampling frequency (recommended by RFC 1323),
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9fc9cea4c251..ff41bd6f99c3 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -24,6 +24,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/netns/generic.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -521,7 +522,8 @@ out:
return err;
}
-static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
int err;
const struct iphdr *rxiph;
@@ -706,7 +708,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
kfree_skb(skb);
return 0;
}
@@ -869,7 +871,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -909,7 +911,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(sk, skb);
+ dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c8ca703dc331..85f4b8fdbe5e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
#include <net/secure_seq.h>
#include <net/netns/generic.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include "dccp.h"
#include "ipv6.h"
@@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
+ enum sk_rst_reason reason)
{
const struct ipv6hdr *rxip6h;
struct sk_buff *skb;
@@ -656,7 +658,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
discard:
if (opt_skb != NULL)
__kfree_skb(opt_skb);
@@ -762,7 +764,7 @@ lookup:
if (nsk == sk) {
reqsk_put(req);
} else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
goto discard_and_relse;
} else {
sock_put(sk);
@@ -801,7 +803,7 @@ no_dccp_socket:
if (dh->dccph_type != DCCP_PKT_RESET) {
DCCP_SKB_CB(skb)->dccpd_reset_code =
DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(sk, skb);
+ dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
}
discard_it:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 64d805b27add..251a57cf5822 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -15,6 +15,7 @@
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
+#include <net/rstreason.h>
#include "ackvec.h"
#include "ccid.h"
@@ -202,7 +203,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
inet_csk_reqsk_queue_drop(sk, req);
out:
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index ee8d4f5afa72..3fc474d6e57d 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -90,8 +90,6 @@ static struct ctl_table dccp_default_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
-
- { }
};
static struct ctl_table_header *dccp_table_header;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2f347cd37316..12521a7d4048 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1511,8 +1511,7 @@ static int dsa_switch_probe(struct dsa_switch *ds)
ds->ops->phylink_mac_config ||
ds->ops->phylink_mac_finish ||
ds->ops->phylink_mac_link_down ||
- ds->ops->phylink_mac_link_up ||
- ds->ops->adjust_link)
+ ds->ops->phylink_mac_link_up)
return -EINVAL;
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c6febc3d96d9..9a249d4ac3a5 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1535,25 +1535,6 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
-static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
-{
- struct device_node *phy_dn;
- struct phy_device *phydev;
-
- phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (!phy_dn)
- return NULL;
-
- phydev = of_phy_find_device(phy_dn);
- if (!phydev) {
- of_node_put(phy_dn);
- return ERR_PTR(-EPROBE_DEFER);
- }
-
- of_node_put(phy_dn);
- return phydev;
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1616,17 +1597,10 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
phy_interface_t interface)
{
struct dsa_port *dp = dsa_phylink_to_port(config);
- struct phy_device *phydev = NULL;
struct dsa_switch *ds = dp->ds;
- if (dsa_port_is_user(dp))
- phydev = dp->user->phydev;
-
- if (!ds->ops->phylink_mac_link_down) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_down)
return;
- }
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
@@ -1641,11 +1615,8 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->phylink_mac_link_up) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_up)
return;
- }
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
speed, duplex, tx_pause, rx_pause);
@@ -1708,78 +1679,6 @@ void dsa_port_phylink_destroy(struct dsa_port *dp)
dp->pl = NULL;
}
-static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable)
-{
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- int err = 0;
-
- phydev = dsa_port_get_phy_device(dp);
- if (!phydev)
- return 0;
-
- if (IS_ERR(phydev))
- return PTR_ERR(phydev);
-
- if (enable) {
- err = genphy_resume(phydev);
- if (err < 0)
- goto err_put_dev;
-
- err = genphy_read_status(phydev);
- if (err < 0)
- goto err_put_dev;
- } else {
- err = genphy_suspend(phydev);
- if (err < 0)
- goto err_put_dev;
- }
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
-
-err_put_dev:
- put_device(&phydev->mdio.dev);
- return err;
-}
-
-static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp)
-{
- struct device_node *dn = dp->dn;
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- phy_interface_t mode;
- int err;
-
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
-
- phydev = of_phy_find_device(dn);
-
- err = of_get_phy_mode(dn, &mode);
- if (err)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
-
- genphy_read_status(phydev);
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- put_device(&phydev->mdio.dev);
-
- return 0;
-}
-
static int dsa_shared_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
@@ -1983,44 +1882,28 @@ int dsa_shared_port_link_register_of(struct dsa_port *dp)
dsa_switches_apply_workarounds))
return -EINVAL;
- if (!ds->ops->adjust_link) {
- if (missing_link_description) {
- dev_warn(ds->dev,
- "Skipping phylink registration for %s port %d\n",
- dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
- } else {
- dsa_shared_port_link_down(dp);
+ if (missing_link_description) {
+ dev_warn(ds->dev,
+ "Skipping phylink registration for %s port %d\n",
+ dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ } else {
+ dsa_shared_port_link_down(dp);
- return dsa_shared_port_phylink_register(dp);
- }
- return 0;
+ return dsa_shared_port_phylink_register(dp);
}
- dev_warn(ds->dev,
- "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
-
- if (of_phy_is_fixed_link(dp->dn))
- return dsa_shared_port_fixed_link_register_of(dp);
- else
- return dsa_shared_port_setup_phy_of(dp, true);
+ return 0;
}
void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
{
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->adjust_link && dp->pl) {
+ if (dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
dsa_port_phylink_destroy(dp);
return;
}
-
- if (of_phy_is_fixed_link(dp->dn))
- of_phy_deregister_fixed_link(dp->dn);
- else
- dsa_shared_port_setup_phy_of(dp, false);
}
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr,
diff --git a/net/dsa/trace.h b/net/dsa/trace.h
index 567f29a39707..83f3e5f78491 100644
--- a/net/dsa/trace.h
+++ b/net/dsa/trace.h
@@ -39,8 +39,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_hw,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -98,8 +98,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -157,8 +157,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
@@ -199,7 +199,7 @@ TRACE_EVENT(dsa_lag_fdb_add_hw,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -227,7 +227,7 @@ TRACE_EVENT(dsa_lag_fdb_add_bump,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -255,7 +255,7 @@ TRACE_EVENT(dsa_lag_fdb_del_hw,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -283,7 +283,7 @@ TRACE_EVENT(dsa_lag_fdb_del_drop,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -310,7 +310,7 @@ TRACE_EVENT(dsa_lag_fdb_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, lag_dev->name);
+ __assign_str(dev);
ether_addr_copy(__entry->addr, addr);
__entry->vid = vid;
dsa_db_print(db, __entry->db_buf);
@@ -338,8 +338,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_hw,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
__entry->flags = vlan->flags;
@@ -383,8 +383,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_refcount,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
__entry->flags = vlan->flags;
@@ -426,8 +426,8 @@ TRACE_EVENT(dsa_vlan_del_not_found,
),
TP_fast_assign(
- __assign_str(dev, dev_name(dp->ds->dev));
- __assign_str(kind, dsa_port_kind(dp));
+ __assign_str(dev);
+ __assign_str(kind);
__entry->port = dp->index;
__entry->vid = vlan->vid;
),
diff --git a/net/dsa/user.c b/net/dsa/user.c
index c94b868855aa..867c5fe9a4da 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -2120,7 +2120,7 @@ int dsa_user_change_mtu(struct net_device *dev, int new_mtu)
if (err)
goto out_port_failed;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
dsa_bridge_mtu_normalization(dp);
@@ -2137,6 +2137,32 @@ out_conduit_failed:
}
static int __maybe_unused
+dsa_user_dcbnl_set_apptrust(struct net_device *dev, u8 *sel, int nsel)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_set_apptrust)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_set_apptrust(ds, port, sel, nsel);
+}
+
+static int __maybe_unused
+dsa_user_dcbnl_get_apptrust(struct net_device *dev, u8 *sel, int *nsel)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_get_apptrust)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_get_apptrust(ds, port, sel, nsel);
+}
+
+static int __maybe_unused
dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
{
struct dsa_port *dp = dsa_user_to_port(dev);
@@ -2163,6 +2189,58 @@ dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
return 0;
}
+/* Update the DSCP prio entries on all user ports of the switch in case
+ * the switch supports global DSCP prio instead of per port DSCP prios.
+ */
+static int dsa_user_dcbnl_ieee_global_dscp_setdel(struct net_device *dev,
+ struct dcb_app *app, bool del)
+{
+ int (*setdel)(struct net_device *dev, struct dcb_app *app);
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
+ int err, restore_err;
+
+ if (del)
+ setdel = dcb_ieee_delapp;
+ else
+ setdel = dcb_ieee_setapp;
+
+ dsa_switch_for_each_user_port(other_dp, ds) {
+ struct net_device *user = other_dp->user;
+
+ if (!user || user == dev)
+ continue;
+
+ err = setdel(user, app);
+ if (err)
+ goto err_try_to_restore;
+ }
+
+ return 0;
+
+err_try_to_restore:
+
+ /* Revert logic to restore previous state of app entries */
+ if (!del)
+ setdel = dcb_ieee_delapp;
+ else
+ setdel = dcb_ieee_setapp;
+
+ dsa_switch_for_each_user_port_continue_reverse(other_dp, ds) {
+ struct net_device *user = other_dp->user;
+
+ if (!user || user == dev)
+ continue;
+
+ restore_err = setdel(user, app);
+ if (restore_err)
+ netdev_err(user, "Failed to restore DSCP prio entry configuration\n");
+ }
+
+ return err;
+}
+
static int __maybe_unused
dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
{
@@ -2194,6 +2272,17 @@ dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
return err;
}
+ if (!ds->dscp_prio_mapping_is_global)
+ return 0;
+
+ err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, false);
+ if (err) {
+ if (ds->ops->port_del_dscp_prio)
+ ds->ops->port_del_dscp_prio(ds, port, dscp, new_prio);
+ dcb_ieee_delapp(dev, app);
+ return err;
+ }
+
return 0;
}
@@ -2264,6 +2353,18 @@ dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
return err;
}
+ if (!ds->dscp_prio_mapping_is_global)
+ return 0;
+
+ err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, true);
+ if (err) {
+ if (ds->ops->port_add_dscp_prio)
+ ds->ops->port_add_dscp_prio(ds, port, dscp,
+ app->priority);
+ dcb_ieee_setapp(dev, app);
+ return err;
+ }
+
return 0;
}
@@ -2376,6 +2477,8 @@ static const struct ethtool_ops dsa_user_ethtool_ops = {
static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = {
.ieee_setapp = dsa_user_dcbnl_ieee_setapp,
.ieee_delapp = dsa_user_dcbnl_ieee_delapp,
+ .dcbnl_setapptrust = dsa_user_dcbnl_set_apptrust,
+ .dcbnl_getapptrust = dsa_user_dcbnl_get_apptrust,
};
static void dsa_user_get_stats64(struct net_device *dev,
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 563e94e0cbd8..bd04f28d5cf4 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -4,7 +4,6 @@
#include <linux/ethtool_netlink.h>
#include <linux/pm_runtime.h>
#include "netlink.h"
-#include <linux/phy_link_topology.h>
static struct genl_family ethtool_genl_family;
@@ -31,24 +30,6 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
-const struct nla_policy ethnl_header_policy_phy[] = {
- [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
- [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
- .len = ALTIFNAMSIZ - 1 },
- [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
- ETHTOOL_FLAGS_BASIC),
- [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
-};
-
-const struct nla_policy ethnl_header_policy_phy_stats[] = {
- [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
- [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
- .len = ALTIFNAMSIZ - 1 },
- [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
- ETHTOOL_FLAGS_STATS),
- [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
-};
-
int ethnl_ops_begin(struct net_device *dev)
{
int ret;
@@ -108,9 +89,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
const struct nlattr *header, struct net *net,
struct netlink_ext_ack *extack, bool require_dev)
{
- struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)];
+ struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
const struct nlattr *devname_attr;
- struct phy_device *phydev = NULL;
struct net_device *dev = NULL;
u32 flags = 0;
int ret;
@@ -124,7 +104,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
/* No validation here, command policy should have a nested policy set
* for the header, therefore validation should have already been done.
*/
- ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header,
+ ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
NULL, extack);
if (ret < 0)
return ret;
@@ -165,30 +145,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
return -EINVAL;
}
- if (dev) {
- if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
- struct nlattr *phy_id;
-
- phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX];
- phydev = phy_link_topo_get_phy(dev->link_topo,
- nla_get_u32(phy_id));
- if (!phydev) {
- NL_SET_BAD_ATTR(extack, phy_id);
- return -ENODEV;
- }
- } else {
- /* If we need a PHY but no phy index is specified, fallback
- * to dev->phydev
- */
- phydev = dev->phydev;
- }
- } else if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
- NL_SET_ERR_MSG_ATTR(extack, header,
- "can't target a PHY without a netdev");
- return -EINVAL;
- }
-
- req_info->phydev = phydev;
req_info->dev = dev;
req_info->flags = flags;
return 0;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index d57a890b5d9e..9a333a8d04c1 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -250,7 +250,6 @@ static inline unsigned int ethnl_reply_header_size(void)
* @dev: network device the request is for (may be null)
* @dev_tracker: refcount tracker for @dev reference
* @flags: request flags common for all request types
- * @phydev: phy_device connected to @dev this request is for (may be null)
*
* This is a common base for request specific structures holding data from
* parsed userspace request. These always embed struct ethnl_req_info at
@@ -260,7 +259,6 @@ struct ethnl_req_info {
struct net_device *dev;
netdevice_tracker dev_tracker;
u32 flags;
- struct phy_device *phydev;
};
static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
@@ -397,12 +395,9 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops;
extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
extern const struct ethnl_request_ops ethnl_mm_request_ops;
-extern const struct ethnl_request_ops ethnl_phy_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
-extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1];
-extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1];
extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1];
extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e9d45133d641..e6904288d40d 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -61,39 +61,36 @@ static bool hsr_check_carrier(struct hsr_port *master)
return false;
}
-static void hsr_check_announce(struct net_device *hsr_dev,
- unsigned char old_operstate)
+static void hsr_check_announce(struct net_device *hsr_dev)
{
struct hsr_priv *hsr;
hsr = netdev_priv(hsr_dev);
-
- if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) {
- /* Went up */
- hsr->announce_count = 0;
- mod_timer(&hsr->announce_timer,
- jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+ if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) {
+ /* Enable announce timer and start sending supervisory frames */
+ if (!timer_pending(&hsr->announce_timer)) {
+ hsr->announce_count = 0;
+ mod_timer(&hsr->announce_timer, jiffies +
+ msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
+ }
+ } else {
+ /* Deactivate the announce timer */
+ timer_delete(&hsr->announce_timer);
}
-
- if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP)
- /* Went down */
- del_timer(&hsr->announce_timer);
}
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr)
{
struct hsr_port *master;
- unsigned char old_operstate;
bool has_carrier;
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
/* netif_stacked_transfer_operstate() cannot be used here since
* it doesn't set IF_OPER_LOWERLAYERDOWN (?)
*/
- old_operstate = READ_ONCE(master->dev->operstate);
has_carrier = hsr_check_carrier(master);
hsr_set_operstate(master, has_carrier);
- hsr_check_announce(master->dev, old_operstate);
+ hsr_check_announce(master->dev);
}
int hsr_get_max_mtu(struct hsr_priv *hsr)
@@ -123,7 +120,7 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -146,6 +143,9 @@ static int hsr_dev_open(struct net_device *dev)
case HSR_PT_SLAVE_B:
designation = "Slave B";
break;
+ case HSR_PT_INTERLINK:
+ designation = "Interlink";
+ break;
default:
designation = "Unknown";
}
@@ -285,6 +285,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
struct hsr_priv *hsr = master->hsr;
__u8 type = HSR_TLV_LIFE_CHECK;
struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tlv *hsr_stlv;
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
@@ -324,6 +325,16 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+ if (hsr->redbox) {
+ hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv));
+ hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC;
+ hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload);
+
+ /* Payload: MacAddressRedBox */
+ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+ ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox);
+ }
+
if (skb_put_padto(skb, ETH_ZLEN)) {
spin_unlock_bh(&hsr->seqnr_lock);
return;
@@ -405,6 +416,10 @@ void hsr_del_ports(struct hsr_priv *hsr)
if (port)
hsr_del_port(port);
+ port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ if (port)
+ hsr_del_port(port);
+
port = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
if (port)
hsr_del_port(port);
@@ -534,8 +549,8 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
};
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
- unsigned char multicast_spec, u8 protocol_version,
- struct netlink_ext_ack *extack)
+ struct net_device *interlink, unsigned char multicast_spec,
+ u8 protocol_version, struct netlink_ext_ack *extack)
{
bool unregister = false;
struct hsr_priv *hsr;
@@ -544,6 +559,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
INIT_LIST_HEAD(&hsr->node_db);
+ INIT_LIST_HEAD(&hsr->proxy_node_db);
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
@@ -569,9 +585,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
/* Overflow soon to find bugs easier: */
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
+ hsr->interlink_sequence_nr = HSR_SEQNR_START;
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
+ timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0);
ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
@@ -604,6 +622,17 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res)
goto err_unregister;
+ if (interlink) {
+ res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack);
+ if (res)
+ goto err_unregister;
+
+ hsr->redbox = true;
+ ether_addr_copy(hsr->macaddress_redbox, interlink->dev_addr);
+ mod_timer(&hsr->prune_proxy_timer,
+ jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD));
+ }
+
hsr_debugfs_init(hsr, hsr_dev);
mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index 9060c92168f9..655284095b78 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -16,8 +16,8 @@
void hsr_del_ports(struct hsr_priv *hsr);
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
- unsigned char multicast_spec, u8 protocol_version,
- struct netlink_ext_ack *extack);
+ struct net_device *interlink, unsigned char multicast_spec,
+ u8 protocol_version, struct netlink_ext_ack *extack);
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
int hsr_get_max_mtu(struct hsr_priv *hsr);
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 5d68cb181695..05a61b8286ec 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -377,6 +377,15 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port,
*/
ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr);
}
+
+ /* When HSR node is used as RedBox - the frame received from HSR ring
+ * requires source MAC address (SA) replacement to one which can be
+ * recognized by SAN devices (otherwise, frames are dropped by switch)
+ */
+ if (port->type == HSR_PT_INTERLINK)
+ ether_addr_copy(eth_hdr(skb)->h_source,
+ port->hsr->macaddress_redbox);
+
return dev_queue_xmit(skb);
}
@@ -390,9 +399,57 @@ bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
{
+ struct sk_buff *skb;
+
if (port->dev->features & NETIF_F_HW_HSR_FWD)
return prp_drop_frame(frame, port);
+ /* RedBox specific frames dropping policies
+ *
+ * Do not send HSR supervisory frames to SAN devices
+ */
+ if (frame->is_supervision && port->type == HSR_PT_INTERLINK)
+ return true;
+
+ /* Do not forward to other HSR port (A or B) unicast frames which
+ * are addressed to interlink port (and are in the ProxyNodeTable).
+ */
+ skb = frame->skb_hsr;
+ if (skb && prp_drop_frame(frame, port) &&
+ is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+
+ /* Do not forward to port C (Interlink) frames from nodes A and B
+ * if DA is in NodeTable.
+ */
+ if ((frame->port_rcv->type == HSR_PT_SLAVE_A ||
+ frame->port_rcv->type == HSR_PT_SLAVE_B) &&
+ port->type == HSR_PT_INTERLINK) {
+ skb = frame->skb_hsr;
+ if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+ }
+
+ /* Do not forward to port A and B unicast frames received on the
+ * interlink port if it is addressed to one of nodes registered in
+ * the ProxyNodeTable.
+ */
+ if ((port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) &&
+ frame->port_rcv->type == HSR_PT_INTERLINK) {
+ skb = frame->skb_std;
+ if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ hsr_is_node_in_db(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest)) {
+ return true;
+ }
+ }
+
return false;
}
@@ -448,13 +505,14 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
}
/* Check if frame is to be dropped. Eg. for PRP no forward
- * between ports.
+ * between ports, or sending HSR supervision to RedBox.
*/
if (hsr->proto_ops->drop_frame &&
hsr->proto_ops->drop_frame(frame, port))
continue;
- if (port->type != HSR_PT_MASTER)
+ if (port->type == HSR_PT_SLAVE_A ||
+ port->type == HSR_PT_SLAVE_B)
skb = hsr->proto_ops->create_tagged_frame(frame, port);
else
skb = hsr->proto_ops->get_untagged_frame(frame, port);
@@ -469,7 +527,9 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
hsr_deliver_master(skb, port->dev, frame->node_src);
} else {
if (!hsr_xmit(skb, port, frame))
- sent = true;
+ if (port->type == HSR_PT_SLAVE_A ||
+ port->type == HSR_PT_SLAVE_B)
+ sent = true;
}
}
}
@@ -503,10 +563,12 @@ static void handle_std_frame(struct sk_buff *skb,
frame->skb_prp = NULL;
frame->skb_std = skb;
- if (port->type != HSR_PT_MASTER) {
+ if (port->type != HSR_PT_MASTER)
frame->is_from_san = true;
- } else {
- /* Sequence nr for the master node */
+
+ if (port->type == HSR_PT_MASTER ||
+ port->type == HSR_PT_INTERLINK) {
+ /* Sequence nr for the master/interlink node */
lockdep_assert_held(&hsr->seqnr_lock);
frame->sequence_nr = hsr->sequence_nr;
hsr->sequence_nr++;
@@ -564,6 +626,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
{
struct hsr_priv *hsr = port->hsr;
struct hsr_vlan_ethhdr *vlan_hdr;
+ struct list_head *n_db;
struct ethhdr *ethhdr;
__be16 proto;
int ret;
@@ -574,9 +637,13 @@ static int fill_frame_info(struct hsr_frame_info *frame,
memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
- frame->is_supervision,
- port->type);
+
+ n_db = &hsr->node_db;
+ if (port->type == HSR_PT_INTERLINK)
+ n_db = &hsr->proxy_node_db;
+
+ frame->node_src = hsr_get_node(port, n_db, skb,
+ frame->is_supervision, port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 26329db09210..614df9649794 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -71,6 +71,14 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
return NULL;
}
+/* Check if node for a given MAC address is already present in data base
+ */
+bool hsr_is_node_in_db(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN])
+{
+ return !!find_node_by_addr_A(node_db, addr);
+}
+
/* Helper for device init; the self_node is used in hsr_rcv() to recognize
* frames from self that's been looped over the HSR ring.
*/
@@ -223,6 +231,15 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
}
}
+ /* Check if required node is not in proxy nodes table */
+ list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
+ if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
+ if (hsr->proto_ops->update_san_info)
+ hsr->proto_ops->update_san_info(node, is_sup);
+ return node;
+ }
+ }
+
/* Everyone may create a node entry, connected node to a HSR/PRP
* device.
*/
@@ -418,6 +435,10 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
node_dst = find_node_by_addr_A(&port->hsr->node_db,
eth_hdr(skb)->h_dest);
+ if (!node_dst && port->hsr->redbox)
+ node_dst = find_node_by_addr_A(&port->hsr->proxy_node_db,
+ eth_hdr(skb)->h_dest);
+
if (!node_dst) {
if (port->hsr->prot_version != PRP_V1 && net_ratelimit())
netdev_err(skb->dev, "%s: Unknown node\n", __func__);
@@ -561,6 +582,37 @@ void hsr_prune_nodes(struct timer_list *t)
jiffies + msecs_to_jiffies(PRUNE_PERIOD));
}
+void hsr_prune_proxy_nodes(struct timer_list *t)
+{
+ struct hsr_priv *hsr = from_timer(hsr, t, prune_proxy_timer);
+ unsigned long timestamp;
+ struct hsr_node *node;
+ struct hsr_node *tmp;
+
+ spin_lock_bh(&hsr->list_lock);
+ list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) {
+ timestamp = node->time_in[HSR_PT_INTERLINK];
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_PROXY_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ if (!node->removed) {
+ list_del_rcu(&node->mac_list);
+ node->removed = true;
+ /* Note that we need to free this entry later: */
+ kfree_rcu(node, rcu_head);
+ }
+ }
+ }
+
+ spin_unlock_bh(&hsr->list_lock);
+
+ /* Restart timer */
+ mod_timer(&hsr->prune_proxy_timer,
+ jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD));
+}
+
void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index b23556251d62..7619e31c1d2d 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -46,6 +46,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr);
void hsr_prune_nodes(struct timer_list *t);
+void hsr_prune_proxy_nodes(struct timer_list *t);
int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
@@ -67,6 +68,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
struct hsr_node *node);
void prp_update_san_info(struct hsr_node *node, bool is_sup);
+bool hsr_is_node_in_db(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN]);
+
struct hsr_node {
struct list_head mac_list;
/* Protect R/W access to seq_out */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 9756e657bab9..d7ae32473c41 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -96,7 +96,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
break; /* Handled in ndo_change_mtu() */
mtu_max = hsr_get_max_mtu(port->hsr);
master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
- master->dev->mtu = mtu_max;
+ WRITE_ONCE(master->dev->mtu, mtu_max);
break;
case NETDEV_UNREGISTER:
if (!is_hsr_master(dev)) {
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 18e01791ad79..23850b16d1ea 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -21,6 +21,7 @@
*/
#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */
#define HSR_NODE_FORGET_TIME 60000 /* ms */
+#define HSR_PROXY_NODE_FORGET_TIME 60000 /* ms */
#define HSR_ANNOUNCE_INTERVAL 100 /* ms */
#define HSR_ENTRY_FORGET_TIME 400 /* ms */
@@ -35,6 +36,7 @@
* HSR_NODE_FORGET_TIME?
*/
#define PRUNE_PERIOD 3000 /* ms */
+#define PRUNE_PROXY_PERIOD 3000 /* ms */
#define HSR_TLV_EOT 0 /* End of TLVs */
#define HSR_TLV_ANNOUNCE 22
#define HSR_TLV_LIFE_CHECK 23
@@ -192,11 +194,14 @@ struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
struct list_head node_db; /* Known HSR nodes */
+ struct list_head proxy_node_db; /* RedBox HSR proxy nodes */
struct hsr_self_node __rcu *self_node; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
struct timer_list prune_timer;
+ struct timer_list prune_proxy_timer;
int announce_count;
u16 sequence_nr;
+ u16 interlink_sequence_nr; /* Interlink port seq_nr */
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
spinlock_t seqnr_lock; /* locking for sequence_nr */
@@ -209,6 +214,8 @@ struct hsr_priv {
* of lan_id
*/
bool fwd_offloaded; /* Forwarding offloaded to HW */
+ bool redbox; /* Device supports HSR RedBox */
+ unsigned char macaddress_redbox[ETH_ALEN];
unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16));
/* Align to u16 boundary to avoid unaligned access
* in ether_addr_equal
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 78fe40eb9f01..898f18c6da53 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = {
[IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN },
[IFLA_HSR_SEQ_NR] = { .type = NLA_U16 },
[IFLA_HSR_PROTOCOL] = { .type = NLA_U8 },
+ [IFLA_HSR_INTERLINK] = { .type = NLA_U32 },
};
/* Here, it seems a netdevice has already been allocated for us, and the
@@ -35,8 +36,8 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
enum hsr_version proto_version;
unsigned char multicast_spec;
u8 proto = HSR_PROTOCOL_HSR;
- struct net_device *link[2];
+ struct net_device *link[2], *interlink = NULL;
if (!data) {
NL_SET_ERR_MSG_MOD(extack, "No slave devices specified");
return -EINVAL;
@@ -67,6 +68,20 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
}
+ if (data[IFLA_HSR_INTERLINK])
+ interlink = __dev_get_by_index(src_net,
+ nla_get_u32(data[IFLA_HSR_INTERLINK]));
+
+ if (interlink && interlink == link[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave1 are the same");
+ return -EINVAL;
+ }
+
+ if (interlink && interlink == link[1]) {
+ NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave2 are the same");
+ return -EINVAL;
+ }
+
if (!data[IFLA_HSR_MULTICAST_SPEC])
multicast_spec = 0;
else
@@ -96,10 +111,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
}
}
- if (proto == HSR_PROTOCOL_PRP)
+ if (proto == HSR_PROTOCOL_PRP) {
proto_version = PRP_V1;
+ if (interlink) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interlink only works with HSR");
+ return -EINVAL;
+ }
+ }
- return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack);
+ return hsr_dev_finalize(dev, link, interlink, multicast_spec,
+ proto_version, extack);
}
static void hsr_dellink(struct net_device *dev, struct list_head *head)
@@ -107,6 +129,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
struct hsr_priv *hsr = netdev_priv(dev);
del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->prune_proxy_timer);
del_timer_sync(&hsr->announce_timer);
hsr_debugfs_term(hsr);
@@ -114,6 +137,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
hsr_del_self_node(hsr);
hsr_del_nodes(&hsr->node_db);
+ hsr_del_nodes(&hsr->proxy_node_db);
unregister_netdevice_queue(dev, head);
}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 1b6457f357bd..af6cf64a00e0 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -55,6 +55,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
protocol = eth_hdr(skb)->h_proto;
if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) &&
+ port->type != HSR_PT_INTERLINK &&
hsr->proto_ops->invalid_dan_ingress_frame &&
hsr->proto_ops->invalid_dan_ingress_frame(protocol))
goto finish_pass;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 2a983cf450da..56ef873828f4 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -338,7 +338,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -351,7 +350,6 @@ static struct ctl_table lowpan_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
@@ -370,10 +368,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
table_size = 0;
- }
}
table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 62aa6465253a..591ce0a16fc0 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -75,7 +75,7 @@ TRACE_EVENT(802154_rdev_add_virtual_intf,
),
TP_fast_assign(
WPAN_PHY_ASSIGN;
- __assign_str(vir_intf_name, name ? name : "<noname>");
+ __assign_str(vir_intf_name);
__entry->type = type;
__entry->extended_addr = extended_addr;
),
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a7cfeda28bb2..e03ba4a21c39 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -758,7 +758,9 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
sock_rps_record_flow(newsk);
WARN_ON(!((1 << newsk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
- TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
+ TCPF_CLOSING | TCPF_CLOSE_WAIT |
+ TCPF_CLOSE)));
if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
@@ -771,16 +773,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
* Accept a pending connection. The TCP layer now gives BSD semantics.
*/
-int inet_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+int inet_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk1 = sock->sk, *sk2;
- int err = -EINVAL;
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
- sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern);
+ arg->err = -EINVAL;
+ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg);
if (!sk2)
- return err;
+ return arg->err;
lock_sock(sk2);
__inet_accept(sock, newsock, sk2);
@@ -1307,8 +1309,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
int inet_sk_rebuild_header(struct sock *sk)
{
+ struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
struct inet_sock *inet = inet_sk(sk);
- struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
@@ -1482,7 +1484,6 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
struct sk_buff *p;
unsigned int hlen;
unsigned int off;
- unsigned int id;
int flush = 1;
int proto;
@@ -1508,13 +1509,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
goto out;
NAPI_GRO_CB(skb)->proto = proto;
- id = ntohl(*(__be32 *)&iph->id);
- flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
- id >>= 16;
+ flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF));
list_for_each_entry(p, head, list) {
struct iphdr *iph2;
- u16 flush_id;
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -1531,48 +1529,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
-
- /* All fields must match except length and checksum. */
- NAPI_GRO_CB(p)->flush |=
- (iph->ttl ^ iph2->ttl) |
- (iph->tos ^ iph2->tos) |
- ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
-
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* We need to store of the IP ID check to be included later
- * when we can verify that this packet does in fact belong
- * to a given flow.
- */
- flush_id = (u16)(id - ntohs(iph2->id));
-
- /* This bit of code makes it much easier for us to identify
- * the cases where we are doing atomic vs non-atomic IP ID
- * checks. Specifically an atomic check can return IP ID
- * values 0 - 0xFFFF, while a non-atomic check can only
- * return 0 or 0xFFFF.
- */
- if (!NAPI_GRO_CB(p)->is_atomic ||
- !(iph->frag_off & htons(IP_DF))) {
- flush_id ^= NAPI_GRO_CB(p)->count;
- flush_id = flush_id ? 0xFFFF : 0;
- }
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = flush_id;
- else
- NAPI_GRO_CB(p)->flush_id |= flush_id;
}
- NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
NAPI_GRO_CB(skb)->flush |= flush;
- skb_set_network_header(skb, off);
- /* The above will be needed by the transport layer if there is one
- * immediately following this IP hdr.
- */
+ NAPI_GRO_CB(skb)->inner_network_offset = off;
/* Note : No need to call skb_gro_postpull_rcsum() here,
* as we already checked checksum over ipv4 header was 0
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ab82ca104496..11c1519b3699 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1003,6 +1003,55 @@ out_of_mem:
* User level interface (ioctl)
*/
+static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r,
+ bool getarp)
+{
+ struct net_device *dev;
+
+ if (getarp)
+ dev = dev_get_by_name_rcu(net, r->arp_dev);
+ else
+ dev = __dev_get_by_name(net, r->arp_dev);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ /* Mmmm... It is wrong... ARPHRD_NETROM == 0 */
+ if (!r->arp_ha.sa_family)
+ r->arp_ha.sa_family = dev->type;
+
+ if ((r->arp_flags & ATF_COM) && r->arp_ha.sa_family != dev->type)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
+static struct net_device *arp_req_dev(struct net *net, struct arpreq *r)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ __be32 ip;
+
+ if (r->arp_dev[0])
+ return arp_req_dev_by_name(net, r, false);
+
+ if (r->arp_flags & ATF_PUBL)
+ return NULL;
+
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
+ rt = ip_route_output(net, ip, 0, 0, 0, RT_SCOPE_LINK);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ ip_rt_put(rt);
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
/*
* Set (create) an ARP cache entry.
*/
@@ -1023,11 +1072,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
static int arp_req_set_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask && mask != htonl(0xFFFFFFFF))
- return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
@@ -1035,6 +1081,8 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return -ENODEV;
}
if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
return -ENOBUFS;
return 0;
@@ -1043,30 +1091,20 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return arp_req_set_proxy(net, dev, 1);
}
-static int arp_req_set(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_set(struct net *net, struct arpreq *r)
{
- __be32 ip;
struct neighbour *neigh;
+ struct net_device *dev;
+ __be32 ip;
int err;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_set_public(net, r, dev);
- ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (r->arp_flags & ATF_PERM)
- r->arp_flags |= ATF_COM;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
- RT_SCOPE_LINK);
-
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
switch (dev->type) {
#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
@@ -1088,12 +1126,18 @@ static int arp_req_set(struct net *net, struct arpreq *r,
break;
}
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
if (!IS_ERR(neigh)) {
unsigned int state = NUD_STALE;
- if (r->arp_flags & ATF_PERM)
+
+ if (r->arp_flags & ATF_PERM) {
+ r->arp_flags |= ATF_COM;
state = NUD_PERMANENT;
+ }
+
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
@@ -1117,27 +1161,40 @@ static unsigned int arp_state_to_flags(struct neighbour *neigh)
* Get an ARP cache entry.
*/
-static int arp_req_get(struct arpreq *r, struct net_device *dev)
+static int arp_req_get(struct net *net, struct arpreq *r)
{
__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
- int err = -ENXIO;
+ struct net_device *dev;
+
+ if (!r->arp_dev[0])
+ return -ENODEV;
+
+ dev = arp_req_dev_by_name(net, r, true);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
neigh = neigh_lookup(&arp_tbl, &ip, dev);
- if (neigh) {
- if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha,
- min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
- r->arp_ha.sa_family = dev->type;
- strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
- err = 0;
- }
+ if (!neigh)
+ return -ENXIO;
+
+ if (READ_ONCE(neigh->nud_state) & NUD_NOARP) {
neigh_release(neigh);
+ return -ENXIO;
}
- return err;
+
+ read_lock_bh(&neigh->lock);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
+ r->arp_flags = arp_state_to_flags(neigh);
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+
+ r->arp_ha.sa_family = dev->type;
+ netdev_copy_name(dev, r->arp_dev);
+
+ return 0;
}
int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
@@ -1168,37 +1225,31 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
static int arp_req_delete_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask == htonl(0xFFFFFFFF))
- return pneigh_delete(&arp_tbl, net, &ip, dev);
+ if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (mask)
- return -EINVAL;
+ return pneigh_delete(&arp_tbl, net, &ip, dev);
+ }
return arp_req_set_proxy(net, dev, 0);
}
-static int arp_req_delete(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_delete(struct net *net, struct arpreq *r)
{
+ struct net_device *dev;
__be32 ip;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_delete_public(net, r, dev);
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
- RT_SCOPE_LINK);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
+
return arp_invalidate(dev, ip, true);
}
@@ -1208,9 +1259,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
- int err;
struct arpreq r;
- struct net_device *dev = NULL;
+ __be32 *netmask;
+ int err;
switch (cmd) {
case SIOCDARP:
@@ -1233,42 +1284,34 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!(r.arp_flags & ATF_PUBL) &&
(r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
return -EINVAL;
+
+ netmask = &((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr;
if (!(r.arp_flags & ATF_NETMASK))
- ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
- htonl(0xFFFFFFFFUL);
- rtnl_lock();
- if (r.arp_dev[0]) {
- err = -ENODEV;
- dev = __dev_get_by_name(net, r.arp_dev);
- if (!dev)
- goto out;
-
- /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
- if (!r.arp_ha.sa_family)
- r.arp_ha.sa_family = dev->type;
- err = -EINVAL;
- if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
- goto out;
- } else if (cmd == SIOCGARP) {
- err = -ENODEV;
- goto out;
- }
+ *netmask = htonl(0xFFFFFFFFUL);
+ else if (*netmask && *netmask != htonl(0xFFFFFFFFUL))
+ return -EINVAL;
switch (cmd) {
case SIOCDARP:
- err = arp_req_delete(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_delete(net, &r);
+ rtnl_unlock();
break;
case SIOCSARP:
- err = arp_req_set(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_set(net, &r);
+ rtnl_unlock();
break;
case SIOCGARP:
- err = arp_req_get(&r, dev);
+ rcu_read_lock();
+ err = arp_req_get(net, &r);
+ rcu_read_unlock();
+
+ if (!err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
break;
}
-out:
- rtnl_unlock();
- if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
- err = -EFAULT;
+
return err;
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 7f518ea5f4ac..18227757ec0c 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -107,6 +107,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
case offsetof(struct tcp_sock, snd_cwnd_cnt):
end = offsetofend(struct tcp_sock, snd_cwnd_cnt);
break;
+ case offsetof(struct tcp_sock, snd_cwnd_stamp):
+ end = offsetofend(struct tcp_sock, snd_cwnd_stamp);
+ break;
case offsetof(struct tcp_sock, snd_ssthresh):
end = offsetofend(struct tcp_sock, snd_ssthresh);
break;
@@ -307,7 +310,8 @@ static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
return 0;
}
-static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag,
+ const struct rate_sample *rs)
{
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8b17d83e5fde..dd6d46015058 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1815,6 +1815,7 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
* @sk: the socket
* @doi_def: the CIPSO DOI to use
* @secattr: the specific security attributes of the socket
+ * @sk_locked: true if caller holds the socket lock
*
* Description:
* Set the CIPSO option on the given socket using the DOI definition and
@@ -1826,7 +1827,8 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
*/
int cipso_v4_sock_setattr(struct sock *sk,
const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
int ret_val = -EPERM;
unsigned char *buf = NULL;
@@ -1876,8 +1878,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
sk_inet = inet_sk(sk);
- old = rcu_dereference_protected(sk_inet->inet_opt,
- lockdep_sock_is_held(sk));
+ old = rcu_dereference_protected(sk_inet->inet_opt, sk_locked);
if (inet_test_bit(IS_ICSK, sk)) {
sk_conn = inet_csk(sk);
if (old)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7592f242336b..96accde527da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -224,6 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(void)
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
+
if (ifa->ifa_dev)
in_dev_put(ifa->ifa_dev);
kfree(ifa);
@@ -231,7 +232,11 @@ static void inet_rcu_free_ifa(struct rcu_head *head)
static void inet_free_ifa(struct in_ifaddr *ifa)
{
- call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
+ /* Our reference to ifa->ifa_dev must be freed ASAP
+ * to release the reference to the netdev the same way.
+ * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
+ */
+ call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
}
static void in_dev_free_rcu(struct rcu_head *head)
@@ -1683,6 +1688,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
struct nlmsghdr *nlh;
unsigned long tstamp;
u32 preferred, valid;
+ u32 flags;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
args->flags);
@@ -1692,7 +1698,13 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
ifm = nlmsg_data(nlh);
ifm->ifa_family = AF_INET;
ifm->ifa_prefixlen = ifa->ifa_prefixlen;
- ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
+
+ flags = READ_ONCE(ifa->ifa_flags);
+ /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
+ * The 32bit value is given in IFA_FLAGS attribute.
+ */
+ ifm->ifa_flags = (__u8)flags;
+
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
@@ -1701,7 +1713,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
goto nla_put_failure;
tstamp = READ_ONCE(ifa->ifa_tstamp);
- if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+ if (!(flags & IFA_F_PERMANENT)) {
preferred = READ_ONCE(ifa->ifa_preferred_lft);
valid = READ_ONCE(ifa->ifa_valid_lft);
if (preferred != INFINITY_LIFE_TIME) {
@@ -1732,7 +1744,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
(ifa->ifa_proto &&
nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
- nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
+ nla_put_u32(skb, IFA_FLAGS, flags) ||
(ifa->ifa_rt_priority &&
nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
@@ -2515,7 +2527,7 @@ static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
+ struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
} devinet_sysctl = {
.devinet_vars = {
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -2578,7 +2590,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
if (!t)
goto out;
- for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
t->devinet_vars[i].extra1 = p;
t->devinet_vars[i].extra2 = net;
@@ -2652,7 +2664,6 @@ static struct ctl_table ctl_forward_entry[] = {
.extra1 = &ipv4_devconf,
.extra2 = &init_net,
},
- { },
};
#endif
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index dff04580318f..3968d3f98e08 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -348,7 +348,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb);
@@ -363,12 +362,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
*skb_mac_header(skb) = IPPROTO_UDP;
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
-
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -424,7 +417,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -776,7 +768,6 @@ int esp_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -1180,9 +1171,6 @@ static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 437e782b9663..ab6d0d98dbc3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,8 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/icmp.h>
/*
* Build xmit assembly blocks
@@ -483,6 +485,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
+ struct dst_entry *dst, *dst2;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -508,16 +511,17 @@ static struct rtable *icmp_route_lookup(struct net *net,
/* No need to clone since we're just using its address. */
rt2 = rt;
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(fl4), NULL, 0);
- if (!IS_ERR(rt)) {
+ dst = xfrm_lookup(net, &rt->dst,
+ flowi4_to_flowi(fl4), NULL, 0);
+ rt = dst_rtable(dst);
+ if (!IS_ERR(dst)) {
if (rt != rt2)
return rt;
- } else if (PTR_ERR(rt) == -EPERM) {
+ } else if (PTR_ERR(dst) == -EPERM) {
rt = NULL;
- } else
+ } else {
return rt;
-
+ }
err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
@@ -551,19 +555,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
- flowi4_to_flowi(&fl4_dec), NULL,
- XFRM_LOOKUP_ICMP);
- if (!IS_ERR(rt2)) {
+ dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL,
+ XFRM_LOOKUP_ICMP);
+ rt2 = dst_rtable(dst2);
+ if (!IS_ERR(dst2)) {
dst_release(&rt->dst);
memcpy(fl4, &fl4_dec, sizeof(*fl4));
rt = rt2;
- } else if (PTR_ERR(rt2) == -EPERM) {
+ } else if (PTR_ERR(dst2) == -EPERM) {
if (rt)
dst_release(&rt->dst);
return rt2;
} else {
- err = PTR_ERR(rt2);
+ err = PTR_ERR(dst2);
goto relookup_failed;
}
return rt;
@@ -768,6 +772,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
+ trace_icmp_send(skb_in, type, code);
+
icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3b38610958ee..d81f74ce0f02 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -661,7 +661,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
/*
* This will accept the next outstanding connection.
*/
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
+struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
@@ -680,7 +680,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
/* Find already established connection */
if (reqsk_queue_empty(queue)) {
- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
@@ -692,6 +692,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
goto out_err;
}
req = reqsk_queue_remove(queue, sk);
+ arg->is_empty = reqsk_queue_empty(queue);
newsk = req->sk;
if (sk->sk_protocol == IPPROTO_TCP &&
@@ -745,7 +746,7 @@ out:
out_err:
newsk = NULL;
req = NULL;
- *err = error;
+ arg->err = error;
goto out;
}
EXPORT_SYMBOL(inet_csk_accept);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index cf88eca5f1b4..48d0d494185b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -565,7 +565,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 534b98a0744a..08e2c92e25ab 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -580,7 +580,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &dist_min,
},
- { }
};
/* secret interval has been deprecated */
@@ -593,7 +592,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip4_frags_ns_ctl_register(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c3af965dc407..ba205473522e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -793,7 +793,7 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
dev->needed_headroom += len;
if (set_mtu)
- dev->mtu = max_t(int, dev->mtu - len, 68);
+ WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
(test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 5e9c8156656a..d6fbcbd2358a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
dst = skb_dst(skb);
if (curr_dst != dst) {
hint = ip_extract_route_hint(net, skb,
- ((struct rtable *)dst)->rt_type);
+ dst_rtable(dst)->rt_type);
/* dispatch old sublist */
if (!list_empty(&sublist))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1fe794967211..9500031a1f55 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
@@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
goto packet_routed;
/* Make sure we can route this packet. */
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
if (!rt) {
__be32 daddr;
@@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk,
bool zc = false;
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
struct iphdr *iph;
u8 pmtudisc, ttl;
__be16 df = 0;
@@ -1473,7 +1473,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* by icmp_hdr(skb)->type.
*/
if (sk->sk_type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH))
icmp_type = fl4->fl4_icmp_type;
else
icmp_type = icmp_hdr(skb)->type;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 177f40c3a8e8..bccef2fcf620 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -543,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rt6_info *rt6;
__be32 daddr;
- rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
+ rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
NULL;
daddr = md ? dst : tunnel->parms.iph.daddr;
@@ -897,7 +897,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
- dev->mtu = mtu;
+ WRITE_ONCE(dev->mtu, mtu);
}
dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
@@ -1082,7 +1082,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
new_mtu = max_mtu;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
@@ -1120,7 +1120,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip_tunnel_get_link_net);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index b9062f4552ac..3ab908b74795 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -44,7 +44,7 @@ static int iptable_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ipt_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ipt_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 914bc9c35cc7..6c4664c681ca 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -33,6 +33,7 @@
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <linux/bottom_half.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dcb11f22cbf2..4cb43401e0e0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -612,6 +612,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
+ fl4.fl4_icmp_type = 0;
+ fl4.fl4_icmp_code = 0;
+
if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f89ff2e5a05b..5fd54103174f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -819,7 +819,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
u32 mark = skb->mark;
__u8 tos = iph->tos;
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
@@ -827,7 +827,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct dst_entry *ret = dst;
if (rt) {
@@ -1044,7 +1044,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
bool confirm_neigh)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
@@ -1115,7 +1115,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
- rt = (struct rtable *)odst;
+ rt = dst_rtable(odst);
if (odst->obsolete && !odst->ops->check(odst, 0)) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
@@ -1124,7 +1124,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
+ __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
@@ -1181,7 +1181,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
u32 cookie)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
/* All IPV4 dsts are created with ->obsolete set to the value
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
@@ -1516,10 +1516,8 @@ void rt_del_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
-
ip_dst_metrics_put(dst);
- rt_del_uncached_list(rt);
+ rt_del_uncached_list(dst_rtable(dst));
}
void rt_flush_dev(struct net_device *dev)
@@ -2820,7 +2818,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rtable *ort = (struct rtable *) dst_orig;
+ struct rtable *ort = dst_rtable(dst_orig);
struct rtable *rt;
rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
@@ -2865,9 +2863,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (flp4->flowi4_proto) {
flp4->flowi4_oif = rt->dst.dev->ifindex;
- rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = dst_rtable(xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0));
}
return rt;
@@ -3498,7 +3496,6 @@ static struct ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static const char ipv4_route_flush_procname[] = "flush";
@@ -3532,7 +3529,6 @@ static struct ctl_table ipv4_route_netns_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
static __net_init int sysctl_route_net_init(struct net *net)
@@ -3550,16 +3546,14 @@ static __net_init int sysctl_route_net_init(struct net *net)
/* Don't export non-whitelisted sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
- if (tbl[0].procname != ipv4_route_flush_procname) {
- tbl[0].procname = NULL;
+ if (tbl[0].procname != ipv4_route_flush_procname)
table_size = 0;
- }
}
/* Update the variables to point into the current struct net
* except for the first element flush
*/
- for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++)
+ for (i = 1; i < table_size; i++)
tbl[i].data += (void *)net - (void *)&init_net;
}
tbl[0].extra1 = net;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ce5d19978a26..162a0a3b6ba5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -575,7 +575,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
- { }
};
static struct ctl_table ipv4_net_table[] = {
@@ -1502,11 +1501,11 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ONE,
},
- { }
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv4_net_table);
struct ctl_table *table;
table = ipv4_net_table;
@@ -1517,7 +1516,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+ for (i = 0; i < table_size; i++) {
if (table[i].data) {
/* Update the variables to point into
* the current struct net
@@ -1533,7 +1532,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
}
net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table,
- ARRAY_SIZE(ipv4_net_table));
+ table_size);
if (!net->ipv4.ipv4_hdr)
goto err_reg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f23b97777ea5..681b54e1f3a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -272,13 +272,16 @@
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
+#include <net/rstreason.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <net/hotdata.h>
#include <net/rps.h>
/* Track pending CMSGs. */
@@ -1187,7 +1190,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -2716,7 +2719,7 @@ void tcp_shutdown(struct sock *sk, int how)
/* If we've already sent a FIN, or it's a closed state, skip this. */
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
- TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
+ TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
@@ -2750,7 +2753,15 @@ static bool tcp_too_many_orphans(int shift)
READ_ONCE(sysctl_tcp_max_orphans);
}
-bool tcp_check_oom(struct sock *sk, int shift)
+static bool tcp_out_of_memory(const struct sock *sk)
+{
+ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+ sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
+ return true;
+ return false;
+}
+
+bool tcp_check_oom(const struct sock *sk, int shift)
{
bool too_many_orphans, out_of_socket_memory;
@@ -2811,7 +2822,8 @@ void __tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, sk->sk_allocation);
+ tcp_send_active_reset(sk, sk->sk_allocation,
+ SK_RST_REASON_NOT_SPECIFIED);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2825,7 +2837,7 @@ void __tcp_close(struct sock *sk, long timeout)
* machine. State transitions:
*
* TCP_ESTABLISHED -> TCP_FIN_WAIT1
- * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
+ * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult)
* TCP_CLOSE_WAIT -> TCP_LAST_ACK
*
* are legal only when FIN has been sent (i.e. in window),
@@ -2885,7 +2897,8 @@ adjudge_to_death:
struct tcp_sock *tp = tcp_sk(sk);
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2903,7 +2916,8 @@ adjudge_to_death:
if (sk->sk_state != TCP_CLOSE) {
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3007,7 +3021,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any());
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -4349,6 +4363,9 @@ zerocopy_rcv_out:
return err;
}
+ case TCP_IS_MPTCP:
+ val = 0;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -4564,7 +4581,8 @@ int tcp_abort(struct sock *sk, int err)
smp_wmb();
sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 05dc2d05bc7c..760941e55153 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1024,7 +1024,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_update_gains(sk);
}
-__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
@@ -1156,8 +1156,6 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
};
BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, bbr_init)
BTF_ID_FLAGS(func, bbr_main)
BTF_ID_FLAGS(func, bbr_sndbuf_expand)
@@ -1166,8 +1164,6 @@ BTF_ID_FLAGS(func, bbr_cwnd_event)
BTF_ID_FLAGS(func, bbr_ssthresh)
BTF_ID_FLAGS(func, bbr_min_tso_segs)
BTF_ID_FLAGS(func, bbr_set_state)
-#endif
-#endif
BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 44869ea089e3..5dbed91c6178 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -486,16 +486,12 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
};
BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, cubictcp_init)
BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
BTF_ID_FLAGS(func, cubictcp_cong_avoid)
BTF_ID_FLAGS(func, cubictcp_state)
BTF_ID_FLAGS(func, cubictcp_cwnd_event)
BTF_ID_FLAGS(func, cubictcp_acked)
-#endif
-#endif
BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e33fbe4933e4..8a45a4aea933 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -58,7 +58,18 @@ struct dctcp {
};
static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
-module_param(dctcp_shift_g, uint, 0644);
+
+static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp)
+{
+ return param_set_uint_minmax(val, kp, 0, 10);
+}
+
+static const struct kernel_param_ops dctcp_shift_g_ops = {
+ .set = dctcp_shift_g_set,
+ .get = param_get_uint,
+};
+
+module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644);
MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha");
static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
@@ -261,16 +272,12 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
};
BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
BTF_ID_FLAGS(func, dctcp_init)
BTF_ID_FLAGS(func, dctcp_update_alpha)
BTF_ID_FLAGS(func, dctcp_cwnd_event)
BTF_ID_FLAGS(func, dctcp_ssthresh)
BTF_ID_FLAGS(func, dctcp_cwnd_undo)
BTF_ID_FLAGS(func, dctcp_state)
-#endif
-#endif
BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids)
static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 384fa5e2f065..9c04a9c8be9d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,7 @@
#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/tcp.h>
+#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
@@ -913,7 +914,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max_us = tcp_rto_min_us(sk);
- tcp_bpf_rtt(sk);
+ tcp_bpf_rtt(sk, mrtt_us, srtt);
}
} else {
/* no previous measure. */
@@ -923,7 +924,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
- tcp_bpf_rtt(sk);
+ tcp_bpf_rtt(sk, mrtt_us, srtt);
}
tp->srtt_us = max(1U, srtt);
}
@@ -3541,7 +3542,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->cong_control) {
- icsk->icsk_ca_ops->cong_control(sk, rs);
+ icsk->icsk_ca_ops->cong_control(sk, ack, flag, rs);
return;
}
@@ -6768,6 +6769,8 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ tcp_shutdown(sk, SEND_SHUTDOWN);
break;
case TCP_FIN_WAIT1: {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e06f0cd04f7e..30ef0c8f5e92 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -154,6 +155,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
tcptw->tw_ts_recent_stamp)))) {
+ /* inet_twsk_hashdance() sets sk_refcnt after putting twsk
+ * and releasing the bucket lock.
+ */
+ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt)))
+ return 0;
+
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
@@ -174,7 +181,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
- sock_hold(sktw);
+
return 1;
}
@@ -723,7 +730,8 @@ out:
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -869,7 +877,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
- trace_tcp_send_reset(sk, skb);
+ trace_tcp_send_reset(sk, skb, reason);
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1934,7 +1942,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v4_send_reset(rsk, skb);
+ tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
discard:
kfree_skb_reason(skb, reason);
/* Be careful here. If this function gets more complicated and
@@ -2285,7 +2293,10 @@ lookup:
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
- tcp_v4_send_reset(nsk, skb);
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v4_send_reset(nsk, skb, rst_reason);
goto discard_and_relse;
}
sock_put(sk);
@@ -2364,7 +2375,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v4_send_reset(NULL, skb);
+ tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -2416,7 +2427,7 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v4_send_reset(sk, skb);
+ tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:;
@@ -2426,7 +2437,6 @@ do_time_wait:
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 146c061145b4..b93619b2384b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -22,6 +22,7 @@
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
@@ -878,7 +879,7 @@ embryonic_reset:
* avoid becoming vulnerable to outside attack aiming at
* resetting legit local connections.
*/
- req->rsk_ops->send_reset(sk, skb);
+ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_INVALID_SYN);
} else if (fastopen) { /* received a valid RST pkt */
reqsk_fastopen_remove(sk, req, true);
tcp_reset(sk, skb);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fab0973f995b..4b791e74529e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
}
}
+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 newip,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th;
+ struct iphdr *iph;
+
+ if (*oldip == newip && *oldport == newport)
+ return;
+
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+
+ csum_replace4(&iph->check, *oldip, newip);
+ *oldip = newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct iphdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct iphdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ip_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->saddr, iph->saddr,
+ &th2->source, th->source);
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->daddr, iph->daddr,
+ &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv4_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp4_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -178,63 +245,76 @@ out:
return segs;
}
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
{
- struct sk_buff *pp = NULL;
+ struct tcphdr *th2;
struct sk_buff *p;
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ th2 = tcp_hdr(p);
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ return p;
+ }
+
+ return NULL;
+}
+
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
+{
+ unsigned int thlen, hlen, off;
struct tcphdr *th;
- struct tcphdr *th2;
- unsigned int len;
- unsigned int thlen;
- __be32 flags;
- unsigned int mss = 1;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int i;
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
th = skb_gro_header(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
- goto out;
+ return NULL;
hlen = off + thlen;
if (!skb_gro_may_pull(skb, hlen)) {
th = skb_gro_header_slow(skb, hlen, off);
if (unlikely(!th))
- goto out;
+ return NULL;
}
skb_gro_pull(skb, thlen);
- len = skb_gro_len(skb);
- flags = tcp_flag_word(th);
-
- list_for_each_entry(p, head, list) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
+ return th;
+}
- th2 = tcp_hdr(p);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ unsigned int thlen = th->doff * 4;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
+ struct tcphdr *th2;
+ unsigned int len;
+ __be32 flags;
+ unsigned int mss = 1;
+ int flush = 1;
+ int i;
- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
+ len = skb_gro_len(skb);
+ flags = tcp_flag_word(th);
- goto found;
- }
- p = NULL;
- goto out_check_final;
+ p = tcp_gro_lookup(head, th);
+ if (!p)
+ goto out_check_final;
-found:
- /* Include the IP ID check below from the inner most IP hdr */
- flush = NAPI_GRO_CB(p)->flush;
- flush |= (__force int)(flags & TCP_FLAG_CWR);
+ th2 = tcp_hdr(p);
+ flush = (__force int)(flags & TCP_FLAG_CWR);
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
@@ -242,16 +322,7 @@ found:
flush |= *(u32 *)((u8 *)th + i) ^
*(u32 *)((u8 *)th2 + i);
- /* When we receive our second frame we can made a decision on if we
- * continue this flow as an atomic flow with a fixed ID or if we use
- * an incrementing ID.
- */
- if (NAPI_GRO_CB(p)->flush_id != 1 ||
- NAPI_GRO_CB(p)->count != 1 ||
- !NAPI_GRO_CB(p)->is_atomic)
- flush |= NAPI_GRO_CB(p)->flush_id;
- else
- NAPI_GRO_CB(p)->is_atomic = false;
+ flush |= gro_receive_network_flush(th, th2, p);
mss = skb_shinfo(p)->gso_size;
@@ -267,6 +338,18 @@ found:
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
flush |= skb_cmp_decrypted(p, skb);
+ if (unlikely(NAPI_GRO_CB(p)->is_flist)) {
+ flush |= (__force int)(flags ^ tcp_flag_word(th2));
+ flush |= skb->ip_summed != p->ip_summed;
+ flush |= skb->csum_level != p->csum_level;
+ flush |= NAPI_GRO_CB(p)->count >= 64;
+
+ if (flush || skb_gro_receive_list(p, skb))
+ mss = 1;
+
+ goto out_check_final;
+ }
+
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
goto out_check_final;
@@ -288,7 +371,6 @@ out_check_final:
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
pp = p;
-out:
NAPI_GRO_CB(skb)->flush |= (flush != 0);
return pp;
@@ -314,30 +396,80 @@ void tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
+static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+ const struct iphdr *iph;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
+ iph = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->saddr, th->source,
+ iph->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- inet_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ inet_gro_compute_pseudo))
+ goto flush;
+
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
- return tcp_gro_receive(head, skb);
+ tcp4_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 |
- (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID);
+ (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID);
tcp_gro_complete(skb);
return 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ce59e4499b66..95618d0e78e4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -39,6 +39,7 @@
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
@@ -231,7 +232,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
- (*rcv_wnd) = min_t(u32, space, U16_MAX);
+ (*rcv_wnd) = space;
if (init_rcv_wnd)
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
@@ -2403,6 +2404,21 @@ commit:
return 0;
}
+/* tcp_mtu_probe() and tcp_grow_skb() can both eat an skb (src) if
+ * all its payload was moved to another one (dst).
+ * Make sure to transfer tcp_flags, eor, and tstamp.
+ */
+static void tcp_eat_one_skb(struct sock *sk,
+ struct sk_buff *dst,
+ struct sk_buff *src)
+{
+ TCP_SKB_CB(dst)->tcp_flags |= TCP_SKB_CB(src)->tcp_flags;
+ TCP_SKB_CB(dst)->eor = TCP_SKB_CB(src)->eor;
+ tcp_skb_collapse_tstamp(dst, src);
+ tcp_unlink_write_queue(src, sk);
+ tcp_wmem_free_skb(sk, src);
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
@@ -2508,16 +2524,7 @@ static int tcp_mtu_probe(struct sock *sk)
copy = min_t(int, skb->len, probe_size - len);
if (skb->len <= copy) {
- /* We've eaten all the data from this skb.
- * Throw it away. */
- TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
- /* If this is the last SKB we copy and eor is set
- * we need to propagate it to the new skb.
- */
- TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
- tcp_skb_collapse_tstamp(nskb, skb);
- tcp_unlink_write_queue(skb, sk);
- tcp_wmem_free_skb(sk, skb);
+ tcp_eat_one_skb(sk, nskb, skb);
} else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
@@ -2705,11 +2712,10 @@ static void tcp_grow_skb(struct sock *sk, struct sk_buff *skb, int amount)
TCP_SKB_CB(next_skb)->seq += nlen;
if (!next_skb->len) {
+ /* In case FIN is set, we need to update end_seq */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
- TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
- tcp_unlink_write_queue(next_skb, sk);
- tcp_wmem_free_skb(sk, next_skb);
+
+ tcp_eat_one_skb(sk, skb, next_skb);
}
}
@@ -3595,7 +3601,9 @@ void tcp_send_fin(struct sock *sk)
return;
}
} else {
- skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER,
+ sk_gfp_mask(sk, GFP_ATOMIC |
+ __GFP_NOWARN));
if (unlikely(!skb))
return;
@@ -3615,7 +3623,8 @@ void tcp_send_fin(struct sock *sk)
* was unread data in the receive queue. This behavior is recommended
* by RFC 2525, section 2.17. -DaveM
*/
-void tcp_send_active_reset(struct sock *sk, gfp_t priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason)
{
struct sk_buff *skb;
@@ -3640,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL);
+ trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
}
/* Send a crossed SYN-ACK during socket establishment.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 976db57b95d4..83fe7f62f7f1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <net/tcp.h>
+#include <net/rstreason.h>
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
@@ -127,7 +128,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
(!tp->snd_wnd && !tp->packets_out))
do_reset = true;
if (do_reset)
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -768,7 +770,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
goto death;
}
@@ -795,7 +797,8 @@ static void tcp_keepalive_timer (struct timer_list *t)
icsk->icsk_probes_out > 0) ||
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
- tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_NOT_SPECIFIED);
tcp_write_err(sk);
goto out;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6e2446295089..189c9113fe9a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -543,7 +543,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
@@ -1217,7 +1218,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (connected)
- rt = (struct rtable *)sk_dst_check(sk, 0);
+ rt = dst_rtable(sk_dst_check(sk, 0));
if (!rt) {
struct net *net = sock_net(sk);
@@ -2711,8 +2712,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
- fallthrough;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
WRITE_ONCE(up->encap_rcv,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 3498dd1d0694..59448a2dbf2c 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -433,33 +433,6 @@ out:
return segs;
}
-static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
-{
- if (unlikely(p->len + skb->len >= 65536))
- return -E2BIG;
-
- if (NAPI_GRO_CB(p)->last == p)
- skb_shinfo(p)->frag_list = skb;
- else
- NAPI_GRO_CB(p)->last->next = skb;
-
- skb_pull(skb, skb_gro_offset(skb));
-
- NAPI_GRO_CB(p)->last = skb;
- NAPI_GRO_CB(p)->count++;
- p->data_len += skb->len;
-
- /* sk ownership - if any - completely transferred to the aggregated packet */
- skb->destructor = NULL;
- skb->sk = NULL;
- p->truesize += skb->truesize;
- p->len += skb->len;
-
- NAPI_GRO_CB(skb)->same_flow = 1;
-
- return 0;
-}
-
#define UDP_GRO_CNT_MAX 64
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
@@ -471,6 +444,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *p;
unsigned int ulen;
int ret = 0;
+ int flush;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -504,13 +478,15 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return p;
}
+ flush = gro_receive_network_flush(uh, uh2, p);
+
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
* leading to excessive truesize values.
* On len mismatch merge the first packet shorter than gso_size,
* otherwise complete the GRO packet.
*/
- if (ulen > ntohs(uh2->len)) {
+ if (ulen > ntohs(uh2->len) || flush) {
pp = p;
} else {
if (NAPI_GRO_CB(skb)->is_flist) {
@@ -718,7 +694,8 @@ EXPORT_SYMBOL(udp_gro_complete);
INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index dae35101d189..a620618cc568 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -63,7 +63,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
ip_send_check(iph);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -113,19 +117,6 @@ static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1dda59e0aeab..0294fef577fa 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif,
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rtable *rt = (struct rtable *)xdst->route;
+ struct rtable *rt = dst_rtable(xdst->route);
const struct flowi4 *fl4 = &fl->u.ip4;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
@@ -152,7 +152,6 @@ static struct ctl_table xfrm4_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static __net_init int xfrm4_net_sysctl_init(struct net *net)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9aa0900abfa1..5c424a0e7232 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -7184,14 +7184,12 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- {
- /* sentinel */
- }
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
+ size_t table_size = ARRAY_SIZE(addrconf_sysctl);
int i, ifindex;
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
@@ -7200,7 +7198,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
if (!table)
goto out;
- for (i = 0; table[i].data; i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data += (char *)p - (char *)&ipv6_devconf;
/* If one of these is already set, then it is not safe to
* overwrite either of them: this makes proc_dointvec_minmax
@@ -7215,7 +7213,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
p->sysctl_header = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(addrconf_sysctl));
+ table_size);
if (!p->sysctl_header)
goto free;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0f2506e35359..0627c4c18d1a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -252,9 +252,8 @@ static void aca_free_rcu(struct rcu_head *h)
static void aca_put(struct ifacaddr6 *ac)
{
- if (refcount_dec_and_test(&ac->aca_refcnt)) {
- call_rcu(&ac->rcu, aca_free_rcu);
- }
+ if (refcount_dec_and_test(&ac->aca_refcnt))
+ call_rcu_hurry(&ac->rcu, aca_free_rcu);
}
static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6bc0a84c8d05..34a9a5b9ed00 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -384,7 +384,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb);
@@ -399,12 +398,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
*skb_mac_header(skb) = IPPROTO_UDP;
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
-
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -460,7 +453,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -823,7 +815,6 @@ int esp6_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -1233,9 +1224,6 @@ static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET6_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 52c04f0ac498..9e254de7462f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -233,8 +233,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = pol_lookup_func(lookup,
net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
+ struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
+
+ if (!idev)
+ goto again;
err = fib6_rule_saddr(net, rule, flags, flp6,
- ip6_dst_idev(&rt->dst)->dev);
+ idev->dev);
if (err == -EAGAIN)
goto again;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1635da07285f..7b31674644ef 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
res = true;
} else {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
int tmo = net->ipv6.sysctl.icmpv6_time;
struct inet_peer *peer;
@@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
dst = ip6_route_output(net, sk, fl6);
if (!dst->error) {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct in6_addr prefsrc;
rt6_get_prefsrc(rt, &prefsrc);
@@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
+ &ipc6, &fl6, dst_rt6_info(dst),
MSG_DONTWAIT)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT)) {
+ dst_rt6_info(dst), MSG_DONTWAIT)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
@@ -1206,7 +1206,6 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { },
};
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 8c1ce78956ba..0601bad79822 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel(
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
- struct rt6_info *rt = (struct rt6_info *)orig_dst;
+ struct rt6_info *rt = dst_rt6_info(orig_dst);
struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
struct dst_entry *dst;
int err = -EINVAL;
@@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = orig_dst->dev->ifindex;
fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
+ fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst),
&ip6h->daddr);
dst = ip6_route_output(net, NULL, &fl6);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2e81383b663b..6db71bb1cd30 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,7 @@
#include <net/secure_seq.h>
#include <net/ip.h>
#include <net/sock_reuseport.h>
+#include <net/tcp.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
@@ -289,7 +290,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b41e35af69ea..bd5aff97d8b1 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -67,7 +67,7 @@ static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
off += len;
}
- skb_gro_pull(skb, off - skb_network_offset(skb));
+ skb_gro_pull(skb, off - skb_gro_receive_network_offset(skb));
return proto;
}
@@ -236,7 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if (unlikely(!iph))
goto out;
- skb_set_network_header(skb, off);
+ NAPI_GRO_CB(skb)->inner_network_offset = off;
flush += ntohs(iph->payload_len) != skb->len - hlen;
@@ -259,7 +259,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
NAPI_GRO_CB(skb)->proto = proto;
flush--;
- nlen = skb_network_header_len(skb);
+ nlen = skb_gro_offset(skb) - off;
list_for_each_entry(p, head, list) {
const struct ipv6hdr *iph2;
@@ -290,19 +290,8 @@ not_same_flow:
nlen - sizeof(struct ipv6hdr)))
goto not_same_flow;
}
- /* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
- (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = 0;
}
- NAPI_GRO_CB(skb)->is_atomic = true;
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_postpull_rcsum(skb, iph, nlen);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b9dd3a66e423..27d8725445e3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
rcu_read_lock();
- nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(IS_ERR_OR_NULL(neigh))) {
@@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) {
+ if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
@@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb)
* send a redirect.
*/
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY)
target = &rt->rt6i_gateway;
else
@@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
bool mono_delivery_time = skb->mono_delivery_time;
@@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
return NULL;
}
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
@@ -1118,7 +1118,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
*dst = ip6_route_output(net, sk, fl6);
- rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
+ rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
@@ -1159,7 +1159,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
- rt = (struct rt6_info *) *dst;
+ rt = dst_rt6_info(*dst);
rcu_read_lock();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
@@ -1423,7 +1423,7 @@ static int __ip6_append_data(struct sock *sk,
int offset = 0;
bool zc = false;
u32 tskey = 0;
- struct rt6_info *rt = (struct rt6_info *)cork->dst;
+ struct rt6_info *rt = dst_rt6_info(cork->dst);
bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
@@ -1877,7 +1877,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
- struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+ struct rt6_info *rt = dst_rt6_info(cork->base.dst);
struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto;
@@ -1933,7 +1933,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
u8 icmp6_type;
if (sk->sk_socket->type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
@@ -1949,7 +1949,7 @@ out:
int ip6_send_skb(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int err;
err = ip6_local_out(net, skb->sk, skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 57bb3b3ea0c5..9dee0c127955 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1746,7 +1746,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL(ip6_tnl_change_mtu);
@@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip6_tnl_get_link_net);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 78344cf3867e..590737c27537 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -666,7 +666,8 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
dev->flags &= ~IFF_POINTOPOINT;
if (keep_mtu && dev->mtu) {
- dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu);
+ WRITE_ONCE(dev->mtu,
+ clamp(dev->mtu, dev->min_mtu, dev->max_mtu));
return;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cb0ee81a068a..dd342e6ecf3f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2273,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
int err;
struct mr_table *mrt;
struct mfc6_cache *cache;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ae134634c323..d914b23256ce 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1722,7 +1722,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
if (IS_ERR(dst))
return;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY) {
ND_PRINTK(2, warn,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index df785ebda0ca..e8992693e14a 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -43,7 +43,7 @@ static int ip6table_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ip6t_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ip6t_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index ce8c14d8aff5..5e1b50c6a44d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -62,7 +62,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- { }
};
static int nf_ct_frag6_sysctl_register(struct net *net)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ef2059c88955..88b3fcacd4f9 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
if (IS_ERR(dst))
return PTR_ERR(dst);
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0d896ca7b589..2eedf255600b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -598,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct ipv6hdr *iph;
struct sk_buff *skb;
int err;
- struct rt6_info *rt = (struct rt6_info *)*dstp;
+ struct rt6_info *rt = dst_rt6_info(*dstp);
int hlen = LL_RESERVED_SPACE(rt->dst.dev);
int tlen = rt->dst.dev->needed_tailroom;
@@ -917,7 +917,7 @@ back_from_confirm:
ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ len, 0, &ipc6, &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ee95cdcc8747..327caca64257 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -369,7 +369,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
* the source of the fragment, with the Pointer field set to zero.
*/
nexthdr = hdr->nexthdr;
- if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+ if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
@@ -436,7 +436,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -449,7 +448,6 @@ static struct ctl_table ip6_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1f4b935a0e57..bbc2a0dd9314 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -226,7 +226,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+ const struct rt6_info *rt = dst_rt6_info(dst);
return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
dst->dev, skb, daddr);
@@ -234,8 +234,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
+ const struct rt6_info *rt = dst_rt6_info(dst);
struct net_device *dev = dst->dev;
- struct rt6_info *rt = (struct rt6_info *)dst;
daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
@@ -354,7 +354,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct fib6_info *from;
struct inet6_dev *idev;
@@ -373,7 +373,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
if (idev && idev->dev != blackhole_netdev) {
@@ -1288,7 +1288,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
- return (struct rt6_info *) dst;
+ return dst_rt6_info(dst);
dst_release(dst);
@@ -2647,7 +2647,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
rcu_read_lock();
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
/* For dst cached in uncached_list, refcnt is already taken. */
if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
dst = &net->ipv6.ip6_null_entry->dst;
@@ -2661,7 +2661,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
@@ -2744,7 +2744,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
struct fib6_info *from;
struct rt6_info *rt;
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->sernum)
return rt6_is_valid(rt) ? dst : NULL;
@@ -2772,7 +2772,7 @@ EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2796,7 +2796,7 @@ static void ip6_link_failure(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2852,7 +2852,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
* IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
@@ -4174,7 +4174,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
@@ -4445,7 +4445,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+ .fc_metric = rtmsg->rtmsg_metric,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -4475,6 +4475,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
+ /* Only do the default setting of fc_metric in route adding */
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
break;
case SIOCDELRT:
@@ -5608,7 +5611,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
unsigned char nh_flags = 0;
@@ -6111,7 +6114,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->dst.error) {
err = rt->dst.error;
ip6_rt_put(rt);
@@ -6428,7 +6431,6 @@ static struct ctl_table ipv6_route_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
@@ -6452,10 +6454,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[1].procname = NULL;
}
return table;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 35508abd76f4..a31521e270f7 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -551,6 +551,8 @@ out_unregister_iptun:
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
out_unregister_genl:
+#endif
+#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC)
genl_unregister_family(&seg6_genl_family);
#endif
out_unregister_pernet:
@@ -564,8 +566,9 @@ void seg6_exit(void)
seg6_hmac_exit();
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_local_exit();
seg6_iptunnel_exit();
#endif
- unregister_pernet_subsys(&ip6_segments_ops);
genl_unregister_family(&seg6_genl_family);
+ unregister_pernet_subsys(&ip6_segments_ops);
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 861e0366f549..bbf5b84a70fc 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -356,6 +356,7 @@ static int seg6_hmac_init_algo(void)
struct crypto_shash *tfm;
struct shash_desc *shash;
int i, alg_count, cpu;
+ int ret = -ENOMEM;
alg_count = ARRAY_SIZE(hmac_algos);
@@ -366,12 +367,14 @@ static int seg6_hmac_init_algo(void)
algo = &hmac_algos[i];
algo->tfms = alloc_percpu(struct crypto_shash *);
if (!algo->tfms)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
tfm = crypto_alloc_shash(algo->name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto error_out;
+ }
p_tfm = per_cpu_ptr(algo->tfms, cpu);
*p_tfm = tfm;
}
@@ -383,18 +386,22 @@ static int seg6_hmac_init_algo(void)
algo->shashs = alloc_percpu(struct shash_desc *);
if (!algo->shashs)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
shash = kzalloc_node(shsize, GFP_KERNEL,
cpu_to_node(cpu));
if (!shash)
- return -ENOMEM;
+ goto error_out;
*per_cpu_ptr(algo->shashs, cpu) = shash;
}
}
return 0;
+
+error_out:
+ seg6_hmac_exit();
+ return ret;
}
int __init seg6_hmac_init(void)
@@ -412,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net)
void seg6_hmac_exit(void)
{
struct seg6_hmac_algo *algo = NULL;
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
int i, alg_count, cpu;
alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
algo = &hmac_algos[i];
- for_each_possible_cpu(cpu) {
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- shash = *per_cpu_ptr(algo->shashs, cpu);
- kfree(shash);
- tfm = *per_cpu_ptr(algo->tfms, cpu);
- crypto_free_shash(tfm);
+ if (algo->shashs) {
+ for_each_possible_cpu(cpu) {
+ shash = *per_cpu_ptr(algo->shashs, cpu);
+ kfree(shash);
+ }
+ free_percpu(algo->shashs);
+ }
+
+ if (algo->tfms) {
+ for_each_possible_cpu(cpu) {
+ tfm = *per_cpu_ptr(algo->tfms, cpu);
+ crypto_free_shash(tfm);
+ }
+ free_percpu(algo->tfms);
}
- free_percpu(algo->tfms);
- free_percpu(algo->shashs);
}
}
EXPORT_SYMBOL(seg6_hmac_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 03b877ff4558..a75df2ec8db0 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -459,10 +459,8 @@ static int seg6_input_core(struct net *net, struct sock *sk,
int err;
err = seg6_do_srh(skb);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto drop;
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
@@ -486,7 +484,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
- return err;
+ goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -494,6 +492,9 @@ static int seg6_input_core(struct net *net, struct sock *sk,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
+drop:
+ kfree_skb(skb);
+ return err;
}
static int seg6_input_nf(struct sk_buff *skb)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 75de55f907b0..c060285ff47f 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -213,7 +213,6 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_doulongvec_minmax,
.extra2 = &ioam6_id_wide_max,
},
- { }
};
static struct ctl_table ipv6_rotable[] = {
@@ -248,11 +247,11 @@ static struct ctl_table ipv6_rotable[] = {
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
- { }
};
static int __net_init ipv6_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv6_table_template);
struct ctl_table *ipv6_table;
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
@@ -264,7 +263,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
if (!ipv6_table)
goto out;
/* Update the variables to point into the current struct net */
- for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++)
+ for (i = 0; i < table_size; i++)
ipv6_table[i].data += (void *)net - (void *)&init_net;
ipv6_route_table = ipv6_route_sysctl_init(net);
@@ -276,8 +275,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
goto out_ipv6_route_table;
net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6",
- ipv6_table,
- ARRAY_SIZE(ipv6_table_template));
+ ipv6_table, table_size);
if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb7c3caf4f85..4c3605485b68 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -60,6 +60,7 @@
#include <net/secure_seq.h>
#include <net/hotdata.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -69,7 +70,8 @@
#include <trace/events/tcp.h>
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -95,11 +97,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
rcu_assign_pointer(sk->sk_rx_dst, dst);
sk->sk_rx_dst_ifindex = skb->skb_iif;
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
}
@@ -1008,7 +1008,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
kfree_skb(buff);
}
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -1130,7 +1131,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
- trace_tcp_send_reset(sk, skb);
+ trace_tcp_send_reset(sk, skb, reason);
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
ipv6_get_dsfield(ipv6h), label, priority, txhash,
@@ -1677,7 +1678,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
discard:
if (opt_skb)
__kfree_skb(opt_skb);
@@ -1862,7 +1863,10 @@ lookup:
} else {
drop_reason = tcp_child_process(sk, nsk, skb);
if (drop_reason) {
- tcp_v6_send_reset(nsk, skb);
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v6_send_reset(nsk, skb, rst_reason);
goto discard_and_relse;
}
sock_put(sk);
@@ -1939,7 +1943,7 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v6_send_reset(NULL, skb);
+ tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
@@ -1995,7 +1999,7 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:
@@ -2045,7 +2049,6 @@ void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor = tcp_twsk_destructor,
};
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 4b07d1e6c952..23971903e66d 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -7,31 +7,84 @@
*/
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
+#include <net/inet6_hashtables.h>
#include <net/gro.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *hdr;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
+ hdr = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- ip6_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ ip6_gro_compute_pseudo))
+ goto flush;
- return tcp_gro_receive(head, skb);
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
+
+ tcp6_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
@@ -40,6 +93,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return 0;
}
+static void __tcpv6_gso_segment_csum(struct sk_buff *seg,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th;
+
+ if (*oldport == newport)
+ return;
+
+ th = tcp_hdr(seg);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+}
+
+static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct ipv6hdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct ipv6hdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ipv6_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ipv6_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
+ ipv6_addr_equal(&iph->daddr, &iph2->daddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ipv6_hdr(seg);
+
+ iph2->saddr = iph->saddr;
+ iph2->daddr = iph->daddr;
+ __tcpv6_gso_segment_csum(seg, &th2->source, th->source);
+ __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv6_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -51,6 +159,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp6_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 085ee236d9a1..c81a07ac0463 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -285,7 +285,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
@@ -910,11 +911,8 @@ start_lookup:
static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
- if (udp_sk_rx_dst_set(sk, dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
- }
+ if (udp_sk_rx_dst_set(sk, dst))
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
@@ -1585,7 +1583,7 @@ back_from_confirm:
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
- (struct rt6_info *)dst,
+ dst_rt6_info(dst),
msg->msg_flags, &cork);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
@@ -1612,7 +1610,7 @@ do_append_data:
ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
- &ipc6, fl6, (struct rt6_info *)dst,
+ &ipc6, fl6, dst_rt6_info(dst),
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index bbd347de00b4..b41152dd4246 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -164,7 +164,8 @@ flush:
INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index a17d783dc7c0..4abc5e9d6322 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -58,7 +58,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -109,19 +113,6 @@ static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
@@ -279,6 +270,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
if (!x)
continue;
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+
spin_lock(&x->lock);
if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4891012b692f..cc885d3aa9e5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -70,7 +70,7 @@ static int xfrm6_get_saddr(struct net *net, int oif,
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rt6_info *rt = (struct rt6_info *)xdst->route;
+ struct rt6_info *rt = dst_rt6_info(xdst->route);
xdst->u.dst.dev = dev;
netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
@@ -184,7 +184,6 @@ static struct ctl_table xfrm6_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int __net_init xfrm6_net_sysctl_init(struct net *net)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c951bb9cc2e0..c3b0b610b0aa 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -795,7 +795,7 @@ done:
/* Accept a pending connection */
static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *nsk;
@@ -809,7 +809,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
goto done;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* Wait for an incoming connection */
add_wait_queue_exclusive(sk_sleep(sk), &wait);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 5e37a8ceebcb..b7bf34a5eb37 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -73,8 +73,42 @@ const struct bus_type iucv_bus = {
};
EXPORT_SYMBOL(iucv_bus);
-struct device *iucv_root;
-EXPORT_SYMBOL(iucv_root);
+static struct device *iucv_root;
+
+static void iucv_release_device(struct device *device)
+{
+ kfree(device);
+}
+
+struct device *iucv_alloc_device(const struct attribute_group **attrs,
+ struct device_driver *driver,
+ void *priv, const char *fmt, ...)
+{
+ struct device *dev;
+ va_list vargs;
+ int rc;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto out_error;
+ va_start(vargs, fmt);
+ rc = dev_set_name(dev, fmt, vargs);
+ va_end(vargs);
+ if (rc)
+ goto out_error;
+ dev->bus = &iucv_bus;
+ dev->parent = iucv_root;
+ dev->driver = driver;
+ dev->groups = attrs;
+ dev->release = iucv_release_device;
+ dev_set_drvdata(dev, priv);
+ return dev;
+
+out_error:
+ kfree(dev);
+ return NULL;
+}
+EXPORT_SYMBOL(iucv_alloc_device);
static int iucv_available;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8d21ff25f160..88a34db265d8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -794,6 +794,7 @@ static void l2tp_session_queue_purge(struct l2tp_session *session)
static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
{
struct l2tp_session *session = NULL;
+ struct l2tp_tunnel *orig_tunnel = tunnel;
unsigned char *ptr, *optr;
u16 hdrflags;
u32 tunnel_id, session_id;
@@ -819,13 +820,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Get L2TP header flags */
hdrflags = ntohs(*(__be16 *)ptr);
- /* Check protocol version */
+ /* Get protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
- if (version != tunnel->version) {
- pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
- goto invalid;
- }
/* Get length of L2TP packet */
length = skb->len;
@@ -837,7 +833,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Skip flags */
ptr += 2;
- if (tunnel->version == L2TP_HDR_VER_2) {
+ if (version == L2TP_HDR_VER_2) {
/* If length is present, skip it */
if (hdrflags & L2TP_HDRFLAG_L)
ptr += 2;
@@ -845,6 +841,20 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Extract tunnel and session ID */
tunnel_id = ntohs(*(__be16 *)ptr);
ptr += 2;
+
+ if (tunnel_id != tunnel->tunnel_id) {
+ /* We are receiving trafic for another tunnel, probably
+ * because we have several tunnels between the same
+ * IP/port quadruple, look it up.
+ */
+ struct l2tp_tunnel *alt_tunnel;
+
+ alt_tunnel = l2tp_tunnel_get(tunnel->l2tp_net, tunnel_id);
+ if (!alt_tunnel)
+ goto pass;
+ tunnel = alt_tunnel;
+ }
+
session_id = ntohs(*(__be16 *)ptr);
ptr += 2;
} else {
@@ -854,6 +864,13 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
ptr += 4;
}
+ /* Check protocol version */
+ if (version != tunnel->version) {
+ pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
+ tunnel->name, version, tunnel->version);
+ goto invalid;
+ }
+
/* Find the session context */
session = l2tp_tunnel_get_session(tunnel, session_id);
if (!session || !session->recv_skb) {
@@ -875,6 +892,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
+ if (tunnel != orig_tunnel)
+ l2tp_tunnel_dec_refcount(tunnel);
+
return 0;
invalid:
@@ -884,25 +904,26 @@ pass:
/* Put UDP header back */
__skb_push(skb, sizeof(struct udphdr));
+ if (tunnel != orig_tunnel)
+ l2tp_tunnel_dec_refcount(tunnel);
+
return 1;
}
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
+/* UDP encapsulation receive and error receive handlers.
+ * See net/ipv4/udp.c for details.
+ *
+ * Note that these functions are called from inside an
+ * RCU-protected region, but without the socket being locked.
+ *
+ * Hence we use rcu_dereference_sk_user_data to access the
+ * tunnel data structure rather the usual l2tp_sk_to_tunnel
+ * accessor function.
*/
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
- /* Note that this is called from the encap_rcv hook inside an
- * RCU-protected region, but without the socket being locked.
- * Hence we use rcu_dereference_sk_user_data to access the
- * tunnel data structure rather the usual l2tp_sk_to_tunnel
- * accessor function.
- */
tunnel = rcu_dereference_sk_user_data(sk);
if (!tunnel)
goto pass_up;
@@ -919,6 +940,29 @@ pass_up:
}
EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload)
+{
+ struct l2tp_tunnel *tunnel;
+
+ tunnel = rcu_dereference_sk_user_data(sk);
+ if (!tunnel || tunnel->fd < 0)
+ return;
+
+ sk->sk_err = err;
+ sk_error_report(sk);
+
+ if (ip_hdr(skb)->version == IPVERSION) {
+ if (inet_test_bit(RECVERR, sk))
+ return ip_icmp_error(sk, skb, err, port, info, payload);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (inet6_test_bit(RECVERR6, sk))
+ return ipv6_icmp_error(sk, skb, err, port, info, payload);
+#endif
+ }
+}
+
/************************************************************************
* Transmit handling
***********************************************************************/
@@ -1493,6 +1537,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
.sk_user_data = tunnel,
.encap_type = UDP_ENCAP_L2TPINUDP,
.encap_rcv = l2tp_udp_encap_recv,
+ .encap_err_rcv = l2tp_udp_encap_err_recv,
.encap_destroy = l2tp_udp_encap_destroy,
};
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 39e487ccc468..8ba00ad433c2 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -127,6 +127,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
/* checksums verified by L2TP */
skb->ip_summed = CHECKSUM_NONE;
+ /* drop outer flow-hash */
+ skb_clear_hash(skb);
+
skb_dst_drop(skb);
nf_reset_ct(skb);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 970af3983d11..19c8cc5289d5 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &inet->cork.fl.u.ip4;
if (connected)
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
rcu_read_lock();
if (!rt) {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 7bf14cf9ffaa..8780ec64f376 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -630,7 +630,7 @@ back_from_confirm:
ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
- &fl6, (struct rt6_info *)dst,
+ &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index fde1140d899e..4eb52add7103 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -688,14 +688,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
* llc_ui_accept - accept a new incoming connection.
* @sock: Socket which connections arrive on.
* @newsock: Socket to move incoming connection to.
- * @flags: User specified operational flags.
- * @kern: If the socket is kernel internal
+ * @arg: User specified arguments
*
* Accept a new incoming connection.
* Returns 0 upon success, negative otherwise.
*/
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk, *newsk;
struct llc_sock *llc, *newllc;
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 8443a6d841b0..72e101135f8c 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -44,11 +44,6 @@ static struct ctl_table llc2_timeout_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
-};
-
-static struct ctl_table llc_station_table[] = {
- { },
};
static struct ctl_table_header *llc2_timeout_header;
@@ -56,8 +51,9 @@ static struct ctl_table_header *llc_station_header;
int __init llc_sysctl_init(void)
{
+ struct ctl_table empty[1] = {};
llc2_timeout_header = register_net_sysctl(&init_net, "net/llc/llc2/timeout", llc2_timeout_table);
- llc_station_header = register_net_sysctl(&init_net, "net/llc/station", llc_station_table);
+ llc_station_header = register_net_sysctl_sz(&init_net, "net/llc/station", empty, 0);
if (!llc2_timeout_header || !llc_station_header) {
llc_sysctl_exit();
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b08e5d7687e3..83ad6c9709fe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2958,8 +2958,9 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
memcpy(sdata->vif.bss_conf.mcast_rate, rate,
sizeof(int) * NUM_NL80211_BANDS);
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_MCAST_RATE);
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_MCAST_RATE);
return 0;
}
@@ -4016,7 +4017,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
- link_data->csa_chanreq = chanreq;
+ link_data->csa_chanreq = chanreq;
link_conf->csa_active = true;
if (params->block_tx &&
@@ -4027,7 +4028,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
}
cfg80211_ch_switch_started_notify(sdata->dev,
- &link_data->csa_chanreq.oper, 0,
+ &link_data->csa_chanreq.oper, link_id,
params->count, params->block_tx);
if (changed) {
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 9f5ffdc9db28..ecbb042dd043 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
if (!he_spr_ie_elem)
return;
+
+ he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control;
data = he_spr_ie_elem->optional;
if (he_spr_ie_elem->he_sr_control &
IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
- data++;
+ he_obss_pd->non_srg_max_offset = *data++;
+
if (he_spr_ie_elem->he_sr_control &
IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
- he_obss_pd->max_offset = *data++;
he_obss_pd->min_offset = *data++;
+ he_obss_pd->max_offset = *data++;
+ memcpy(he_obss_pd->bss_color_bitmap, data, 8);
+ data += 8;
+ memcpy(he_obss_pd->partial_bssid_bitmap, data, 8);
he_obss_pd->enable = true;
}
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index eb62b7d4b4f7..3cedfdc9099b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1845,6 +1845,8 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
void ieee80211_configure_filter(struct ieee80211_local *local);
u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
+void ieee80211_handle_queued_frames(struct ieee80211_local *local);
+
u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
u64 *cookie, gfp_t gfp);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4eaea0a9975b..1132dea0e290 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -423,9 +423,8 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_ERP_SLOT;
}
-static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+void ieee80211_handle_queued_frames(struct ieee80211_local *local)
{
- struct ieee80211_local *local = from_tasklet(local, t, tasklet);
struct sk_buff *skb;
while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -450,6 +449,13 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t)
}
}
+static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+{
+ struct ieee80211_local *local = from_tasklet(local, t, tasklet);
+
+ ieee80211_handle_queued_frames(local);
+}
+
static void ieee80211_restart_work(struct work_struct *work)
{
struct ieee80211_local *local =
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index cbc9b5e40cb3..6d4510221c98 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1776,6 +1776,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
ifmsh->last_preq = jiffies;
ifmsh->next_perr = jiffies;
ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
+ ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
/* Allocate all mesh structures when creating the first mesh interface. */
if (!mesh_allocated)
ieee80211s_init();
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a6b62169f084..c0a5c75cddcb 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
*/
void mesh_path_flush_pending(struct mesh_path *mpath)
{
+ struct ieee80211_sub_if_data *sdata = mpath->sdata;
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct mesh_preq_queue *preq, *tmp;
struct sk_buff *skb;
while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL)
mesh_path_discard_frame(mpath->sdata, skb);
+
+ spin_lock_bh(&ifmsh->mesh_preq_queue_lock);
+ list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) {
+ if (ether_addr_equal(mpath->dst, preq->dst)) {
+ list_del(&preq->list);
+ kfree(preq);
+ --ifmsh->preq_queue_len;
+ }
+ }
+ spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
}
/**
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 55e5497f8978..055a60e90979 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -111,7 +111,7 @@ ieee80211_parse_extension_element(u32 *crc,
if (params->mode < IEEE80211_CONN_MODE_HE)
break;
if (len >= sizeof(*elems->he_spr) &&
- len >= ieee80211_he_spr_size(data))
+ len >= ieee80211_he_spr_size(data) - 1)
elems->he_spr = data;
break;
case WLAN_EID_EXT_HE_6GHZ_CAPA:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 3da1c5c45035..8ecc4b710b0e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -744,15 +744,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_ies_bufsize *= n_bands;
}
- local->hw_scan_req = kmalloc(
- sizeof(*local->hw_scan_req) +
- req->n_channels * sizeof(req->channels[0]) +
- local->hw_scan_ies_bufsize, GFP_KERNEL);
+ local->hw_scan_req = kmalloc(struct_size(local->hw_scan_req,
+ req.channels,
+ req->n_channels) +
+ local->hw_scan_ies_bufsize,
+ GFP_KERNEL);
if (!local->hw_scan_req)
return -ENOMEM;
local->hw_scan_req->req.ssids = req->ssids;
local->hw_scan_req->req.n_ssids = req->n_ssids;
+ /* None of the channels are actually set
+ * up but let UBSAN know the boundaries.
+ */
+ local->hw_scan_req->req.n_channels = req->n_channels;
+
ies = (u8 *)local->hw_scan_req +
sizeof(*local->hw_scan_req) +
req->n_channels * sizeof(req->channels[0]);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index da5fdd6f5c85..aa22f09e6d14 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1724,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
skb_queue_head_init(&pending);
/* sync with ieee80211_tx_h_unicast_ps_buf */
- spin_lock(&sta->ps_lock);
+ spin_lock_bh(&sta->ps_lock);
/* Send all buffered frames to the station */
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
int count = skb_queue_len(&pending), tmp;
@@ -1753,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
*/
clear_sta_flag(sta, WLAN_STA_PSPOLL);
clear_sta_flag(sta, WLAN_STA_UAPSD);
- spin_unlock(&sta->ps_lock);
+ spin_unlock_bh(&sta->ps_lock);
atomic_dec(&ps->num_sta_ps);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8e758b5074bd..b26aacfbc622 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -33,7 +33,7 @@
__string(vif_name, sdata->name)
#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
__entry->p2p = sdata->vif.p2p; \
- __assign_str(vif_name, sdata->name)
+ __assign_str(vif_name)
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0b893e958959..283bfc99417e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1567,6 +1567,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
void ieee80211_stop_device(struct ieee80211_local *local)
{
+ ieee80211_handle_queued_frames(local);
+
ieee80211_led_radio(local, false);
ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 5d2012d1cf4a..2dc7a908a6bb 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1377,13 +1377,13 @@ static const struct ctl_table mpls_dev_table[] = {
.proc_handler = mpls_conf_proc,
.data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
},
- { }
};
static int mpls_dev_sysctl_register(struct net_device *dev,
struct mpls_dev *mdev)
{
char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
+ size_t table_size = ARRAY_SIZE(mpls_dev_table);
struct net *net = dev_net(dev);
struct ctl_table *table;
int i;
@@ -1395,7 +1395,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
/* Table data contains only offsets relative to the base of
* the mdev at this point, so make them absolute.
*/
- for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data = (char *)mdev + (uintptr_t)table[i].data;
table[i].extra1 = mdev;
table[i].extra2 = net;
@@ -1403,8 +1403,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(mpls_dev_table));
+ mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size);
if (!mdev->sysctl)
goto free;
@@ -2653,11 +2652,11 @@ static const struct ctl_table mpls_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &ttl_max,
},
- { }
};
static int mpls_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(mpls_table);
struct ctl_table *table;
int i;
@@ -2673,11 +2672,11 @@ static int mpls_net_init(struct net *net)
/* Table data contains only offsets relative to the base of
* the mdev at this point, so make them absolute.
*/
- for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ for (i = 0; i < table_size; i++)
table[i].data = (char *)net + (uintptr_t)table[i].data;
net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table,
- ARRAY_SIZE(mpls_table));
+ table_size);
if (net->mpls.ctl == NULL) {
kfree(table);
return -ENOMEM;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 8fc790f2a01b..4385fd3b13be 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -81,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
+ rt = dst_rtable(dst);
} else if (dst->ops->family == AF_INET6) {
if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
ttl = tun_encap_info->default_ttl;
@@ -90,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
} else {
goto drop;
}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 8d661156ab8c..98b1dd498ff6 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -92,10 +92,65 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
- strcpy(pernet->scheduler, "default");
+ strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
}
#ifdef CONFIG_SYSCTL
+static int mptcp_set_scheduler(const struct net *net, const char *name)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(net);
+ struct mptcp_sched_ops *sched;
+ int ret = 0;
+
+ rcu_read_lock();
+ sched = mptcp_sched_find(name);
+ if (sched)
+ strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX);
+ else
+ ret = -ENOENT;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int proc_scheduler(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ const struct net *net = current->nsproxy->net_ns;
+ char val[MPTCP_SCHED_NAME_MAX];
+ struct ctl_table tbl = {
+ .data = val,
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ };
+ int ret;
+
+ strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX);
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = mptcp_set_scheduler(net, val);
+
+ return ret;
+}
+
+static int proc_available_schedulers(struct ctl_table *ctl,
+ int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, };
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+ if (!tbl.data)
+ return -ENOMEM;
+
+ mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ kfree(tbl.data);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -148,7 +203,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.procname = "scheduler",
.maxlen = MPTCP_SCHED_NAME_MAX,
.mode = 0644,
- .proc_handler = proc_dostring,
+ .proc_handler = proc_scheduler,
+ },
+ {
+ .procname = "available_schedulers",
+ .maxlen = MPTCP_SCHED_BUF_MAX,
+ .mode = 0644,
+ .proc_handler = proc_available_schedulers,
},
{
.procname = "close_timeout",
@@ -156,7 +217,6 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {}
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -178,7 +238,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
table[6].data = &pernet->scheduler;
- table[7].data = &pernet->close_timeout;
+ /* table[7] is for available_schedulers which is read-only info */
+ table[8].data = &pernet->close_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index dd7fd1f246b5..2704afd0dfe4 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -1,5 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <net/inet_common.h>
+
enum linux_mptcp_mib_field {
MPTCP_MIB_NUM = 0,
MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5c17d39146ea..7f53e022e27e 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -14,6 +14,7 @@
#include "protocol.h"
#include "mib.h"
+#include "mptcp_pm_gen.h"
static int pm_nl_pernet_id;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 9f5d422d5ef6..f0a4590506c6 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -6,6 +6,7 @@
#include "protocol.h"
#include "mib.h"
+#include "mptcp_pm_gen.h"
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f8bc34f0d973..7d44196ec5b6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -20,6 +20,7 @@
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+#include <net/hotdata.h>
#include <net/xfrm.h>
#include <asm/ioctls.h>
#include "protocol.h"
@@ -1272,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -2569,7 +2570,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
- tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(tcp_sk);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
unlock_sock_fast(tcp_sk, slow);
@@ -2813,7 +2814,8 @@ static void mptcp_ca_reset(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_assign_congestion_control(sk);
- strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+ strscpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name,
+ sizeof(mptcp_sk(sk)->ca_name));
/* no need to keep a reference to the ops, the name will suffice */
tcp_cleanup_congestion_control(sk);
@@ -3730,6 +3732,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
+
+ WRITE_ONCE(msk->write_seq, subflow->idsn);
+ WRITE_ONCE(msk->snd_nxt, subflow->idsn);
if (likely(!__mptcp_check_fallback(msk)))
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
@@ -3877,11 +3882,10 @@ unlock:
}
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *ssk, *newsk;
- int err;
pr_debug("msk=%p", msk);
@@ -3893,9 +3897,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
return -EINVAL;
pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, &err, kern);
+ newsk = inet_csk_accept(ssk, arg);
if (!newsk)
- return err;
+ return arg->err;
pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
@@ -3916,7 +3920,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- newsk->sk_kern_sock = kern;
+ newsk->sk_kern_sock = arg->kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
@@ -3945,7 +3949,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
}
} else {
tcpfallback:
- newsk->sk_kern_sock = kern;
+ newsk->sk_kern_sock = arg->kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
/* we are being invoked after accepting a non-mp-capable
@@ -4165,7 +4169,7 @@ int __init mptcp_proto_v6_init(void)
int err;
mptcp_v6_prot = mptcp_prot;
- strcpy(mptcp_v6_prot.name, "MPTCPv6");
+ strscpy(mptcp_v6_prot.name, "MPTCPv6", sizeof(mptcp_v6_prot.name));
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index fdfa843e2d88..7aa47e2dd52b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -12,8 +12,7 @@
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
#include <net/genetlink.h>
-
-#include "mptcp_pm_gen.h"
+#include <net/rstreason.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -311,6 +310,9 @@ struct mptcp_sock {
free_first:1,
rcvspace_init:1;
u32 notsent_lowat;
+ int keepalive_cnt;
+ int keepalive_idle;
+ int keepalive_intvl;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -581,6 +583,43 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
WRITE_ONCE(subflow->local_id, -1);
}
+/* Convert reset reasons in MPTCP to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_mptcp_reason(u32 reason)
+{
+ switch (reason) {
+ case MPTCP_RST_EUNSPEC:
+ return SK_RST_REASON_MPTCP_RST_EUNSPEC;
+ case MPTCP_RST_EMPTCP:
+ return SK_RST_REASON_MPTCP_RST_EMPTCP;
+ case MPTCP_RST_ERESOURCE:
+ return SK_RST_REASON_MPTCP_RST_ERESOURCE;
+ case MPTCP_RST_EPROHIBIT:
+ return SK_RST_REASON_MPTCP_RST_EPROHIBIT;
+ case MPTCP_RST_EWQ2BIG:
+ return SK_RST_REASON_MPTCP_RST_EWQ2BIG;
+ case MPTCP_RST_EBADPERF:
+ return SK_RST_REASON_MPTCP_RST_EBADPERF;
+ case MPTCP_RST_EMIDDLEBOX:
+ return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX;
+ default:
+ /* It should not happen, or else errors may occur
+ * in MPTCP layer
+ */
+ return SK_RST_REASON_ERROR;
+ }
+}
+
+static inline void
+mptcp_send_active_reset_reason(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ enum sk_rst_reason reason;
+
+ reason = sk_rst_convert_mptcp_reason(subflow->reset_reason);
+ tcp_send_active_reset(sk, GFP_ATOMIC, reason);
+}
+
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
@@ -645,6 +684,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 4ab0693c069c..4a7fd0508ad2 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -51,6 +51,28 @@ struct mptcp_sched_ops *mptcp_sched_find(const char *name)
return ret;
}
+/* Build string with list of available scheduler values.
+ * Similar to tcp_get_available_congestion_control()
+ */
+void mptcp_get_available_schedulers(char *buf, size_t maxlen)
+{
+ struct mptcp_sched_ops *sched;
+ size_t offs = 0;
+
+ rcu_read_lock();
+ spin_lock(&mptcp_sched_list_lock);
+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", sched->name);
+
+ if (WARN_ON_ONCE(offs >= maxlen))
+ break;
+ }
+ spin_unlock(&mptcp_sched_list_lock);
+ rcu_read_unlock();
+}
+
int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
{
if (!sched->get_subflow)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 1fea43f5b6f3..f9a4fb17b5b7 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -181,8 +181,6 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
switch (optname) {
case SO_KEEPALIVE:
- mptcp_sol_socket_sync_intval(msk, optname, val);
- return 0;
case SO_DEBUG:
case SO_MARK:
case SO_PRIORITY:
@@ -618,12 +616,37 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
}
if (ret == 0)
- strcpy(msk->ca_name, name);
+ strscpy(msk->ca_name, name, sizeof(msk->ca_name));
release_sock(sk);
return ret;
}
+static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max,
+ int (*set_val)(struct sock *, int),
+ int *msk_val, int val)
+{
+ struct mptcp_subflow_context *subflow;
+ int err = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int ret;
+
+ lock_sock(ssk);
+ ret = set_val(ssk, val);
+ err = err ? : ret;
+ release_sock(ssk);
+ }
+
+ if (!err) {
+ *msk_val = val;
+ sockopt_seq_inc(msk);
+ }
+
+ return err;
+}
+
static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
{
struct mptcp_subflow_context *subflow;
@@ -820,6 +843,22 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_NODELAY:
ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
break;
+ case TCP_KEEPIDLE:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE,
+ &tcp_sock_set_keepidle_locked,
+ &msk->keepalive_idle, val);
+ break;
+ case TCP_KEEPINTVL:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL,
+ &tcp_sock_set_keepintvl,
+ &msk->keepalive_intvl, val);
+ break;
+ case TCP_KEEPCNT:
+ ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT,
+ &tcp_sock_set_keepcnt,
+ &msk->keepalive_cnt,
+ val);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -960,6 +999,10 @@ static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in
if (get_user(len, optlen))
return -EFAULT;
+ /* When used only to check if a fallback to TCP happened. */
+ if (len == 0)
+ return 0;
+
len = min_t(unsigned int, len, sizeof(struct mptcp_info));
mptcp_diag_fill_info(msk, &m_info);
@@ -1328,6 +1371,8 @@ static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
+ struct sock *sk = (void *)msk;
+
switch (optname) {
case TCP_ULP:
case TCP_CONGESTION:
@@ -1346,8 +1391,22 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->cork);
case TCP_NODELAY:
return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
+ case TCP_KEEPIDLE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_idle ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ);
+ case TCP_KEEPINTVL:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_intvl ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ);
+ case TCP_KEEPCNT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ msk->keepalive_cnt ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes));
case TCP_NOTSENT_LOWAT:
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
+ case TCP_IS_MPTCP:
+ return mptcp_put_int_option(msk, optval, optlen, 1);
}
return -EOPNOTSUPP;
}
@@ -1463,6 +1522,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
tcp_set_congestion_control(ssk, msk->ca_name, false, true);
__tcp_sock_set_cork(ssk, !!msk->cork);
__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
+ tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
+ tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
+ tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b94d1dca1094..612c38570a64 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -20,6 +20,7 @@
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+
#include "protocol.h"
#include "mib.h"
@@ -286,6 +287,16 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
}
EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
+static enum sk_rst_reason mptcp_get_rst_reason(const struct sk_buff *skb)
+{
+ const struct mptcp_ext *mpext = mptcp_get_ext(skb);
+
+ if (!mpext)
+ return SK_RST_REASON_NOT_SPECIFIED;
+
+ return sk_rst_convert_mptcp_reason(mpext->reset_reason);
+}
+
static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
@@ -308,7 +319,8 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
dst_release(dst);
if (!req->syncookie)
- tcp_request_sock_ops.send_reset(sk, skb);
+ tcp_request_sock_ops.send_reset(sk, skb,
+ mptcp_get_rst_reason(skb));
return NULL;
}
@@ -376,7 +388,8 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
dst_release(dst);
if (!req->syncookie)
- tcp6_request_sock_ops.send_reset(sk, skb);
+ tcp6_request_sock_ops.send_reset(sk, skb,
+ mptcp_get_rst_reason(skb));
return NULL;
}
#endif
@@ -412,7 +425,7 @@ void mptcp_subflow_reset(struct sock *ssk)
/* must hold: tcp_done() could drop last reference on parent */
sock_hold(sk);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
mptcp_schedule_work(sk);
@@ -781,6 +794,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
+ enum sk_rst_reason reason;
struct mptcp_sock *owner;
struct sock *child;
@@ -899,7 +913,7 @@ create_child:
}
/* check for expected invariant - should never trigger, just help
- * catching eariler subtle bugs
+ * catching earlier subtle bugs
*/
WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
(!mptcp_subflow_ctx(child) ||
@@ -911,7 +925,8 @@ dispose_child:
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
- req->rsk_ops->send_reset(sk, skb);
+ reason = mptcp_get_rst_reason(skb);
+ req->rsk_ops->send_reset(sk, skb, reason);
/* The last child reference will be released by the caller */
return child;
@@ -1104,6 +1119,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
if (mpext->data_fin == 1) {
+ u64 data_fin_seq;
+
if (data_len == 1) {
bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
mpext->dsn64);
@@ -1116,26 +1133,26 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
*/
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
- } else {
- if (updated)
- mptcp_schedule_work((struct sock *)msk);
-
- return MAPPING_DATA_FIN;
}
- } else {
- u64 data_fin_seq = mpext->data_seq + data_len - 1;
- /* If mpext->data_seq is a 32-bit value, data_fin_seq
- * must also be limited to 32 bits.
- */
- if (!mpext->dsn64)
- data_fin_seq &= GENMASK_ULL(31, 0);
+ if (updated)
+ mptcp_schedule_work((struct sock *)msk);
- mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
- pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
- data_fin_seq, mpext->dsn64);
+ return MAPPING_DATA_FIN;
}
+ data_fin_seq = mpext->data_seq + data_len - 1;
+
+ /* If mpext->data_seq is a 32-bit value, data_fin_seq must also
+ * be limited to 32 bits.
+ */
+ if (!mpext->dsn64)
+ data_fin_seq &= GENMASK_ULL(31, 0);
+
+ mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
+ pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
+ data_fin_seq, mpext->dsn64);
+
/* Adjust for DATA_FIN using 1 byte of sequence space */
data_len--;
}
@@ -1246,7 +1263,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
- /* greceful failure can happen only on the MPC subflow */
+ /* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
return;
@@ -1348,7 +1365,7 @@ reset:
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ mptcp_send_active_reset_reason(ssk);
WRITE_ONCE(subflow->data_avail, false);
return false;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 143a341bbc0a..b6d0dcf3a5c3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -94,6 +94,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
int availmem;
+ int amemthresh;
int nomem;
int to_change = -1;
@@ -105,7 +106,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < ipvs->sysctl_amemthresh);
+ amemthresh = max(READ_ONCE(ipvs->sysctl_amemthresh), 0);
+ nomem = (availmem < amemthresh);
local_bh_disable();
@@ -145,9 +147,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 1:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
ipvs->sysctl_drop_packet = 2;
} else {
ipvs->drop_rate = 0;
@@ -155,9 +156,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 2:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
} else {
ipvs->drop_rate = 0;
ipvs->sysctl_drop_packet = 1;
@@ -2263,7 +2263,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec,
},
#endif
- { }
};
#endif
@@ -4270,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
struct ctl_table *tbl;
int idx, ret;
size_t ctl_table_size = ARRAY_SIZE(vs_vars);
+ bool unpriv = net->user_ns != &init_user_ns;
atomic_set(&ipvs->dropentry, 0);
spin_lock_init(&ipvs->dropentry_lock);
@@ -4284,12 +4284,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- tbl[0].procname = NULL;
- ctl_table_size = 0;
- }
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -4315,10 +4309,17 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_ports = 1;
tbl[idx++].data = &ipvs->sysctl_sync_ports;
tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
+
ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+
ipvs->sysctl_sync_sock_size = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
+
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
@@ -4341,15 +4342,22 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+
ipvs->sysctl_run_estimation = 1;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_run_estimation;
ipvs->est_cpulist_valid = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_cpulist;
ipvs->sysctl_est_nice = IPVS_EST_NICE;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_nice;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 8ceec7a2fa8f..2423513d701d 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -123,7 +123,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -563,10 +562,8 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblc_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblc_ctl_table = vs_vars_table;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0fb64707213f..cdb1d4bf6761 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -294,7 +294,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -749,10 +748,8 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblcr_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 39b5fd6bbf65..3313bceb6cc9 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
(!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
(addr_type & IPV6_ADDR_LOOPBACK);
old_rt_is_local = __ip_vs_is_local_route6(
- (struct rt6_info *)skb_dst(skb));
+ dst_rt6_info(skb_dst(skb)));
} else
#endif
{
@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rtable *) dest_dst->dst_cache;
+ rt = dst_rtable(dest_dst->dst_cache);
else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock_bh(&dest->dst_lock);
@@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rt6_info *) dest_dst->dst_cache;
+ rt = dst_rt6_info(dest_dst->dst_cache);
else {
u32 cookie;
@@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock);
@@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
rt_mode);
if (!dst)
goto err_unreach;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
}
local = __ip_vs_is_local_route6(rt);
@@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_RDR);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
tdev = rt->dst.dev;
/*
@@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c63868666bd9..7ac20750c127 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1440,8 +1440,6 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto;
u8 protonum = nf_ct_protonum(ct);
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
- return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
@@ -2024,7 +2022,7 @@ repeat:
goto repeat;
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
+ if (ret == NF_DROP)
NF_CT_STAT_INC_ATOMIC(state->net, drop);
ret = -ret;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index e2db1f4ec2df..ebc4f733bb2e 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -525,7 +525,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
if (dccp_error(dh, skb, dataoff, state))
return -NF_ACCEPT;
@@ -533,7 +533,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
/* pull again, including possible 48 bit sequences and subtype header */
dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
type = dh->dccph_type;
if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 1020d67600a9..327b8059025d 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -62,7 +62,9 @@ static const u_int8_t noct_valid_new[] = {
[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
- [ICMPV6_MLD2_REPORT - 130] = 1
+ [ICMPV6_MLD2_REPORT - 130] = 1,
+ [ICMPV6_MRDISC_ADV - 130] = 1,
+ [ICMPV6_MRDISC_SOL - 130] = 1
};
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index bb9dea676ec1..74112e9c5dab 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -616,11 +616,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_LWTUNNEL,
#endif
- __NF_SYSCTL_CT_LAST_SYSCTL,
+ NF_SYSCTL_CT_LAST_SYSCTL,
};
-#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
-
static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
@@ -957,7 +955,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
#endif
- {}
};
static struct ctl_table nf_ct_netfilter_table[] = {
@@ -968,7 +965,6 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a0571339239c..5c1ff07eaee0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc);
static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
{
- const struct rt6_info *rt;
-
- if (flow_tuple->l3proto == NFPROTO_IPV6) {
- rt = (const struct rt6_info *)flow_tuple->dst_cache;
- return rt6_get_cookie(rt);
- }
+ if (flow_tuple->l3proto == NFPROTO_IPV6)
+ return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache));
return 0;
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 5383bed3d3e0..c2c005234dcd 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
@@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index efedd2f13ac7..769fd7680fac 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -395,7 +395,7 @@ static const struct seq_operations nflog_seq_ops = {
#ifdef CONFIG_SYSCTL
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
-static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
+static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO];
static struct ctl_table_header *nf_log_sysctl_fhdr;
static struct ctl_table nf_log_sysctl_ftable[] = {
@@ -406,7 +406,6 @@ static struct ctl_table nf_log_sysctl_ftable[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int nf_log_proc_dostring(struct ctl_table *table, int write,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 167074283ea9..be3b4c90d2ed 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3333,7 +3333,7 @@ err_expr_parse:
return ERR_PTR(err);
}
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp)
{
int err;
@@ -3341,7 +3341,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
return -EINVAL;
dst->ops = src->ops;
- err = src->ops->clone(dst, src);
+ err = src->ops->clone(dst, src, gfp);
if (err < 0)
return err;
@@ -6525,7 +6525,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
if (!expr)
goto err_expr;
- err = nft_expr_clone(expr, set->exprs[i]);
+ err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT);
if (err < 0) {
kfree(expr);
goto err_expr;
@@ -6564,7 +6564,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
for (i = 0; i < num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- err = nft_expr_clone(expr, expr_array[i]);
+ err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT);
if (err < 0)
goto err_elem_expr_setup;
@@ -7776,6 +7776,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (WARN_ON_ONCE(!type))
return -ENOENT;
+ if (!obj->ops->update)
+ return 0;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -9467,9 +9470,10 @@ static void nft_obj_commit_update(struct nft_trans *trans)
obj = nft_trans_obj(trans);
newobj = nft_trans_obj_newobj(trans);
- if (obj->ops->update)
- obj->ops->update(obj, newobj);
+ if (WARN_ON_ONCE(!obj->ops->update))
+ return;
+ obj->ops->update(obj, newobj);
nft_obj_destroy(&trans->ctx, newobj);
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d170758a1eb5..7010541fcca6 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -325,9 +325,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_hook *hook, *found = NULL;
int n = 0;
- if (event != NETDEV_UNREGISTER)
- return;
-
list_for_each_entry(hook, &basechain->hook_list, list) {
if (hook->ops.dev == dev)
found = hook;
@@ -367,8 +364,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
.net = dev_net(dev),
};
- if (event != NETDEV_UNREGISTER &&
- event != NETDEV_CHANGENAME)
+ if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
nft_net = nft_pernet(ctx.net);
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index de9d1980df69..92b984fa8175 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -210,12 +210,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx,
nft_connlimit_do_destroy(ctx, priv);
}
-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp);
if (!priv_dst->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index dccc68a5135a..291ed2026367 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -226,7 +226,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
nft_counter_do_destroy(priv);
}
-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
@@ -236,7 +236,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index c09dba57354c..b4ada3ab2167 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
for (i = 0; i < priv->num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- if (nft_expr_clone(expr, priv->expr_array[i]) < 0)
+ if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0)
return -1;
elem_expr->size += priv->expr_array[i]->ops->size;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 8e6d7eaf9dc8..de1b6066bfa8 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -102,12 +102,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx,
kfree(priv->last);
}
-static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
struct nft_last_priv *priv_src = nft_expr_priv(src);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp);
if (!priv_dst->last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index cefa25e0dbb0..21d26b79b460 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx,
}
static int nft_limit_clone(struct nft_limit_priv *priv_dst,
- const struct nft_limit_priv *priv_src)
+ const struct nft_limit_priv *priv_src, gfp_t gfp)
{
priv_dst->tokens_max = priv_src->tokens_max;
priv_dst->rate = priv_src->rate;
@@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->burst = priv_src->burst;
priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp);
if (!priv_dst->limit)
return -ENOMEM;
@@ -223,14 +223,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, &priv->limit);
}
-static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
priv_dst->cost = priv_src->cost;
- return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp);
}
static struct nft_expr_type nft_limit_type;
@@ -281,12 +282,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, priv);
}
-static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv *priv_src = nft_expr_priv(src);
- return nft_limit_clone(priv_dst, priv_src);
+ return nft_limit_clone(priv_dst, priv_src, gfp);
}
static const struct nft_expr_ops nft_limit_bytes_ops = {
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 3ba12a7471b0..9b2d7463d3d3 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -233,7 +233,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx,
return nft_quota_do_destroy(ctx, priv);
}
-static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
struct nft_quota *priv_src = nft_expr_priv(src);
@@ -241,7 +241,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
priv_dst->quota = priv_src->quota;
priv_dst->flags = priv_src->flags;
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp);
if (!priv_dst->consumed)
return -ENOMEM;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 24d977138572..14d88394bcb7 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -73,14 +73,14 @@ void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ *dest = (__force u32)rt_nexthop(dst_rtable(dst),
ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
goto err;
- memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+ memcpy(dest, rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 187138afac45..15a236bebb46 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -504,6 +504,7 @@ out:
* pipapo_get() - Get matching element reference given key data
* @net: Network namespace
* @set: nftables API set representation
+ * @m: storage containing active/existing elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
@@ -517,17 +518,15 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
+ const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
u64 tstamp, gfp_t gfp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
- struct nft_pipapo *priv = nft_set_priv(set);
unsigned long *res_map, *fill_map = NULL;
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
int i;
- m = priv->clone;
if (m->bsize_max == 0)
return ret;
@@ -612,9 +611,11 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
nft_genmask_cur(net), get_jiffies_64(),
GFP_ATOMIC);
if (IS_ERR(e))
@@ -1247,6 +1248,40 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return 0;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
+static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old);
+
+/**
+ * pipapo_maybe_clone() - Build clone for pending data changes, if not existing
+ * @set: nftables API set representation
+ *
+ * Return: newly created or existing clone, if any. NULL on allocation failure
+ */
+static struct nft_pipapo_match *pipapo_maybe_clone(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m;
+
+ if (priv->clone)
+ return priv->clone;
+
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = pipapo_clone(m);
+
+ return priv->clone;
+}
+
/**
* nft_pipapo_insert() - Validate and insert ranged elements
* @net: Network namespace
@@ -1263,8 +1298,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
u64 tstamp = nft_net_tstamp(net);
@@ -1272,12 +1306,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
+ if (!m)
+ return -ENOMEM;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
end = (const u8 *)nft_set_ext_key_end(ext)->data;
else
end = start;
- dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL);
+ dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1299,7 +1336,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp,
+ dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
GFP_KERNEL);
}
@@ -1332,8 +1369,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Insert */
- priv->dirty = true;
-
bsize_max = m->bsize_max;
nft_pipapo_for_each_field(f, i, m) {
@@ -1384,7 +1419,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
* pipapo_clone() - Clone matching data to create new working copy
* @old: Existing matching data
*
- * Return: copy of matching data passed as 'old', error pointer on failure
+ * Return: copy of matching data passed as 'old' or NULL.
*/
static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
{
@@ -1394,7 +1429,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
- return ERR_PTR(-ENOMEM);
+ return NULL;
new->field_count = old->field_count;
new->bsize_max = old->bsize_max;
@@ -1466,7 +1501,7 @@ out_scratch:
free_percpu(new->scratch);
kfree(new);
- return ERR_PTR(-ENOMEM);
+ return NULL;
}
/**
@@ -1698,8 +1733,6 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- priv->dirty = true;
-
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return;
@@ -1777,57 +1810,30 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *old;
-
- if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ struct nft_pipapo_match *old;
- if (!priv->dirty)
+ if (!priv->clone)
return;
- new_clone = pipapo_clone(priv->clone);
- if (IS_ERR(new_clone))
- return;
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ pipapo_gc(set, priv->clone);
- priv->dirty = false;
+ old = rcu_replace_pointer(priv->match, priv->clone,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = NULL;
- old = rcu_access_pointer(priv->match);
- rcu_assign_pointer(priv->match, priv->clone);
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
-
- priv->clone = new_clone;
-}
-
-static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
-{
-#ifdef CONFIG_PROVE_LOCKING
- const struct net *net = read_pnet(&set->net);
-
- return lockdep_is_held(&nft_pernet(net)->commit_mutex);
-#else
- return true;
-#endif
}
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *m;
-
- if (!priv->dirty)
- return;
-
- m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
- new_clone = pipapo_clone(m);
- if (IS_ERR(new_clone))
+ if (!priv->clone)
return;
-
- priv->dirty = false;
-
pipapo_free_match(priv->clone);
- priv->clone = new_clone;
+ priv->clone = NULL;
}
/**
@@ -1851,52 +1857,38 @@ static void nft_pipapo_activate(const struct net *net,
}
/**
- * pipapo_deactivate() - Check that element is in set, mark as inactive
+ * nft_pipapo_deactivate() - Search for element and make it inactive
* @net: Network namespace
* @set: nftables API set representation
- * @data: Input key data
- * @ext: nftables API extension pointer, used to check for end element
- *
- * This is a convenience function that can be called from both
- * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same
- * operation.
+ * @elem: nftables API element representation containing key data
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const u8 *data, const struct nft_set_ext *ext)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net),
- nft_net_tstamp(net), GFP_KERNEL);
+ /* removal must occur on priv->clone, if we are low on memory
+ * we have no choice and must fail the removal request.
+ */
+ if (!m)
+ return NULL;
+
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
+ nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
if (IS_ERR(e))
return NULL;
nft_set_elem_change_active(net, set, &e->ext);
- return e;
-}
-
-/**
- * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive
- * @net: Network namespace
- * @set: nftables API set representation
- * @elem: nftables API element representation containing key data
- *
- * Return: deactivated element if found, NULL otherwise.
- */
-static struct nft_elem_priv *
-nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
-
- return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext);
+ return &e->priv;
}
/**
- * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
+ * nft_pipapo_flush() - make element inactive
* @net: Network namespace
* @set: nftables API set representation
* @elem_priv: nftables API element representation containing key data
@@ -2093,7 +2085,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
if (last && f->mt[rulemap[i].to].e == e) {
- priv->dirty = true;
pipapo_drop(m, rulemap);
return;
}
@@ -2106,35 +2097,23 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
}
/**
- * nft_pipapo_walk() - Walk over elements
+ * nft_pipapo_do_walk() - Walk over elements in m
* @ctx: nftables API context
* @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
* @iter: Iterator
*
* As elements are referenced in the mapping array for the last field, directly
* scan that array: there's no need to follow rule mappings from the first
- * field.
+ * field. @m is protected either by RCU read lock or by transaction mutex.
*/
-static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_pipapo_do_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nft_pipapo_match *m,
+ struct nft_set_iter *iter)
{
- struct nft_pipapo *priv = nft_set_priv(set);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
- WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
- iter->type != NFT_ITER_UPDATE);
-
- rcu_read_lock();
- if (iter->type == NFT_ITER_READ)
- m = rcu_dereference(priv->match);
- else
- m = priv->clone;
-
- if (unlikely(!m))
- goto out;
-
for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
;
@@ -2151,14 +2130,49 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
- goto out;
+ return;
cont:
iter->count++;
}
+}
-out:
- rcu_read_unlock();
+/**
+ * nft_pipapo_walk() - Walk over elements
+ * @ctx: nftables API context
+ * @set: nftables API set representation
+ * @iter: Iterator
+ *
+ * Test if destructive action is needed or not, clone active backend if needed
+ * and call the real function to work on the data.
+ */
+static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ const struct nft_pipapo_match *m;
+
+ switch (iter->type) {
+ case NFT_ITER_UPDATE:
+ m = pipapo_maybe_clone(set);
+ if (!m) {
+ iter->err = -ENOMEM;
+ return;
+ }
+
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_READ:
+ rcu_read_lock();
+ m = rcu_dereference(priv->match);
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ rcu_read_unlock();
+ break;
+ default:
+ iter->err = -EINVAL;
+ WARN_ON_ONCE(1);
+ break;
+ }
}
/**
@@ -2267,21 +2281,10 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}
- /* Create an initial clone of matching data for next insertion */
- priv->clone = pipapo_clone(m);
- if (IS_ERR(priv->clone)) {
- err = PTR_ERR(priv->clone);
- goto out_free;
- }
-
- priv->dirty = false;
-
rcu_assign_pointer(priv->match, m);
return 0;
-out_free:
- free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2326,33 +2329,18 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- int cpu;
m = rcu_dereference_protected(priv->match, true);
- if (m) {
- rcu_barrier();
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(m, cpu);
- free_percpu(m->scratch);
- pipapo_free_fields(m);
- kfree(m);
- priv->match = NULL;
- }
if (priv->clone) {
- m = priv->clone;
-
- nft_set_pipapo_match_destroy(ctx, set, m);
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(priv->clone, cpu);
- free_percpu(priv->clone->scratch);
-
- pipapo_free_fields(priv->clone);
- kfree(priv->clone);
+ nft_set_pipapo_match_destroy(ctx, set, priv->clone);
+ pipapo_free_match(priv->clone);
priv->clone = NULL;
+ } else {
+ nft_set_pipapo_match_destroy(ctx, set, m);
}
+
+ pipapo_free_match(m);
}
/**
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 24cd1ff73f98..0d2e40e10f7f 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -155,14 +155,12 @@ struct nft_pipapo_match {
* @match: Currently in-use matching data
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
- * @dirty: Working copy has pending insertions or deletions
* @last_gc: Timestamp of last garbage collection run, jiffies
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
- bool dirty;
unsigned long last_gc;
};
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 1ba4f58e1d35..cd9160bbc919 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -965,6 +965,7 @@ int netlbl_enabled(void)
* @sk: the socket to label
* @family: protocol family
* @secattr: the security attributes
+ * @sk_locked: true if caller holds the socket lock
*
* Description:
* Attach the correct label to the given socket using the security attributes
@@ -977,7 +978,8 @@ int netlbl_enabled(void)
*/
int netlbl_sock_setattr(struct sock *sk,
u16 family,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
int ret_val;
struct netlbl_dom_map *dom_entry;
@@ -997,7 +999,7 @@ int netlbl_sock_setattr(struct sock *sk,
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
dom_entry->def.cipso,
- secattr);
+ secattr, sk_locked);
break;
case NETLBL_NLTYPE_UNLABELED:
ret_val = 0;
@@ -1091,6 +1093,28 @@ int netlbl_sock_getattr(struct sock *sk,
}
/**
+ * netlbl_sk_lock_check - Check if the socket lock has been acquired.
+ * @sk: the socket to be checked
+ *
+ * Return: true if socket @sk is locked or if lock debugging is disabled at
+ * runtime or compile-time; false otherwise
+ *
+ */
+#ifdef CONFIG_LOCKDEP
+bool netlbl_sk_lock_check(struct sock *sk)
+{
+ if (debug_locks)
+ return lockdep_sock_is_held(sk);
+ return true;
+}
+#else
+bool netlbl_sk_lock_check(struct sock *sk)
+{
+ return true;
+}
+#endif
+
+/**
* netlbl_conn_setattr - Label a connected socket using the correct protocol
* @sk: the socket to label
* @addr: the destination address
@@ -1126,7 +1150,8 @@ int netlbl_conn_setattr(struct sock *sk,
switch (entry->type) {
case NETLBL_NLTYPE_CIPSOV4:
ret_val = cipso_v4_sock_setattr(sk,
- entry->cipso, secattr);
+ entry->cipso, secattr,
+ netlbl_sk_lock_check(sk));
break;
case NETLBL_NLTYPE_UNLABELED:
/* just delete the protocols we support for right now
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 104a80b75477..6ee148f0e6d0 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -772,8 +772,8 @@ out_release:
return err;
}
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int nr_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -805,7 +805,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 70480869ad1c..bd2b17b219ae 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -285,22 +285,14 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
return 0;
}
-static inline void __nr_remove_node(struct nr_node *nr_node)
+static void nr_remove_node_locked(struct nr_node *nr_node)
{
+ lockdep_assert_held(&nr_node_list_lock);
+
hlist_del_init(&nr_node->node_node);
nr_node_put(nr_node);
}
-#define nr_remove_node_locked(__node) \
- __nr_remove_node(__node)
-
-static void nr_remove_node(struct nr_node *nr_node)
-{
- spin_lock_bh(&nr_node_list_lock);
- __nr_remove_node(nr_node);
- spin_unlock_bh(&nr_node_list_lock);
-}
-
static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh)
{
hlist_del_init(&nr_neigh->neigh_node);
@@ -339,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
return -EINVAL;
}
+ spin_lock_bh(&nr_node_list_lock);
nr_node_lock(nr_node);
for (i = 0; i < nr_node->count; i++) {
if (nr_node->routes[i].neighbour == nr_neigh) {
@@ -352,7 +345,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
nr_node->count--;
if (nr_node->count == 0) {
- nr_remove_node(nr_node);
+ nr_remove_node_locked(nr_node);
} else {
switch (i) {
case 0:
@@ -367,12 +360,14 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
nr_node_put(nr_node);
}
nr_node_unlock(nr_node);
+ spin_unlock_bh(&nr_node_list_lock);
return 0;
}
}
nr_neigh_put(nr_neigh);
nr_node_unlock(nr_node);
+ spin_unlock_bh(&nr_node_list_lock);
nr_node_put(nr_node);
return -EINVAL;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 79fb2d3f477b..7dc0fa628f2e 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -140,7 +140,6 @@ static struct ctl_table nr_table[] = {
.extra1 = &min_reset,
.extra2 = &max_reset
},
- { }
};
int __init nr_register_sysctl(void)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index d5344563e525..57a2f97004e1 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -447,7 +447,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
}
static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *new_sk;
@@ -463,7 +463,7 @@ static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
goto error;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
/* Wait for an incoming connection. */
add_wait_queue_exclusive(sk_sleep(sk), &wait);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 0d26c8ec9993..f456a5911e7d 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1463,6 +1463,19 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
ndev->ops->n_core_ops);
}
+static bool nci_valid_size(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE);
+ unsigned int hdr_size = NCI_CTRL_HDR_SIZE;
+
+ if (skb->len < hdr_size ||
+ !nci_plen(skb->data) ||
+ skb->len < hdr_size + nci_plen(skb->data)) {
+ return false;
+ }
+ return true;
+}
+
/* ---- NCI TX Data worker thread ---- */
static void nci_tx_work(struct work_struct *work)
@@ -1516,9 +1529,9 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
- if (!nci_plen(skb->data)) {
+ if (!nci_valid_size(skb)) {
kfree_skb(skb);
- break;
+ continue;
}
/* Process frame */
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index f4a38bd6a7e0..bfb7758063f3 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -77,13 +77,15 @@ EXPORT_SYMBOL_GPL(nsh_pop);
static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ unsigned int outer_hlen, mac_len, nsh_len;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
- unsigned int nsh_len, mac_len;
- __be16 proto;
+ __be16 outer_proto, proto;
skb_reset_network_header(skb);
+ outer_proto = skb->protocol;
+ outer_hlen = skb_mac_header_len(skb);
mac_len = skb->mac_len;
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
@@ -113,10 +115,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
}
for (skb = segs; skb; skb = skb->next) {
- skb->protocol = htons(ETH_P_NSH);
- __skb_push(skb, nsh_len);
- skb->mac_header = mac_offset;
- skb->network_header = skb->mac_header + mac_len;
+ skb->protocol = outer_proto;
+ __skb_push(skb, nsh_len + outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, outer_hlen);
skb->mac_len = mac_len;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca81f..964225580824 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -936,6 +936,12 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
pskb_trim(skb, ovs_mac_header_len(key));
}
+ /* Need to set the pkt_type to involve the routing layer. The
+ * packet movement through the OVS datapath doesn't generally
+ * use routing, but this is needed for tunnel cases.
+ */
+ skb->pkt_type = PACKET_OUTGOING;
+
if (likely(!mru ||
(skb->len <= mru + vport->dev->hard_header_len))) {
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 33b21a0c0548..8a848ce72e29 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
*/
key->tp.src = htons(icmp->icmp6_type);
key->tp.dst = htons(icmp->icmp6_code);
- memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
+ memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
+
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
index 3eb35d9eb700..74d75aaebef4 100644
--- a/net/openvswitch/openvswitch_trace.h
+++ b/net/openvswitch/openvswitch_trace.h
@@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
@@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall,
TP_fast_assign(
__entry->dpaddr = dp;
- __assign_str(dp_name, ovs_dp_name(dp));
- __assign_str(dev_name, skb->dev->name);
+ __assign_str(dp_name);
+ __assign_str(dev_name);
__entry->skbaddr = skb;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8c6d3fbb4ed8..ea3ebc160e25 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2522,8 +2522,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
- if (!packet_read_pending(&po->tx_ring))
- complete(&po->skb_completion);
+ complete(&po->skb_completion);
}
sock_wfree(skb);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 3dd5f52bc1b5..53a858478e22 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -759,8 +759,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
sock_put(sk);
}
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
- bool kern)
+static struct sock *pep_sock_accept(struct sock *sk,
+ struct proto_accept_arg *arg)
{
struct pep_sock *pn = pep_sk(sk), *newpn;
struct sock *newsk = NULL;
@@ -772,8 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
- errp);
+ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &arg->err);
if (!skb)
return NULL;
@@ -836,7 +836,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
/* Create a new to-be-accepted sock */
newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
- kern);
+ arg->kern);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
@@ -878,7 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
drop:
release_sock(sk);
kfree_skb(skb);
- *errp = err;
+ arg->err = err;
return newsk;
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 59aebe296890..7008d402499d 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -178,7 +178,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
if (nla_put_u8(skb, RTA_DST, dst) ||
- nla_put_u32(skb, RTA_OIF, dev->ifindex))
+ nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) +
nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -263,6 +263,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ int err = 0;
u8 addr;
rcu_read_lock();
@@ -272,16 +273,16 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
continue;
- if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0)
- goto out;
+ err = fill_route(skb, dev, addr << 2,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE);
+ if (err < 0)
+ break;
}
-
-out:
rcu_read_unlock();
cb->args[0] = addr;
- return skb->len;
+ return err;
}
int __init phonet_netlink_register(void)
@@ -301,6 +302,6 @@ int __init phonet_netlink_register(void)
rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
route_doit, NULL, 0);
rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
- NULL, route_dumpit, 0);
+ NULL, route_dumpit, RTNL_FLAG_DUMP_UNLOCKED);
return 0;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1018340d89a7..5ce0b3ee5def 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -292,18 +292,17 @@ out:
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
- int err;
if (unlikely(sk->sk_state != TCP_LISTEN))
return -EINVAL;
- newsk = sk->sk_prot->accept(sk, flags, &err, kern);
+ newsk = sk->sk_prot->accept(sk, arg);
if (!newsk)
- return err;
+ return arg->err;
lock_sock(newsk);
sock_graft(newsk, newsock);
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 0d0bf41381c2..82fc22467a09 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -81,7 +81,6 @@ static struct ctl_table phonet_table[] = {
.mode = 0644,
.proc_handler = proc_local_port_range,
},
- { }
};
int __init phonet_sysctl_init(void)
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index abb0c70ffc8b..654a3cc0d347 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -725,6 +725,24 @@ int qrtr_ns_init(void)
if (ret < 0)
goto err_wq;
+ /* As the qrtr ns socket owner and creator is the same module, we have
+ * to decrease the qrtr module reference count to guarantee that it
+ * remains zero after the ns socket is created, otherwise, executing
+ * "rmmod" command is unable to make the qrtr module deleted after the
+ * qrtr module is inserted successfully.
+ *
+ * However, the reference count is increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of qrtr socket's proto_ops struct; another is to increment the
+ * reference count of owner of qrtr proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(qrtr_ns.sock->ops->owner);
+ module_put(qrtr_ns.sock->sk->sk_prot_creator->owner);
+
return 0;
err_wq:
@@ -739,6 +757,15 @@ void qrtr_ns_remove(void)
{
cancel_work_sync(&qrtr_ns.work);
destroy_workqueue(qrtr_ns.workqueue);
+
+ /* sock_release() expects the two references that were put during
+ * qrtr_ns_init(). This function is only called during module remove,
+ * so try_stop_module() has already set the refcnt to 0. Use
+ * __module_get() instead of try_module_get() to successfully take two
+ * references.
+ */
+ __module_get(qrtr_ns.sock->ops->owner);
+ __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner);
sock_release(qrtr_ns.sock);
}
EXPORT_SYMBOL_GPL(qrtr_ns_remove);
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index e4e41b3afce7..2af678e71e3c 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -103,7 +103,6 @@ static struct ctl_table rds_ib_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
void rds_ib_sysctl_exit(void)
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index e381bbcd9cc1..025f518a4349 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -89,7 +89,6 @@ static struct ctl_table rds_sysctl_rds_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
void rds_sysctl_exit(void)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2dba7505b414..d8111ac83bb6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -86,7 +86,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
.proc_handler = rds_tcp_skbuf_handler,
.extra1 = &rds_tcp_min_rcvbuf,
},
- { }
};
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 05008ce5c421..d89bd8d0c354 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -105,6 +105,10 @@ int rds_tcp_accept_one(struct socket *sock)
int conn_state;
struct rds_conn_path *cp;
struct in6_addr *my_addr, *peer_addr;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ .kern = true,
+ };
#if !IS_ENABLED(CONFIG_IPV6)
struct in6_addr saddr, daddr;
#endif
@@ -119,7 +123,7 @@ int rds_tcp_accept_one(struct socket *sock)
if (ret)
goto out;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ef81d019b20f..59050caab65c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -919,8 +919,8 @@ out_release:
return err;
}
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int rose_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sk_buff *skb;
struct sock *newsk;
@@ -953,7 +953,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
if (skb)
break;
- if (flags & O_NONBLOCK) {
+ if (arg->flags & O_NONBLOCK) {
err = -EWOULDBLOCK;
break;
}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index d391d7758f52..d801315b7083 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -112,7 +112,6 @@ static struct ctl_table rose_table[] = {
.extra1 = &min_window,
.extra2 = &max_window
},
- { }
};
void __init rose_register_sysctl(void)
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 08c0a32db8c7..08de24658f4f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -697,7 +697,7 @@ struct rxrpc_call {
* packets) rather than bytes.
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
-#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
+#define RXRPC_MIN_CWND 4
u8 cong_cwnd; /* Congestion window size */
u8 cong_extra; /* Extra to send for congestion management */
u8 cong_ssthresh; /* Slow-start threshold */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 01fa71e8b1f7..f9e983a12c14 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -174,12 +174,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
call->rx_winsize = rxrpc_rx_window_size;
call->tx_winsize = 16;
- if (RXRPC_TX_SMSS > 2190)
- call->cong_cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- call->cong_cwnd = 3;
- else
- call->cong_cwnd = 4;
+ call->cong_cwnd = RXRPC_MIN_CWND;
call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
call->rxnet = rxnet;
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 0af4642aeec4..1539d315afe7 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -119,18 +119,13 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo
switch (srx->transport.family) {
case AF_INET:
if (peer->srx.transport.sin.sin_port !=
- srx->transport.sin.sin_port ||
- peer->srx.transport.sin.sin_addr.s_addr !=
- srx->transport.sin.sin_addr.s_addr)
+ srx->transport.sin.sin_port)
goto not_found;
break;
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
if (peer->srx.transport.sin6.sin6_port !=
- srx->transport.sin6.sin6_port ||
- memcmp(&peer->srx.transport.sin6.sin6_addr,
- &srx->transport.sin6.sin6_addr,
- sizeof(struct in6_addr)) != 0)
+ srx->transport.sin6.sin6_port)
goto not_found;
break;
#endif
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 3dedb8c0618c..16d49a861dbb 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,6 +9,17 @@
#include "ar-internal.h"
+/* Override priority when generating ACKs for received DATA */
+static const u8 rxrpc_ack_priority[RXRPC_ACK__INVALID] = {
+ [RXRPC_ACK_IDLE] = 1,
+ [RXRPC_ACK_DELAY] = 2,
+ [RXRPC_ACK_REQUESTED] = 3,
+ [RXRPC_ACK_DUPLICATE] = 4,
+ [RXRPC_ACK_EXCEEDS_WINDOW] = 5,
+ [RXRPC_ACK_NOSPACE] = 6,
+ [RXRPC_ACK_OUT_OF_SEQUENCE] = 7,
+};
+
static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
enum rxrpc_abort_reason why)
{
@@ -365,7 +376,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
* Process a DATA packet.
*/
static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
- bool *_notify)
+ bool *_notify, rxrpc_serial_t *_ack_serial, int *_ack_reason)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
@@ -418,8 +429,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
- else
- call->ackr_nr_unacked++;
window++;
if (after(window, wtop)) {
@@ -497,12 +506,16 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
}
send_ack:
- if (ack_reason >= 0)
- rxrpc_send_ACK(call, ack_reason, serial,
- rxrpc_propose_ack_input_data);
- else
- rxrpc_propose_delay_ACK(call, serial,
- rxrpc_propose_ack_input_data);
+ if (ack_reason >= 0) {
+ if (rxrpc_ack_priority[ack_reason] > rxrpc_ack_priority[*_ack_reason]) {
+ *_ack_serial = serial;
+ *_ack_reason = ack_reason;
+ } else if (rxrpc_ack_priority[ack_reason] == rxrpc_ack_priority[*_ack_reason] &&
+ ack_reason == RXRPC_ACK_REQUESTED) {
+ *_ack_serial = serial;
+ *_ack_reason = ack_reason;
+ }
+ }
}
/*
@@ -513,9 +526,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
struct rxrpc_jumbo_header jhdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp;
struct sk_buff *jskb;
+ rxrpc_serial_t ack_serial = 0;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len - offset;
bool notify = false;
+ int ack_reason = 0;
while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
if (len < RXRPC_JUMBO_SUBPKTLEN)
@@ -535,7 +550,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
jsp = rxrpc_skb(jskb);
jsp->offset = offset;
jsp->len = RXRPC_JUMBO_DATALEN;
- rxrpc_input_data_one(call, jskb, &notify);
+ rxrpc_input_data_one(call, jskb, &notify, &ack_serial, &ack_reason);
rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket);
sp->hdr.flags = jhdr.flags;
@@ -548,7 +563,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
sp->offset = offset;
sp->len = len;
- rxrpc_input_data_one(call, skb, &notify);
+ rxrpc_input_data_one(call, skb, &notify, &ack_serial, &ack_reason);
+
+ if (ack_reason > 0) {
+ rxrpc_send_ACK(call, ack_reason, ack_serial,
+ rxrpc_propose_ack_input_data);
+ } else {
+ call->ackr_nr_unacked++;
+ rxrpc_propose_delay_ACK(call, sp->hdr.serial,
+ rxrpc_propose_ack_input_data);
+ }
if (notify) {
trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
rxrpc_notify_socket(call);
@@ -685,9 +709,6 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
call->tx_winsize = rwind;
}
- if (call->cong_ssthresh > rwind)
- call->cong_ssthresh = rwind;
-
mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
peer = call->peer;
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index f2701068ed9e..6716c021a532 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -19,7 +19,7 @@ static int none_init_connection_security(struct rxrpc_connection *conn,
*/
static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp);
+ return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp);
}
static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index f1a68270862d..48a1475e6b06 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -155,7 +155,7 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem
switch (call->conn->security_level) {
default:
space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- return rxrpc_alloc_data_txbuf(call, space, 0, gfp);
+ return rxrpc_alloc_data_txbuf(call, space, 1, gfp);
case RXRPC_SECURITY_AUTH:
shdr = sizeof(struct rxkad_level1_hdr);
break;
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index c9bedd0e2d86..9bf9a1f6e4cb 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -127,7 +127,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
- { }
};
int __init rxrpc_sysctl_init(void)
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index e0679658d9de..c3913d8a50d3 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -21,20 +21,20 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_
{
struct rxrpc_wire_header *whdr;
struct rxrpc_txbuf *txb;
- size_t total, hoff = 0;
+ size_t total, hoff;
void *buf;
txb = kmalloc(sizeof(*txb), gfp);
if (!txb)
return NULL;
- if (data_align)
- hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr);
+ hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr);
total = hoff + sizeof(*whdr) + data_size;
+ data_align = umax(data_align, L1_CACHE_BYTES);
mutex_lock(&call->conn->tx_data_alloc_lock);
- buf = __page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp,
- ~(data_align - 1) & ~(L1_CACHE_BYTES - 1));
+ buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp,
+ data_align);
mutex_unlock(&call->conn->tx_data_alloc_lock);
if (!buf) {
kfree(txb);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 60239378d43f..74afc210527d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* before again attaching a qdisc.
*/
if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
- dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
@@ -1389,6 +1389,7 @@ err_out4:
ops->destroy(sch);
qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
+ lockdep_unregister_key(&sch->root_lock_key);
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4a2c763e2d11..2a637a17061b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -506,19 +506,22 @@ static void dev_watchdog(struct timer_list *t)
unsigned int timedout_ms = 0;
unsigned int i;
unsigned long trans_start;
+ unsigned long oldest_start = jiffies;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
trans_start = READ_ONCE(txq->trans_start);
- if (netif_xmit_stopped(txq) &&
- time_after(jiffies, (trans_start +
- dev->watchdog_timeo))) {
+ if (!netif_xmit_stopped(txq))
+ continue;
+ if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
timedout_ms = jiffies_to_msecs(jiffies - trans_start);
atomic_long_inc(&txq->trans_timeout);
break;
}
+ if (time_after(oldest_start, trans_start))
+ oldest_start = trans_start;
}
if (unlikely(timedout_ms)) {
@@ -531,7 +534,7 @@ static void dev_watchdog(struct timer_list *t)
netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies +
+ round_jiffies(oldest_start +
dev->watchdog_timeo)))
release = false;
}
@@ -945,7 +948,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
gnet_stats_basic_sync_init(&sch->bstats);
+ lockdep_register_key(&sch->root_lock_key);
spin_lock_init(&sch->q.lock);
+ lockdep_set_class(&sch->q.lock, &sch->root_lock_key);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
@@ -980,6 +985,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
return sch;
errout1:
+ lockdep_unregister_key(&sch->root_lock_key);
kfree(sch);
errout:
return ERR_PTR(err);
@@ -1068,6 +1074,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
if (ops->destroy)
ops->destroy(qdisc);
+ lockdep_unregister_key(&qdisc->root_lock_key);
module_put(ops->owner);
netdev_put(dev, &qdisc->dev_tracker);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 93e6fb56f3b5..ff3de37874e4 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work)
rcu_read_unlock();
}
-static void htb_set_lockdep_class_child(struct Qdisc *q)
-{
- static struct lock_class_key child_key;
-
- lockdep_set_class(qdisc_lock(q), &child_key);
-}
-
static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
{
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
@@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
return -ENOMEM;
}
- htb_set_lockdep_class_child(qdisc);
q->direct_qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
@@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
}
if (q->offload) {
- htb_set_lockdep_class_child(new);
/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
qdisc_refcount_inc(new);
old_q = htb_graft_helper(dev_queue, new);
@@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
- if (q->offload) {
- if (new_q)
- htb_set_lockdep_class_child(new_q);
+ if (q->offload)
htb_parent_to_leaf_offload(sch, dev_queue, new_q);
- }
}
sch_tree_lock(sch);
@@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
classid, NULL);
if (q->offload) {
- if (new_q) {
- htb_set_lockdep_class_child(new_q);
- /* One ref for cl->leaf.q, the other for
- * dev_queue->qdisc.
- */
+ /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+ if (new_q)
qdisc_refcount_inc(new_q);
- }
old_q = htb_graft_helper(dev_queue, new_q);
/* No qdisc_put needed. */
WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e66f4afb920d..3b9245a3c767 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -608,6 +608,7 @@ static void sfq_perturbation(struct timer_list *t)
struct Qdisc *sch = q->sch;
spinlock_t *root_lock;
siphash_key_t nkey;
+ int period;
get_random_bytes(&nkey, sizeof(nkey));
rcu_read_lock();
@@ -618,8 +619,12 @@ static void sfq_perturbation(struct timer_list *t)
sfq_rehash(sch);
spin_unlock(root_lock);
- if (q->perturb_period)
- mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ /* q->perturb_period can change under us from
+ * sfq_change() and sfq_destroy().
+ */
+ period = READ_ONCE(q->perturb_period);
+ if (period)
+ mod_timer(&q->perturb_timer, jiffies + period);
rcu_read_unlock();
}
@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
q->quantum = ctl->quantum;
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
}
- q->perturb_period = ctl->perturb_period * HZ;
+ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sch)
struct sfq_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- q->perturb_period = 0;
+ WRITE_ONCE(q->perturb_period, 0);
del_timer_sync(&q->perturb_timer);
sfq_free(q->ht);
sfq_free(q->slots);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 59304611dc00..8badec6d82a2 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
- if (q->q.qlen < dev->tx_queue_len) {
+ if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
__skb_queue_tail(&q->q, skb);
return NET_XMIT_SUCCESS;
}
@@ -424,7 +424,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
} while ((q = NEXT_SLAVE(q)) != m->slaves);
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 24368f755ab1..f7b809c0d142 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -415,7 +415,7 @@ out:
if (!IS_ERR_OR_NULL(dst)) {
struct rt6_info *rt;
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
t->dst_cookie = rt6_get_cookie(rt);
pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e849f368ed91..5a7436a13b74 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
struct flowi *fl)
{
union sctp_addr *saddr = &t->saddr;
- struct rtable *rt = (struct rtable *)t->dst;
+ struct rtable *rt = dst_rtable(t->dst);
if (rt) {
saddr->v4.sin_family = AF_INET;
@@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
skb_reset_inner_mac_header(skb);
skb_reset_inner_transport_header(skb);
skb_set_inner_ipproto(skb, IPPROTO_SCTP);
- udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr,
+ udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr,
fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
sctp_sk(sk)->udp_port, t->encap_port, false, false);
return 0;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 08fdf1251f46..5adf0c0a6c1a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -38,6 +38,7 @@
#include <linux/inet.h>
#include <linux/slab.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <net/inet_ecn.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e416b6d3d270..c009383369b2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4847,7 +4847,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
* descriptor will be returned from accept() to represent the newly
* formed association.
*/
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
+static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg)
{
struct sctp_sock *sp;
struct sctp_endpoint *ep;
@@ -4871,7 +4871,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
goto out;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
error = sctp_wait_for_accept(sk, timeo);
if (error)
@@ -4882,7 +4882,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
*/
asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
- newsk = sp->pf->create_accept_sk(sk, asoc, kern);
+ newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern);
if (!newsk) {
error = -ENOMEM;
goto out;
@@ -4899,7 +4899,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
out:
release_sock(sk);
- *err = error;
+ arg->err = error;
return newsk;
}
@@ -7119,6 +7119,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
struct sctp_assoc_ids *ids;
+ size_t ids_size;
u32 num = 0;
if (sctp_style(sk, TCP))
@@ -7131,11 +7132,11 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
num++;
}
- if (len < sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num)
+ ids_size = struct_size(ids, gaids_assoc_id, num);
+ if (len < ids_size)
return -EINVAL;
- len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num;
-
+ len = ids_size;
ids = kmalloc(len, GFP_USER | __GFP_NOWARN);
if (unlikely(!ids))
return -ENOMEM;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 25bdf17c7262..61c6f3027e7f 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -80,8 +80,6 @@ static struct ctl_table sctp_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
-
- { /* sentinel */ }
};
/* The following index defines are used in sctp_sysctl_net_register().
@@ -384,8 +382,6 @@ static struct ctl_table sctp_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &pf_expose_max,
},
-
- { /* sentinel */ }
};
static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
@@ -597,6 +593,7 @@ static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
int sctp_sysctl_net_register(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(sctp_net_table);
struct ctl_table *table;
int i;
@@ -604,7 +601,7 @@ int sctp_sysctl_net_register(struct net *net)
if (!table)
return -ENOMEM;
- for (i = 0; table[i].data; i++)
+ for (i = 0; i < table_size; i++)
table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max;
@@ -613,8 +610,7 @@ int sctp_sysctl_net_register(struct net *net)
table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans;
net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp",
- table,
- ARRAY_SIZE(sctp_net_table));
+ table, table_size);
if (net->sctp.sysctl_header == NULL) {
kfree(table);
return -ENOMEM;
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 746be3996768..ba5e6a2dd2fd 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -20,3 +20,16 @@ config SMC_DIAG
smcss.
if unsure, say Y.
+
+config SMC_LO
+ bool "SMC intra-OS shortcut with loopback-ism"
+ depends on SMC
+ default n
+ help
+ SMC_LO enables the creation of an Emulated-ISM device named
+ loopback-ism in SMC and makes use of it for transferring data
+ when communication occurs within the same OS. This helps in
+ convenient testing of SMC-D since loopback-ism is independent
+ of architecture or hardware.
+
+ if unsure, say N.
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 875efcd126a2..2c510d543058 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
+smc-$(CONFIG_SMC_LO) += smc_loopback.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e8dcd28a554c..e50a286fd0fb 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -53,6 +53,7 @@
#include "smc_stats.h"
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
+#include "smc_loopback.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -1435,6 +1436,14 @@ static int smc_connect_ism(struct smc_sock *smc,
}
smc_conn_save_peer_info(smc, aclc);
+
+ if (smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) {
+ rc = smcd_buf_attach(smc);
+ if (rc) {
+ rc = SMC_CLC_DECL_MEM; /* try to fallback */
+ goto connect_abort;
+ }
+ }
smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -2539,6 +2548,14 @@ static void smc_listen_work(struct work_struct *work)
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, cclc);
+
+ if (ini->is_smcd &&
+ smc_ism_support_dmb_nocopy(new_smc->conn.lgr->smcd)) {
+ rc = smcd_buf_attach(new_smc);
+ if (rc)
+ goto out_decl;
+ }
+
smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
goto out_free;
@@ -2672,7 +2689,7 @@ out:
}
static int smc_accept(struct socket *sock, struct socket *new_sock,
- int flags, bool kern)
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk, *nsk;
DECLARE_WAITQUEUE(wait, current);
@@ -2691,7 +2708,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
}
/* Wait for an incoming connection */
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
add_wait_queue_exclusive(sk_sleep(sk), &wait);
while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -2718,7 +2735,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
if (rc)
goto out;
- if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
+ if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
/* wait till data arrives on the socket */
timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
MSEC_PER_SEC);
@@ -3555,15 +3572,23 @@ static int __init smc_init(void)
goto out_sock;
}
+ rc = smc_loopback_init();
+ if (rc) {
+ pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc);
+ goto out_ib;
+ }
+
rc = tcp_register_ulp(&smc_ulp_ops);
if (rc) {
pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
- goto out_ib;
+ goto out_lo;
}
static_branch_enable(&tcp_have_smc);
return 0;
+out_lo:
+ smc_loopback_exit();
out_ib:
smc_ib_unregister_client();
out_sock:
@@ -3601,6 +3626,7 @@ static void __exit smc_exit(void)
tcp_unregister_ulp(&smc_ulp_ops);
sock_unregister(PF_SMC);
smc_core_exit();
+ smc_loopback_exit();
smc_ib_unregister_client();
smc_ism_exit();
destroy_workqueue(smc_close_wq);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 3c06625ceb20..619b3bab3824 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -18,6 +18,7 @@
#include "smc_tx.h"
#include "smc_rx.h"
#include "smc_close.h"
+#include "smc_ism.h"
/********************************** send *************************************/
@@ -255,6 +256,14 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
return rc;
smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+
+ if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
+ /* if local sndbuf shares the same memory region with
+ * peer DMB, then don't update the tx_curs_fin
+ * and sndbuf_space until peer has consumed the data.
+ */
+ return 0;
+
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -266,7 +275,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn)
smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn);
smc_tx_sndbuf_nonfull(smc);
- return rc;
+ return 0;
}
/********************************* receive ***********************************/
@@ -323,7 +332,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
{
union smc_host_cursor cons_old, prod_old;
struct smc_connection *conn = &smc->conn;
- int diff_cons, diff_prod;
+ int diff_cons, diff_prod, diff_tx;
smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn);
smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn);
@@ -339,6 +348,29 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
atomic_add(diff_cons, &conn->peer_rmbe_space);
/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
smp_mb__after_atomic();
+
+ /* if local sndbuf shares the same memory region with
+ * peer RMB, then update tx_curs_fin and sndbuf_space
+ * here since peer has already consumed the data.
+ */
+ if (conn->lgr->is_smcd &&
+ smc_ism_support_dmb_nocopy(conn->lgr->smcd)) {
+ /* Calculate consumed data and
+ * increment free send buffer space.
+ */
+ diff_tx = smc_curs_diff(conn->sndbuf_desc->len,
+ &conn->tx_curs_fin,
+ &conn->local_rx_ctrl.cons);
+ /* increase local sndbuf space and fin_curs */
+ smp_mb__before_atomic();
+ atomic_add(diff_tx, &conn->sndbuf_space);
+ /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
+ smp_mb__after_atomic();
+ smc_curs_copy(&conn->tx_curs_fin,
+ &conn->local_rx_ctrl.cons, conn);
+
+ smc_tx_sndbuf_nonfull(smc);
+ }
}
diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 9b84d5897aa5..fafdb97adfad 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1149,6 +1149,20 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
}
}
+static void smcd_buf_detach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+
+ if (!conn->sndbuf_desc)
+ return;
+
+ smc_ism_detach_dmb(smcd, peer_token);
+
+ kfree(conn->sndbuf_desc);
+ conn->sndbuf_desc = NULL;
+}
+
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
@@ -1192,6 +1206,8 @@ void smc_conn_free(struct smc_connection *conn)
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
+ if (smc_ism_support_dmb_nocopy(lgr->smcd))
+ smcd_buf_detach(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_wait_pend_tx_wr(conn);
@@ -1445,6 +1461,8 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
smc_sk_wake_ups(smc);
if (conn->lgr->is_smcd) {
smc_ism_unset_conn(conn);
+ if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
+ smcd_buf_detach(conn);
if (soft)
tasklet_kill(&conn->rx_tsklet);
else
@@ -2464,12 +2482,18 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
int rc;
/* create send buffer */
+ if (is_smcd &&
+ smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd))
+ goto create_rmb;
+
rc = __smc_buf_create(smc, is_smcd, false);
if (rc)
return rc;
+
+create_rmb:
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
- if (rc) {
+ if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
up_write(&smc->conn.lgr->sndbufs_lock);
@@ -2479,6 +2503,41 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
return rc;
}
+int smcd_buf_attach(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+ struct smc_buf_desc *buf_desc;
+ int rc;
+
+ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+ if (!buf_desc)
+ return -ENOMEM;
+
+ /* The ghost sndbuf_desc describes the same memory region as
+ * peer RMB. Its lifecycle is consistent with the connection's
+ * and it will be freed with the connections instead of the
+ * link group.
+ */
+ rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+ if (rc)
+ goto free;
+
+ smc->sk.sk_sndbuf = buf_desc->len;
+ buf_desc->cpu_addr =
+ (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+ buf_desc->len -= sizeof(struct smcd_cdc_msg);
+ conn->sndbuf_desc = buf_desc;
+ conn->sndbuf_desc->used = 1;
+ atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+ return 0;
+
+free:
+ kfree(buf_desc);
+ return rc;
+}
+
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1f175376037b..d93cf51dbd7c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -557,6 +557,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_sock *smc);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 97704a9e84c7..9297dc20bfe2 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -209,13 +209,18 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
- goto out;
- neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr);
- if (neigh) {
- memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
- *uses_gateway = rt->rt_uses_gateway;
- return 0;
- }
+ goto out_rt;
+ neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr);
+ if (!neigh)
+ goto out_rt;
+ memcpy(nexthop_mac, neigh->ha, ETH_ALEN);
+ *uses_gateway = rt->rt_uses_gateway;
+ neigh_release(neigh);
+ ip_rt_put(rt);
+ return 0;
+
+out_rt:
+ ip_rt_put(rt);
out:
return -ENOENT;
}
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index ac88de2a06a0..84f98e18c7db 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -91,6 +91,11 @@ bool smc_ism_is_v2_capable(void)
return smc_ism_v2_capable;
}
+void smc_ism_set_v2_capable(void)
+{
+ smc_ism_v2_capable = true;
+}
+
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
@@ -126,6 +131,8 @@ int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
if (!vlanid) /* No valid vlan id */
return -EINVAL;
+ if (!smcd->ops->add_vlan_id)
+ return -EOPNOTSUPP;
/* create new vlan entry, in case we need it */
new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
@@ -171,6 +178,8 @@ int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
if (!vlanid) /* No valid vlan id */
return -EINVAL;
+ if (!smcd->ops->del_vlan_id)
+ return -EOPNOTSUPP;
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
@@ -222,7 +231,6 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
-#if IS_ENABLED(CONFIG_ISM)
struct smcd_dmb dmb;
int rc;
@@ -231,7 +239,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid.gid;
- rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
+ rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, lgr->smcd->client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
@@ -240,9 +248,46 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb_desc->len = dmb.dmb_len;
}
return rc;
-#else
- return 0;
-#endif
+}
+
+bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+ /* for now only loopback-ism supports
+ * merging sndbuf with peer DMB to avoid
+ * data copies between them.
+ */
+ return (smcd->ops->support_dmb_nocopy &&
+ smcd->ops->support_dmb_nocopy(smcd));
+}
+
+int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token,
+ struct smc_buf_desc *dmb_desc)
+{
+ struct smcd_dmb dmb;
+ int rc = 0;
+
+ if (!dev->ops->attach_dmb)
+ return -EINVAL;
+
+ memset(&dmb, 0, sizeof(dmb));
+ dmb.dmb_tok = token;
+ rc = dev->ops->attach_dmb(dev, &dmb);
+ if (!rc) {
+ dmb_desc->sba_idx = dmb.sba_idx;
+ dmb_desc->token = dmb.dmb_tok;
+ dmb_desc->cpu_addr = dmb.cpu_addr;
+ dmb_desc->dma_addr = dmb.dma_addr;
+ dmb_desc->len = dmb.dmb_len;
+ }
+ return rc;
+}
+
+int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token)
+{
+ if (!dev->ops->detach_dmb)
+ return -EINVAL;
+
+ return dev->ops->detach_dmb(dev, token);
}
static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
@@ -322,6 +367,8 @@ static void smc_nl_prep_smcd_dev(struct smcd_dev_list *dev_list,
list_for_each_entry(smcd, &dev_list->list, list) {
if (num < snum)
goto next;
+ if (smc_ism_is_loopback(smcd))
+ goto next;
if (smc_nl_handle_smcd_dev(smcd, skb, cb))
goto errout;
next:
@@ -372,7 +419,8 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id);
break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- if (ev_info.code == ISM_EVENT_REQUEST) {
+ if (ev_info.code == ISM_EVENT_REQUEST &&
+ wrk->smcd->ops->signal_event) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
&peer_gid,
@@ -436,7 +484,7 @@ static struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
static void smcd_register_dev(struct ism_dev *ism)
{
const struct smcd_ops *ops = ism_get_smcd_ops();
- struct smcd_dev *smcd;
+ struct smcd_dev *smcd, *fentry;
if (!ops)
return;
@@ -446,20 +494,28 @@ static void smcd_register_dev(struct ism_dev *ism)
if (!smcd)
return;
smcd->priv = ism;
+ smcd->client = &smc_ism_client;
ism_set_priv(ism, &smc_ism_client, smcd);
if (smc_pnetid_by_dev_port(&ism->pdev->dev, 0, smcd->pnetid))
smc_pnetid_by_table_smcd(smcd);
+ if (smcd->ops->supports_v2())
+ smc_ism_set_v2_capable();
mutex_lock(&smcd_dev_list.mutex);
- if (list_empty(&smcd_dev_list.list)) {
- if (smcd->ops->supports_v2())
- smc_ism_v2_capable = true;
- }
- /* sort list: devices without pnetid before devices with pnetid */
- if (smcd->pnetid[0])
+ /* sort list:
+ * - devices without pnetid before devices with pnetid;
+ * - loopback-ism always at the very beginning;
+ */
+ if (!smcd->pnetid[0]) {
+ fentry = list_first_entry_or_null(&smcd_dev_list.list,
+ struct smcd_dev, list);
+ if (fentry && smc_ism_is_loopback(fentry))
+ list_add(&smcd->list, &fentry->list);
+ else
+ list_add(&smcd->list, &smcd_dev_list.list);
+ } else {
list_add_tail(&smcd->list, &smcd_dev_list.list);
- else
- list_add(&smcd->list, &smcd_dev_list.list);
+ }
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
@@ -541,6 +597,8 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr)
if (lgr->peer_shutdown)
return 0;
+ if (!lgr->smcd->ops->signal_event)
+ return 0;
memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
ev_info.vlan_id = lgr->vlan_id;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 165cd013404b..6763133dd8d0 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -48,10 +48,15 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
struct smc_buf_desc *dmb_desc);
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
+bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd);
+int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token,
+ struct smc_buf_desc *dmb_desc);
+int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
+void smc_ism_set_v2_capable(void);
int smc_ism_init(void);
void smc_ism_exit(void);
int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
@@ -84,4 +89,9 @@ static inline bool smc_ism_is_emulated(struct smcd_dev *smcd)
return __smc_ism_is_emulated(chid);
}
+static inline bool smc_ism_is_loopback(struct smcd_dev *smcd)
+{
+ return (smcd->ops->get_chid(smcd) == 0xFFFF);
+}
+
#endif
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
new file mode 100644
index 000000000000..3c5f64ca4115
--- /dev/null
+++ b/net/smc/smc_loopback.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared Memory Communications Direct over loopback-ism device.
+ *
+ * Functions for loopback-ism device.
+ *
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: Wen Gu <guwen@linux.alibaba.com>
+ * Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/types.h>
+#include <net/smc.h>
+
+#include "smc_cdc.h"
+#include "smc_ism.h"
+#include "smc_loopback.h"
+
+#define SMC_LO_V2_CAPABLE 0x1 /* loopback-ism acts as ISMv2 */
+#define SMC_LO_SUPPORT_NOCOPY 0x1
+#define SMC_DMA_ADDR_INVALID (~(dma_addr_t)0)
+
+static const char smc_lo_dev_name[] = "loopback-ism";
+static struct smc_lo_dev *lo_dev;
+
+static void smc_lo_generate_ids(struct smc_lo_dev *ldev)
+{
+ struct smcd_gid *lgid = &ldev->local_gid;
+ uuid_t uuid;
+
+ uuid_gen(&uuid);
+ memcpy(&lgid->gid, &uuid, sizeof(lgid->gid));
+ memcpy(&lgid->gid_ext, (u8 *)&uuid + sizeof(lgid->gid),
+ sizeof(lgid->gid_ext));
+
+ ldev->chid = SMC_LO_RESERVED_CHID;
+}
+
+static int smc_lo_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid,
+ u32 vid_valid, u32 vid)
+{
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* rgid should be the same as lgid */
+ if (!ldev || rgid->gid != ldev->local_gid.gid ||
+ rgid->gid_ext != ldev->local_gid.gid_ext)
+ return -ENETUNREACH;
+ return 0;
+}
+
+static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
+ void *client_priv)
+{
+ struct smc_lo_dmb_node *dmb_node, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ int sba_idx, rc;
+
+ /* check space for new dmb */
+ for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) {
+ if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask))
+ break;
+ }
+ if (sba_idx == SMC_LO_MAX_DMBS)
+ return -ENOSPC;
+
+ dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL);
+ if (!dmb_node) {
+ rc = -ENOMEM;
+ goto err_bit;
+ }
+
+ dmb_node->sba_idx = sba_idx;
+ dmb_node->len = dmb->dmb_len;
+ dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL |
+ __GFP_NOWARN | __GFP_NORETRY |
+ __GFP_NOMEMALLOC);
+ if (!dmb_node->cpu_addr) {
+ rc = -ENOMEM;
+ goto err_node;
+ }
+ dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
+ refcount_set(&dmb_node->refcnt, 1);
+
+again:
+ /* add new dmb into hash table */
+ get_random_bytes(&dmb_node->token, sizeof(dmb_node->token));
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) {
+ if (tmp_node->token == dmb_node->token) {
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ goto again;
+ }
+ }
+ hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ atomic_inc(&ldev->dmb_cnt);
+
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+
+ return 0;
+
+err_node:
+ kfree(dmb_node);
+err_bit:
+ clear_bit(sba_idx, ldev->sba_idx_mask);
+ return rc;
+}
+
+static void __smc_lo_unregister_dmb(struct smc_lo_dev *ldev,
+ struct smc_lo_dmb_node *dmb_node)
+{
+ /* remove dmb from hash table */
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_del(&dmb_node->list);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+
+ clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
+ kvfree(dmb_node->cpu_addr);
+ kfree(dmb_node);
+
+ if (atomic_dec_and_test(&ldev->dmb_cnt))
+ wake_up(&ldev->ldev_release);
+}
+
+static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb from hash table */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (refcount_dec_and_test(&dmb_node->refcnt))
+ __smc_lo_unregister_dmb(ldev, dmb_node);
+ return 0;
+}
+
+static int smc_lo_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+ return SMC_LO_SUPPORT_NOCOPY;
+}
+
+static int smc_lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb_node according to dmb->dmb_tok */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (!refcount_inc_not_zero(&dmb_node->refcnt))
+ /* the dmb is being unregistered, but has
+ * not been removed from the hash table.
+ */
+ return -EINVAL;
+
+ /* provide dmb information */
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+ return 0;
+}
+
+static int smc_lo_detach_dmb(struct smcd_dev *smcd, u64 token)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* find dmb_node according to dmb->dmb_tok */
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) {
+ if (tmp_node->token == token) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (refcount_dec_and_test(&dmb_node->refcnt))
+ __smc_lo_unregister_dmb(ldev, dmb_node);
+ return 0;
+}
+
+static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
+ unsigned int idx, bool sf, unsigned int offset,
+ void *data, unsigned int size)
+{
+ struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ struct smc_connection *conn;
+
+ if (!sf)
+ /* since sndbuf is merged with peer DMB, there is
+ * no need to copy data from sndbuf to peer DMB.
+ */
+ return 0;
+
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
+ if (tmp_node->token == dmb_tok) {
+ rmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!rmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ memcpy((char *)rmb_node->cpu_addr + offset, data, size);
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ conn = smcd->conn[rmb_node->sba_idx];
+ if (!conn || conn->killed)
+ return -EPIPE;
+ tasklet_schedule(&conn->rx_tsklet);
+ return 0;
+}
+
+static int smc_lo_supports_v2(void)
+{
+ return SMC_LO_V2_CAPABLE;
+}
+
+static void smc_lo_get_local_gid(struct smcd_dev *smcd,
+ struct smcd_gid *smcd_gid)
+{
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ smcd_gid->gid = ldev->local_gid.gid;
+ smcd_gid->gid_ext = ldev->local_gid.gid_ext;
+}
+
+static u16 smc_lo_get_chid(struct smcd_dev *smcd)
+{
+ return ((struct smc_lo_dev *)smcd->priv)->chid;
+}
+
+static struct device *smc_lo_get_dev(struct smcd_dev *smcd)
+{
+ return &((struct smc_lo_dev *)smcd->priv)->dev;
+}
+
+static const struct smcd_ops lo_ops = {
+ .query_remote_gid = smc_lo_query_rgid,
+ .register_dmb = smc_lo_register_dmb,
+ .unregister_dmb = smc_lo_unregister_dmb,
+ .support_dmb_nocopy = smc_lo_support_dmb_nocopy,
+ .attach_dmb = smc_lo_attach_dmb,
+ .detach_dmb = smc_lo_detach_dmb,
+ .add_vlan_id = NULL,
+ .del_vlan_id = NULL,
+ .set_vlan_required = NULL,
+ .reset_vlan_required = NULL,
+ .signal_event = NULL,
+ .move_data = smc_lo_move_data,
+ .supports_v2 = smc_lo_supports_v2,
+ .get_local_gid = smc_lo_get_local_gid,
+ .get_chid = smc_lo_get_chid,
+ .get_dev = smc_lo_get_dev,
+};
+
+static struct smcd_dev *smcd_lo_alloc_dev(const struct smcd_ops *ops,
+ int max_dmbs)
+{
+ struct smcd_dev *smcd;
+
+ smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+ if (!smcd)
+ return NULL;
+
+ smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
+ GFP_KERNEL);
+ if (!smcd->conn)
+ goto out_smcd;
+
+ smcd->ops = ops;
+
+ spin_lock_init(&smcd->lock);
+ spin_lock_init(&smcd->lgr_lock);
+ INIT_LIST_HEAD(&smcd->vlan);
+ INIT_LIST_HEAD(&smcd->lgr_list);
+ init_waitqueue_head(&smcd->lgrs_deleted);
+ return smcd;
+
+out_smcd:
+ kfree(smcd);
+ return NULL;
+}
+
+static int smcd_lo_register_dev(struct smc_lo_dev *ldev)
+{
+ struct smcd_dev *smcd;
+
+ smcd = smcd_lo_alloc_dev(&lo_ops, SMC_LO_MAX_DMBS);
+ if (!smcd)
+ return -ENOMEM;
+ ldev->smcd = smcd;
+ smcd->priv = ldev;
+ smc_ism_set_v2_capable();
+ mutex_lock(&smcd_dev_list.mutex);
+ list_add(&smcd->list, &smcd_dev_list.list);
+ mutex_unlock(&smcd_dev_list.mutex);
+ pr_warn_ratelimited("smc: adding smcd device %s\n",
+ dev_name(&ldev->dev));
+ return 0;
+}
+
+static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev)
+{
+ struct smcd_dev *smcd = ldev->smcd;
+
+ pr_warn_ratelimited("smc: removing smcd device %s\n",
+ dev_name(&ldev->dev));
+ smcd->going_away = 1;
+ smc_smcd_terminate_all(smcd);
+ mutex_lock(&smcd_dev_list.mutex);
+ list_del_init(&smcd->list);
+ mutex_unlock(&smcd_dev_list.mutex);
+ kfree(smcd->conn);
+ kfree(smcd);
+}
+
+static int smc_lo_dev_init(struct smc_lo_dev *ldev)
+{
+ smc_lo_generate_ids(ldev);
+ rwlock_init(&ldev->dmb_ht_lock);
+ hash_init(ldev->dmb_ht);
+ atomic_set(&ldev->dmb_cnt, 0);
+ init_waitqueue_head(&ldev->ldev_release);
+
+ return smcd_lo_register_dev(ldev);
+}
+
+static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
+{
+ smcd_lo_unregister_dev(ldev);
+ if (atomic_read(&ldev->dmb_cnt))
+ wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
+}
+
+static void smc_lo_dev_release(struct device *dev)
+{
+ struct smc_lo_dev *ldev =
+ container_of(dev, struct smc_lo_dev, dev);
+
+ kfree(ldev);
+}
+
+static int smc_lo_dev_probe(void)
+{
+ struct smc_lo_dev *ldev;
+ int ret;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return -ENOMEM;
+
+ ldev->dev.parent = NULL;
+ ldev->dev.release = smc_lo_dev_release;
+ device_initialize(&ldev->dev);
+ dev_set_name(&ldev->dev, smc_lo_dev_name);
+
+ ret = smc_lo_dev_init(ldev);
+ if (ret)
+ goto free_dev;
+
+ lo_dev = ldev; /* global loopback device */
+ return 0;
+
+free_dev:
+ put_device(&ldev->dev);
+ return ret;
+}
+
+static void smc_lo_dev_remove(void)
+{
+ if (!lo_dev)
+ return;
+
+ smc_lo_dev_exit(lo_dev);
+ put_device(&lo_dev->dev); /* device_initialize in smc_lo_dev_probe */
+}
+
+int smc_loopback_init(void)
+{
+ return smc_lo_dev_probe();
+}
+
+void smc_loopback_exit(void)
+{
+ smc_lo_dev_remove();
+}
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
new file mode 100644
index 000000000000..6dd4292dae56
--- /dev/null
+++ b/net/smc/smc_loopback.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications Direct over loopback-ism device.
+ *
+ * SMC-D loopback-ism device structure definitions.
+ *
+ * Copyright (c) 2024, Alibaba Inc.
+ *
+ * Author: Wen Gu <guwen@linux.alibaba.com>
+ * Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#ifndef _SMC_LOOPBACK_H
+#define _SMC_LOOPBACK_H
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <net/smc.h>
+
+#if IS_ENABLED(CONFIG_SMC_LO)
+#define SMC_LO_MAX_DMBS 5000
+#define SMC_LO_DMBS_HASH_BITS 12
+#define SMC_LO_RESERVED_CHID 0xFFFF
+
+struct smc_lo_dmb_node {
+ struct hlist_node list;
+ u64 token;
+ u32 len;
+ u32 sba_idx;
+ void *cpu_addr;
+ dma_addr_t dma_addr;
+ refcount_t refcnt;
+};
+
+struct smc_lo_dev {
+ struct smcd_dev *smcd;
+ struct device dev;
+ u16 chid;
+ struct smcd_gid local_gid;
+ atomic_t dmb_cnt;
+ rwlock_t dmb_ht_lock;
+ DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
+ DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
+ wait_queue_head_t ldev_release;
+};
+
+int smc_loopback_init(void);
+void smc_loopback_exit(void);
+#else
+static inline int smc_loopback_init(void)
+{
+ return 0;
+}
+
+static inline void smc_loopback_exit(void)
+{
+}
+#endif
+
+#endif /* _SMC_LOOPBACK_H */
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 4e8baa2e7ea4..13f2bc092db1 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -90,11 +90,11 @@ static struct ctl_table smc_table[] = {
.extra1 = &conns_per_lgr_min,
.extra2 = &conns_per_lgr_max,
},
- { }
};
int __net_init smc_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(smc_table);
struct ctl_table *table;
table = smc_table;
@@ -105,12 +105,12 @@ int __net_init smc_sysctl_net_init(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++)
+ for (i = 0; i < table_size; i++)
table[i].data += (void *)net - (void *)&init_net;
}
net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table,
- ARRAY_SIZE(smc_table));
+ table_size);
if (!net->smc.smc_hdr)
goto err_reg;
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
index 9fc5e586d24a..a9a6e3c1113a 100644
--- a/net/smc/smc_tracepoint.h
+++ b/net/smc/smc_tracepoint.h
@@ -60,7 +60,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
__entry->smc = smc;
__entry->net_cookie = sock_net(sk)->net_cookie;
__entry->len = len;
- __assign_str(name, smc->conn.lnk->ibname);
+ __assign_str(name);
),
TP_printk("smc=%p net=%llu len=%zu dev=%s",
@@ -104,7 +104,7 @@ TRACE_EVENT(smcr_link_down,
__entry->lgr = lgr;
__entry->net_cookie = lgr->net->net_cookie;
__entry->state = lnk->state;
- __assign_str(name, lnk->ibname);
+ __assign_str(name);
__entry->location = location;
),
diff --git a/net/socket.c b/net/socket.c
index e5f3af49a8b6..e416920e9399 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,7 +88,7 @@
#include <linux/xattr.h>
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
-#include <linux/io_uring.h>
+#include <linux/io_uring/net.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -1890,7 +1890,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog);
}
-struct file *do_accept(struct file *file, unsigned file_flags,
+struct file *do_accept(struct file *file, struct proto_accept_arg *arg,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
@@ -1926,8 +1926,8 @@ struct file *do_accept(struct file *file, unsigned file_flags,
if (err)
goto out_fd;
- err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
- false);
+ arg->flags |= sock->file->f_flags;
+ err = ops->accept(sock, newsock, arg);
if (err < 0)
goto out_fd;
@@ -1953,6 +1953,7 @@ out_fd:
static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
+ struct proto_accept_arg arg = { };
struct file *newfile;
int newfd;
@@ -1966,7 +1967,7 @@ static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_s
if (unlikely(newfd < 0))
return newfd;
- newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
+ newfile = do_accept(file, &arg, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
@@ -3580,6 +3581,10 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
const struct proto_ops *ops = READ_ONCE(sock->ops);
+ struct proto_accept_arg arg = {
+ .flags = flags,
+ .kern = true,
+ };
int err;
err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
@@ -3587,7 +3592,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = ops->accept(sock, *newsock, flags, true);
+ err = ops->accept(sock, *newsock, &arg);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index c53b329092d4..4ebc1b7043d9 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len)
}
static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest)
{
const void *q;
unsigned int len;
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_KERNEL);
+ dest->data = kmemdup_noprof(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
@@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
dest->len = len;
return q;
}
+
+#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__))
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 24de94184700..96ab50eda9c2 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1033,17 +1033,11 @@ null_verifier:
static void gss_free_in_token_pages(struct gssp_in_token *in_token)
{
- u32 inlen;
int i;
i = 0;
- inlen = in_token->page_len;
- while (inlen) {
- if (in_token->pages[i])
- put_page(in_token->pages[i]);
- inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen;
- }
-
+ while (in_token->pages[i])
+ put_page(in_token->pages[i++]);
kfree(in_token->pages);
in_token->pages = NULL;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 28f3749f6dc6..cfd1b1bf7e35 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1071,6 +1071,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
.stats = old->cl_stats,
+ .timeout = old->cl_timeout,
};
struct rpc_clnt *clnt;
int err;
@@ -2698,8 +2699,19 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
goto out_msg_denied;
error = rpcauth_checkverf(task, xdr);
- if (error)
+ if (error) {
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+ if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ rpcauth_invalcred(task);
+ if (!task->tk_cred_retry)
+ goto out_err;
+ task->tk_cred_retry--;
+ trace_rpc__stale_creds(task);
+ return -EKEYREJECTED;
+ }
goto out_verifier;
+ }
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b33e429336fb..2b4b1276d4e8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1265,8 +1265,6 @@ svc_generic_init_request(struct svc_rqst *rqstp,
if (rqstp->rq_proc >= versp->vs_nproc)
goto err_bad_proc;
rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
- if (!procp)
- goto err_bad_proc;
/* Initialize storage for argp and resp */
memset(rqstp->rq_argp, 0, procp->pc_argzero);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b4a85a227bd7..dd86d7f1e97e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -46,7 +46,6 @@ static LIST_HEAD(svc_xprt_class_list);
/* SMP locking strategy:
*
- * svc_pool->sp_lock protects most of the fields of that pool.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
* The "service mutex" protects svc_serv->sv_nrthread.
@@ -211,51 +210,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
-static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
- struct svc_serv *serv,
- struct net *net,
- const int family,
- const unsigned short port,
- int flags)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_ANY),
- .sin_port = htons(port),
- };
-#if IS_ENABLED(CONFIG_IPV6)
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = htons(port),
- };
-#endif
- struct svc_xprt *xprt;
- struct sockaddr *sap;
- size_t len;
-
- switch (family) {
- case PF_INET:
- sap = (struct sockaddr *)&sin;
- len = sizeof(sin);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case PF_INET6:
- sap = (struct sockaddr *)&sin6;
- len = sizeof(sin6);
- break;
-#endif
- default:
- return ERR_PTR(-EAFNOSUPPORT);
- }
-
- xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
- if (IS_ERR(xprt))
- trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, len, xprt);
- return xprt;
-}
-
/**
* svc_xprt_received - start next receiver thread
* @xprt: controlling transport
@@ -294,9 +248,8 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
}
static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
- struct net *net, const int family,
- const unsigned short port, int flags,
- const struct cred *cred)
+ struct net *net, struct sockaddr *sap,
+ size_t len, int flags, const struct cred *cred)
{
struct svc_xprt_class *xcl;
@@ -312,8 +265,11 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
+ newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(newxprt)) {
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ xcl->xcl_name, sap, len,
+ newxprt);
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
}
@@ -330,6 +286,48 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
}
/**
+ * svc_xprt_create_from_sa - Add a new listener to @serv from socket address
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @sap: socket address pointer
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
+ */
+int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, struct sockaddr *sap,
+ int flags, const struct cred *cred)
+{
+ size_t len;
+ int err;
+
+ switch (sap->sa_family) {
+ case AF_INET:
+ len = sizeof(struct sockaddr_in);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred);
+ if (err == -EPROTONOSUPPORT) {
+ request_module("svc%s", xprt_name);
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags,
+ cred);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(svc_xprt_create_from_sa);
+
+/**
* svc_xprt_create - Add a new listener to @serv
* @serv: target RPC service
* @xprt_name: transport class name
@@ -339,23 +337,41 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
* @flags: SVC_SOCK flags
* @cred: credential to bind to this transport
*
- * Return values:
- * %0: New listener added successfully
- * %-EPROTONOSUPPORT: Requested transport type not supported
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
*/
int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
- int err;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+#if IS_ENABLED(CONFIG_IPV6)
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+#endif
+ struct sockaddr *sap;
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
- if (err == -EPROTONOSUPPORT) {
- request_module("svc%s", xprt_name);
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+ switch (family) {
+ case PF_INET:
+ sap = (struct sockaddr *)&sin;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case PF_INET6:
+ sap = (struct sockaddr *)&sin6;
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
}
- return err;
+
+ return svc_xprt_create_from_sa(serv, xprt_name, net, sap, flags, cred);
}
EXPORT_SYMBOL_GPL(svc_xprt_create);
@@ -1260,6 +1276,40 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
}
/**
+ * svc_find_listener - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @net: owner net pointer
+ * @sa: sockaddr containing address
+ *
+ * Return the transport instance pointer for the endpoint accepting
+ * connections/peer traffic from the specified transport class,
+ * and matching sockaddr.
+ */
+struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name,
+ struct net *net, const struct sockaddr *sa)
+{
+ struct svc_xprt *xprt;
+ struct svc_xprt *found = NULL;
+
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ if (xprt->xpt_net != net)
+ continue;
+ if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
+ continue;
+ if (!rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local))
+ continue;
+ found = xprt;
+ svc_xprt_get(xprt);
+ break;
+ }
+ spin_unlock_bh(&serv->sv_lock);
+ return found;
+}
+EXPORT_SYMBOL_GPL(svc_find_listener);
+
+/**
* svc_find_xprt - find an RPC transport instance
* @serv: pointer to svc_serv to search
* @xcl_name: C string containing transport's class name
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 93941ab12549..5f3170a1c9bb 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -160,7 +160,6 @@ static struct ctl_table debug_table[] = {
.mode = 0444,
.proc_handler = proc_do_xprt,
},
- { }
};
void
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index f86970733eb0..474f7a98fe9e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -209,7 +209,6 @@ static struct ctl_table svcrdma_parm_table[] = {
.extra1 = &zero,
.extra2 = &zero,
},
- { },
};
static void svc_rdma_proc_cleanup(void)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 29b0562d62e7..9a8ce5df83ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -137,7 +137,6 @@ static struct ctl_table xr_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
#endif
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4f8d7efa469f..432557a553e7 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
pr_info("rpcrdma: removing device %s for %pISpc\n",
ep->re_id->device->name, sap);
- fallthrough;
+ switch (xchg(&ep->re_connect_status, -ENODEV)) {
+ case 0: goto wake_connect_worker;
+ case 1: goto disconnected;
+ }
+ return 0;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bb9b747d58a1..dfc353eea8ed 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -160,7 +160,6 @@ static struct ctl_table xs_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
};
/*
@@ -2664,6 +2663,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
.xprtsec = {
.policy = RPC_XPRTSEC_NONE,
},
+ .stats = upper_clnt->cl_stats,
};
unsigned int pflags = current->flags;
struct rpc_clnt *lower_clnt;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index c9189a970eec..6488ead9e464 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -244,6 +244,99 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
return 0;
}
+static void switchdev_obj_id_to_helpful_msg(struct net_device *dev,
+ enum switchdev_obj_id obj_id,
+ int err, bool add)
+{
+ const char *action = add ? "add" : "del";
+ const char *reason = "";
+ const char *problem;
+ const char *obj_str;
+
+ switch (obj_id) {
+ case SWITCHDEV_OBJ_ID_UNDEFINED:
+ obj_str = "Undefined object";
+ problem = "Attempted operation is undefined, indicating a possible programming\n"
+ "error.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ obj_str = "VLAN entry";
+ problem = "Failure in VLAN settings on this port might disrupt network\n"
+ "segmentation or traffic isolation, affecting network partitioning.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ obj_str = "Port Multicast Database entry";
+ problem = "Failure in updating the port's Multicast Database could lead to\n"
+ "multicast forwarding issues.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ obj_str = "Host Multicast Database entry";
+ problem = "Failure in updating the host's Multicast Database may impact multicast\n"
+ "group memberships or traffic delivery, affecting multicast\n"
+ "communication.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_MRP:
+ obj_str = "Media Redundancy Protocol configuration for port";
+ problem = "Failure to set MRP ring ID on this port prevents communication with\n"
+ "the specified redundancy ring, resulting in an inability to engage\n"
+ "in MRP-based network operations.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_TEST_MRP:
+ obj_str = "MRP Test Frame Operations for port";
+ problem = "Failure to generate/monitor MRP test frames may lead to inability to\n"
+ "assess the ring's operational integrity and fault response, hindering\n"
+ "proactive network management.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
+ obj_str = "MRP Ring Role Configuration";
+ problem = "Improper MRP ring role configuration may create conflicts in the ring,\n"
+ "disrupting communication for all participants, or isolate the local\n"
+ "system from the ring, hindering its ability to communicate with other\n"
+ "participants.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_RING_STATE_MRP:
+ obj_str = "MRP Ring State Configuration";
+ problem = "Failure to correctly set the MRP ring state can result in network\n"
+ "loops or leave segments without communication. In a Closed state,\n"
+ "it maintains loop prevention by blocking one MRM port, while an Open\n"
+ "state activates in response to failures, changing port states to\n"
+ "preserve network connectivity.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_TEST_MRP:
+ obj_str = "MRP_InTest Frame Generation Configuration";
+ problem = "Failure in managing MRP_InTest frame generation can misjudge the\n"
+ "interconnection ring's state, leading to incorrect blocking or\n"
+ "unblocking of the I/C port. This misconfiguration might result\n"
+ "in unintended network loops or isolate critical network segments,\n"
+ "compromising network integrity and reliability.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_ROLE_MRP:
+ obj_str = "Interconnection Ring Role Configuration";
+ problem = "Failure in incorrect assignment of interconnection ring roles\n"
+ "(MIM/MIC) can impair the formation of the interconnection rings.\n";
+ break;
+ case SWITCHDEV_OBJ_ID_IN_STATE_MRP:
+ obj_str = "Interconnection Ring State Configuration";
+ problem = "Failure in updating the interconnection ring state can lead in\n"
+ "case of Open state to incorrect blocking or unblocking of the\n"
+ "I/C port, resulting in unintended network loops or isolation\n"
+ "of critical network\n";
+ break;
+ default:
+ obj_str = "Unknown object";
+ problem = "Indicating a possible programming error.\n";
+ }
+
+ switch (err) {
+ case -ENOSPC:
+ reason = "Current HW/SW setup lacks sufficient resources.\n";
+ break;
+ }
+
+ netdev_err(dev, "Failed to %s %s (object id=%d) with error: %pe (%d).\n%s%s\n",
+ action, obj_str, obj_id, ERR_PTR(err), err, problem, reason);
+}
+
static void switchdev_port_obj_add_deferred(struct net_device *dev,
const void *data)
{
@@ -254,8 +347,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
dev, obj, NULL);
if (err && err != -EOPNOTSUPP)
- netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
- err, obj->id);
+ switchdev_obj_id_to_helpful_msg(dev, obj->id, err, true);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
@@ -304,8 +396,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
err = switchdev_port_obj_del_now(dev, obj);
if (err && err != -EOPNOTSUPP)
- netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
- err, obj->id);
+ switchdev_obj_id_to_helpful_msg(dev, obj->id, err, false);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 051ed5f6fc93..f5017012a049 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -40,7 +40,7 @@ static int is_seen(struct ctl_table_set *set)
/* Return standard mode bits for table entry. */
static int net_ctl_permissions(struct ctl_table_header *head,
- struct ctl_table *table)
+ const struct ctl_table *table)
{
struct net *net = container_of(head->set, struct net, sysctls);
@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head,
}
static void net_ctl_set_ownership(struct ctl_table_header *head,
- struct ctl_table *table,
kuid_t *uid, kgid_t *gid)
{
struct net *net = container_of(head->set, struct net, sysctls);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 5c9fd4791c4b..76284fc538eb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- *buf = NULL;
if (skb_has_frag_list(frag) && __skb_linearize(frag))
goto err;
+ *buf = NULL;
frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
@@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (!head)
goto err;
+ /* Either the input skb ownership is transferred to headskb
+ * or the input skb is freed, clear the reference to avoid
+ * bad access on error path.
+ */
+ *buf = NULL;
if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
kfree_skb_partial(frag, headstolen);
} else {
@@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
*headbuf = NULL;
return 1;
}
- *buf = NULL;
return 0;
err:
kfree_skb(*buf);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 798397b6811e..2d58ecae4e21 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -146,8 +146,6 @@ static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
- bool kern);
static void tipc_sk_timeout(struct timer_list *t);
static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
@@ -2711,13 +2709,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* tipc_accept - wait for connection request
* @sock: listening socket
* @new_sock: new socket that is to be connected
- * @flags: file-related flags associated with socket
- * @kern: caused by kernel or by userspace?
+ * @arg: arguments for accept
*
* Return: 0 on success, errno otherwise
*/
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
- bool kern)
+static int tipc_accept(struct socket *sock, struct socket *new_sock,
+ struct proto_accept_arg *arg)
{
struct sock *new_sk, *sk = sock->sk;
struct tipc_sock *new_tsock;
@@ -2733,14 +2730,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
res = -EINVAL;
goto exit;
}
- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
res = tipc_wait_for_accept(sock, timeo);
if (res)
goto exit;
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern);
if (res)
goto exit;
security_sk_clone(sock->sk, new_sock->sk);
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 9fb65c988f7f..30d2e06e3d8c 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -91,7 +91,6 @@ static struct ctl_table tipc_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {}
};
int tipc_register_sysctl(void)
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 04af83f0500c..865142ed0ab4 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(tipc_skb_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
tipc_skb_dump(skb, more, __get_str(buf));
),
@@ -172,7 +172,7 @@ DECLARE_EVENT_CLASS(tipc_list_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
tipc_list_dump(list, more, __get_str(buf));
),
@@ -200,7 +200,7 @@ DECLARE_EVENT_CLASS(tipc_sk_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
__entry->portid = tipc_sock_get_portid(sk);
tipc_sk_dump(sk, dqueues, __get_str(buf));
if (skb)
@@ -254,7 +254,7 @@ DECLARE_EVENT_CLASS(tipc_link_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
tipc_link_dump(l, dqueues, __get_str(buf));
),
@@ -337,7 +337,7 @@ DECLARE_EVENT_CLASS(tipc_node_class,
),
TP_fast_assign(
- __assign_str(header, header);
+ __assign_str(header);
__entry->addr = tipc_node_get_addr(n);
tipc_node_dump(n, more, __get_str(buf));
),
@@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(tipc_fsm_class,
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->os = os;
__entry->ns = ns;
__entry->evt = evt;
@@ -409,8 +409,8 @@ TRACE_EVENT(tipc_l2_device_event,
),
TP_fast_assign(
- __assign_str(dev_name, dev->name);
- __assign_str(b_name, b->name);
+ __assign_str(dev_name);
+ __assign_str(b_name);
__entry->evt = evt;
__entry->b_up = test_bit(0, &b->up);
__entry->carrier = netif_carrier_ok(dev);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index f892b0903dba..b849a3d133a0 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -174,7 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
local_bh_disable();
ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
- struct rtable *rt = (struct rtable *)ndst;
+ struct rtable *rt = dst_rtable(ndst);
if (!rt) {
struct flowi4 fl = {
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 9237dded4467..f9e3d3d90dcf 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -278,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
for (i = 0; remaining > 0; i++) {
skb_frag_t *frag = &record->frags[i];
- __skb_frag_ref(frag, false);
+ __skb_frag_ref(frag);
sg_set_page(sg_in + i, skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag));
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b4674f03d71a..90b7f253d363 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -816,9 +816,17 @@ struct tls_context *tls_ctx_create(struct sock *sk)
return NULL;
mutex_init(&ctx->tx_lock);
- rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
ctx->sk_proto = READ_ONCE(sk->sk_prot);
ctx->sk = sk;
+ /* Release semantic of rcu_assign_pointer() ensures that
+ * ctx->sk_proto is visible before changing sk->sk_prot in
+ * update_sk_prot(), and prevents reading uninitialized value in
+ * tls_{getsockopt, setsockopt}. Note that we do not need a
+ * read barrier in tls_{getsockopt,setsockopt} as there is an
+ * address dependency between sk->sk_proto->{getsockopt,setsockopt}
+ * and ctx->sk_proto.
+ */
+ rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
return ctx;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dc1651541723..e4af6616e1df 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -755,7 +755,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int, bool);
+static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
@@ -1689,19 +1689,18 @@ static void unix_sock_inherit_flags(const struct socket *old,
set_bit(SOCK_PASSSEC, &new->flags);
}
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int unix_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
struct sock *tsk;
- int err;
- err = -EOPNOTSUPP;
+ arg->err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out;
- err = -EINVAL;
+ arg->err = -EINVAL;
if (sk->sk_state != TCP_LISTEN)
goto out;
@@ -1709,12 +1708,12 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
* so that no locks are necessary.
*/
- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
- &err);
+ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &arg->err);
if (!skb) {
/* This means receive shutdown. */
- if (err == 0)
- err = -EINVAL;
+ if (arg->err == 0)
+ arg->err = -EINVAL;
goto out;
}
@@ -1732,7 +1731,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
return 0;
out:
- return err;
+ return arg->err;
}
@@ -2171,13 +2170,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
maybe_add_creds(skb, sock, other);
skb_get(skb);
+ scm_stat_add(other, skb);
+
+ spin_lock(&other->sk_receive_queue.lock);
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
-
WRITE_ONCE(ousk->oob_skb, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
- scm_stat_add(other, skb);
- skb_queue_tail(&other->sk_receive_queue, skb);
sk_send_sigurg(other);
unix_state_unlock(other);
other->sk_data_ready(other);
@@ -2224,7 +2225,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_err;
}
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto pipe_err;
while (sent < len) {
@@ -2568,8 +2569,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
mutex_lock(&u->iolock);
unix_state_lock(sk);
+ spin_lock(&sk->sk_receive_queue.lock);
if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
mutex_unlock(&u->iolock);
return -EINVAL;
@@ -2581,6 +2584,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
WRITE_ONCE(u->oob_skb, NULL);
else
skb_get(oob_skb);
+
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
@@ -2609,6 +2614,10 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
consume_skb(skb);
skb = NULL;
} else {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
if (skb == u->oob_skb) {
if (copied) {
skb = NULL;
@@ -2620,13 +2629,19 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
} else if (flags & MSG_PEEK) {
skb = NULL;
} else {
- skb_unlink(skb, &sk->sk_receive_queue);
+ __skb_unlink(skb, &sk->sk_receive_queue);
WRITE_ONCE(u->oob_skb, NULL);
- if (!WARN_ON_ONCE(skb_unref(skb)))
- kfree_skb(skb);
+ unlinked_skb = skb;
skb = skb_peek(&sk->sk_receive_queue);
}
}
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ if (unlinked_skb) {
+ WARN_ON_ONCE(skb_unref(unlinked_skb));
+ kfree_skb(unlinked_skb);
+ }
}
return skb;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d76450133e4f..dfe94a90ece4 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -158,13 +158,11 @@ static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
unix_update_graph(unix_edge_successor(edge));
}
-static bool gc_in_progress;
-
static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
{
struct unix_vertex *vertex = edge->predecessor->vertex;
- if (!gc_in_progress)
+ if (!fpl->dead)
unix_update_graph(unix_edge_successor(edge));
list_del(&edge->vertex_entry);
@@ -240,7 +238,7 @@ void unix_del_edges(struct scm_fp_list *fpl)
unix_del_edge(fpl, edge);
} while (i < fpl->count_unix);
- if (!gc_in_progress) {
+ if (!fpl->dead) {
receiver = fpl->edges[0].successor;
receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
}
@@ -344,6 +342,18 @@ enum unix_recv_queue_lock_class {
U_RECVQ_LOCK_EMBRYO,
};
+static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
+{
+ skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ WARN_ON_ONCE(skb_unref(u->oob_skb));
+ u->oob_skb = NULL;
+ }
+#endif
+}
+
static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
{
struct unix_vertex *vertex;
@@ -367,18 +377,11 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist
/* listener -> embryo order, the inversion never happens. */
spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO);
- skb_queue_splice_init(embryo_queue, hitlist);
+ unix_collect_queue(unix_sk(skb->sk), hitlist);
spin_unlock(&embryo_queue->lock);
}
} else {
- skb_queue_splice_init(queue, hitlist);
-
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
- }
-#endif
+ unix_collect_queue(u, hitlist);
}
spin_unlock(&queue->lock);
@@ -559,9 +562,12 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
}
+static bool gc_in_progress;
+
static void __unix_gc(struct work_struct *work)
{
struct sk_buff_head hitlist;
+ struct sk_buff *skb;
spin_lock(&unix_gc_lock);
@@ -579,6 +585,11 @@ static void __unix_gc(struct work_struct *work)
spin_unlock(&unix_gc_lock);
+ skb_queue_walk(&hitlist, skb) {
+ if (UNIXCB(skb).fp)
+ UNIXCB(skb).fp->dead = true;
+ }
+
__skb_queue_purge(&hitlist);
skip_gc:
WRITE_ONCE(gc_in_progress, false);
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 44996af61999..357b3e5f3847 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -19,7 +19,6 @@ static struct ctl_table unix_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- { }
};
int __net_init unix_sysctl_register(struct net *net)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 54ba7316f808..4b040285aa78 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1500,8 +1500,8 @@ out:
return err;
}
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int vsock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *listener;
int err;
@@ -1528,7 +1528,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
/* Wait for children sockets to appear; these are the new sockets
* created upon connection establishment.
*/
- timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
+ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK);
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
while ((connected = vsock_dequeue_accept(listener)) == NULL &&
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ee5d306a96d0..43d405298857 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -859,7 +859,6 @@ static struct virtio_driver virtio_vsock_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 72074fd36df4..1d49cc8b6da1 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -25,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex))
+$(obj)/shipped-certs.c: $(sort $(wildcard $(src)/certs/*.hex))
@$(kecho) " GEN $@"
$(Q)(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3fb1b637352a..4b1f45e3070e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work)
if (wk) {
list_del_init(&wk->entry);
if (!list_empty(&rdev->wiphy_work_list))
- schedule_work(work);
+ queue_work(system_unbound_wq, work);
spin_unlock_irq(&rdev->wiphy_work_lock);
wk->func(&rdev->wiphy, wk);
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index e106dcea3977..c569c37da317 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.burst_period = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD])
out->ftm.burst_period =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
+ nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP];
if (out->ftm.asap && !capa->ftm.asap) {
@@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.num_bursts_exp = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP])
out->ftm.num_bursts_exp =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
if (capa->ftm.max_bursts_exponent >= 0 &&
out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) {
@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.burst_duration = 15;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION])
out->ftm.burst_duration =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
out->ftm.ftms_per_burst = 0;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
@@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
out->ftm.ftmr_retries = 3;
if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES])
out->ftm.ftmr_retries =
- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI];
if (out->ftm.request_lci && !capa->ftm.request_lci) {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 43897a5269b6..755af47b88b9 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018, 2021-2023 Intel Corporation
+ * Copyright (C) 2018, 2021-2024 Intel Corporation
*/
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
@@ -458,6 +458,10 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev,
struct cfg80211_scan_request *request)
{
int ret;
+
+ if (WARN_ON_ONCE(!request->n_ssids && request->ssids))
+ return -EINVAL;
+
trace_rdev_scan(&rdev->wiphy, request);
ret = rdev->ops->scan(&rdev->wiphy, request);
trace_rdev_return_int(&rdev->wiphy, ret);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 127853877a0a..2f2a3163968a 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -812,6 +812,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
LIST_HEAD(coloc_ap_list);
bool need_scan_psc = true;
const struct ieee80211_sband_iftype_data *iftd;
+ size_t size, offs_ssids, offs_6ghz_params, offs_ies;
rdev_req->scan_6ghz = true;
@@ -877,10 +878,15 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
spin_unlock_bh(&rdev->bss_lock);
}
- request = kzalloc(struct_size(request, channels, n_channels) +
- sizeof(*request->scan_6ghz_params) * count +
- sizeof(*request->ssids) * rdev_req->n_ssids,
- GFP_KERNEL);
+ size = struct_size(request, channels, n_channels);
+ offs_ssids = size;
+ size += sizeof(*request->ssids) * rdev_req->n_ssids;
+ offs_6ghz_params = size;
+ size += sizeof(*request->scan_6ghz_params) * count;
+ offs_ies = size;
+ size += rdev_req->ie_len;
+
+ request = kzalloc(size, GFP_KERNEL);
if (!request) {
cfg80211_free_coloc_ap_list(&coloc_ap_list);
return -ENOMEM;
@@ -888,8 +894,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
*request = *rdev_req;
request->n_channels = 0;
- request->scan_6ghz_params =
- (void *)&request->channels[n_channels];
+ request->n_6ghz_params = 0;
+ if (rdev_req->n_ssids) {
+ /*
+ * Add the ssids from the parent scan request to the new
+ * scan request, so the driver would be able to use them
+ * in its probe requests to discover hidden APs on PSC
+ * channels.
+ */
+ request->ssids = (void *)request + offs_ssids;
+ memcpy(request->ssids, rdev_req->ssids,
+ sizeof(*request->ssids) * request->n_ssids);
+ }
+ request->scan_6ghz_params = (void *)request + offs_6ghz_params;
+
+ if (rdev_req->ie_len) {
+ void *ie = (void *)request + offs_ies;
+
+ memcpy(ie, rdev_req->ie, rdev_req->ie_len);
+ request->ie = ie;
+ }
/*
* PSC channels should not be scanned in case of direct scan with 1 SSID
@@ -978,17 +1002,8 @@ skip:
if (request->n_channels) {
struct cfg80211_scan_request *old = rdev->int_scan_req;
- rdev->int_scan_req = request;
- /*
- * Add the ssids from the parent scan request to the new scan
- * request, so the driver would be able to use them in its
- * probe requests to discover hidden APs on PSC channels.
- */
- request->ssids = (void *)&request->channels[request->n_channels];
- request->n_ssids = rdev_req->n_ssids;
- memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
- request->n_ssids);
+ rdev->int_scan_req = request;
/*
* If this scan follows a previous scan, save the scan start
@@ -2128,7 +2143,8 @@ static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen,
struct ieee80211_he_operation *he_oper;
tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
- if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
+ if (tmp && tmp->datalen >= sizeof(*he_oper) + 1 &&
+ tmp->datalen >= ieee80211_he_oper_size(tmp->data + 1)) {
const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
he_oper = (void *)&tmp->data[1];
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 565511a3f461..62f26618f674 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,7 +5,7 @@
*
* Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021, 2023 Intel Corporation
+ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation
*/
#include <linux/device.h>
@@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev)
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
rdev->suspended = false;
- schedule_work(&rdev->wiphy_work);
+ queue_work(system_unbound_wq, &rdev->wiphy_work);
wiphy_unlock(&rdev->wiphy);
if (ret)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 9bf987519811..87986170d1b1 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -372,7 +372,7 @@ TRACE_EVENT(rdev_add_virtual_intf,
),
TP_fast_assign(
WIPHY_ASSIGN;
- __assign_str(vir_intf_name, name ? name : "<noname>");
+ __assign_str(vir_intf_name);
__entry->type = type;
),
TP_printk(WIPHY_PR_FMT ", virtual intf name: %s, type: %d",
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 2bde8a354631..082c6f9c5416 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -2549,6 +2549,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
{
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
+ int ret;
wdev = dev->ieee80211_ptr;
if (!wdev)
@@ -2560,7 +2561,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
memset(sinfo, 0, sizeof(*sinfo));
- return rdev_get_station(rdev, dev, mac_addr, sinfo);
+ wiphy_lock(&rdev->wiphy);
+ ret = rdev_get_station(rdev, dev, mac_addr, sinfo);
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
EXPORT_SYMBOL(cfg80211_get_station);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d18d51412cc0..8dda4178497c 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -871,8 +871,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
return rc;
}
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int x25_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index e9802afa43d0..643f50874dfe 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -71,7 +71,6 @@ static struct ctl_table x25_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
};
int __init x25_register_sysctl(void)
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index ce60ecd48a4d..c0e0204b9630 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
dma_map->netdev = netdev;
dma_map->dev = dev;
- dma_map->dma_need_sync = false;
dma_map->dma_pages_cnt = nr_pages;
refcount_set(&dma_map->users, 1);
list_add(&dma_map->list, &umem->xsk_dma_list);
@@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
pool->dev = dma_map->dev;
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
- pool->dma_need_sync = dma_map->dma_need_sync;
memcpy(pool->dma_pages, dma_map->dma_pages,
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
@@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
__xp_dma_unmap(dma_map, attrs);
return -ENOMEM;
}
- if (dma_need_sync(dev, dma))
- dma_map->dma_need_sync = true;
dma_map->dma_pages[i] = dma;
}
@@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data_meta = xskb->xdp.data;
xskb->xdp.flags = 0;
- if (pool->dma_need_sync) {
- dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
- pool->frame_len,
- DMA_BIDIRECTIONAL);
- }
+ if (pool->dev)
+ xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len);
+
return &xskb->xdp;
}
EXPORT_SYMBOL(xp_alloc);
@@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
u32 nb_entries1 = 0, nb_entries2;
- if (unlikely(pool->dma_need_sync)) {
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
struct xdp_buff *buff;
/* Slow path */
@@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
(addr & ~PAGE_MASK);
}
EXPORT_SYMBOL(xp_raw_get_dma);
-
-void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
-{
- dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
- xskb->pool->frame_len, DMA_BIDIRECTIONAL);
-}
-EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
-
-void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
- size_t size)
-{
- dma_sync_single_range_for_device(pool->dev, dma, 0,
- size, DMA_BIDIRECTIONAL);
-}
-EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 655fe4ff8621..703d4172c7d7 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -98,6 +98,7 @@ static const int compat_msg_min[XFRM_NR_MSGTYPES] = {
};
static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
+ [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR },
[XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -129,6 +130,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -277,9 +279,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_SET_MARK_MASK:
case XFRMA_IF_ID:
case XFRMA_MTIMER_THRESH:
+ case XFRMA_SA_DIR:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -434,7 +437,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6346690d5c69..2455a76a1cff 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -253,6 +253,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
+ if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) ||
+ (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) {
+ NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction");
+ return -EINVAL;
+ }
+
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
/* We don't yet support UDP encapsulation and TFC padding. */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 161f535c8b94..d2ea18dcb0cb 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -389,11 +389,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ip_hdr(skb)->tot_len = htons(skb->len + ihl);
@@ -404,11 +408,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
@@ -466,6 +474,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
x = xfrm_input_state(skb);
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ goto drop;
+ }
+
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
if (x->km.state == XFRM_STATE_ACQ)
XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
@@ -571,6 +584,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_state_put(x);
+ goto drop;
+ }
+
skb->mark = xfrm_smark_get(skb->mark, x);
sp->xvec[sp->len++] = x;
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 4df5c06e3ece..e50e4bf993fa 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->net;
+ return READ_ONCE(xi->net);
}
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6affe5cd85d8..475b904fe68b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2489,6 +2489,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
family, policy->if_id);
+ if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR);
+ xfrm_state_put(x);
+ error = -EINVAL;
+ goto fail;
+ }
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
@@ -2598,8 +2604,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- path->path_cookie = rt6_get_cookie(rt);
+ path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
path->u.rt6.rt6i_nfheader_len = nfheader_len;
}
}
@@ -3593,6 +3598,8 @@ xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
return pol;
pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ if (IS_ERR(pol))
+ pol = NULL;
}
return pol;
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 5f9bf8e5c933..eeb984be03a7 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -41,6 +41,8 @@ static const struct snmp_mib xfrm_mib_list[] = {
SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
+ SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
+ SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR),
SNMP_MIB_SENTINEL
};
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index ce56d659c55a..bc56c6305725 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -778,7 +778,8 @@ int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack)
}
if (x->props.flags & XFRM_STATE_ESN) {
- if (replay_esn->replay_window == 0) {
+ if (replay_esn->replay_window == 0 &&
+ (!x->dir || x->dir == XFRM_SA_DIR_IN)) {
NL_SET_ERR_MSG(extack, "ESN replay window must be > 0");
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0c306473a79d..649bb739df0d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1292,6 +1292,7 @@ found:
if (km_query(x, tmpl, pol) == 0) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
+ x->dir = XFRM_SA_DIR_OUT;
list_add(&x->km.all, &net->xfrm.state_all);
XFRM_STATE_INSERT(bydst, &x->bydst,
net->xfrm.state_bydst + h,
@@ -1744,6 +1745,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
+ x->dir = orig->dir;
return x;
@@ -1864,8 +1866,14 @@ int xfrm_state_update(struct xfrm_state *x)
}
if (x1->km.state == XFRM_STATE_ACQ) {
+ if (x->dir && x1->dir != x->dir)
+ goto out;
+
__xfrm_state_insert(x);
x = NULL;
+ } else {
+ if (x1->dir != x->dir)
+ goto out;
}
err = 0;
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index e972930c292b..ca003e8a0376 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -38,7 +38,6 @@ static struct ctl_table xfrm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
- {}
};
int __net_init xfrm_sysctl_init(struct net *net)
@@ -57,10 +56,8 @@ int __net_init xfrm_sysctl_init(struct net *net)
table[3].data = &net->xfrm.sysctl_acq_expires;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
table_size = 0;
- }
net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table,
table_size);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 810b520493f3..e83c687bd64e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -130,7 +130,7 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_a
}
static inline int verify_replay(struct xfrm_usersa_info *p,
- struct nlattr **attrs,
+ struct nlattr **attrs, u8 sa_dir,
struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
@@ -168,6 +168,30 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
return -EINVAL;
}
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ if (rs->replay_window) {
+ NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA");
+ return -EINVAL;
+ }
+ if (rs->seq || rs->seq_hi) {
+ NL_SET_ERR_MSG(extack,
+ "Replay seq and seq_hi should be 0 for output SA");
+ return -EINVAL;
+ }
+ if (rs->bmp_len) {
+ NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA");
+ return -EINVAL;
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_IN) {
+ if (rs->oseq || rs->oseq_hi) {
+ NL_SET_ERR_MSG(extack,
+ "Replay oseq and oseq_hi should be 0 for input SA");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -176,6 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
struct netlink_ext_ack *extack)
{
int err;
+ u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0;
err = -EINVAL;
switch (p->family) {
@@ -334,7 +359,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
if ((err = verify_sec_ctx_len(attrs, extack)))
goto out;
- if ((err = verify_replay(p, attrs, extack)))
+ if ((err = verify_replay(p, attrs, sa_dir, extack)))
goto out;
err = -EINVAL;
@@ -358,6 +383,77 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
err = -EINVAL;
goto out;
}
+
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ NL_SET_ERR_MSG(extack,
+ "MTIMER_THRESH attribute should not be set on output SA");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_OUT) {
+ if (p->flags & XFRM_STATE_DECAP_DSCP) {
+ NL_SET_ERR_MSG(extack, "Flag DECAP_DSCP should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->flags & XFRM_STATE_ICMP) {
+ NL_SET_ERR_MSG(extack, "Flag ICMP should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->flags & XFRM_STATE_WILDRECV) {
+ NL_SET_ERR_MSG(extack, "Flag WILDRECV should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (p->replay_window) {
+ NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_REPLAY_VAL]) {
+ struct xfrm_replay_state *replay;
+
+ replay = nla_data(attrs[XFRMA_REPLAY_VAL]);
+
+ if (replay->seq || replay->bitmap) {
+ NL_SET_ERR_MSG(extack,
+ "Replay seq and bitmap should be 0 for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ if (sa_dir == XFRM_SA_DIR_IN) {
+ if (p->flags & XFRM_STATE_NOPMTUDISC) {
+ NL_SET_ERR_MSG(extack, "Flag NOPMTUDISC should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_SA_EXTRA_FLAGS]) {
+ u32 xflags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
+ if (xflags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) {
+ NL_SET_ERR_MSG(extack, "Flag DONT_ENCAP_DSCP should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (xflags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP) {
+ NL_SET_ERR_MSG(extack, "Flag OSEQ_MAY_WRAP should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ }
}
out:
@@ -734,6 +830,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ if (attrs[XFRMA_SA_DIR])
+ x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -1182,8 +1281,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
- if (x->mapping_maxage)
+ if (x->mapping_maxage) {
ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
+ if (ret)
+ goto out;
+ }
+ if (x->dir)
+ ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
out:
return ret;
}
@@ -1618,6 +1722,9 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
goto out;
+ if (attrs[XFRMA_SA_DIR])
+ x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+
resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
@@ -2402,7 +2509,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
+ nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur))
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(4) /* XFRM_AE_RTHR */
- + nla_total_size(4); /* XFRM_AE_ETHR */
+ + nla_total_size(4) /* XFRM_AE_ETHR */
+ + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */
}
static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -2459,6 +2567,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
goto out_cancel;
+ if (x->dir) {
+ err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+ if (err)
+ goto out_cancel;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -3018,6 +3132,7 @@ EXPORT_SYMBOL_GPL(xfrm_msg_min);
#undef XMSGSIZE
const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
+ [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR },
[XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)},
[XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)},
[XFRMA_LASTUSED] = { .type = NLA_U64},
@@ -3049,6 +3164,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
+ [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3097,6 +3213,24 @@ static const struct xfrm_link {
[XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default },
};
+static int xfrm_reject_unused_attr(int type, struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
+{
+ if (attrs[XFRMA_SA_DIR]) {
+ switch (type) {
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_ALLOCSPI:
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid attribute SA_DIR");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -3156,6 +3290,12 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto err;
+ if (!link->nla_pol || link->nla_pol == xfrma_policy) {
+ err = xfrm_reject_unused_attr((type + XFRM_MSG_BASE), attrs, extack);
+ if (err < 0)
+ goto err;
+ }
+
if (link->doit == NULL) {
err = -EINVAL;
goto err;
@@ -3189,8 +3329,9 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
static inline unsigned int xfrm_expire_msgsize(void)
{
- return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
- + nla_total_size(sizeof(struct xfrm_mark));
+ return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) +
+ nla_total_size(sizeof(struct xfrm_mark)) +
+ nla_total_size(sizeof_field(struct xfrm_state, dir));
}
static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -3217,6 +3358,12 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
if (err)
return err;
+ if (x->dir) {
+ err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+ if (err)
+ return err;
+ }
+
nlmsg_end(skb, nlh);
return 0;
}
@@ -3324,6 +3471,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
if (x->mapping_maxage)
l += nla_total_size(sizeof(x->mapping_maxage));
+ if (x->dir)
+ l += nla_total_size(sizeof(x->dir));
+
return l;
}