summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_fd.c28
-rw-r--r--net/9p/trans_xen.c9
-rw-r--r--net/bluetooth/6lowpan.c1
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_codec.c19
-rw-r--r--net/bluetooth/hci_core.c8
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/hci_sync.c19
-rw-r--r--net/bluetooth/iso.c1
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c14
-rw-r--r--net/bpf/test_run.c19
-rw-r--r--net/bridge/br_mdb.c312
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/can/af_can.c9
-rw-r--r--net/can/raw.c1
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/devlink.c444
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/filter.c80
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/neighbour.c58
-rw-r--r--net/core/of_net.c5
-rw-r--r--net/core/skbuff.c70
-rw-r--r--net/core/skmsg.c9
-rw-r--r--net/core/sock.c9
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/dccp/ipv4.c23
-rw-r--r--net/dccp/ipv6.c24
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/dsa/Kconfig6
-rw-r--r--net/dsa/Makefile4
-rw-r--r--net/dsa/devlink.c391
-rw-r--r--net/dsa/devlink.h16
-rw-r--r--net/dsa/dsa.c1736
-rw-r--r--net/dsa/dsa.h40
-rw-r--r--net/dsa/dsa2.c1817
-rw-r--r--net/dsa/dsa_priv.h663
-rw-r--r--net/dsa/master.c10
-rw-r--r--net/dsa/master.h19
-rw-r--r--net/dsa/netlink.c3
-rw-r--r--net/dsa/netlink.h8
-rw-r--r--net/dsa/port.c6
-rw-r--r--net/dsa/port.h114
-rw-r--r--net/dsa/slave.c49
-rw-r--r--net/dsa/slave.h69
-rw-r--r--net/dsa/switch.c53
-rw-r--r--net/dsa/switch.h120
-rw-r--r--net/dsa/tag.c243
-rw-r--r--net/dsa/tag.h310
-rw-r--r--net/dsa/tag_8021q.c19
-rw-r--r--net/dsa/tag_8021q.h27
-rw-r--r--net/dsa/tag_ar9331.c2
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/dsa/tag_dsa.c2
-rw-r--r--net/dsa/tag_gswip.c2
-rw-r--r--net/dsa/tag_hellcreek.c5
-rw-r--r--net/dsa/tag_ksz.c6
-rw-r--r--net/dsa/tag_lan9303.c2
-rw-r--r--net/dsa/tag_mtk.c2
-rw-r--r--net/dsa/tag_none.c30
-rw-r--r--net/dsa/tag_ocelot.c3
-rw-r--r--net/dsa/tag_ocelot_8021q.c4
-rw-r--r--net/dsa/tag_qca.c2
-rw-r--r--net/dsa/tag_rtl4_a.c2
-rw-r--r--net/dsa/tag_rtl8_4.c2
-rw-r--r--net/dsa/tag_rzn1_a5psw.c2
-rw-r--r--net/dsa/tag_sja1105.c7
-rw-r--r--net/dsa/tag_trailer.c2
-rw-r--r--net/dsa/tag_xrs700x.c2
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/common.c1
-rw-r--r--net/ethtool/ioctl.c3
-rw-r--r--net/ethtool/netlink.c7
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rss.c153
-rw-r--r--net/hsr/hsr_debugfs.c40
-rw-r--r--net/hsr/hsr_device.c31
-rw-r--r--net/hsr/hsr_forward.c19
-rw-r--r--net/hsr/hsr_framereg.c265
-rw-r--r--net/hsr/hsr_framereg.h17
-rw-r--r--net/hsr/hsr_main.h15
-rw-r--r--net/hsr/hsr_netlink.c4
-rw-r--r--net/ieee802154/nl802154.c103
-rw-r--r--net/ieee802154/nl802154.h2
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/bpf_tcp_ca.c17
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c9
-rw-r--r--net/ipv4/fib_trie.c6
-rw-r--r--net/ipv4/inet_hashtables.c84
-rw-r--r--net/ipv4/ip_gre.c48
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c4
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_bpf.c19
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c117
-rw-r--r--net/ipv4/tcp_minisocks.c61
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/ipv6/tcp_ipv6.c41
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_policy.c6
-rw-r--r--net/key/af_key.c40
-rw-r--r--net/l2tp/l2tp_core.c17
-rw-r--r--net/mac80211/airtime.c3
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rx.c38
-rw-r--r--net/mac80211/tx.c299
-rw-r--r--net/mac802154/iface.c16
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mac802154/rx.c24
-rw-r--r--net/mac802154/trace.h25
-rw-r--r--net/mptcp/Makefile2
-rw-r--r--net/mptcp/fastopen.c73
-rw-r--r--net/mptcp/options.c25
-rw-r--r--net/mptcp/pm_netlink.c67
-rw-r--r--net/mptcp/protocol.c55
-rw-r--r--net/mptcp/protocol.h30
-rw-r--r--net/mptcp/sockopt.c11
-rw-r--r--net/mptcp/subflow.c111
-rw-r--r--net/ncsi/ncsi-cmd.c3
-rw-r--r--net/netfilter/Kconfig3
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c8
-rw-r--r--net/netfilter/nf_conntrack_bpf.c17
-rw-r--r--net/netfilter/nf_conntrack_core.c8
-rw-r--r--net/netfilter/nf_conntrack_netlink.c21
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c10
-rw-r--r--net/netfilter/nf_nat_ovs.c135
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nft_ct.c6
-rw-r--r--net/netfilter/nft_set_pipapo.c5
-rw-r--r--net/netfilter/xt_connmark.c18
-rw-r--r--net/nfc/nci/core.c2
-rw-r--r--net/nfc/nci/data.c4
-rw-r--r--net/nfc/nci/ntf.c6
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/conntrack.c154
-rw-r--r--net/openvswitch/datapath.c41
-rw-r--r--net/openvswitch/vport.c50
-rw-r--r--net/openvswitch/vport.h16
-rw-r--r--net/packet/af_packet.c6
-rw-r--r--net/rxrpc/Kconfig7
-rw-r--r--net/rxrpc/Makefile4
-rw-r--r--net/rxrpc/af_rxrpc.c18
-rw-r--r--net/rxrpc/ar-internal.h212
-rw-r--r--net/rxrpc/call_accept.c191
-rw-r--r--net/rxrpc/call_event.c260
-rw-r--r--net/rxrpc/call_object.c318
-rw-r--r--net/rxrpc/conn_client.c173
-rw-r--r--net/rxrpc/conn_event.c128
-rw-r--r--net/rxrpc/conn_object.c309
-rw-r--r--net/rxrpc/conn_service.c29
-rw-r--r--net/rxrpc/input.c653
-rw-r--r--net/rxrpc/io_thread.c496
-rw-r--r--net/rxrpc/key.c16
-rw-r--r--net/rxrpc/local_event.c46
-rw-r--r--net/rxrpc/local_object.c167
-rw-r--r--net/rxrpc/net_ns.c2
-rw-r--r--net/rxrpc/output.c227
-rw-r--r--net/rxrpc/peer_event.c167
-rw-r--r--net/rxrpc/peer_object.c52
-rw-r--r--net/rxrpc/proc.c67
-rw-r--r--net/rxrpc/recvmsg.c88
-rw-r--r--net/rxrpc/rxkad.c63
-rw-r--r--net/rxrpc/rxperf.c619
-rw-r--r--net/rxrpc/security.c34
-rw-r--r--net/rxrpc/sendmsg.c105
-rw-r--r--net/rxrpc/server_key.c25
-rw-r--r--net/rxrpc/skbuff.c36
-rw-r--r--net/rxrpc/txbuf.c15
-rw-r--r--net/sched/Kconfig3
-rw-r--r--net/sched/act_api.c3
-rw-r--r--net/sched/act_bpf.c6
-rw-r--r--net/sched/act_connmark.c10
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/act_ct.c149
-rw-r--r--net/sched/act_ctinfo.c12
-rw-r--r--net/sched/act_gact.c6
-rw-r--r--net/sched/act_gate.c6
-rw-r--r--net/sched/act_ife.c6
-rw-r--r--net/sched/act_ipt.c6
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/act_mpls.c6
-rw-r--r--net/sched/act_nat.c7
-rw-r--r--net/sched/act_pedit.c6
-rw-r--r--net/sched/act_police.c6
-rw-r--r--net/sched/act_sample.c6
-rw-r--r--net/sched/act_simple.c6
-rw-r--r--net/sched/act_skbedit.c6
-rw-r--r--net/sched/act_skbmod.c6
-rw-r--r--net/sched/act_tunnel_key.c6
-rw-r--r--net/sched/act_vlan.c6
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c6
-rw-r--r--net/sched/cls_bpf.c6
-rw-r--r--net/sched/cls_cgroup.c6
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/cls_fw.c6
-rw-r--r--net/sched/cls_matchall.c6
-rw-r--r--net/sched/cls_route.c6
-rw-r--r--net/sched/cls_rsvp.c2
-rw-r--r--net/sched/cls_rsvp.h6
-rw-r--r--net/sched/cls_rsvp6.c2
-rw-r--r--net/sched/cls_tcindex.c7
-rw-r--r--net/sched/cls_u32.c6
-rw-r--r--net/sched/sch_api.c5
-rw-r--r--net/sctp/stream.c25
-rw-r--r--net/sctp/stream_sched.c37
-rw-r--r--net/sctp/stream_sched_prio.c32
-rw-r--r--net/sctp/stream_sched_rr.c5
-rw-r--r--net/sctp/sysctl.c73
-rw-r--r--net/tipc/crypto.c3
-rw-r--r--net/tipc/discover.c5
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/tipc/topsrv.c20
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/unix/diag.c20
-rw-r--r--net/wireless/nl80211.c3
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/wireless/scan.c54
-rw-r--r--net/xdp/xskmap.c4
-rw-r--r--net/xfrm/Makefile8
-rw-r--r--net/xfrm/xfrm_device.c124
-rw-r--r--net/xfrm/xfrm_input.c1
-rw-r--r--net/xfrm/xfrm_interface_bpf.c115
-rw-r--r--net/xfrm/xfrm_interface_core.c (renamed from net/xfrm/xfrm_interface.c)14
-rw-r--r--net/xfrm/xfrm_output.c15
-rw-r--r--net/xfrm/xfrm_policy.c122
-rw-r--r--net/xfrm/xfrm_replay.c2
-rw-r--r--net/xfrm/xfrm_state.c212
-rw-r--r--net/xfrm/xfrm_user.c104
249 files changed, 8966 insertions, 6479 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 56a186768750..07db2f436d44 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -120,7 +120,7 @@ struct p9_conn {
struct list_head unsent_req_list;
struct p9_req_t *rreq;
struct p9_req_t *wreq;
- char tmp_buf[7];
+ char tmp_buf[P9_HDRSZ];
struct p9_fcall rc;
int wpos;
int wsize;
@@ -202,9 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
list_move(&req->req_list, &cancel_list);
+ req->status = REQ_STATUS_ERROR;
}
list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
list_move(&req->req_list, &cancel_list);
+ req->status = REQ_STATUS_ERROR;
}
spin_unlock(&m->req_lock);
@@ -291,7 +293,7 @@ static void p9_read_work(struct work_struct *work)
if (!m->rc.sdata) {
m->rc.sdata = m->tmp_buf;
m->rc.offset = 0;
- m->rc.capacity = 7; /* start by reading header */
+ m->rc.capacity = P9_HDRSZ; /* start by reading header */
}
clear_bit(Rpending, &m->wsched);
@@ -314,7 +316,7 @@ static void p9_read_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "got new header\n");
/* Header size */
- m->rc.size = 7;
+ m->rc.size = P9_HDRSZ;
err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
if (err) {
p9_debug(P9_DEBUG_ERROR,
@@ -322,14 +324,6 @@ static void p9_read_work(struct work_struct *work)
goto error;
}
- if (m->rc.size >= m->client->msize) {
- p9_debug(P9_DEBUG_ERROR,
- "requested packet size too big: %d\n",
- m->rc.size);
- err = -EIO;
- goto error;
- }
-
p9_debug(P9_DEBUG_TRANS,
"mux %p pkt: size: %d bytes tag: %d\n",
m, m->rc.size, m->rc.tag);
@@ -342,6 +336,14 @@ static void p9_read_work(struct work_struct *work)
goto error;
}
+ if (m->rc.size > m->rreq->rc.capacity) {
+ p9_debug(P9_DEBUG_ERROR,
+ "requested packet size too big: %d for tag %d with capacity %zd\n",
+ m->rc.size, m->rc.tag, m->rreq->rc.capacity);
+ err = -EIO;
+ goto error;
+ }
+
if (!m->rreq->rc.sdata) {
p9_debug(P9_DEBUG_ERROR,
"No recv fcall for tag %d (req %p), disconnecting!\n",
@@ -860,8 +862,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
struct file *file;
p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
- if (!p)
+ if (!p) {
+ sock_release(csocket);
return -ENOMEM;
+ }
csocket->sk->sk_allocation = GFP_NOIO;
file = sock_alloc_file(csocket, 0, NULL);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index b15c64128c3e..aaa5fd364691 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -208,6 +208,14 @@ static void p9_xen_response(struct work_struct *work)
continue;
}
+ if (h.size > req->rc.capacity) {
+ dev_warn(&priv->dev->dev,
+ "requested packet size too big: %d for tag %d with capacity %zd\n",
+ h.size, h.tag, req->rc.capacity);
+ req->status = REQ_STATUS_ERROR;
+ goto recv_error;
+ }
+
memcpy(&req->rc, &h, sizeof(h));
req->rc.offset = 0;
@@ -217,6 +225,7 @@ static void p9_xen_response(struct work_struct *work)
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE(ring));
+recv_error:
virt_mb();
cons += h.size;
ring->intf->in_cons = cons;
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 215af9b3b589..c57d643afb10 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -972,6 +972,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (!hcon)
return -ENOENT;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index dc65974f5adb..1c3c7ff5c3c6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -737,7 +737,7 @@ static int __init bt_init(void)
err = bt_sysfs_init();
if (err < 0)
- return err;
+ goto cleanup_led;
err = sock_register(&bt_sock_family_ops);
if (err)
@@ -773,6 +773,8 @@ unregister_socket:
sock_unregister(PF_BLUETOOTH);
cleanup_sysfs:
bt_sysfs_cleanup();
+cleanup_led:
+ bt_leds_cleanup();
return err;
}
diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c
index 38201532f58e..3cc135bb1d30 100644
--- a/net/bluetooth/hci_codec.c
+++ b/net/bluetooth/hci_codec.c
@@ -72,9 +72,8 @@ static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport,
continue;
}
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
- sizeof(*cmd), cmd,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
+ sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read codec capabilities (%ld)",
PTR_ERR(skb));
@@ -127,8 +126,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
struct hci_op_read_local_codec_caps caps;
__u8 i;
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
+ 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
@@ -158,7 +157,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
for (i = 0; i < std_codecs->num; i++) {
caps.id = std_codecs->codec[i];
caps.direction = 0x00;
- hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ hci_read_codec_capabilities(hdev,
+ LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
}
skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
@@ -178,7 +178,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
caps.cid = vnd_codecs->codec[i].cid;
caps.vid = vnd_codecs->codec[i].vid;
caps.direction = 0x00;
- hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ hci_read_codec_capabilities(hdev,
+ LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
}
error:
@@ -194,8 +195,8 @@ void hci_read_supported_codecs_v2(struct hci_dev *hdev)
struct hci_op_read_local_codec_caps caps;
__u8 i;
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
+ 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0540555b3704..d97fac4f7130 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2764,7 +2764,8 @@ int hci_register_suspend_notifier(struct hci_dev *hdev)
{
int ret = 0;
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ if (!hdev->suspend_notifier.notifier_call &&
+ !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
ret = register_pm_notifier(&hdev->suspend_notifier);
}
@@ -2776,8 +2777,11 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev)
{
int ret = 0;
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
+ if (hdev->suspend_notifier.notifier_call) {
ret = unregister_pm_notifier(&hdev->suspend_notifier);
+ if (!ret)
+ hdev->suspend_notifier.notifier_call = NULL;
+ }
return ret;
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 5a0296a4352e..f7e006a36382 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -269,7 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
const void *param)
{
- bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode);
+ bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode);
hci_req_add_ev(req, opcode, plen, param, 0);
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 76c3107c9f91..1fc693122a47 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -12,6 +12,7 @@
#include <net/bluetooth/mgmt.h>
#include "hci_request.h"
+#include "hci_codec.h"
#include "hci_debugfs.h"
#include "smp.h"
#include "eir.h"
@@ -3780,7 +3781,8 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev)
static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[18] & 0x04) ||
- !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
+ test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -4238,11 +4240,12 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev)
/* Read local codec list if the HCI command is supported */
static int hci_read_local_codecs_sync(struct hci_dev *hdev)
{
- if (!(hdev->commands[29] & 0x20))
- return 0;
+ if (hdev->commands[45] & 0x04)
+ hci_read_supported_codecs_v2(hdev);
+ else if (hdev->commands[29] & 0x20)
+ hci_read_supported_codecs(hdev);
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
- HCI_CMD_TIMEOUT);
+ return 0;
}
/* Read local pairing options if the HCI command is supported */
@@ -4298,7 +4301,8 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev)
bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED);
if (!(hdev->commands[18] & 0x08) ||
- !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
+ test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
return 0;
if (enabled == hdev->err_data_reporting)
@@ -4457,6 +4461,9 @@ static const struct {
HCI_QUIRK_BROKEN(STORED_LINK_KEY,
"HCI Delete Stored Link Key command is advertised, "
"but not supported."),
+ HCI_QUIRK_BROKEN(ERR_DATA_REPORTING,
+ "HCI Read Default Erroneous Data Reporting command is "
+ "advertised, but not supported."),
HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
"HCI Read Transmit Power Level command is advertised, "
"but not supported."),
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index f825857db6d0..26db929b97c4 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -879,6 +879,7 @@ static int iso_listen_bis(struct sock *sk)
iso_pi(sk)->bc_sid);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
return err;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9c24947aa41e..9fdede5fe71c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4453,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
chan->ident = cmd->ident;
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
- chan->num_conf_rsp++;
+ if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP)
+ chan->num_conf_rsp++;
/* Reset config buffer. */
chan->conf_len = 0;
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index e78dadfc5829..2d434c1f4617 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -156,29 +156,29 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size,
}
static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
u32 *next_btf_id,
enum bpf_type_flag *flag)
{
const struct btf_type *state;
+ const struct btf_type *t;
s32 type_id;
int err;
- type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state",
+ type_id = btf_find_by_name_kind(reg->btf, "bpf_dummy_ops_state",
BTF_KIND_STRUCT);
if (type_id < 0)
return -EINVAL;
- state = btf_type_by_id(btf, type_id);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
+ state = btf_type_by_id(reg->btf, type_id);
if (t != state) {
bpf_log(log, "only access to bpf_dummy_ops_state is supported\n");
return -EACCES;
}
- err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ err = btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
if (err < 0)
return err;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fcb3e6c5e03c..2723623429ac 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -489,7 +489,6 @@ int noinline bpf_fentry_test1(int a)
return a + 1;
}
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
-ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
@@ -733,7 +732,15 @@ noinline void bpf_kfunc_call_test_destructive(void)
__diag_pop();
-ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+BTF_SET8_START(bpf_test_modify_return_ids)
+BTF_ID_FLAGS(func, bpf_modify_return_test)
+BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
+BTF_SET8_END(bpf_test_modify_return_ids)
+
+static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_test_modify_return_ids,
+};
BTF_SET8_START(test_sk_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -980,9 +987,6 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
{
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
- if (!skb->len)
- return -EINVAL;
-
if (!__skb)
return 0;
@@ -1131,7 +1135,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
}
sock_init_data(NULL, sk);
- skb = build_skb(data, 0);
+ skb = slab_build_skb(data);
if (!skb) {
kfree(data);
kfree(ctx);
@@ -1669,7 +1673,8 @@ static int __init bpf_prog_test_run_init(void)
};
int ret;
- ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+ ret = register_btf_fmodret_id_set(&bpf_test_modify_return_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set);
return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 321be94c445a..ae7d93c08880 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -754,73 +754,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
sizeof(struct in6_addr)),
};
-static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct net_device **pdev, struct br_mdb_entry **pentry,
- struct nlattr **mdb_attrs, struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct br_mdb_entry *entry;
- struct br_port_msg *bpm;
- struct nlattr *tb[MDBA_SET_ENTRY_MAX+1];
- struct net_device *dev;
- int err;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
- MDBA_SET_ENTRY_MAX, NULL, NULL);
- if (err < 0)
- return err;
-
- bpm = nlmsg_data(nlh);
- if (bpm->ifindex == 0) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
- return -EINVAL;
- }
-
- dev = __dev_get_by_index(net, bpm->ifindex);
- if (dev == NULL) {
- NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
- return -ENODEV;
- }
-
- if (!netif_is_bridge_master(dev)) {
- NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
- return -EOPNOTSUPP;
- }
-
- *pdev = dev;
-
- if (!tb[MDBA_SET_ENTRY]) {
- NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
- return -EINVAL;
- }
- if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
- return -EINVAL;
- }
-
- entry = nla_data(tb[MDBA_SET_ENTRY]);
- if (!is_valid_mdb_entry(entry, extack))
- return -EINVAL;
- *pentry = entry;
-
- if (tb[MDBA_SET_ENTRY_ATTRS]) {
- err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX,
- tb[MDBA_SET_ENTRY_ATTRS],
- br_mdbe_attrs_pol, extack);
- if (err)
- return err;
- if (mdb_attrs[MDBE_ATTR_SOURCE] &&
- !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
- entry->addr.proto, extack))
- return -EINVAL;
- } else {
- memset(mdb_attrs, 0,
- sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1));
- }
-
- return 0;
-}
-
static struct net_bridge_mcast *
__br_mdb_choose_context(struct net_bridge *br,
const struct br_mdb_entry *entry,
@@ -853,44 +786,26 @@ out:
return brmctx;
}
-static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_mdb_entry *entry,
- struct nlattr **mdb_attrs,
+static int br_mdb_add_group(const struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
struct net_bridge_mdb_entry *mp, *star_mp;
struct net_bridge_port_group __rcu **pp;
+ struct br_mdb_entry *entry = cfg->entry;
+ struct net_bridge_port *port = cfg->p;
+ struct net_bridge *br = cfg->br;
struct net_bridge_port_group *p;
struct net_bridge_mcast *brmctx;
- struct br_ip group, star_group;
+ struct br_ip group = cfg->group;
unsigned long now = jiffies;
unsigned char flags = 0;
+ struct br_ip star_group;
u8 filter_mode;
- __mdb_entry_to_br_ip(entry, &group, mdb_attrs);
-
brmctx = __br_mdb_choose_context(br, entry, extack);
if (!brmctx)
return -EINVAL;
- /* host join errors which can happen before creating the group */
- if (!port && !br_group_is_l2(&group)) {
- /* don't allow any flags for host-joined IP groups */
- if (entry->state) {
- NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
- return -EINVAL;
- }
- if (!br_multicast_is_star_g(&group)) {
- NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
- return -EINVAL;
- }
- }
-
- if (br_group_is_l2(&group) && entry->state != MDB_PERMANENT) {
- NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
- return -EINVAL;
- }
-
mp = br_multicast_new_group(br, &group);
if (IS_ERR(mp))
return PTR_ERR(mp);
@@ -959,107 +874,197 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return 0;
}
-static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct net_bridge_port *p,
- struct br_mdb_entry *entry,
- struct nlattr **mdb_attrs,
+static int __br_mdb_add(const struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
int ret;
- spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack);
- spin_unlock_bh(&br->multicast_lock);
+ spin_lock_bh(&cfg->br->multicast_lock);
+ ret = br_mdb_add_group(cfg, extack);
+ spin_unlock_bh(&cfg->br->multicast_lock);
return ret;
}
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int br_mdb_config_attrs_init(struct nlattr *set_attrs,
+ struct br_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
- struct net *net = sock_net(skb->sk);
- struct net_bridge_vlan_group *vg;
- struct net_bridge_port *p = NULL;
- struct net_device *dev, *pdev;
- struct br_mdb_entry *entry;
- struct net_bridge_vlan *v;
- struct net_bridge *br;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
- if (err < 0)
+ err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX, set_attrs,
+ br_mdbe_attrs_pol, extack);
+ if (err)
return err;
- br = netdev_priv(dev);
+ if (mdb_attrs[MDBE_ATTR_SOURCE] &&
+ !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
+ cfg->entry->addr.proto, extack))
+ return -EINVAL;
+
+ __mdb_entry_to_br_ip(cfg->entry, &cfg->group, mdb_attrs);
- if (!netif_running(br->dev)) {
+ return 0;
+}
+
+static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
+ struct br_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, NULL, extack);
+ if (err)
+ return err;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (!netif_is_bridge_master(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
+ return -EOPNOTSUPP;
+ }
+
+ cfg->br = netdev_priv(dev);
+
+ if (!netif_running(cfg->br->dev)) {
NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running");
return -EINVAL;
}
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
+ if (!br_opt_get(cfg->br, BROPT_MULTICAST_ENABLED)) {
NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled");
return -EINVAL;
}
- if (entry->ifindex != br->dev->ifindex) {
- pdev = __dev_get_by_index(net, entry->ifindex);
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+ if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
+ return -EINVAL;
+ }
+
+ cfg->entry = nla_data(tb[MDBA_SET_ENTRY]);
+ if (!is_valid_mdb_entry(cfg->entry, extack))
+ return -EINVAL;
+
+ if (cfg->entry->ifindex != cfg->br->dev->ifindex) {
+ struct net_device *pdev;
+
+ pdev = __dev_get_by_index(net, cfg->entry->ifindex);
if (!pdev) {
NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist");
return -ENODEV;
}
- p = br_port_get_rtnl(pdev);
- if (!p) {
+ cfg->p = br_port_get_rtnl(pdev);
+ if (!cfg->p) {
NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
return -EINVAL;
}
- if (p->br != br) {
+ if (cfg->p->br != cfg->br) {
NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
}
- if (p->state == BR_STATE_DISABLED && entry->state != MDB_PERMANENT) {
+ }
+
+ if (tb[MDBA_SET_ENTRY_ATTRS])
+ return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg,
+ extack);
+ else
+ __mdb_entry_to_br_ip(cfg->entry, &cfg->group, NULL);
+
+ return 0;
+}
+
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct br_mdb_config cfg;
+ int err;
+
+ err = br_mdb_config_init(net, nlh, &cfg, extack);
+ if (err)
+ return err;
+
+ /* host join errors which can happen before creating the group */
+ if (!cfg.p && !br_group_is_l2(&cfg.group)) {
+ /* don't allow any flags for host-joined IP groups */
+ if (cfg.entry->state) {
+ NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
+ return -EINVAL;
+ }
+ if (!br_multicast_is_star_g(&cfg.group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
+ return -EINVAL;
+ }
+ }
+
+ if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) {
+ NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
+ return -EINVAL;
+ }
+
+ if (cfg.p) {
+ if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) {
NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent");
return -EINVAL;
}
- vg = nbp_vlan_group(p);
+ vg = nbp_vlan_group(cfg.p);
} else {
- vg = br_vlan_group(br);
+ vg = br_vlan_group(cfg.br);
}
/* If vlan filtering is enabled and VLAN is not specified
* install mdb entry on all vlans configured on the port.
*/
- if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
+ if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
- entry->vid = v->vid;
- err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
+ cfg.entry->vid = v->vid;
+ cfg.group.vid = v->vid;
+ err = __br_mdb_add(&cfg, extack);
if (err)
break;
}
} else {
- err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
+ err = __br_mdb_add(&cfg, extack);
}
return err;
}
-static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
- struct nlattr **mdb_attrs)
+static int __br_mdb_del(const struct br_mdb_config *cfg)
{
+ struct br_mdb_entry *entry = cfg->entry;
+ struct net_bridge *br = cfg->br;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct br_ip ip;
+ struct br_ip ip = cfg->group;
int err = -EINVAL;
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
- return -EINVAL;
-
- __mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
-
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &ip);
if (!mp)
@@ -1094,51 +1099,32 @@ unlock:
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
- struct net_bridge_port *p = NULL;
- struct net_device *dev, *pdev;
- struct br_mdb_entry *entry;
struct net_bridge_vlan *v;
- struct net_bridge *br;
+ struct br_mdb_config cfg;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
- if (err < 0)
+ err = br_mdb_config_init(net, nlh, &cfg, extack);
+ if (err)
return err;
- br = netdev_priv(dev);
-
- if (entry->ifindex != br->dev->ifindex) {
- pdev = __dev_get_by_index(net, entry->ifindex);
- if (!pdev)
- return -ENODEV;
-
- p = br_port_get_rtnl(pdev);
- if (!p) {
- NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
- return -EINVAL;
- }
- if (p->br != br) {
- NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
- return -EINVAL;
- }
- vg = nbp_vlan_group(p);
- } else {
- vg = br_vlan_group(br);
- }
+ if (cfg.p)
+ vg = nbp_vlan_group(cfg.p);
+ else
+ vg = br_vlan_group(cfg.br);
/* If vlan filtering is enabled and VLAN is not specified
* delete mdb entry on all vlans configured on the port.
*/
- if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
+ if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
- entry->vid = v->vid;
- err = __br_mdb_del(br, entry, mdb_attrs);
+ cfg.entry->vid = v->vid;
+ cfg.group.vid = v->vid;
+ err = __br_mdb_del(&cfg);
}
} else {
- err = __br_mdb_del(br, entry, mdb_attrs);
+ err = __br_mdb_del(&cfg);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 5e988f0ed2c0..db4c3900ae95 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1273,7 +1273,7 @@ br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_i
struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
- struct br_ip *group,
+ const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags,
const unsigned char *src,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4c4fda930068..3997e16c15fc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -92,6 +92,13 @@ struct bridge_mcast_stats {
struct br_mcast_stats mstats;
struct u64_stats_sync syncp;
};
+
+struct br_mdb_config {
+ struct net_bridge *br;
+ struct net_bridge_port *p;
+ struct br_mdb_entry *entry;
+ struct br_ip group;
+};
#endif
/* net_bridge_mcast_port must be always defined due to forwarding stubs */
@@ -934,7 +941,8 @@ br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
struct net_bridge_mdb_entry *
br_multicast_new_group(struct net_bridge *br, struct br_ip *group);
struct net_bridge_port_group *
-br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
+br_multicast_new_port_group(struct net_bridge_port *port,
+ const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
u8 filter_mode, u8 rt_protocol);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 27dcdcc0b808..7343fd487dbe 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -446,7 +446,6 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
struct hlist_head *rcv_list;
struct can_dev_rcv_lists *dev_rcv_lists;
struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
- int err = 0;
/* insert new receiver (dev,canid,mask) -> (func,data) */
@@ -481,7 +480,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
rcv_lists_stats->rcv_entries);
spin_unlock_bh(&net->can.rcvlists_lock);
- return err;
+ return 0;
}
EXPORT_SYMBOL(can_rx_register);
@@ -677,7 +676,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
static int can_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_can_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
dev->type, skb->len);
@@ -692,7 +691,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canfd_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
dev->type, skb->len);
@@ -707,7 +706,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
static int canxl_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canxl_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n",
dev->type, skb->len);
diff --git a/net/can/raw.c b/net/can/raw.c
index 3eb7d3e2b541..81071cdb0301 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -857,6 +857,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->dev = dev;
skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 9d2288c0736e..bb378c33f542 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -310,7 +310,6 @@ bpf_sk_storage_ptr(void *owner)
return &sk->sk_bpf_storage;
}
-BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops sk_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -321,7 +320,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &sk_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
diff --git a/net/core/dev.c b/net/core/dev.c
index 7627c475d991..b76fb37b381e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10517,6 +10517,22 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+/**
+ * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
+ * @dev: netdev to enable the IRQ coalescing on
+ *
+ * Sets a conservative default for SW IRQ coalescing. Users can use
+ * sysfs attributes to override the default values.
+ */
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
+{
+ WARN_ON(dev->reg_state == NETREG_REGISTERED);
+
+ dev->gro_flush_timeout = 20000;
+ dev->napi_defer_hard_irqs = 1;
+}
+EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
+
void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d93bc95cd7cb..6004bd0ccee4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -41,7 +41,7 @@ struct devlink_dev_stats {
struct devlink {
u32 index;
- struct list_head port_list;
+ struct xarray ports;
struct list_head rate_list;
struct list_head sb_list;
struct list_head dpipe_table_list;
@@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
[DEVLINK_PORT_FN_ATTR_STATE] =
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
@@ -382,19 +387,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
unsigned int port_index)
{
- struct devlink_port *devlink_port;
-
- list_for_each_entry(devlink_port, &devlink->port_list, list) {
- if (devlink_port->index == port_index)
- return devlink_port;
- }
- return NULL;
-}
-
-static bool devlink_port_index_exists(struct devlink *devlink,
- unsigned int port_index)
-{
- return devlink_port_get_by_index(devlink, port_index);
+ return xa_load(&devlink->ports, port_index);
}
static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
@@ -692,6 +685,87 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
return 0;
}
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+ u32 cap, bool is_enable)
+{
+ caps->selector |= cap;
+ if (is_enable)
+ caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_roce_get)
+ return 0;
+
+ err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_migratable_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
static int
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct genl_info *info,
@@ -1276,6 +1350,51 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
}
static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_migratable_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_roce_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ err = devlink_port_fn_mig_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_MIGRATABLE,
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
@@ -1293,6 +1412,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
&msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_caps_fill(ops, port, msg, extack,
+ &msg_updated);
+ if (err)
+ goto out;
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
@@ -1565,14 +1688,14 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
struct devlink_port *devlink_port;
+ unsigned long index, port_index;
int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err;
devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
devl_lock(devlink);
- list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
if (idx < start) {
idx++;
continue;
@@ -1644,11 +1767,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
}
}
- if (!ops->port_function_hw_addr_set) {
- NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
- return -EOPNOTSUPP;
- }
-
return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
extack);
}
@@ -1662,12 +1780,52 @@ static int devlink_port_fn_state_set(struct devlink_port *port,
state = nla_get_u8(attr);
ops = port->devlink->ops;
- if (!ops->port_fn_state_set) {
- NL_SET_ERR_MSG_MOD(extack,
- "Function does not support state setting");
+ return ops->port_fn_state_set(port, state, extack);
+}
+
+static int devlink_port_function_validate(struct devlink_port *devlink_port,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+ struct nlattr *attr;
+
+ if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
+ !ops->port_function_hw_addr_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Port doesn't support function attributes");
return -EOPNOTSUPP;
}
- return ops->port_fn_state_set(port, state, extack);
+ if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Function does not support state setting");
+ return -EOPNOTSUPP;
+ }
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ struct nla_bitfield32 caps;
+
+ caps = nla_get_bitfield32(attr);
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
+ !ops->port_fn_roce_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support RoCE function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ if (!ops->port_fn_migratable_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support migratable function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "migratable function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ return 0;
}
static int devlink_port_function_set(struct devlink_port *port,
@@ -1684,12 +1842,24 @@ static int devlink_port_function_set(struct devlink_port *port,
return err;
}
+ err = devlink_port_function_validate(port, tb, extack);
+ if (err)
+ return err;
+
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
if (attr) {
err = devlink_port_function_hw_addr_set(port, attr, extack);
if (err)
return err;
}
+
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ err = devlink_port_fn_caps_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.
@@ -2886,10 +3056,11 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
{
struct devlink_port *devlink_port;
u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ unsigned long port_index;
u16 pool_index;
int err;
- list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
for (pool_index = 0; pool_index < pool_count; pool_index++) {
if (*p_idx < start) {
(*p_idx)++;
@@ -3107,10 +3278,11 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
u32 portid, u32 seq)
{
struct devlink_port *devlink_port;
+ unsigned long port_index;
u16 tc_index;
int err;
- list_for_each_entry(devlink_port, &devlink->port_list, list) {
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
for (tc_index = 0;
tc_index < devlink_sb->ingress_tc_count; tc_index++) {
if (*p_idx < start) {
@@ -4269,9 +4441,10 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
DEVLINK_ATTR_PAD))
goto nla_put_failure;
- if (resource->size != resource->size_new)
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
- resource->size_new, DEVLINK_ATTR_PAD);
+ if (resource->size != resource->size_new &&
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
if (devlink_resource_occ_put(resource, skb))
goto nla_put_failure;
if (devlink_resource_size_params_put(resource, skb))
@@ -6207,6 +6380,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
{
struct devlink_region *region;
struct devlink_port *port;
+ unsigned long port_index;
int err = 0;
devl_lock(devlink);
@@ -6225,7 +6399,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
(*idx)++;
}
- list_for_each_entry(port, &devlink->port_list, list) {
+ xa_for_each(&devlink->ports, port_index, port) {
err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, idx,
start);
if (err)
@@ -6431,7 +6605,6 @@ unlock:
}
static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
- struct devlink *devlink,
u8 *chunk, u32 chunk_size,
u64 addr)
{
@@ -6461,39 +6634,37 @@ nla_put_failure:
#define DEVLINK_REGION_READ_CHUNK_SIZE 256
-static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
- struct devlink *devlink,
- struct devlink_region *region,
- struct nlattr **attrs,
- u64 start_offset,
- u64 end_offset,
- u64 *new_offset)
+typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack *extack);
+
+static int
+devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
+ void *cb_priv, u64 start_offset, u64 end_offset,
+ u64 *new_offset, struct netlink_ext_ack *extack)
{
- struct devlink_snapshot *snapshot;
u64 curr_offset = start_offset;
- u32 snapshot_id;
int err = 0;
+ u8 *data;
- *new_offset = start_offset;
+ /* Allocate and re-use a single buffer */
+ data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
- snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
- snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot)
- return -EINVAL;
+ *new_offset = start_offset;
while (curr_offset < end_offset) {
u32 data_size;
- u8 *data;
- if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE)
- data_size = end_offset - curr_offset;
- else
- data_size = DEVLINK_REGION_READ_CHUNK_SIZE;
+ data_size = min_t(u32, end_offset - curr_offset,
+ DEVLINK_REGION_READ_CHUNK_SIZE);
- data = &snapshot->data[curr_offset];
- err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink,
- data, data_size,
- curr_offset);
+ err = cb(cb_priv, data, data_size, curr_offset, extack);
+ if (err)
+ break;
+
+ err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
if (err)
break;
@@ -6501,21 +6672,57 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
}
*new_offset = curr_offset;
+ kfree(data);
+
return err;
}
+static int
+devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack __always_unused *extack)
+{
+ struct devlink_snapshot *snapshot = cb_priv;
+
+ memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
+
+ return 0;
+}
+
+static int
+devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->port_ops->read(region->port, region->port_ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
+static int
+devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->ops->read(region->devlink, region->ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
u64 ret_offset, start_offset, end_offset = U64_MAX;
struct nlattr **attrs = info->attrs;
struct devlink_port *port = NULL;
+ devlink_chunk_fill_t *region_cb;
struct devlink_region *region;
- struct nlattr *chunks_attr;
const char *region_name;
struct devlink *devlink;
unsigned int index;
+ void *region_cb_priv;
void *hdr;
int err;
@@ -6527,8 +6734,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
devl_lock(devlink);
- if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
- !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
+ if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
+ NL_SET_ERR_MSG(cb->extack, "No region name provided");
err = -EINVAL;
goto out_unlock;
}
@@ -6543,7 +6750,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
}
}
- region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
+ region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
+ region_name = nla_data(region_attr);
if (port)
region = devlink_port_region_get_by_name(port, region_name);
@@ -6551,10 +6759,51 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
region = devlink_region_get_by_name(devlink, region_name);
if (!region) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
err = -EINVAL;
goto out_unlock;
}
+ snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+ if (!snapshot_attr) {
+ if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (!region->ops->read) {
+ NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ if (port)
+ region_cb = &devlink_region_port_direct_fill;
+ else
+ region_cb = &devlink_region_direct_fill;
+ region_cb_priv = region;
+ } else {
+ struct devlink_snapshot *snapshot;
+ u32 snapshot_id;
+
+ if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ snapshot_id = nla_get_u32(snapshot_attr);
+ snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+ if (!snapshot) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ region_cb = &devlink_region_snapshot_fill;
+ region_cb_priv = snapshot;
+ }
+
if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
if (!start_offset)
@@ -6603,10 +6852,9 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto nla_put_failure;
}
- err = devlink_nl_region_read_snapshot_fill(skb, devlink,
- region, attrs,
- start_offset,
- end_offset, &ret_offset);
+ err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
+ start_offset, end_offset, &ret_offset,
+ cb->extack);
if (err && err != -EMSGSIZE)
goto nla_put_failure;
@@ -6633,14 +6881,6 @@ out_unlock:
return err;
}
-int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
-}
-EXPORT_SYMBOL_GPL(devlink_info_driver_name_put);
-
int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
{
if (!req->msg)
@@ -6749,11 +6989,25 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req,
}
EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+static int devlink_nl_driver_info_get(struct device_driver *drv,
+ struct devlink_info_req *req)
+{
+ if (!drv)
+ return 0;
+
+ if (drv->name[0])
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
+ drv->name);
+
+ return 0;
+}
+
static int
devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags, struct netlink_ext_ack *extack)
{
+ struct device *dev = devlink_to_dev(devlink);
struct devlink_info_req req = {};
void *hdr;
int err;
@@ -6767,7 +7021,13 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
goto err_cancel_msg;
req.msg = msg;
- err = devlink->ops->info_get(devlink, &req, extack);
+ if (devlink->ops->info_get) {
+ err = devlink->ops->info_get(devlink, &req, extack);
+ if (err)
+ goto err_cancel_msg;
+ }
+
+ err = devlink_nl_driver_info_get(dev->driver, &req);
if (err)
goto err_cancel_msg;
@@ -6786,9 +7046,6 @@ static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
struct sk_buff *msg;
int err;
- if (!devlink->ops->info_get)
- return -EOPNOTSUPP;
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -6814,7 +7071,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
int err = 0;
devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start || !devlink->ops->info_get)
+ if (idx < start)
goto inc;
devl_lock(devlink);
@@ -7846,8 +8103,6 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
return -ECANCELED;
}
- reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
-
if (reporter->auto_dump) {
mutex_lock(&reporter->dump_lock);
/* store current dump of current error, for later analysis */
@@ -7976,10 +8231,10 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
struct devlink_health_reporter *reporter;
+ unsigned long index, port_index;
struct devlink_port *port;
struct devlink *devlink;
int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err;
@@ -8008,7 +8263,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
devl_lock(devlink);
- list_for_each_entry(port, &devlink->port_list, list) {
+ xa_for_each(&devlink->ports, port_index, port) {
mutex_lock(&port->reporters_lock);
list_for_each_entry(reporter, &port->reporter_list, list) {
if (idx < start) {
@@ -9253,6 +9508,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -9726,9 +9982,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
devlink->dev = dev;
devlink->ops = ops;
+ xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
write_pnet(&devlink->_net, net);
- INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->linecard_list);
INIT_LIST_HEAD(&devlink->sb_list);
@@ -9780,12 +10036,13 @@ static void devlink_notify_register(struct devlink *devlink)
struct devlink_linecard *linecard;
struct devlink_rate *rate_node;
struct devlink_region *region;
+ unsigned long port_index;
devlink_notify(devlink, DEVLINK_CMD_NEW);
list_for_each_entry(linecard, &devlink->linecard_list, list)
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- list_for_each_entry(devlink_port, &devlink->port_list, list)
+ xa_for_each(&devlink->ports, port_index, devlink_port)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
@@ -9819,6 +10076,7 @@ static void devlink_notify_unregister(struct devlink *devlink)
struct devlink_port *devlink_port;
struct devlink_rate *rate_node;
struct devlink_region *region;
+ unsigned long port_index;
list_for_each_entry_reverse(param_item, &devlink->param_list, list)
devlink_param_notify(devlink, 0, param_item,
@@ -9841,7 +10099,7 @@ static void devlink_notify_unregister(struct devlink *devlink)
devlink_trap_policer_notify(devlink, policer_item,
DEVLINK_CMD_TRAP_POLICER_DEL);
- list_for_each_entry_reverse(devlink_port, &devlink->port_list, list)
+ xa_for_each(&devlink->ports, port_index, devlink_port)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
devlink_notify(devlink, DEVLINK_CMD_DEL);
}
@@ -9905,12 +10163,13 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->sb_list));
WARN_ON(!list_empty(&devlink->rate_list));
WARN_ON(!list_empty(&devlink->linecard_list));
- WARN_ON(!list_empty(&devlink->port_list));
+ WARN_ON(!xa_empty(&devlink->ports));
xa_destroy(&devlink->snapshot_ids);
+ xa_destroy(&devlink->ports);
- unregister_netdevice_notifier_net(devlink_net(devlink),
- &devlink->netdevice_nb);
+ WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink),
+ &devlink->netdevice_nb));
xa_erase(&devlinks, devlink->index);
@@ -10006,10 +10265,9 @@ int devl_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index)
{
- devl_assert_locked(devlink);
+ int err;
- if (devlink_port_index_exists(devlink, port_index))
- return -EEXIST;
+ devl_assert_locked(devlink);
ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
@@ -10019,7 +10277,11 @@ int devl_port_register(struct devlink *devlink,
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
mutex_init(&devlink_port->reporters_lock);
- list_add_tail(&devlink_port->list, &devlink->port_list);
+ err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
+ if (err) {
+ mutex_destroy(&devlink_port->reporters_lock);
+ return err;
+ }
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
@@ -10068,7 +10330,7 @@ void devl_port_unregister(struct devlink_port *devlink_port)
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- list_del(&devlink_port->list);
+ xa_erase(&devlink_port->devlink->ports, devlink_port->index);
WARN_ON(!list_empty(&devlink_port->reporter_list));
mutex_destroy(&devlink_port->reporters_lock);
devlink_port->registered = false;
diff --git a/net/core/dst.c b/net/core/dst.c
index bc9c9be4e080..bb14a0392388 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -316,6 +316,8 @@ void metadata_dst_free(struct metadata_dst *md_dst)
if (md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
+ if (md_dst->type == METADATA_XFRM)
+ dst_release(md_dst->u.xfrm_info.dst_orig);
kfree(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free);
@@ -340,16 +342,18 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
{
-#ifdef CONFIG_DST_CACHE
int cpu;
for_each_possible_cpu(cpu) {
struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+#ifdef CONFIG_DST_CACHE
if (one_md_dst->type == METADATA_IP_TUNNEL)
dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
- }
#endif
+ if (one_md_dst->type == METADATA_XFRM)
+ dst_release(one_md_dst->u.xfrm_info.dst_orig);
+ }
free_percpu(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/filter.c b/net/core/filter.c
index da4f697e4fa4..929358677183 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -80,6 +80,7 @@
#include <net/tls.h>
#include <net/xdp.h>
#include <net/mptcp.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -2124,12 +2125,13 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
{
unsigned int mlen = skb_network_offset(skb);
+ if (unlikely(skb->len <= mlen)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
+
if (mlen) {
__skb_pull(skb, mlen);
- if (unlikely(!skb->len)) {
- kfree_skb(skb);
- return -ERANGE;
- }
/* At ingress, the mac header has already been pulled once.
* At egress, skb_pospull_rcsum has to be done in case that
@@ -2149,7 +2151,7 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
u32 flags)
{
/* Verify that a link layer header is carried */
- if (unlikely(skb->mac_header >= skb->network_header)) {
+ if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) {
kfree_skb(skb);
return -ERANGE;
}
@@ -4108,7 +4110,10 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
-/* XDP_REDIRECT works by a three-step process, implemented in the functions
+/**
+ * DOC: xdp redirect
+ *
+ * XDP_REDIRECT works by a three-step process, implemented in the functions
* below:
*
* 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
@@ -4123,7 +4128,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
* 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
* which will flush all the different bulk queues, thus completing the
* redirect.
- *
+ */
+/*
* Pointers to the map entries will be kept around for this whole sequence of
* steps, protected by RCU. However, there is no top-level rcu_read_lock() in
* the core code; instead, the RCU protection relies on everything happening
@@ -4414,10 +4420,10 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key,
u64, flags)
{
- return map->ops->map_redirect(map, ifindex, flags);
+ return map->ops->map_redirect(map, key, flags);
}
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
@@ -5625,6 +5631,15 @@ static const struct bpf_func_proto bpf_bind_proto = {
};
#ifdef CONFIG_XFRM
+
+#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+struct metadata_dst __percpu *xfrm_bpf_md_dst;
+EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
+
+#endif
+
BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
struct bpf_xfrm_state *, to, u32, size, u64, flags)
{
@@ -7987,6 +8002,19 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
default:
return bpf_sk_base_func_proto(func_id);
}
+
+#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
+ /* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
+ * kfuncs are defined in two different modules, and we want to be able
+ * to use them interchangably with the same BTF type ID. Because modules
+ * can't de-duplicate BTF IDs between each other, we need the type to be
+ * referenced in the vmlinux BTF or the verifier will get confused about
+ * the different types. So we add this dummy type reference which will
+ * be included in vmlinux BTF, allowing both modules to refer to the
+ * same type ID.
+ */
+ BTF_TYPE_EMIT(struct nf_conn___init);
+#endif
}
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
@@ -8651,28 +8679,25 @@ static bool tc_cls_act_is_valid_access(int off, int size,
DEFINE_MUTEX(nf_conn_btf_access_lock);
EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
-int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, int off, int size,
- enum bpf_access_type atype, u32 *next_btf_id,
- enum bpf_type_flag *flag);
+int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag);
EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
int ret = -EACCES;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_lock(&nf_conn_btf_access_lock);
if (nfct_btf_struct_access)
- ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
+ ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_unlock(&nf_conn_btf_access_lock);
return ret;
@@ -8738,21 +8763,18 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
static int xdp_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
int ret = -EACCES;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_lock(&nf_conn_btf_access_lock);
if (nfct_btf_struct_access)
- ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
+ ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
mutex_unlock(&nf_conn_btf_access_lock);
return ret;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3e81798ed3e0..25fb0bbc310f 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -296,7 +296,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
key->ct_zone = ct->zone.id;
#endif
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- key->ct_mark = ct->mark;
+ key->ct_mark = READ_ONCE(ct->mark);
#endif
cl = nf_ct_labels_find(ct);
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 6fac2f0ef074..711cd3b4347a 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -48,9 +48,11 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "RPL";
case LWTUNNEL_ENCAP_IOAM6:
return "IOAM6";
+ case LWTUNNEL_ENCAP_XFRM:
+ /* module autoload not supported for encap type */
+ return NULL;
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
- case LWTUNNEL_ENCAP_XFRM:
case LWTUNNEL_ENCAP_NONE:
case __LWTUNNEL_ENCAP_MAX:
/* should not have got here */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a77a85e357e0..952a54763358 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -307,7 +307,31 @@ static int neigh_del_timer(struct neighbour *n)
return 0;
}
-static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
+static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
+ int family)
+{
+ switch (family) {
+ case AF_INET:
+ return __in_dev_arp_parms_get_rcu(dev);
+ case AF_INET6:
+ return __in6_dev_nd_parms_get_rcu(dev);
+ }
+ return NULL;
+}
+
+static void neigh_parms_qlen_dec(struct net_device *dev, int family)
+{
+ struct neigh_parms *p;
+
+ rcu_read_lock();
+ p = neigh_get_dev_parms_rcu(dev, family);
+ if (p)
+ p->qlen--;
+ rcu_read_unlock();
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
+ int family)
{
struct sk_buff_head tmp;
unsigned long flags;
@@ -321,13 +345,7 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
struct net_device *dev = skb->dev;
if (net == NULL || net_eq(dev_net(dev), net)) {
- struct in_device *in_dev;
-
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
- if (in_dev)
- in_dev->arp_parms->qlen--;
- rcu_read_unlock();
+ neigh_parms_qlen_dec(dev, family);
__skb_unlink(skb, list);
__skb_queue_tail(&tmp, skb);
}
@@ -409,7 +427,8 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev, skip_perm);
pneigh_ifdown_and_unlock(tbl, dev);
- pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
+ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
+ tbl->family);
if (skb_queue_empty_lockless(&tbl->proxy_queue))
del_timer_sync(&tbl->proxy_timer);
return 0;
@@ -1621,13 +1640,8 @@ static void neigh_proxy_process(struct timer_list *t)
if (tdif <= 0) {
struct net_device *dev = skb->dev;
- struct in_device *in_dev;
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
- if (in_dev)
- in_dev->arp_parms->qlen--;
- rcu_read_unlock();
+ neigh_parms_qlen_dec(dev, tbl->family);
__skb_unlink(skb, &tbl->proxy_queue);
if (tbl->proxy_redo && netif_running(dev)) {
@@ -1821,7 +1835,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue, NULL);
+ pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
@@ -3539,18 +3553,6 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
return ret;
}
-static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
- int family)
-{
- switch (family) {
- case AF_INET:
- return __in_dev_arp_parms_get_rcu(dev);
- case AF_INET6:
- return __in6_dev_nd_parms_get_rcu(dev);
- }
- return NULL;
-}
-
static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
int index)
{
diff --git a/net/core/of_net.c b/net/core/of_net.c
index f1a9bf7578e7..55d3fe229269 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -57,7 +57,7 @@ static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
return -ENODEV;
}
-static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
+int of_get_mac_address_nvmem(struct device_node *np, u8 *addr)
{
struct platform_device *pdev = of_find_device_by_node(np);
struct nvmem_cell *cell;
@@ -94,6 +94,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
return 0;
}
+EXPORT_SYMBOL(of_get_mac_address_nvmem);
/**
* of_get_mac_address()
@@ -140,7 +141,7 @@ int of_get_mac_address(struct device_node *np, u8 *addr)
if (!ret)
return 0;
- return of_get_mac_addr_nvmem(np, addr);
+ return of_get_mac_address_nvmem(np, addr);
}
EXPORT_SYMBOL(of_get_mac_address);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4bf95e36ed16..3cbba7099c0f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -270,12 +270,10 @@ static struct sk_buff *napi_skb_cache_get(void)
return skb;
}
-/* Caller must provide SKB that is memset cleared */
-static void __build_skb_around(struct sk_buff *skb, void *data,
- unsigned int frag_size)
+static inline void __finalize_skb_around(struct sk_buff *skb, void *data,
+ unsigned int size)
{
struct skb_shared_info *shinfo;
- unsigned int size = frag_size ? : ksize(data);
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -297,15 +295,71 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb_set_kcov_handle(skb, kcov_common_handle());
}
+static inline void *__slab_build_skb(struct sk_buff *skb, void *data,
+ unsigned int *size)
+{
+ void *resized;
+
+ /* Must find the allocation size (and grow it to match). */
+ *size = ksize(data);
+ /* krealloc() will immediately return "data" when
+ * "ksize(data)" is requested: it is the existing upper
+ * bounds. As a result, GFP_ATOMIC will be ignored. Note
+ * that this "new" pointer needs to be passed back to the
+ * caller for use so the __alloc_size hinting will be
+ * tracked correctly.
+ */
+ resized = krealloc(data, *size, GFP_ATOMIC);
+ WARN_ON_ONCE(resized != data);
+ return resized;
+}
+
+/* build_skb() variant which can operate on slab buffers.
+ * Note that this should be used sparingly as slab buffers
+ * cannot be combined efficiently by GRO!
+ */
+struct sk_buff *slab_build_skb(void *data)
+{
+ struct sk_buff *skb;
+ unsigned int size;
+
+ skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ data = __slab_build_skb(skb, data, &size);
+ __finalize_skb_around(skb, data, size);
+
+ return skb;
+}
+EXPORT_SYMBOL(slab_build_skb);
+
+/* Caller must provide SKB that is memset cleared */
+static void __build_skb_around(struct sk_buff *skb, void *data,
+ unsigned int frag_size)
+{
+ unsigned int size = frag_size;
+
+ /* frag_size == 0 is considered deprecated now. Callers
+ * using slab buffer should use slab_build_skb() instead.
+ */
+ if (WARN_ONCE(size == 0, "Use slab_build_skb() instead"))
+ data = __slab_build_skb(skb, data, &size);
+
+ __finalize_skb_around(skb, data, size);
+}
+
/**
* __build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data (must not be 0)
*
* Allocate a new &sk_buff. Caller provides space holding head and
- * skb_shared_info. @data must have been allocated by kmalloc() only if
- * @frag_size is 0, otherwise data should come from the page allocator
- * or vmalloc()
+ * skb_shared_info. @data must have been allocated from the page
+ * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc()
+ * allocation is deprecated, and callers should use slab_build_skb()
+ * instead.)
* The return is the new skb buffer.
* On a failure the return is %NULL, and @data is not freed.
* Notes :
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e6b9ced3eda8..53d0251788aa 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -886,13 +886,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
- if (psock->sk_redir)
+ if (psock->sk_redir) {
sock_put(psock->sk_redir);
- psock->sk_redir = msg->sk_redir;
- if (!psock->sk_redir) {
+ psock->sk_redir = NULL;
+ }
+ if (!msg->sk_redir) {
ret = __SK_DROP;
goto out;
}
+ psock->redir_ingress = sk_msg_to_ingress(msg);
+ psock->sk_redir = msg->sk_redir;
sock_hold(psock->sk_redir);
}
out:
diff --git a/net/core/sock.c b/net/core/sock.c
index 4571914a4aa8..b0ab841e0aed 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -901,13 +901,20 @@ int sock_set_timestamping(struct sock *sk, int optname,
if (val & ~SOF_TIMESTAMPING_MASK)
return -EINVAL;
+ if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
+ !(val & SOF_TIMESTAMPING_OPT_ID))
+ return -EINVAL;
+
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk_is_tcp(sk)) {
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
return -EINVAL;
- atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
+ if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
+ else
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
} else {
atomic_set(&sk->sk_tskey, 0);
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 81beb16ab1eb..22fa2c5bc6ec 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -349,11 +349,13 @@ static void sock_map_free(struct bpf_map *map)
sk = xchg(psk, NULL);
if (sk) {
+ sock_hold(sk);
lock_sock(sk);
rcu_read_lock();
sock_map_unref(sk, psk);
rcu_read_unlock();
release_sock(sk);
+ sock_put(sk);
}
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 713b7b8dad7e..b780827f5e0a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -45,11 +45,10 @@ static unsigned int dccp_v4_pernet_id __read_mostly;
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
- struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
- __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
__be16 orig_sport, orig_dport;
+ __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
@@ -91,26 +90,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
daddr = fl4->daddr;
if (inet->inet_saddr == 0) {
- if (inet_csk(sk)->icsk_bind2_hash) {
- prev_addr_hashbucket =
- inet_bhashfn_portaddr(&dccp_hashinfo, sk,
- sock_net(sk),
- inet->inet_num);
- prev_sk_rcv_saddr = sk->sk_rcv_saddr;
- }
- inet->inet_saddr = fl4->saddr;
- }
-
- sk_rcv_saddr_set(sk, inet->inet_saddr);
-
- if (prev_addr_hashbucket) {
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET);
if (err) {
- inet->inet_saddr = 0;
- sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
ip_rt_put(rt);
return err;
}
+ } else {
+ sk_rcv_saddr_set(sk, inet->inet_saddr);
}
inet->inet_dport = usin->sin_port;
@@ -157,6 +143,7 @@ failure:
* This unhashes the socket and releases the local port, if necessary.
*/
dccp_set_state(sk, DCCP_CLOSED);
+ inet_bhash2_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ae62b1591dea..4260fe466993 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -934,26 +934,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (saddr == NULL) {
- struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
- struct in6_addr prev_v6_rcv_saddr;
-
- if (icsk->icsk_bind2_hash) {
- prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo,
- sk, sock_net(sk),
- inet->inet_num);
- prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- }
-
saddr = &fl6.saddr;
- sk->sk_v6_rcv_saddr = *saddr;
-
- if (prev_addr_hashbucket) {
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
- if (err) {
- sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
- goto failure;
- }
- }
+
+ err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
+ if (err)
+ goto failure;
}
/* set the source address */
@@ -985,6 +970,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
late_failure:
dccp_set_state(sk, DCCP_CLOSED);
+ inet_bhash2_reset_saddr(sk);
__sk_dst_reset(sk);
failure:
inet->inet_dport = 0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9494b0d224f9..a06b5641287a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -285,8 +285,7 @@ int dccp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
- inet_reset_saddr(sk);
+ inet_bhash2_reset_saddr(sk);
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 3eef72ce99a4..8e698bea99a3 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -18,6 +18,12 @@ if NET_DSA
# Drivers must select the appropriate tagging format(s)
+config NET_DSA_TAG_NONE
+ tristate "No-op tag driver"
+ help
+ Say Y or M if you want to enable support for switches which don't tag
+ frames over the CPU port.
+
config NET_DSA_TAG_AR9331
tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
help
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index bf57ef3bce2a..cc7e93a562fe 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -2,13 +2,14 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
dsa_core-y += \
+ devlink.o \
dsa.o \
- dsa2.o \
master.o \
netlink.o \
port.o \
slave.o \
switch.o \
+ tag.o \
tag_8021q.o
# tagging formats
@@ -20,6 +21,7 @@ obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o
obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+obj-$(CONFIG_NET_DSA_TAG_NONE) += tag_none.o
obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o
obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c
new file mode 100644
index 000000000000..431bf52290a1
--- /dev/null
+++ b/net/dsa/devlink.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DSA devlink handling
+ */
+
+#include <net/dsa.h>
+#include <net/devlink.h>
+
+#include "devlink.h"
+
+static int dsa_devlink_info_get(struct devlink *dl,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (ds->ops->devlink_info_get)
+ return ds->ops->devlink_info_get(ds, req, extack);
+
+ return -EOPNOTSUPP;
+}
+
+static int dsa_devlink_sb_pool_get(struct devlink *dl,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index,
+ pool_info);
+}
+
+static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_pool_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size,
+ threshold_type, extack);
+}
+
+static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_port_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index,
+ pool_index, p_threshold);
+}
+
+static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_port_pool_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index,
+ pool_index, threshold, extack);
+}
+
+static int
+dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_tc_pool_bind_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index,
+ tc_index, pool_type,
+ p_pool_index, p_threshold);
+}
+
+static int
+dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_tc_pool_bind_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold,
+ extack);
+}
+
+static int dsa_devlink_sb_occ_snapshot(struct devlink *dl,
+ unsigned int sb_index)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_occ_snapshot)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_snapshot(ds, sb_index);
+}
+
+static int dsa_devlink_sb_occ_max_clear(struct devlink *dl,
+ unsigned int sb_index)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_occ_max_clear)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_max_clear(ds, sb_index);
+}
+
+static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp,
+ unsigned int sb_index,
+ u16 pool_index, u32 *p_cur,
+ u32 *p_max)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_occ_port_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index,
+ pool_index, p_cur, p_max);
+}
+
+static int
+dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_occ_tc_port_bind_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port,
+ sb_index, tc_index,
+ pool_type, p_cur,
+ p_max);
+}
+
+static const struct devlink_ops dsa_devlink_ops = {
+ .info_get = dsa_devlink_info_get,
+ .sb_pool_get = dsa_devlink_sb_pool_get,
+ .sb_pool_set = dsa_devlink_sb_pool_set,
+ .sb_port_pool_get = dsa_devlink_sb_port_pool_get,
+ .sb_port_pool_set = dsa_devlink_sb_port_pool_set,
+ .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot,
+ .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear,
+ .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get,
+};
+
+int dsa_devlink_param_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_param_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_param_get(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
+
+int dsa_devlink_param_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_param_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_param_set(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_set);
+
+int dsa_devlink_params_register(struct dsa_switch *ds,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ return devlink_params_register(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_register);
+
+void dsa_devlink_params_unregister(struct dsa_switch *ds,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ devlink_params_unregister(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister);
+
+int dsa_devlink_resource_register(struct dsa_switch *ds,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ return devlink_resource_register(ds->devlink, resource_name,
+ resource_size, resource_id,
+ parent_resource_id,
+ size_params);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_register);
+
+void dsa_devlink_resources_unregister(struct dsa_switch *ds)
+{
+ devlink_resources_unregister(ds->devlink);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister);
+
+void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ return devlink_resource_occ_get_register(ds->devlink, resource_id,
+ occ_get, occ_get_priv);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register);
+
+void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
+ u64 resource_id)
+{
+ devlink_resource_occ_get_unregister(ds->devlink, resource_id);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
+
+struct devlink_region *
+dsa_devlink_region_create(struct dsa_switch *ds,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ return devlink_region_create(ds->devlink, ops, region_max_snapshots,
+ region_size);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_region_create);
+
+struct devlink_region *
+dsa_devlink_port_region_create(struct dsa_switch *ds,
+ int port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+
+ return devlink_port_region_create(&dp->devlink_port, ops,
+ region_max_snapshots,
+ region_size);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_port_region_create);
+
+void dsa_devlink_region_destroy(struct devlink_region *region)
+{
+ devlink_region_destroy(region);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_region_destroy);
+
+int dsa_port_devlink_setup(struct dsa_port *dp)
+{
+ struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch_tree *dst = dp->ds->dst;
+ struct devlink_port_attrs attrs = {};
+ struct devlink *dl = dp->ds->devlink;
+ struct dsa_switch *ds = dp->ds;
+ const unsigned char *id;
+ unsigned char len;
+ int err;
+
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_init(dl, dlp);
+
+ if (ds->ops->port_setup) {
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
+ id = (const unsigned char *)&dst->index;
+ len = sizeof(dst->index);
+
+ attrs.phys.port_number = dp->index;
+ memcpy(attrs.switch_id.id, id, len);
+ attrs.switch_id.id_len = len;
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED;
+ break;
+ case DSA_PORT_TYPE_CPU:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
+ break;
+ case DSA_PORT_TYPE_DSA:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
+ break;
+ case DSA_PORT_TYPE_USER:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ break;
+ }
+
+ devlink_port_attrs_set(dlp, &attrs);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err) {
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+ return err;
+ }
+
+ return 0;
+}
+
+void dsa_port_devlink_teardown(struct dsa_port *dp)
+{
+ struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch *ds = dp->ds;
+
+ devlink_port_unregister(dlp);
+
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+
+ devlink_port_fini(dlp);
+}
+
+void dsa_switch_devlink_register(struct dsa_switch *ds)
+{
+ devlink_register(ds->devlink);
+}
+
+void dsa_switch_devlink_unregister(struct dsa_switch *ds)
+{
+ devlink_unregister(ds->devlink);
+}
+
+int dsa_switch_devlink_alloc(struct dsa_switch *ds)
+{
+ struct dsa_devlink_priv *dl_priv;
+ struct devlink *dl;
+
+ /* Add the switch to devlink before calling setup, so that setup can
+ * add dpipe tables
+ */
+ dl = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
+ if (!dl)
+ return -ENOMEM;
+
+ ds->devlink = dl;
+
+ dl_priv = devlink_priv(ds->devlink);
+ dl_priv->ds = ds;
+
+ return 0;
+}
+
+void dsa_switch_devlink_free(struct dsa_switch *ds)
+{
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+}
diff --git a/net/dsa/devlink.h b/net/dsa/devlink.h
new file mode 100644
index 000000000000..4d9f4f23705b
--- /dev/null
+++ b/net/dsa/devlink.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_DEVLINK_H
+#define __DSA_DEVLINK_H
+
+struct dsa_port;
+struct dsa_switch;
+
+int dsa_port_devlink_setup(struct dsa_port *dp);
+void dsa_port_devlink_teardown(struct dsa_port *dp);
+void dsa_switch_devlink_register(struct dsa_switch *ds);
+void dsa_switch_devlink_unregister(struct dsa_switch *ds);
+int dsa_switch_devlink_alloc(struct dsa_switch *ds);
+void dsa_switch_devlink_free(struct dsa_switch *ds);
+
+#endif
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 4afd3edbd64d..e5f156940c67 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1,472 +1,1637 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * net/dsa/dsa.c - Hardware switch handling
+ * DSA topology and switch handling
+ *
* Copyright (c) 2008-2009 Marvell Semiconductor
* Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
*/
#include <linux/device.h>
+#include <linux/err.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/netdevice.h>
-#include <linux/sysfs.h>
-#include <linux/ptp_classify.h>
-#include <net/dst_metadata.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <net/sch_generic.h>
+
+#include "devlink.h"
+#include "dsa.h"
+#include "master.h"
+#include "netlink.h"
+#include "port.h"
+#include "slave.h"
+#include "switch.h"
+#include "tag.h"
+
+#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG
+
+static DEFINE_MUTEX(dsa2_mutex);
+LIST_HEAD(dsa_tree_list);
-#include "dsa_priv.h"
+static struct workqueue_struct *dsa_owq;
-static LIST_HEAD(dsa_tag_drivers_list);
-static DEFINE_MUTEX(dsa_tag_drivers_lock);
+/* Track the bridges with forwarding offload enabled */
+static unsigned long dsa_fwd_offloading_bridges;
-static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
- struct net_device *dev)
+bool dsa_schedule_work(struct work_struct *work)
{
- /* Just return the original SKB */
- return skb;
+ return queue_work(dsa_owq, work);
}
-static const struct dsa_device_ops none_ops = {
- .name = "none",
- .proto = DSA_TAG_PROTO_NONE,
- .xmit = dsa_slave_notag_xmit,
- .rcv = NULL,
-};
+void dsa_flush_workqueue(void)
+{
+ flush_workqueue(dsa_owq);
+}
+EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
-DSA_TAG_DRIVER(none_ops);
+/**
+ * dsa_lag_map() - Map LAG structure to a linear LAG array
+ * @dst: Tree in which to record the mapping.
+ * @lag: LAG structure that is to be mapped to the tree's array.
+ *
+ * dsa_lag_id/dsa_lag_by_id can then be used to translate between the
+ * two spaces. The size of the mapping space is determined by the
+ * driver by setting ds->num_lag_ids. It is perfectly legal to leave
+ * it unset if it is not needed, in which case these functions become
+ * no-ops.
+ */
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag)
+{
+ unsigned int id;
+
+ for (id = 1; id <= dst->lags_len; id++) {
+ if (!dsa_lag_by_id(dst, id)) {
+ dst->lags[id - 1] = lag;
+ lag->id = id;
+ return;
+ }
+ }
-static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver,
- struct module *owner)
+ /* No IDs left, which is OK. Some drivers do not need it. The
+ * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
+ * returns an error for this device when joining the LAG. The
+ * driver can then return -EOPNOTSUPP back to DSA, which will
+ * fall back to a software LAG.
+ */
+}
+
+/**
+ * dsa_lag_unmap() - Remove a LAG ID mapping
+ * @dst: Tree in which the mapping is recorded.
+ * @lag: LAG structure that was mapped.
+ *
+ * As there may be multiple users of the mapping, it is only removed
+ * if there are no other references to it.
+ */
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag)
{
- dsa_tag_driver->owner = owner;
+ unsigned int id;
- mutex_lock(&dsa_tag_drivers_lock);
- list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list);
- mutex_unlock(&dsa_tag_drivers_lock);
+ dsa_lags_foreach_id(id, dst) {
+ if (dsa_lag_by_id(dst, id) == lag) {
+ dst->lags[id - 1] = NULL;
+ lag->id = 0;
+ break;
+ }
+ }
}
-void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
- unsigned int count, struct module *owner)
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev)
{
- unsigned int i;
+ struct dsa_port *dp;
- for (i = 0; i < count; i++)
- dsa_tag_driver_register(dsa_tag_driver_array[i], owner);
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_lag_dev_get(dp) == lag_dev)
+ return dp->lag;
+
+ return NULL;
}
-static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver)
+struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
+ const struct net_device *br)
{
- mutex_lock(&dsa_tag_drivers_lock);
- list_del(&dsa_tag_driver->list);
- mutex_unlock(&dsa_tag_drivers_lock);
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_bridge_dev_get(dp) == br)
+ return dp->bridge;
+
+ return NULL;
}
-EXPORT_SYMBOL_GPL(dsa_tag_drivers_register);
-void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
- unsigned int count)
+static int dsa_bridge_num_find(const struct net_device *bridge_dev)
{
- unsigned int i;
+ struct dsa_switch_tree *dst;
- for (i = 0; i < count; i++)
- dsa_tag_driver_unregister(dsa_tag_driver_array[i]);
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ struct dsa_bridge *bridge;
+
+ bridge = dsa_tree_bridge_find(dst, bridge_dev);
+ if (bridge)
+ return bridge->num;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister);
-const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
+unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max)
{
- return ops->name;
-};
+ unsigned int bridge_num = dsa_bridge_num_find(bridge_dev);
+
+ /* Switches without FDB isolation support don't get unique
+ * bridge numbering
+ */
+ if (!max)
+ return 0;
+
+ if (!bridge_num) {
+ /* First port that requests FDB isolation or TX forwarding
+ * offload for this bridge
+ */
+ bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges,
+ DSA_MAX_NUM_OFFLOADING_BRIDGES,
+ 1);
+ if (bridge_num >= max)
+ return 0;
+
+ set_bit(bridge_num, &dsa_fwd_offloading_bridges);
+ }
+
+ return bridge_num;
+}
+
+void dsa_bridge_num_put(const struct net_device *bridge_dev,
+ unsigned int bridge_num)
+{
+ /* Since we refcount bridges, we know that when we call this function
+ * it is no longer in use, so we can just go ahead and remove it from
+ * the bit mask.
+ */
+ clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
+}
+
+struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
+{
+ struct dsa_switch_tree *dst;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ if (dst->index != tree_index)
+ continue;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->index != sw_index)
+ continue;
+
+ return dp->ds;
+ }
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_find);
+
+static struct dsa_switch_tree *dsa_tree_find(int index)
+{
+ struct dsa_switch_tree *dst;
+
+ list_for_each_entry(dst, &dsa_tree_list, list)
+ if (dst->index == index)
+ return dst;
+
+ return NULL;
+}
+
+static struct dsa_switch_tree *dsa_tree_alloc(int index)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return NULL;
+
+ dst->index = index;
+
+ INIT_LIST_HEAD(&dst->rtable);
+
+ INIT_LIST_HEAD(&dst->ports);
+
+ INIT_LIST_HEAD(&dst->list);
+ list_add_tail(&dst->list, &dsa_tree_list);
+
+ kref_init(&dst->refcount);
-/* Function takes a reference on the module owning the tagger,
- * so dsa_tag_driver_put must be called afterwards.
+ return dst;
+}
+
+static void dsa_tree_free(struct dsa_switch_tree *dst)
+{
+ if (dst->tag_ops)
+ dsa_tag_driver_put(dst->tag_ops);
+ list_del(&dst->list);
+ kfree(dst);
+}
+
+static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
+{
+ if (dst)
+ kref_get(&dst->refcount);
+
+ return dst;
+}
+
+static struct dsa_switch_tree *dsa_tree_touch(int index)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = dsa_tree_find(index);
+ if (dst)
+ return dsa_tree_get(dst);
+ else
+ return dsa_tree_alloc(index);
+}
+
+static void dsa_tree_release(struct kref *ref)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = container_of(ref, struct dsa_switch_tree, refcount);
+
+ dsa_tree_free(dst);
+}
+
+static void dsa_tree_put(struct dsa_switch_tree *dst)
+{
+ if (dst)
+ kref_put(&dst->refcount, dsa_tree_release);
+}
+
+static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
+ struct device_node *dn)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->dn == dn)
+ return dp;
+
+ return NULL;
+}
+
+static struct dsa_link *dsa_link_touch(struct dsa_port *dp,
+ struct dsa_port *link_dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst;
+ struct dsa_link *dl;
+
+ dst = ds->dst;
+
+ list_for_each_entry(dl, &dst->rtable, list)
+ if (dl->dp == dp && dl->link_dp == link_dp)
+ return dl;
+
+ dl = kzalloc(sizeof(*dl), GFP_KERNEL);
+ if (!dl)
+ return NULL;
+
+ dl->dp = dp;
+ dl->link_dp = link_dp;
+
+ INIT_LIST_HEAD(&dl->list);
+ list_add_tail(&dl->list, &dst->rtable);
+
+ return dl;
+}
+
+static bool dsa_port_setup_routing_table(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct device_node *dn = dp->dn;
+ struct of_phandle_iterator it;
+ struct dsa_port *link_dp;
+ struct dsa_link *dl;
+ int err;
+
+ of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
+ link_dp = dsa_tree_find_port_by_node(dst, it.node);
+ if (!link_dp) {
+ of_node_put(it.node);
+ return false;
+ }
+
+ dl = dsa_link_touch(dp, link_dp);
+ if (!dl) {
+ of_node_put(it.node);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
+{
+ bool complete = true;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_dsa(dp)) {
+ complete = dsa_port_setup_routing_table(dp);
+ if (!complete)
+ break;
+ }
+ }
+
+ return complete;
+}
+
+static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_cpu(dp))
+ return dp;
+
+ return NULL;
+}
+
+struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst)
+{
+ struct device_node *ethernet;
+ struct net_device *master;
+ struct dsa_port *cpu_dp;
+
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0);
+ master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
+
+ return master;
+}
+
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
*/
-const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name)
+static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
- const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT);
- struct dsa_tag_driver *dsa_tag_driver;
+ struct dsa_port *cpu_dp, *dp;
- request_module("%s%s", DSA_TAG_DRIVER_ALIAS, name);
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!cpu_dp) {
+ pr_err("DSA: tree %d has no CPU port\n", dst->index);
+ return -EINVAL;
+ }
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->cpu_dp)
+ continue;
- mutex_lock(&dsa_tag_drivers_lock);
- list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
- const struct dsa_device_ops *tmp = dsa_tag_driver->ops;
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
+
+ return 0;
+}
- if (strcmp(name, tmp->name))
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp, *dp;
+
+ list_for_each_entry(cpu_dp, &dst->ports, list) {
+ if (!dsa_port_is_cpu(cpu_dp))
continue;
- if (!try_module_get(dsa_tag_driver->owner))
- break;
+ /* Prefer a local CPU port */
+ dsa_switch_for_each_port(dp, cpu_dp->ds) {
+ /* Prefer the first local CPU port found */
+ if (dp->cpu_dp)
+ continue;
- ops = tmp;
- break;
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
}
- mutex_unlock(&dsa_tag_drivers_lock);
- return ops;
+ return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = NULL;
}
-const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol)
+static int dsa_port_setup(struct dsa_port *dp)
{
- struct dsa_tag_driver *dsa_tag_driver;
- const struct dsa_device_ops *ops;
- bool found = false;
+ bool dsa_port_link_registered = false;
+ struct dsa_switch *ds = dp->ds;
+ bool dsa_port_enabled = false;
+ int err = 0;
- request_module("%sid-%d", DSA_TAG_DRIVER_ALIAS, tag_protocol);
+ if (dp->setup)
+ return 0;
- mutex_lock(&dsa_tag_drivers_lock);
- list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
- ops = dsa_tag_driver->ops;
- if (ops->proto == tag_protocol) {
- found = true;
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ dsa_port_disable(dp);
+ break;
+ case DSA_PORT_TYPE_CPU:
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for CPU port %d\n",
+ dp->index);
+ }
+
+ err = dsa_port_enable(dp, NULL);
+ if (err)
break;
+ dsa_port_enabled = true;
+
+ break;
+ case DSA_PORT_TYPE_DSA:
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for DSA port %d\n",
+ dp->index);
}
+
+ err = dsa_port_enable(dp, NULL);
+ if (err)
+ break;
+ dsa_port_enabled = true;
+
+ break;
+ case DSA_PORT_TYPE_USER:
+ of_get_mac_address(dp->dn, dp->mac);
+ err = dsa_slave_create(dp);
+ break;
}
- if (found) {
- if (!try_module_get(dsa_tag_driver->owner))
- ops = ERR_PTR(-ENOPROTOOPT);
- } else {
- ops = ERR_PTR(-ENOPROTOOPT);
+ if (err && dsa_port_enabled)
+ dsa_port_disable(dp);
+ if (err && dsa_port_link_registered)
+ dsa_shared_port_link_unregister_of(dp);
+ if (err) {
+ dsa_port_devlink_teardown(dp);
+ return err;
}
- mutex_unlock(&dsa_tag_drivers_lock);
+ dp->setup = true;
- return ops;
+ return 0;
}
-void dsa_tag_driver_put(const struct dsa_device_ops *ops)
+static void dsa_port_teardown(struct dsa_port *dp)
{
- struct dsa_tag_driver *dsa_tag_driver;
+ if (!dp->setup)
+ return;
- mutex_lock(&dsa_tag_drivers_lock);
- list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
- if (dsa_tag_driver->ops == ops) {
- module_put(dsa_tag_driver->owner);
- break;
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ dsa_port_disable(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_DSA:
+ dsa_port_disable(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_USER:
+ if (dp->slave) {
+ dsa_slave_destroy(dp->slave);
+ dp->slave = NULL;
}
+ break;
}
- mutex_unlock(&dsa_tag_drivers_lock);
+
+ dsa_port_devlink_teardown(dp);
+
+ dp->setup = false;
}
-static int dev_is_class(struct device *dev, void *class)
+static int dsa_port_setup_as_unused(struct dsa_port *dp)
{
- if (dev->class != NULL && !strcmp(dev->class->name, class))
- return 1;
+ dp->type = DSA_PORT_TYPE_UNUSED;
+ return dsa_port_setup(dp);
+}
+
+static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
+{
+ const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
+ struct dsa_switch_tree *dst = ds->dst;
+ int err;
+
+ if (tag_ops->proto == dst->default_proto)
+ goto connect;
+
+ rtnl_lock();
+ err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
+ rtnl_unlock();
+ if (err) {
+ dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
+ tag_ops->name, ERR_PTR(err));
+ return err;
+ }
+
+connect:
+ if (tag_ops->connect) {
+ err = tag_ops->connect(ds);
+ if (err)
+ return err;
+ }
+
+ if (ds->ops->connect_tag_protocol) {
+ err = ds->ops->connect_tag_protocol(ds, tag_ops->proto);
+ if (err) {
+ dev_err(ds->dev,
+ "Unable to connect to tag protocol \"%s\": %pe\n",
+ tag_ops->name, ERR_PTR(err));
+ goto disconnect;
+ }
+ }
return 0;
+
+disconnect:
+ if (tag_ops->disconnect)
+ tag_ops->disconnect(ds);
+
+ return err;
}
-static struct device *dev_find_class(struct device *parent, char *class)
+static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds)
{
- if (dev_is_class(parent, class)) {
- get_device(parent);
- return parent;
- }
+ const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
- return device_find_child(parent, class, dev_is_class);
+ if (tag_ops->disconnect)
+ tag_ops->disconnect(ds);
}
-struct net_device *dsa_dev_to_net_device(struct device *dev)
+static int dsa_switch_setup(struct dsa_switch *ds)
{
- struct device *d;
+ struct device_node *dn;
+ int err;
- d = dev_find_class(dev, "net");
- if (d != NULL) {
- struct net_device *nd;
+ if (ds->setup)
+ return 0;
- nd = to_net_dev(d);
- dev_hold(nd);
- put_device(d);
+ /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
+ * driver and before ops->setup() has run, since the switch drivers and
+ * the slave MDIO bus driver rely on these values for probing PHY
+ * devices or not
+ */
+ ds->phys_mii_mask |= dsa_user_ports(ds);
- return nd;
+ err = dsa_switch_devlink_alloc(ds);
+ if (err)
+ return err;
+
+ err = dsa_switch_register_notifier(ds);
+ if (err)
+ goto devlink_free;
+
+ ds->configure_vlan_while_not_filtering = true;
+
+ err = ds->ops->setup(ds);
+ if (err < 0)
+ goto unregister_notifier;
+
+ err = dsa_switch_setup_tag_protocol(ds);
+ if (err)
+ goto teardown;
+
+ if (!ds->slave_mii_bus && ds->ops->phy_read) {
+ ds->slave_mii_bus = mdiobus_alloc();
+ if (!ds->slave_mii_bus) {
+ err = -ENOMEM;
+ goto teardown;
+ }
+
+ dsa_slave_mii_bus_init(ds);
+
+ dn = of_get_child_by_name(ds->dev->of_node, "mdio");
+
+ err = of_mdiobus_register(ds->slave_mii_bus, dn);
+ of_node_put(dn);
+ if (err < 0)
+ goto free_slave_mii_bus;
}
- return NULL;
+ dsa_switch_devlink_register(ds);
+
+ ds->setup = true;
+ return 0;
+
+free_slave_mii_bus:
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->slave_mii_bus);
+teardown:
+ if (ds->ops->teardown)
+ ds->ops->teardown(ds);
+unregister_notifier:
+ dsa_switch_unregister_notifier(ds);
+devlink_free:
+ dsa_switch_devlink_free(ds);
+ return err;
}
-EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
-/* Determine if we should defer delivery of skb until we have a rx timestamp.
- *
- * Called from dsa_switch_rcv. For now, this will only work if tagging is
- * enabled on the switch. Normally the MAC driver would retrieve the hardware
- * timestamp when it reads the packet out of the hardware. However in a DSA
- * switch, the DSA driver owning the interface to which the packet is
- * delivered is never notified unless we do so here.
- */
-static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
- struct sk_buff *skb)
+static void dsa_switch_teardown(struct dsa_switch *ds)
{
- struct dsa_switch *ds = p->dp->ds;
- unsigned int type;
+ if (!ds->setup)
+ return;
- if (skb_headroom(skb) < ETH_HLEN)
- return false;
+ dsa_switch_devlink_unregister(ds);
- __skb_push(skb, ETH_HLEN);
+ if (ds->slave_mii_bus && ds->ops->phy_read) {
+ mdiobus_unregister(ds->slave_mii_bus);
+ mdiobus_free(ds->slave_mii_bus);
+ ds->slave_mii_bus = NULL;
+ }
- type = ptp_classify_raw(skb);
+ dsa_switch_teardown_tag_protocol(ds);
- __skb_pull(skb, ETH_HLEN);
+ if (ds->ops->teardown)
+ ds->ops->teardown(ds);
- if (type == PTP_CLASS_NONE)
- return false;
+ dsa_switch_unregister_notifier(ds);
- if (likely(ds->ops->port_rxtstamp))
- return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+ dsa_switch_devlink_free(ds);
- return false;
+ ds->setup = false;
+}
+
+/* First tear down the non-shared, then the shared ports. This ensures that
+ * all work items scheduled by our switchdev handlers for user ports have
+ * completed before we destroy the refcounting kept on the shared ports.
+ */
+static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp))
+ dsa_port_teardown(dp);
+
+ dsa_flush_workqueue();
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
+ dsa_port_teardown(dp);
}
-static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *unused)
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
{
- struct metadata_dst *md_dst = skb_metadata_dst(skb);
- struct dsa_port *cpu_dp = dev->dsa_ptr;
- struct sk_buff *nskb = NULL;
- struct dsa_slave_priv *p;
+ struct dsa_port *dp;
- if (unlikely(!cpu_dp)) {
- kfree_skb(skb);
- return 0;
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
+}
+
+/* Bring shared ports up first, then non-shared ports */
+static int dsa_tree_setup_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+ int err = 0;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) {
+ err = dsa_port_setup(dp);
+ if (err)
+ goto teardown;
+ }
}
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- return 0;
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) {
+ err = dsa_port_setup(dp);
+ if (err) {
+ err = dsa_port_setup_as_unused(dp);
+ if (err)
+ goto teardown;
+ }
+ }
+ }
- if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) {
- unsigned int port = md_dst->u.port_info.port_id;
+ return 0;
+
+teardown:
+ dsa_tree_teardown_ports(dst);
- skb_dst_drop(skb);
- if (!skb_has_extensions(skb))
- skb->slow_gro = 0;
+ return err;
+}
- skb->dev = dsa_master_find_slave(dev, 0, port);
- if (likely(skb->dev)) {
- dsa_default_offload_fwd_mark(skb);
- nskb = skb;
+static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+ int err = 0;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ err = dsa_switch_setup(dp->ds);
+ if (err) {
+ dsa_tree_teardown_switches(dst);
+ break;
}
- } else {
- nskb = cpu_dp->rcv(skb, dev);
}
- if (!nskb) {
- kfree_skb(skb);
- return 0;
+ return err;
+}
+
+static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp;
+ int err = 0;
+
+ rtnl_lock();
+
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+ bool admin_up = (master->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(master);
+
+ err = dsa_master_setup(master, cpu_dp);
+ if (err)
+ break;
+
+ /* Replay master state event */
+ dsa_tree_master_admin_state_change(dst, master, admin_up);
+ dsa_tree_master_oper_state_change(dst, master,
+ netif_oper_up(master));
}
- skb = nskb;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
+ rtnl_unlock();
+
+ return err;
+}
+
+static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp;
+
+ rtnl_lock();
+
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
- if (unlikely(!dsa_slave_dev_check(skb->dev))) {
- /* Packet is to be injected directly on an upper
- * device, e.g. a team/bond, so skip all DSA-port
- * specific actions.
+ /* Synthesizing an "admin down" state is sufficient for
+ * the switches to get a notification if the master is
+ * currently up and running.
*/
- netif_rx(skb);
- return 0;
+ dsa_tree_master_admin_state_change(dst, master, false);
+
+ dsa_master_teardown(master);
}
- p = netdev_priv(skb->dev);
+ rtnl_unlock();
+}
- if (unlikely(cpu_dp->ds->untag_bridge_pvid)) {
- nskb = dsa_untag_bridge_pvid(skb);
- if (!nskb) {
- kfree_skb(skb);
- return 0;
- }
- skb = nskb;
+static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
+{
+ unsigned int len = 0;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->num_lag_ids > len)
+ len = dp->ds->num_lag_ids;
}
- dev_sw_netstats_rx_add(skb->dev, skb->len);
+ if (!len)
+ return 0;
+
+ dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
+ if (!dst->lags)
+ return -ENOMEM;
+
+ dst->lags_len = len;
+ return 0;
+}
+
+static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
+{
+ kfree(dst->lags);
+}
- if (dsa_skb_defer_rx_timestamp(p, skb))
+static int dsa_tree_setup(struct dsa_switch_tree *dst)
+{
+ bool complete;
+ int err;
+
+ if (dst->setup) {
+ pr_err("DSA: tree %d already setup! Disjoint trees?\n",
+ dst->index);
+ return -EEXIST;
+ }
+
+ complete = dsa_tree_setup_routing_table(dst);
+ if (!complete)
return 0;
- gro_cells_receive(&p->gcells, skb);
+ err = dsa_tree_setup_cpu_ports(dst);
+ if (err)
+ return err;
+
+ err = dsa_tree_setup_switches(dst);
+ if (err)
+ goto teardown_cpu_ports;
+
+ err = dsa_tree_setup_ports(dst);
+ if (err)
+ goto teardown_switches;
+
+ err = dsa_tree_setup_master(dst);
+ if (err)
+ goto teardown_ports;
+
+ err = dsa_tree_setup_lags(dst);
+ if (err)
+ goto teardown_master;
+
+ dst->setup = true;
+
+ pr_info("DSA: tree %d setup\n", dst->index);
return 0;
+
+teardown_master:
+ dsa_tree_teardown_master(dst);
+teardown_ports:
+ dsa_tree_teardown_ports(dst);
+teardown_switches:
+ dsa_tree_teardown_switches(dst);
+teardown_cpu_ports:
+ dsa_tree_teardown_cpu_ports(dst);
+
+ return err;
}
-#ifdef CONFIG_PM_SLEEP
-static bool dsa_port_is_initialized(const struct dsa_port *dp)
+static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
- return dp->type == DSA_PORT_TYPE_USER && dp->slave;
+ struct dsa_link *dl, *next;
+
+ if (!dst->setup)
+ return;
+
+ dsa_tree_teardown_lags(dst);
+
+ dsa_tree_teardown_master(dst);
+
+ dsa_tree_teardown_ports(dst);
+
+ dsa_tree_teardown_switches(dst);
+
+ dsa_tree_teardown_cpu_ports(dst);
+
+ list_for_each_entry_safe(dl, next, &dst->rtable, list) {
+ list_del(&dl->list);
+ kfree(dl);
+ }
+
+ pr_info("DSA: tree %d torn down\n", dst->index);
+
+ dst->setup = false;
}
-int dsa_switch_suspend(struct dsa_switch *ds)
+static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst,
+ const struct dsa_device_ops *tag_ops)
+{
+ const struct dsa_device_ops *old_tag_ops = dst->tag_ops;
+ struct dsa_notifier_tag_proto_info info;
+ int err;
+
+ dst->tag_ops = tag_ops;
+
+ /* Notify the switches from this tree about the connection
+ * to the new tagger
+ */
+ info.tag_ops = tag_ops;
+ err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info);
+ if (err && err != -EOPNOTSUPP)
+ goto out_disconnect;
+
+ /* Notify the old tagger about the disconnection from this tree */
+ info.tag_ops = old_tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
+
+ return 0;
+
+out_disconnect:
+ info.tag_ops = tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
+ dst->tag_ops = old_tag_ops;
+
+ return err;
+}
+
+/* Since the dsa/tagging sysfs device attribute is per master, the assumption
+ * is that all DSA switches within a tree share the same tagger, otherwise
+ * they would have formed disjoint trees (different "dsa,member" values).
+ */
+int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ const struct dsa_device_ops *tag_ops,
+ const struct dsa_device_ops *old_tag_ops)
{
+ struct dsa_notifier_tag_proto_info info;
struct dsa_port *dp;
- int ret = 0;
+ int err = -EBUSY;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ /* At the moment we don't allow changing the tag protocol under
+ * traffic. The rtnl_mutex also happens to serialize concurrent
+ * attempts to change the tagging protocol. If we ever lift the IFF_UP
+ * restriction, there needs to be another mutex which serializes this.
+ */
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (dsa_port_to_master(dp)->flags & IFF_UP)
+ goto out_unlock;
+
+ if (dp->slave->flags & IFF_UP)
+ goto out_unlock;
+ }
- /* Suspend slave network devices */
- dsa_switch_for_each_port(dp, ds) {
- if (!dsa_port_is_initialized(dp))
- continue;
+ /* Notify the tag protocol change */
+ info.tag_ops = tag_ops;
+ err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+ if (err)
+ goto out_unwind_tagger;
- ret = dsa_slave_suspend(dp->slave);
- if (ret)
- return ret;
+ err = dsa_tree_bind_tag_proto(dst, tag_ops);
+ if (err)
+ goto out_unwind_tagger;
+
+ rtnl_unlock();
+
+ return 0;
+
+out_unwind_tagger:
+ info.tag_ops = old_tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master)
+{
+ struct dsa_notifier_master_state_info info;
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+
+ info.master = master;
+ info.operational = dsa_port_master_is_operational(cpu_dp);
+
+ dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
+}
+
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ /* Don't keep track of admin state on LAG DSA masters,
+ * but rather just of physical DSA masters
+ */
+ if (netif_is_lag_master(master))
+ return;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (up && cpu_dp->master_oper_up))
+ notify = true;
+
+ cpu_dp->master_admin_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ /* Don't keep track of oper state on LAG DSA masters,
+ * but rather just of physical DSA masters
+ */
+ if (netif_is_lag_master(master))
+ return;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (cpu_dp->master_admin_up && up))
+ notify = true;
+
+ cpu_dp->master_oper_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
+static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_port *dp;
+
+ dsa_switch_for_each_port(dp, ds)
+ if (dp->index == index)
+ return dp;
+
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ if (!dp)
+ return NULL;
+
+ dp->ds = ds;
+ dp->index = index;
+
+ mutex_init(&dp->addr_lists_lock);
+ mutex_init(&dp->vlans_lock);
+ INIT_LIST_HEAD(&dp->fdbs);
+ INIT_LIST_HEAD(&dp->mdbs);
+ INIT_LIST_HEAD(&dp->vlans);
+ INIT_LIST_HEAD(&dp->list);
+ list_add_tail(&dp->list, &dst->ports);
+
+ return dp;
+}
+
+static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
+{
+ dp->type = DSA_PORT_TYPE_USER;
+ dp->name = name;
+
+ return 0;
+}
+
+static int dsa_port_parse_dsa(struct dsa_port *dp)
+{
+ dp->type = DSA_PORT_TYPE_DSA;
+
+ return 0;
+}
+
+static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
+ struct net_device *master)
+{
+ enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE;
+ struct dsa_switch *mds, *ds = dp->ds;
+ unsigned int mdp_upstream;
+ struct dsa_port *mdp;
+
+ /* It is possible to stack DSA switches onto one another when that
+ * happens the switch driver may want to know if its tagging protocol
+ * is going to work in such a configuration.
+ */
+ if (dsa_slave_dev_check(master)) {
+ mdp = dsa_slave_to_port(master);
+ mds = mdp->ds;
+ mdp_upstream = dsa_upstream_port(mds, mdp->index);
+ tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream,
+ DSA_TAG_PROTO_NONE);
}
- if (ds->ops->suspend)
- ret = ds->ops->suspend(ds);
+ /* If the master device is not itself a DSA slave in a disjoint DSA
+ * tree, then return immediately.
+ */
+ return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
+}
- return ret;
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
+ const char *user_protocol)
+{
+ const struct dsa_device_ops *tag_ops = NULL;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ enum dsa_tag_protocol default_proto;
+
+ /* Find out which protocol the switch would prefer. */
+ default_proto = dsa_get_tag_protocol(dp, master);
+ if (dst->default_proto) {
+ if (dst->default_proto != default_proto) {
+ dev_err(ds->dev,
+ "A DSA switch tree can have only one tagging protocol\n");
+ return -EINVAL;
+ }
+ } else {
+ dst->default_proto = default_proto;
+ }
+
+ /* See if the user wants to override that preference. */
+ if (user_protocol) {
+ if (!ds->ops->change_tag_protocol) {
+ dev_err(ds->dev, "Tag protocol cannot be modified\n");
+ return -EINVAL;
+ }
+
+ tag_ops = dsa_tag_driver_get_by_name(user_protocol);
+ if (IS_ERR(tag_ops)) {
+ dev_warn(ds->dev,
+ "Failed to find a tagging driver for protocol %s, using default\n",
+ user_protocol);
+ tag_ops = NULL;
+ }
+ }
+
+ if (!tag_ops)
+ tag_ops = dsa_tag_driver_get_by_id(default_proto);
+
+ if (IS_ERR(tag_ops)) {
+ if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
+ return -EPROBE_DEFER;
+
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ return PTR_ERR(tag_ops);
+ }
+
+ if (dst->tag_ops) {
+ if (dst->tag_ops != tag_ops) {
+ dev_err(ds->dev,
+ "A DSA switch tree can have only one tagging protocol\n");
+
+ dsa_tag_driver_put(tag_ops);
+ return -EINVAL;
+ }
+
+ /* In the case of multiple CPU ports per switch, the tagging
+ * protocol is still reference-counted only per switch tree.
+ */
+ dsa_tag_driver_put(tag_ops);
+ } else {
+ dst->tag_ops = tag_ops;
+ }
+
+ dp->master = master;
+ dp->type = DSA_PORT_TYPE_CPU;
+ dsa_port_set_tag_protocol(dp, dst->tag_ops);
+ dp->dst = dst;
+
+ /* At this point, the tree may be configured to use a different
+ * tagger than the one chosen by the switch driver during
+ * .setup, in the case when a user selects a custom protocol
+ * through the DT.
+ *
+ * This is resolved by syncing the driver with the tree in
+ * dsa_switch_setup_tag_protocol once .setup has run and the
+ * driver is ready to accept calls to .change_tag_protocol. If
+ * the driver does not support the custom protocol at that
+ * point, the tree is wholly rejected, thereby ensuring that the
+ * tree and driver are always in agreement on the protocol to
+ * use.
+ */
+ return 0;
}
-EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-int dsa_switch_resume(struct dsa_switch *ds)
+static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
+{
+ struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
+ const char *name = of_get_property(dn, "label", NULL);
+ bool link = of_property_read_bool(dn, "link");
+
+ dp->dn = dn;
+
+ if (ethernet) {
+ struct net_device *master;
+ const char *user_protocol;
+
+ master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
+ return dsa_port_parse_cpu(dp, master, user_protocol);
+ }
+
+ if (link)
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
+}
+
+static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
+ struct device_node *dn)
{
+ struct device_node *ports, *port;
struct dsa_port *dp;
- int ret = 0;
+ int err = 0;
+ u32 reg;
+
+ ports = of_get_child_by_name(dn, "ports");
+ if (!ports) {
+ /* The second possibility is "ethernet-ports" */
+ ports = of_get_child_by_name(dn, "ethernet-ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return -EINVAL;
+ }
+ }
- if (ds->ops->resume)
- ret = ds->ops->resume(ds);
+ for_each_available_child_of_node(ports, port) {
+ err = of_property_read_u32(port, "reg", &reg);
+ if (err) {
+ of_node_put(port);
+ goto out_put_node;
+ }
- if (ret)
- return ret;
+ if (reg >= ds->num_ports) {
+ dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n",
+ port, reg, ds->num_ports);
+ of_node_put(port);
+ err = -EINVAL;
+ goto out_put_node;
+ }
- /* Resume slave network devices */
- dsa_switch_for_each_port(dp, ds) {
- if (!dsa_port_is_initialized(dp))
- continue;
+ dp = dsa_to_port(ds, reg);
- ret = dsa_slave_resume(dp->slave);
- if (ret)
- return ret;
+ err = dsa_port_parse_of(dp, port);
+ if (err) {
+ of_node_put(port);
+ goto out_put_node;
+ }
+ }
+
+out_put_node:
+ of_node_put(ports);
+ return err;
+}
+
+static int dsa_switch_parse_member_of(struct dsa_switch *ds,
+ struct device_node *dn)
+{
+ u32 m[2] = { 0, 0 };
+ int sz;
+
+ /* Don't error out if this optional property isn't found */
+ sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
+ if (sz < 0 && sz != -EINVAL)
+ return sz;
+
+ ds->index = m[1];
+
+ ds->dst = dsa_tree_touch(m[0]);
+ if (!ds->dst)
+ return -ENOMEM;
+
+ if (dsa_switch_find(ds->dst->index, ds->index)) {
+ dev_err(ds->dev,
+ "A DSA switch with index %d already exists in tree %d\n",
+ ds->index, ds->dst->index);
+ return -EEXIST;
}
+ if (ds->dst->last_switch < ds->index)
+ ds->dst->last_switch = ds->index;
+
return 0;
}
-EXPORT_SYMBOL_GPL(dsa_switch_resume);
-#endif
-static struct packet_type dsa_pack_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_XDSA),
- .func = dsa_switch_rcv,
-};
+static int dsa_switch_touch_ports(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+ int port;
-static struct workqueue_struct *dsa_owq;
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = dsa_port_touch(ds, port);
+ if (!dp)
+ return -ENOMEM;
+ }
-bool dsa_schedule_work(struct work_struct *work)
+ return 0;
+}
+
+static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
{
- return queue_work(dsa_owq, work);
+ int err;
+
+ err = dsa_switch_parse_member_of(ds, dn);
+ if (err)
+ return err;
+
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
+
+ return dsa_switch_parse_ports_of(ds, dn);
}
-void dsa_flush_workqueue(void)
+static int dev_is_class(struct device *dev, void *class)
{
- flush_workqueue(dsa_owq);
+ if (dev->class != NULL && !strcmp(dev->class->name, class))
+ return 1;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
-int dsa_devlink_param_get(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+static struct device *dev_find_class(struct device *parent, char *class)
+{
+ if (dev_is_class(parent, class)) {
+ get_device(parent);
+ return parent;
+ }
+
+ return device_find_child(parent, class, dev_is_class);
+}
+
+static struct net_device *dsa_dev_to_net_device(struct device *dev)
+{
+ struct device *d;
+
+ d = dev_find_class(dev, "net");
+ if (d != NULL) {
+ struct net_device *nd;
+
+ nd = to_net_dev(d);
+ dev_hold(nd);
+ put_device(d);
+
+ return nd;
+ }
+
+ return NULL;
+}
+
+static int dsa_port_parse(struct dsa_port *dp, const char *name,
+ struct device *dev)
+{
+ if (!strcmp(name, "cpu")) {
+ struct net_device *master;
+
+ master = dsa_dev_to_net_device(dev);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ dev_put(master);
+
+ return dsa_port_parse_cpu(dp, master, NULL);
+ }
+
+ if (!strcmp(name, "dsa"))
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
+}
+
+static int dsa_switch_parse_ports(struct dsa_switch *ds,
+ struct dsa_chip_data *cd)
{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+ bool valid_name_found = false;
+ struct dsa_port *dp;
+ struct device *dev;
+ const char *name;
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < DSA_MAX_PORTS; i++) {
+ name = cd->port_names[i];
+ dev = cd->netdev[i];
+ dp = dsa_to_port(ds, i);
+
+ if (!name)
+ continue;
+
+ err = dsa_port_parse(dp, name, dev);
+ if (err)
+ return err;
- if (!ds->ops->devlink_param_get)
- return -EOPNOTSUPP;
+ valid_name_found = true;
+ }
+
+ if (!valid_name_found && i == DSA_MAX_PORTS)
+ return -EINVAL;
- return ds->ops->devlink_param_get(ds, id, ctx);
+ return 0;
}
-EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
-int dsa_devlink_param_set(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+ int err;
- if (!ds->ops->devlink_param_set)
- return -EOPNOTSUPP;
+ ds->cd = cd;
+
+ /* We don't support interconnected switches nor multiple trees via
+ * platform data, so this is the unique switch of the tree.
+ */
+ ds->index = 0;
+ ds->dst = dsa_tree_touch(0);
+ if (!ds->dst)
+ return -ENOMEM;
- return ds->ops->devlink_param_set(ds, id, ctx);
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
+
+ return dsa_switch_parse_ports(ds, cd);
}
-EXPORT_SYMBOL_GPL(dsa_devlink_param_set);
-int dsa_devlink_params_register(struct dsa_switch *ds,
- const struct devlink_param *params,
- size_t params_count)
+static void dsa_switch_release_ports(struct dsa_switch *ds)
{
- return devlink_params_register(ds->devlink, params, params_count);
+ struct dsa_port *dp, *next;
+
+ dsa_switch_for_each_port_safe(dp, next, ds) {
+ WARN_ON(!list_empty(&dp->fdbs));
+ WARN_ON(!list_empty(&dp->mdbs));
+ WARN_ON(!list_empty(&dp->vlans));
+ list_del(&dp->list);
+ kfree(dp);
+ }
}
-EXPORT_SYMBOL_GPL(dsa_devlink_params_register);
-void dsa_devlink_params_unregister(struct dsa_switch *ds,
- const struct devlink_param *params,
- size_t params_count)
+static int dsa_switch_probe(struct dsa_switch *ds)
{
- devlink_params_unregister(ds->devlink, params, params_count);
+ struct dsa_switch_tree *dst;
+ struct dsa_chip_data *pdata;
+ struct device_node *np;
+ int err;
+
+ if (!ds->dev)
+ return -ENODEV;
+
+ pdata = ds->dev->platform_data;
+ np = ds->dev->of_node;
+
+ if (!ds->num_ports)
+ return -EINVAL;
+
+ if (np) {
+ err = dsa_switch_parse_of(ds, np);
+ if (err)
+ dsa_switch_release_ports(ds);
+ } else if (pdata) {
+ err = dsa_switch_parse(ds, pdata);
+ if (err)
+ dsa_switch_release_ports(ds);
+ } else {
+ err = -ENODEV;
+ }
+
+ if (err)
+ return err;
+
+ dst = ds->dst;
+ dsa_tree_get(dst);
+ err = dsa_tree_setup(dst);
+ if (err) {
+ dsa_switch_release_ports(ds);
+ dsa_tree_put(dst);
+ }
+
+ return err;
}
-EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister);
-int dsa_devlink_resource_register(struct dsa_switch *ds,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
+int dsa_register_switch(struct dsa_switch *ds)
{
- return devlink_resource_register(ds->devlink, resource_name,
- resource_size, resource_id,
- parent_resource_id,
- size_params);
+ int err;
+
+ mutex_lock(&dsa2_mutex);
+ err = dsa_switch_probe(ds);
+ dsa_tree_put(ds->dst);
+ mutex_unlock(&dsa2_mutex);
+
+ return err;
}
-EXPORT_SYMBOL_GPL(dsa_devlink_resource_register);
+EXPORT_SYMBOL_GPL(dsa_register_switch);
-void dsa_devlink_resources_unregister(struct dsa_switch *ds)
+static void dsa_switch_remove(struct dsa_switch *ds)
{
- devlink_resources_unregister(ds->devlink);
+ struct dsa_switch_tree *dst = ds->dst;
+
+ dsa_tree_teardown(dst);
+ dsa_switch_release_ports(ds);
+ dsa_tree_put(dst);
}
-EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister);
-void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
+void dsa_unregister_switch(struct dsa_switch *ds)
{
- return devlink_resource_occ_get_register(ds->devlink, resource_id,
- occ_get, occ_get_priv);
+ mutex_lock(&dsa2_mutex);
+ dsa_switch_remove(ds);
+ mutex_unlock(&dsa2_mutex);
}
-EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register);
+EXPORT_SYMBOL_GPL(dsa_unregister_switch);
-void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
- u64 resource_id)
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
{
- devlink_resource_occ_get_unregister(ds->devlink, resource_id);
+ struct net_device *master, *slave_dev;
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
+
+ if (!ds->setup)
+ goto out;
+
+ rtnl_lock();
+
+ dsa_switch_for_each_user_port(dp, ds) {
+ master = dsa_port_to_master(dp);
+ slave_dev = dp->slave;
+
+ netdev_upper_dev_unlink(master, slave_dev);
+ }
+
+ /* Disconnect from further netdevice notifiers on the master,
+ * since netdev_uses_dsa() will now return false.
+ */
+ dsa_switch_for_each_cpu_port(dp, ds)
+ dp->master->dsa_ptr = NULL;
+
+ rtnl_unlock();
+out:
+ mutex_unlock(&dsa2_mutex);
}
-EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
-struct devlink_region *
-dsa_devlink_region_create(struct dsa_switch *ds,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
+#ifdef CONFIG_PM_SLEEP
+static bool dsa_port_is_initialized(const struct dsa_port *dp)
{
- return devlink_region_create(ds->devlink, ops, region_max_snapshots,
- region_size);
+ return dp->type == DSA_PORT_TYPE_USER && dp->slave;
}
-EXPORT_SYMBOL_GPL(dsa_devlink_region_create);
-struct devlink_region *
-dsa_devlink_port_region_create(struct dsa_switch *ds,
- int port,
- const struct devlink_port_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
+int dsa_switch_suspend(struct dsa_switch *ds)
{
- struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_port *dp;
+ int ret = 0;
+
+ /* Suspend slave network devices */
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_initialized(dp))
+ continue;
+
+ ret = dsa_slave_suspend(dp->slave);
+ if (ret)
+ return ret;
+ }
+
+ if (ds->ops->suspend)
+ ret = ds->ops->suspend(ds);
- return devlink_port_region_create(&dp->devlink_port, ops,
- region_max_snapshots,
- region_size);
+ return ret;
}
-EXPORT_SYMBOL_GPL(dsa_devlink_port_region_create);
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-void dsa_devlink_region_destroy(struct devlink_region *region)
+int dsa_switch_resume(struct dsa_switch *ds)
{
- devlink_region_destroy(region);
+ struct dsa_port *dp;
+ int ret = 0;
+
+ if (ds->ops->resume)
+ ret = ds->ops->resume(ds);
+
+ if (ret)
+ return ret;
+
+ /* Resume slave network devices */
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_initialized(dp))
+ continue;
+
+ ret = dsa_slave_resume(dp->slave);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(dsa_devlink_region_destroy);
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
+#endif
struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
{
@@ -552,9 +1717,6 @@ static int __init dsa_init_module(void)
dev_add_pack(&dsa_pack_type);
- dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops),
- THIS_MODULE);
-
rc = rtnl_link_register(&dsa_link_ops);
if (rc)
goto netlink_register_fail;
@@ -562,7 +1724,6 @@ static int __init dsa_init_module(void)
return 0;
netlink_register_fail:
- dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops));
dsa_slave_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
register_notifier_fail:
@@ -575,7 +1736,6 @@ module_init(dsa_init_module);
static void __exit dsa_cleanup_module(void)
{
rtnl_link_unregister(&dsa_link_ops);
- dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops));
dsa_slave_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
diff --git a/net/dsa/dsa.h b/net/dsa/dsa.h
new file mode 100644
index 000000000000..b7e17ae1094d
--- /dev/null
+++ b/net/dsa/dsa.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_H
+#define __DSA_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct dsa_db;
+struct dsa_device_ops;
+struct dsa_lag;
+struct dsa_switch_tree;
+struct net_device;
+struct work_struct;
+
+extern struct list_head dsa_tree_list;
+
+bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b);
+bool dsa_schedule_work(struct work_struct *work);
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag);
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag);
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev);
+struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst);
+int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ const struct dsa_device_ops *tag_ops,
+ const struct dsa_device_ops *old_tag_ops);
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up);
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up);
+unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
+void dsa_bridge_num_put(const struct net_device *bridge_dev,
+ unsigned int bridge_num);
+struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
+ const struct net_device *br);
+
+#endif
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
deleted file mode 100644
index f8df55e2e23a..000000000000
--- a/net/dsa/dsa2.c
+++ /dev/null
@@ -1,1817 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dsa/dsa2.c - Hardware switch handling, binding version 2
- * Copyright (c) 2008-2009 Marvell Semiconductor
- * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
- * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
- */
-
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/slab.h>
-#include <linux/rtnetlink.h>
-#include <linux/of.h>
-#include <linux/of_mdio.h>
-#include <linux/of_net.h>
-#include <net/devlink.h>
-#include <net/sch_generic.h>
-
-#include "dsa_priv.h"
-
-static DEFINE_MUTEX(dsa2_mutex);
-LIST_HEAD(dsa_tree_list);
-
-/* Track the bridges with forwarding offload enabled */
-static unsigned long dsa_fwd_offloading_bridges;
-
-/**
- * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
- * @dst: collection of struct dsa_switch devices to notify.
- * @e: event, must be of type DSA_NOTIFIER_*
- * @v: event-specific value.
- *
- * Given a struct dsa_switch_tree, this can be used to run a function once for
- * each member DSA switch. The other alternative of traversing the tree is only
- * through its ports list, which does not uniquely list the switches.
- */
-int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
-{
- struct raw_notifier_head *nh = &dst->nh;
- int err;
-
- err = raw_notifier_call_chain(nh, e, v);
-
- return notifier_to_errno(err);
-}
-
-/**
- * dsa_broadcast - Notify all DSA trees in the system.
- * @e: event, must be of type DSA_NOTIFIER_*
- * @v: event-specific value.
- *
- * Can be used to notify the switching fabric of events such as cross-chip
- * bridging between disjoint trees (such as islands of tagger-compatible
- * switches bridged by an incompatible middle switch).
- *
- * WARNING: this function is not reliable during probe time, because probing
- * between trees is asynchronous and not all DSA trees might have probed.
- */
-int dsa_broadcast(unsigned long e, void *v)
-{
- struct dsa_switch_tree *dst;
- int err = 0;
-
- list_for_each_entry(dst, &dsa_tree_list, list) {
- err = dsa_tree_notify(dst, e, v);
- if (err)
- break;
- }
-
- return err;
-}
-
-/**
- * dsa_lag_map() - Map LAG structure to a linear LAG array
- * @dst: Tree in which to record the mapping.
- * @lag: LAG structure that is to be mapped to the tree's array.
- *
- * dsa_lag_id/dsa_lag_by_id can then be used to translate between the
- * two spaces. The size of the mapping space is determined by the
- * driver by setting ds->num_lag_ids. It is perfectly legal to leave
- * it unset if it is not needed, in which case these functions become
- * no-ops.
- */
-void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag)
-{
- unsigned int id;
-
- for (id = 1; id <= dst->lags_len; id++) {
- if (!dsa_lag_by_id(dst, id)) {
- dst->lags[id - 1] = lag;
- lag->id = id;
- return;
- }
- }
-
- /* No IDs left, which is OK. Some drivers do not need it. The
- * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
- * returns an error for this device when joining the LAG. The
- * driver can then return -EOPNOTSUPP back to DSA, which will
- * fall back to a software LAG.
- */
-}
-
-/**
- * dsa_lag_unmap() - Remove a LAG ID mapping
- * @dst: Tree in which the mapping is recorded.
- * @lag: LAG structure that was mapped.
- *
- * As there may be multiple users of the mapping, it is only removed
- * if there are no other references to it.
- */
-void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag)
-{
- unsigned int id;
-
- dsa_lags_foreach_id(id, dst) {
- if (dsa_lag_by_id(dst, id) == lag) {
- dst->lags[id - 1] = NULL;
- lag->id = 0;
- break;
- }
- }
-}
-
-struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
- const struct net_device *lag_dev)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_lag_dev_get(dp) == lag_dev)
- return dp->lag;
-
- return NULL;
-}
-
-struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
- const struct net_device *br)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_bridge_dev_get(dp) == br)
- return dp->bridge;
-
- return NULL;
-}
-
-static int dsa_bridge_num_find(const struct net_device *bridge_dev)
-{
- struct dsa_switch_tree *dst;
-
- list_for_each_entry(dst, &dsa_tree_list, list) {
- struct dsa_bridge *bridge;
-
- bridge = dsa_tree_bridge_find(dst, bridge_dev);
- if (bridge)
- return bridge->num;
- }
-
- return 0;
-}
-
-unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max)
-{
- unsigned int bridge_num = dsa_bridge_num_find(bridge_dev);
-
- /* Switches without FDB isolation support don't get unique
- * bridge numbering
- */
- if (!max)
- return 0;
-
- if (!bridge_num) {
- /* First port that requests FDB isolation or TX forwarding
- * offload for this bridge
- */
- bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges,
- DSA_MAX_NUM_OFFLOADING_BRIDGES,
- 1);
- if (bridge_num >= max)
- return 0;
-
- set_bit(bridge_num, &dsa_fwd_offloading_bridges);
- }
-
- return bridge_num;
-}
-
-void dsa_bridge_num_put(const struct net_device *bridge_dev,
- unsigned int bridge_num)
-{
- /* Since we refcount bridges, we know that when we call this function
- * it is no longer in use, so we can just go ahead and remove it from
- * the bit mask.
- */
- clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
-}
-
-struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
-{
- struct dsa_switch_tree *dst;
- struct dsa_port *dp;
-
- list_for_each_entry(dst, &dsa_tree_list, list) {
- if (dst->index != tree_index)
- continue;
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dp->ds->index != sw_index)
- continue;
-
- return dp->ds;
- }
- }
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_find);
-
-static struct dsa_switch_tree *dsa_tree_find(int index)
-{
- struct dsa_switch_tree *dst;
-
- list_for_each_entry(dst, &dsa_tree_list, list)
- if (dst->index == index)
- return dst;
-
- return NULL;
-}
-
-static struct dsa_switch_tree *dsa_tree_alloc(int index)
-{
- struct dsa_switch_tree *dst;
-
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return NULL;
-
- dst->index = index;
-
- INIT_LIST_HEAD(&dst->rtable);
-
- INIT_LIST_HEAD(&dst->ports);
-
- INIT_LIST_HEAD(&dst->list);
- list_add_tail(&dst->list, &dsa_tree_list);
-
- kref_init(&dst->refcount);
-
- return dst;
-}
-
-static void dsa_tree_free(struct dsa_switch_tree *dst)
-{
- if (dst->tag_ops)
- dsa_tag_driver_put(dst->tag_ops);
- list_del(&dst->list);
- kfree(dst);
-}
-
-static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
-{
- if (dst)
- kref_get(&dst->refcount);
-
- return dst;
-}
-
-static struct dsa_switch_tree *dsa_tree_touch(int index)
-{
- struct dsa_switch_tree *dst;
-
- dst = dsa_tree_find(index);
- if (dst)
- return dsa_tree_get(dst);
- else
- return dsa_tree_alloc(index);
-}
-
-static void dsa_tree_release(struct kref *ref)
-{
- struct dsa_switch_tree *dst;
-
- dst = container_of(ref, struct dsa_switch_tree, refcount);
-
- dsa_tree_free(dst);
-}
-
-static void dsa_tree_put(struct dsa_switch_tree *dst)
-{
- if (dst)
- kref_put(&dst->refcount, dsa_tree_release);
-}
-
-static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
- struct device_node *dn)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dp->dn == dn)
- return dp;
-
- return NULL;
-}
-
-static struct dsa_link *dsa_link_touch(struct dsa_port *dp,
- struct dsa_port *link_dp)
-{
- struct dsa_switch *ds = dp->ds;
- struct dsa_switch_tree *dst;
- struct dsa_link *dl;
-
- dst = ds->dst;
-
- list_for_each_entry(dl, &dst->rtable, list)
- if (dl->dp == dp && dl->link_dp == link_dp)
- return dl;
-
- dl = kzalloc(sizeof(*dl), GFP_KERNEL);
- if (!dl)
- return NULL;
-
- dl->dp = dp;
- dl->link_dp = link_dp;
-
- INIT_LIST_HEAD(&dl->list);
- list_add_tail(&dl->list, &dst->rtable);
-
- return dl;
-}
-
-static bool dsa_port_setup_routing_table(struct dsa_port *dp)
-{
- struct dsa_switch *ds = dp->ds;
- struct dsa_switch_tree *dst = ds->dst;
- struct device_node *dn = dp->dn;
- struct of_phandle_iterator it;
- struct dsa_port *link_dp;
- struct dsa_link *dl;
- int err;
-
- of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
- link_dp = dsa_tree_find_port_by_node(dst, it.node);
- if (!link_dp) {
- of_node_put(it.node);
- return false;
- }
-
- dl = dsa_link_touch(dp, link_dp);
- if (!dl) {
- of_node_put(it.node);
- return false;
- }
- }
-
- return true;
-}
-
-static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
-{
- bool complete = true;
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_dsa(dp)) {
- complete = dsa_port_setup_routing_table(dp);
- if (!complete)
- break;
- }
- }
-
- return complete;
-}
-
-static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_is_cpu(dp))
- return dp;
-
- return NULL;
-}
-
-struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst)
-{
- struct device_node *ethernet;
- struct net_device *master;
- struct dsa_port *cpu_dp;
-
- cpu_dp = dsa_tree_find_first_cpu(dst);
- ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0);
- master = of_find_net_device_by_node(ethernet);
- of_node_put(ethernet);
-
- return master;
-}
-
-/* Assign the default CPU port (the first one in the tree) to all ports of the
- * fabric which don't already have one as part of their own switch.
- */
-static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
-{
- struct dsa_port *cpu_dp, *dp;
-
- cpu_dp = dsa_tree_find_first_cpu(dst);
- if (!cpu_dp) {
- pr_err("DSA: tree %d has no CPU port\n", dst->index);
- return -EINVAL;
- }
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dp->cpu_dp)
- continue;
-
- if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
- dp->cpu_dp = cpu_dp;
- }
-
- return 0;
-}
-
-/* Perform initial assignment of CPU ports to user ports and DSA links in the
- * fabric, giving preference to CPU ports local to each switch. Default to
- * using the first CPU port in the switch tree if the port does not have a CPU
- * port local to this switch.
- */
-static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
-{
- struct dsa_port *cpu_dp, *dp;
-
- list_for_each_entry(cpu_dp, &dst->ports, list) {
- if (!dsa_port_is_cpu(cpu_dp))
- continue;
-
- /* Prefer a local CPU port */
- dsa_switch_for_each_port(dp, cpu_dp->ds) {
- /* Prefer the first local CPU port found */
- if (dp->cpu_dp)
- continue;
-
- if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
- dp->cpu_dp = cpu_dp;
- }
- }
-
- return dsa_tree_setup_default_cpu(dst);
-}
-
-static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
- dp->cpu_dp = NULL;
-}
-
-static int dsa_port_devlink_setup(struct dsa_port *dp)
-{
- struct devlink_port *dlp = &dp->devlink_port;
- struct dsa_switch_tree *dst = dp->ds->dst;
- struct devlink_port_attrs attrs = {};
- struct devlink *dl = dp->ds->devlink;
- struct dsa_switch *ds = dp->ds;
- const unsigned char *id;
- unsigned char len;
- int err;
-
- memset(dlp, 0, sizeof(*dlp));
- devlink_port_init(dl, dlp);
-
- if (ds->ops->port_setup) {
- err = ds->ops->port_setup(ds, dp->index);
- if (err)
- return err;
- }
-
- id = (const unsigned char *)&dst->index;
- len = sizeof(dst->index);
-
- attrs.phys.port_number = dp->index;
- memcpy(attrs.switch_id.id, id, len);
- attrs.switch_id.id_len = len;
-
- switch (dp->type) {
- case DSA_PORT_TYPE_UNUSED:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED;
- break;
- case DSA_PORT_TYPE_CPU:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
- break;
- case DSA_PORT_TYPE_DSA:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
- break;
- case DSA_PORT_TYPE_USER:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
- break;
- }
-
- devlink_port_attrs_set(dlp, &attrs);
- err = devlink_port_register(dl, dlp, dp->index);
- if (err) {
- if (ds->ops->port_teardown)
- ds->ops->port_teardown(ds, dp->index);
- return err;
- }
-
- return 0;
-}
-
-static void dsa_port_devlink_teardown(struct dsa_port *dp)
-{
- struct devlink_port *dlp = &dp->devlink_port;
- struct dsa_switch *ds = dp->ds;
-
- devlink_port_unregister(dlp);
-
- if (ds->ops->port_teardown)
- ds->ops->port_teardown(ds, dp->index);
-
- devlink_port_fini(dlp);
-}
-
-static int dsa_port_setup(struct dsa_port *dp)
-{
- bool dsa_port_link_registered = false;
- struct dsa_switch *ds = dp->ds;
- bool dsa_port_enabled = false;
- int err = 0;
-
- if (dp->setup)
- return 0;
-
- err = dsa_port_devlink_setup(dp);
- if (err)
- return err;
-
- switch (dp->type) {
- case DSA_PORT_TYPE_UNUSED:
- dsa_port_disable(dp);
- break;
- case DSA_PORT_TYPE_CPU:
- if (dp->dn) {
- err = dsa_shared_port_link_register_of(dp);
- if (err)
- break;
- dsa_port_link_registered = true;
- } else {
- dev_warn(ds->dev,
- "skipping link registration for CPU port %d\n",
- dp->index);
- }
-
- err = dsa_port_enable(dp, NULL);
- if (err)
- break;
- dsa_port_enabled = true;
-
- break;
- case DSA_PORT_TYPE_DSA:
- if (dp->dn) {
- err = dsa_shared_port_link_register_of(dp);
- if (err)
- break;
- dsa_port_link_registered = true;
- } else {
- dev_warn(ds->dev,
- "skipping link registration for DSA port %d\n",
- dp->index);
- }
-
- err = dsa_port_enable(dp, NULL);
- if (err)
- break;
- dsa_port_enabled = true;
-
- break;
- case DSA_PORT_TYPE_USER:
- of_get_mac_address(dp->dn, dp->mac);
- err = dsa_slave_create(dp);
- break;
- }
-
- if (err && dsa_port_enabled)
- dsa_port_disable(dp);
- if (err && dsa_port_link_registered)
- dsa_shared_port_link_unregister_of(dp);
- if (err) {
- dsa_port_devlink_teardown(dp);
- return err;
- }
-
- dp->setup = true;
-
- return 0;
-}
-
-static void dsa_port_teardown(struct dsa_port *dp)
-{
- if (!dp->setup)
- return;
-
- switch (dp->type) {
- case DSA_PORT_TYPE_UNUSED:
- break;
- case DSA_PORT_TYPE_CPU:
- dsa_port_disable(dp);
- if (dp->dn)
- dsa_shared_port_link_unregister_of(dp);
- break;
- case DSA_PORT_TYPE_DSA:
- dsa_port_disable(dp);
- if (dp->dn)
- dsa_shared_port_link_unregister_of(dp);
- break;
- case DSA_PORT_TYPE_USER:
- if (dp->slave) {
- dsa_slave_destroy(dp->slave);
- dp->slave = NULL;
- }
- break;
- }
-
- dsa_port_devlink_teardown(dp);
-
- dp->setup = false;
-}
-
-static int dsa_port_setup_as_unused(struct dsa_port *dp)
-{
- dp->type = DSA_PORT_TYPE_UNUSED;
- return dsa_port_setup(dp);
-}
-
-static int dsa_devlink_info_get(struct devlink *dl,
- struct devlink_info_req *req,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
-
- if (ds->ops->devlink_info_get)
- return ds->ops->devlink_info_get(ds, req, extack);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_devlink_sb_pool_get(struct devlink *dl,
- unsigned int sb_index, u16 pool_index,
- struct devlink_sb_pool_info *pool_info)
-{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
-
- if (!ds->ops->devlink_sb_pool_get)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index,
- pool_info);
-}
-
-static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index,
- u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
-
- if (!ds->ops->devlink_sb_pool_set)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size,
- threshold_type, extack);
-}
-
-static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp,
- unsigned int sb_index, u16 pool_index,
- u32 *p_threshold)
-{
- struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
- int port = dsa_devlink_port_to_port(dlp);
-
- if (!ds->ops->devlink_sb_port_pool_get)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index,
- pool_index, p_threshold);
-}
-
-static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp,
- unsigned int sb_index, u16 pool_index,
- u32 threshold,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
- int port = dsa_devlink_port_to_port(dlp);
-
- if (!ds->ops->devlink_sb_port_pool_set)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index,
- pool_index, threshold, extack);
-}
-
-static int
-dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp,
- unsigned int sb_index, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- u16 *p_pool_index, u32 *p_threshold)
-{
- struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
- int port = dsa_devlink_port_to_port(dlp);
-
- if (!ds->ops->devlink_sb_tc_pool_bind_get)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index,
- tc_index, pool_type,
- p_pool_index, p_threshold);
-}
-
-static int
-dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp,
- unsigned int sb_index, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
- int port = dsa_devlink_port_to_port(dlp);
-
- if (!ds->ops->devlink_sb_tc_pool_bind_set)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index,
- tc_index, pool_type,
- pool_index, threshold,
- extack);
-}
-
-static int dsa_devlink_sb_occ_snapshot(struct devlink *dl,
- unsigned int sb_index)
-{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
-
- if (!ds->ops->devlink_sb_occ_snapshot)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_occ_snapshot(ds, sb_index);
-}
-
-static int dsa_devlink_sb_occ_max_clear(struct devlink *dl,
- unsigned int sb_index)
-{
- struct dsa_switch *ds = dsa_devlink_to_ds(dl);
-
- if (!ds->ops->devlink_sb_occ_max_clear)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_occ_max_clear(ds, sb_index);
-}
-
-static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp,
- unsigned int sb_index,
- u16 pool_index, u32 *p_cur,
- u32 *p_max)
-{
- struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
- int port = dsa_devlink_port_to_port(dlp);
-
- if (!ds->ops->devlink_sb_occ_port_pool_get)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index,
- pool_index, p_cur, p_max);
-}
-
-static int
-dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp,
- unsigned int sb_index, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- u32 *p_cur, u32 *p_max)
-{
- struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
- int port = dsa_devlink_port_to_port(dlp);
-
- if (!ds->ops->devlink_sb_occ_tc_port_bind_get)
- return -EOPNOTSUPP;
-
- return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port,
- sb_index, tc_index,
- pool_type, p_cur,
- p_max);
-}
-
-static const struct devlink_ops dsa_devlink_ops = {
- .info_get = dsa_devlink_info_get,
- .sb_pool_get = dsa_devlink_sb_pool_get,
- .sb_pool_set = dsa_devlink_sb_pool_set,
- .sb_port_pool_get = dsa_devlink_sb_port_pool_get,
- .sb_port_pool_set = dsa_devlink_sb_port_pool_set,
- .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get,
- .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set,
- .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot,
- .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear,
- .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get,
- .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get,
-};
-
-static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
-{
- const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
- struct dsa_switch_tree *dst = ds->dst;
- int err;
-
- if (tag_ops->proto == dst->default_proto)
- goto connect;
-
- rtnl_lock();
- err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
- rtnl_unlock();
- if (err) {
- dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
- tag_ops->name, ERR_PTR(err));
- return err;
- }
-
-connect:
- if (tag_ops->connect) {
- err = tag_ops->connect(ds);
- if (err)
- return err;
- }
-
- if (ds->ops->connect_tag_protocol) {
- err = ds->ops->connect_tag_protocol(ds, tag_ops->proto);
- if (err) {
- dev_err(ds->dev,
- "Unable to connect to tag protocol \"%s\": %pe\n",
- tag_ops->name, ERR_PTR(err));
- goto disconnect;
- }
- }
-
- return 0;
-
-disconnect:
- if (tag_ops->disconnect)
- tag_ops->disconnect(ds);
-
- return err;
-}
-
-static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds)
-{
- const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
-
- if (tag_ops->disconnect)
- tag_ops->disconnect(ds);
-}
-
-static int dsa_switch_setup(struct dsa_switch *ds)
-{
- struct dsa_devlink_priv *dl_priv;
- struct device_node *dn;
- int err;
-
- if (ds->setup)
- return 0;
-
- /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
- * driver and before ops->setup() has run, since the switch drivers and
- * the slave MDIO bus driver rely on these values for probing PHY
- * devices or not
- */
- ds->phys_mii_mask |= dsa_user_ports(ds);
-
- /* Add the switch to devlink before calling setup, so that setup can
- * add dpipe tables
- */
- ds->devlink =
- devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
- if (!ds->devlink)
- return -ENOMEM;
- dl_priv = devlink_priv(ds->devlink);
- dl_priv->ds = ds;
-
- err = dsa_switch_register_notifier(ds);
- if (err)
- goto devlink_free;
-
- ds->configure_vlan_while_not_filtering = true;
-
- err = ds->ops->setup(ds);
- if (err < 0)
- goto unregister_notifier;
-
- err = dsa_switch_setup_tag_protocol(ds);
- if (err)
- goto teardown;
-
- if (!ds->slave_mii_bus && ds->ops->phy_read) {
- ds->slave_mii_bus = mdiobus_alloc();
- if (!ds->slave_mii_bus) {
- err = -ENOMEM;
- goto teardown;
- }
-
- dsa_slave_mii_bus_init(ds);
-
- dn = of_get_child_by_name(ds->dev->of_node, "mdio");
-
- err = of_mdiobus_register(ds->slave_mii_bus, dn);
- of_node_put(dn);
- if (err < 0)
- goto free_slave_mii_bus;
- }
-
- ds->setup = true;
- devlink_register(ds->devlink);
- return 0;
-
-free_slave_mii_bus:
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_free(ds->slave_mii_bus);
-teardown:
- if (ds->ops->teardown)
- ds->ops->teardown(ds);
-unregister_notifier:
- dsa_switch_unregister_notifier(ds);
-devlink_free:
- devlink_free(ds->devlink);
- ds->devlink = NULL;
- return err;
-}
-
-static void dsa_switch_teardown(struct dsa_switch *ds)
-{
- if (!ds->setup)
- return;
-
- if (ds->devlink)
- devlink_unregister(ds->devlink);
-
- if (ds->slave_mii_bus && ds->ops->phy_read) {
- mdiobus_unregister(ds->slave_mii_bus);
- mdiobus_free(ds->slave_mii_bus);
- ds->slave_mii_bus = NULL;
- }
-
- dsa_switch_teardown_tag_protocol(ds);
-
- if (ds->ops->teardown)
- ds->ops->teardown(ds);
-
- dsa_switch_unregister_notifier(ds);
-
- if (ds->devlink) {
- devlink_free(ds->devlink);
- ds->devlink = NULL;
- }
-
- ds->setup = false;
-}
-
-/* First tear down the non-shared, then the shared ports. This ensures that
- * all work items scheduled by our switchdev handlers for user ports have
- * completed before we destroy the refcounting kept on the shared ports.
- */
-static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_is_user(dp) || dsa_port_is_unused(dp))
- dsa_port_teardown(dp);
-
- dsa_flush_workqueue();
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
- dsa_port_teardown(dp);
-}
-
-static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
-}
-
-/* Bring shared ports up first, then non-shared ports */
-static int dsa_tree_setup_ports(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
- int err = 0;
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) {
- err = dsa_port_setup(dp);
- if (err)
- goto teardown;
- }
- }
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) {
- err = dsa_port_setup(dp);
- if (err) {
- err = dsa_port_setup_as_unused(dp);
- if (err)
- goto teardown;
- }
- }
- }
-
- return 0;
-
-teardown:
- dsa_tree_teardown_ports(dst);
-
- return err;
-}
-
-static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
- int err = 0;
-
- list_for_each_entry(dp, &dst->ports, list) {
- err = dsa_switch_setup(dp->ds);
- if (err) {
- dsa_tree_teardown_switches(dst);
- break;
- }
- }
-
- return err;
-}
-
-static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
-{
- struct dsa_port *cpu_dp;
- int err = 0;
-
- rtnl_lock();
-
- dsa_tree_for_each_cpu_port(cpu_dp, dst) {
- struct net_device *master = cpu_dp->master;
- bool admin_up = (master->flags & IFF_UP) &&
- !qdisc_tx_is_noop(master);
-
- err = dsa_master_setup(master, cpu_dp);
- if (err)
- break;
-
- /* Replay master state event */
- dsa_tree_master_admin_state_change(dst, master, admin_up);
- dsa_tree_master_oper_state_change(dst, master,
- netif_oper_up(master));
- }
-
- rtnl_unlock();
-
- return err;
-}
-
-static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
-{
- struct dsa_port *cpu_dp;
-
- rtnl_lock();
-
- dsa_tree_for_each_cpu_port(cpu_dp, dst) {
- struct net_device *master = cpu_dp->master;
-
- /* Synthesizing an "admin down" state is sufficient for
- * the switches to get a notification if the master is
- * currently up and running.
- */
- dsa_tree_master_admin_state_change(dst, master, false);
-
- dsa_master_teardown(master);
- }
-
- rtnl_unlock();
-}
-
-static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
-{
- unsigned int len = 0;
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dp->ds->num_lag_ids > len)
- len = dp->ds->num_lag_ids;
- }
-
- if (!len)
- return 0;
-
- dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
- if (!dst->lags)
- return -ENOMEM;
-
- dst->lags_len = len;
- return 0;
-}
-
-static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
-{
- kfree(dst->lags);
-}
-
-static int dsa_tree_setup(struct dsa_switch_tree *dst)
-{
- bool complete;
- int err;
-
- if (dst->setup) {
- pr_err("DSA: tree %d already setup! Disjoint trees?\n",
- dst->index);
- return -EEXIST;
- }
-
- complete = dsa_tree_setup_routing_table(dst);
- if (!complete)
- return 0;
-
- err = dsa_tree_setup_cpu_ports(dst);
- if (err)
- return err;
-
- err = dsa_tree_setup_switches(dst);
- if (err)
- goto teardown_cpu_ports;
-
- err = dsa_tree_setup_ports(dst);
- if (err)
- goto teardown_switches;
-
- err = dsa_tree_setup_master(dst);
- if (err)
- goto teardown_ports;
-
- err = dsa_tree_setup_lags(dst);
- if (err)
- goto teardown_master;
-
- dst->setup = true;
-
- pr_info("DSA: tree %d setup\n", dst->index);
-
- return 0;
-
-teardown_master:
- dsa_tree_teardown_master(dst);
-teardown_ports:
- dsa_tree_teardown_ports(dst);
-teardown_switches:
- dsa_tree_teardown_switches(dst);
-teardown_cpu_ports:
- dsa_tree_teardown_cpu_ports(dst);
-
- return err;
-}
-
-static void dsa_tree_teardown(struct dsa_switch_tree *dst)
-{
- struct dsa_link *dl, *next;
-
- if (!dst->setup)
- return;
-
- dsa_tree_teardown_lags(dst);
-
- dsa_tree_teardown_master(dst);
-
- dsa_tree_teardown_ports(dst);
-
- dsa_tree_teardown_switches(dst);
-
- dsa_tree_teardown_cpu_ports(dst);
-
- list_for_each_entry_safe(dl, next, &dst->rtable, list) {
- list_del(&dl->list);
- kfree(dl);
- }
-
- pr_info("DSA: tree %d torn down\n", dst->index);
-
- dst->setup = false;
-}
-
-static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst,
- const struct dsa_device_ops *tag_ops)
-{
- const struct dsa_device_ops *old_tag_ops = dst->tag_ops;
- struct dsa_notifier_tag_proto_info info;
- int err;
-
- dst->tag_ops = tag_ops;
-
- /* Notify the switches from this tree about the connection
- * to the new tagger
- */
- info.tag_ops = tag_ops;
- err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info);
- if (err && err != -EOPNOTSUPP)
- goto out_disconnect;
-
- /* Notify the old tagger about the disconnection from this tree */
- info.tag_ops = old_tag_ops;
- dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
-
- return 0;
-
-out_disconnect:
- info.tag_ops = tag_ops;
- dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
- dst->tag_ops = old_tag_ops;
-
- return err;
-}
-
-/* Since the dsa/tagging sysfs device attribute is per master, the assumption
- * is that all DSA switches within a tree share the same tagger, otherwise
- * they would have formed disjoint trees (different "dsa,member" values).
- */
-int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- const struct dsa_device_ops *tag_ops,
- const struct dsa_device_ops *old_tag_ops)
-{
- struct dsa_notifier_tag_proto_info info;
- struct dsa_port *dp;
- int err = -EBUSY;
-
- if (!rtnl_trylock())
- return restart_syscall();
-
- /* At the moment we don't allow changing the tag protocol under
- * traffic. The rtnl_mutex also happens to serialize concurrent
- * attempts to change the tagging protocol. If we ever lift the IFF_UP
- * restriction, there needs to be another mutex which serializes this.
- */
- dsa_tree_for_each_user_port(dp, dst) {
- if (dsa_port_to_master(dp)->flags & IFF_UP)
- goto out_unlock;
-
- if (dp->slave->flags & IFF_UP)
- goto out_unlock;
- }
-
- /* Notify the tag protocol change */
- info.tag_ops = tag_ops;
- err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
- if (err)
- goto out_unwind_tagger;
-
- err = dsa_tree_bind_tag_proto(dst, tag_ops);
- if (err)
- goto out_unwind_tagger;
-
- rtnl_unlock();
-
- return 0;
-
-out_unwind_tagger:
- info.tag_ops = old_tag_ops;
- dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
-out_unlock:
- rtnl_unlock();
- return err;
-}
-
-static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
- struct net_device *master)
-{
- struct dsa_notifier_master_state_info info;
- struct dsa_port *cpu_dp = master->dsa_ptr;
-
- info.master = master;
- info.operational = dsa_port_master_is_operational(cpu_dp);
-
- dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
-}
-
-void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up)
-{
- struct dsa_port *cpu_dp = master->dsa_ptr;
- bool notify = false;
-
- /* Don't keep track of admin state on LAG DSA masters,
- * but rather just of physical DSA masters
- */
- if (netif_is_lag_master(master))
- return;
-
- if ((dsa_port_master_is_operational(cpu_dp)) !=
- (up && cpu_dp->master_oper_up))
- notify = true;
-
- cpu_dp->master_admin_up = up;
-
- if (notify)
- dsa_tree_master_state_change(dst, master);
-}
-
-void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up)
-{
- struct dsa_port *cpu_dp = master->dsa_ptr;
- bool notify = false;
-
- /* Don't keep track of oper state on LAG DSA masters,
- * but rather just of physical DSA masters
- */
- if (netif_is_lag_master(master))
- return;
-
- if ((dsa_port_master_is_operational(cpu_dp)) !=
- (cpu_dp->master_admin_up && up))
- notify = true;
-
- cpu_dp->master_oper_up = up;
-
- if (notify)
- dsa_tree_master_state_change(dst, master);
-}
-
-static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
-{
- struct dsa_switch_tree *dst = ds->dst;
- struct dsa_port *dp;
-
- dsa_switch_for_each_port(dp, ds)
- if (dp->index == index)
- return dp;
-
- dp = kzalloc(sizeof(*dp), GFP_KERNEL);
- if (!dp)
- return NULL;
-
- dp->ds = ds;
- dp->index = index;
-
- mutex_init(&dp->addr_lists_lock);
- mutex_init(&dp->vlans_lock);
- INIT_LIST_HEAD(&dp->fdbs);
- INIT_LIST_HEAD(&dp->mdbs);
- INIT_LIST_HEAD(&dp->vlans);
- INIT_LIST_HEAD(&dp->list);
- list_add_tail(&dp->list, &dst->ports);
-
- return dp;
-}
-
-static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
-{
- dp->type = DSA_PORT_TYPE_USER;
- dp->name = name;
-
- return 0;
-}
-
-static int dsa_port_parse_dsa(struct dsa_port *dp)
-{
- dp->type = DSA_PORT_TYPE_DSA;
-
- return 0;
-}
-
-static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
- struct net_device *master)
-{
- enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE;
- struct dsa_switch *mds, *ds = dp->ds;
- unsigned int mdp_upstream;
- struct dsa_port *mdp;
-
- /* It is possible to stack DSA switches onto one another when that
- * happens the switch driver may want to know if its tagging protocol
- * is going to work in such a configuration.
- */
- if (dsa_slave_dev_check(master)) {
- mdp = dsa_slave_to_port(master);
- mds = mdp->ds;
- mdp_upstream = dsa_upstream_port(mds, mdp->index);
- tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream,
- DSA_TAG_PROTO_NONE);
- }
-
- /* If the master device is not itself a DSA slave in a disjoint DSA
- * tree, then return immediately.
- */
- return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
-}
-
-static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
- const char *user_protocol)
-{
- const struct dsa_device_ops *tag_ops = NULL;
- struct dsa_switch *ds = dp->ds;
- struct dsa_switch_tree *dst = ds->dst;
- enum dsa_tag_protocol default_proto;
-
- /* Find out which protocol the switch would prefer. */
- default_proto = dsa_get_tag_protocol(dp, master);
- if (dst->default_proto) {
- if (dst->default_proto != default_proto) {
- dev_err(ds->dev,
- "A DSA switch tree can have only one tagging protocol\n");
- return -EINVAL;
- }
- } else {
- dst->default_proto = default_proto;
- }
-
- /* See if the user wants to override that preference. */
- if (user_protocol) {
- if (!ds->ops->change_tag_protocol) {
- dev_err(ds->dev, "Tag protocol cannot be modified\n");
- return -EINVAL;
- }
-
- tag_ops = dsa_tag_driver_get_by_name(user_protocol);
- if (IS_ERR(tag_ops)) {
- dev_warn(ds->dev,
- "Failed to find a tagging driver for protocol %s, using default\n",
- user_protocol);
- tag_ops = NULL;
- }
- }
-
- if (!tag_ops)
- tag_ops = dsa_tag_driver_get_by_id(default_proto);
-
- if (IS_ERR(tag_ops)) {
- if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
- return -EPROBE_DEFER;
-
- dev_warn(ds->dev, "No tagger for this switch\n");
- return PTR_ERR(tag_ops);
- }
-
- if (dst->tag_ops) {
- if (dst->tag_ops != tag_ops) {
- dev_err(ds->dev,
- "A DSA switch tree can have only one tagging protocol\n");
-
- dsa_tag_driver_put(tag_ops);
- return -EINVAL;
- }
-
- /* In the case of multiple CPU ports per switch, the tagging
- * protocol is still reference-counted only per switch tree.
- */
- dsa_tag_driver_put(tag_ops);
- } else {
- dst->tag_ops = tag_ops;
- }
-
- dp->master = master;
- dp->type = DSA_PORT_TYPE_CPU;
- dsa_port_set_tag_protocol(dp, dst->tag_ops);
- dp->dst = dst;
-
- /* At this point, the tree may be configured to use a different
- * tagger than the one chosen by the switch driver during
- * .setup, in the case when a user selects a custom protocol
- * through the DT.
- *
- * This is resolved by syncing the driver with the tree in
- * dsa_switch_setup_tag_protocol once .setup has run and the
- * driver is ready to accept calls to .change_tag_protocol. If
- * the driver does not support the custom protocol at that
- * point, the tree is wholly rejected, thereby ensuring that the
- * tree and driver are always in agreement on the protocol to
- * use.
- */
- return 0;
-}
-
-static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
-{
- struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
- const char *name = of_get_property(dn, "label", NULL);
- bool link = of_property_read_bool(dn, "link");
-
- dp->dn = dn;
-
- if (ethernet) {
- struct net_device *master;
- const char *user_protocol;
-
- master = of_find_net_device_by_node(ethernet);
- of_node_put(ethernet);
- if (!master)
- return -EPROBE_DEFER;
-
- user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
- return dsa_port_parse_cpu(dp, master, user_protocol);
- }
-
- if (link)
- return dsa_port_parse_dsa(dp);
-
- return dsa_port_parse_user(dp, name);
-}
-
-static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
- struct device_node *dn)
-{
- struct device_node *ports, *port;
- struct dsa_port *dp;
- int err = 0;
- u32 reg;
-
- ports = of_get_child_by_name(dn, "ports");
- if (!ports) {
- /* The second possibility is "ethernet-ports" */
- ports = of_get_child_by_name(dn, "ethernet-ports");
- if (!ports) {
- dev_err(ds->dev, "no ports child node found\n");
- return -EINVAL;
- }
- }
-
- for_each_available_child_of_node(ports, port) {
- err = of_property_read_u32(port, "reg", &reg);
- if (err) {
- of_node_put(port);
- goto out_put_node;
- }
-
- if (reg >= ds->num_ports) {
- dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n",
- port, reg, ds->num_ports);
- of_node_put(port);
- err = -EINVAL;
- goto out_put_node;
- }
-
- dp = dsa_to_port(ds, reg);
-
- err = dsa_port_parse_of(dp, port);
- if (err) {
- of_node_put(port);
- goto out_put_node;
- }
- }
-
-out_put_node:
- of_node_put(ports);
- return err;
-}
-
-static int dsa_switch_parse_member_of(struct dsa_switch *ds,
- struct device_node *dn)
-{
- u32 m[2] = { 0, 0 };
- int sz;
-
- /* Don't error out if this optional property isn't found */
- sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
- if (sz < 0 && sz != -EINVAL)
- return sz;
-
- ds->index = m[1];
-
- ds->dst = dsa_tree_touch(m[0]);
- if (!ds->dst)
- return -ENOMEM;
-
- if (dsa_switch_find(ds->dst->index, ds->index)) {
- dev_err(ds->dev,
- "A DSA switch with index %d already exists in tree %d\n",
- ds->index, ds->dst->index);
- return -EEXIST;
- }
-
- if (ds->dst->last_switch < ds->index)
- ds->dst->last_switch = ds->index;
-
- return 0;
-}
-
-static int dsa_switch_touch_ports(struct dsa_switch *ds)
-{
- struct dsa_port *dp;
- int port;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = dsa_port_touch(ds, port);
- if (!dp)
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
-{
- int err;
-
- err = dsa_switch_parse_member_of(ds, dn);
- if (err)
- return err;
-
- err = dsa_switch_touch_ports(ds);
- if (err)
- return err;
-
- return dsa_switch_parse_ports_of(ds, dn);
-}
-
-static int dsa_port_parse(struct dsa_port *dp, const char *name,
- struct device *dev)
-{
- if (!strcmp(name, "cpu")) {
- struct net_device *master;
-
- master = dsa_dev_to_net_device(dev);
- if (!master)
- return -EPROBE_DEFER;
-
- dev_put(master);
-
- return dsa_port_parse_cpu(dp, master, NULL);
- }
-
- if (!strcmp(name, "dsa"))
- return dsa_port_parse_dsa(dp);
-
- return dsa_port_parse_user(dp, name);
-}
-
-static int dsa_switch_parse_ports(struct dsa_switch *ds,
- struct dsa_chip_data *cd)
-{
- bool valid_name_found = false;
- struct dsa_port *dp;
- struct device *dev;
- const char *name;
- unsigned int i;
- int err;
-
- for (i = 0; i < DSA_MAX_PORTS; i++) {
- name = cd->port_names[i];
- dev = cd->netdev[i];
- dp = dsa_to_port(ds, i);
-
- if (!name)
- continue;
-
- err = dsa_port_parse(dp, name, dev);
- if (err)
- return err;
-
- valid_name_found = true;
- }
-
- if (!valid_name_found && i == DSA_MAX_PORTS)
- return -EINVAL;
-
- return 0;
-}
-
-static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
-{
- int err;
-
- ds->cd = cd;
-
- /* We don't support interconnected switches nor multiple trees via
- * platform data, so this is the unique switch of the tree.
- */
- ds->index = 0;
- ds->dst = dsa_tree_touch(0);
- if (!ds->dst)
- return -ENOMEM;
-
- err = dsa_switch_touch_ports(ds);
- if (err)
- return err;
-
- return dsa_switch_parse_ports(ds, cd);
-}
-
-static void dsa_switch_release_ports(struct dsa_switch *ds)
-{
- struct dsa_port *dp, *next;
-
- dsa_switch_for_each_port_safe(dp, next, ds) {
- WARN_ON(!list_empty(&dp->fdbs));
- WARN_ON(!list_empty(&dp->mdbs));
- WARN_ON(!list_empty(&dp->vlans));
- list_del(&dp->list);
- kfree(dp);
- }
-}
-
-static int dsa_switch_probe(struct dsa_switch *ds)
-{
- struct dsa_switch_tree *dst;
- struct dsa_chip_data *pdata;
- struct device_node *np;
- int err;
-
- if (!ds->dev)
- return -ENODEV;
-
- pdata = ds->dev->platform_data;
- np = ds->dev->of_node;
-
- if (!ds->num_ports)
- return -EINVAL;
-
- if (np) {
- err = dsa_switch_parse_of(ds, np);
- if (err)
- dsa_switch_release_ports(ds);
- } else if (pdata) {
- err = dsa_switch_parse(ds, pdata);
- if (err)
- dsa_switch_release_ports(ds);
- } else {
- err = -ENODEV;
- }
-
- if (err)
- return err;
-
- dst = ds->dst;
- dsa_tree_get(dst);
- err = dsa_tree_setup(dst);
- if (err) {
- dsa_switch_release_ports(ds);
- dsa_tree_put(dst);
- }
-
- return err;
-}
-
-int dsa_register_switch(struct dsa_switch *ds)
-{
- int err;
-
- mutex_lock(&dsa2_mutex);
- err = dsa_switch_probe(ds);
- dsa_tree_put(ds->dst);
- mutex_unlock(&dsa2_mutex);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(dsa_register_switch);
-
-static void dsa_switch_remove(struct dsa_switch *ds)
-{
- struct dsa_switch_tree *dst = ds->dst;
-
- dsa_tree_teardown(dst);
- dsa_switch_release_ports(ds);
- dsa_tree_put(dst);
-}
-
-void dsa_unregister_switch(struct dsa_switch *ds)
-{
- mutex_lock(&dsa2_mutex);
- dsa_switch_remove(ds);
- mutex_unlock(&dsa2_mutex);
-}
-EXPORT_SYMBOL_GPL(dsa_unregister_switch);
-
-/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
- * blocking that operation from completion, due to the dev_hold taken inside
- * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
- * the DSA master, so that the system can reboot successfully.
- */
-void dsa_switch_shutdown(struct dsa_switch *ds)
-{
- struct net_device *master, *slave_dev;
- struct dsa_port *dp;
-
- mutex_lock(&dsa2_mutex);
-
- if (!ds->setup)
- goto out;
-
- rtnl_lock();
-
- dsa_switch_for_each_user_port(dp, ds) {
- master = dsa_port_to_master(dp);
- slave_dev = dp->slave;
-
- netdev_upper_dev_unlink(master, slave_dev);
- }
-
- /* Disconnect from further netdevice notifiers on the master,
- * since netdev_uses_dsa() will now return false.
- */
- dsa_switch_for_each_cpu_port(dp, ds)
- dp->master->dsa_ptr = NULL;
-
- rtnl_unlock();
-out:
- mutex_unlock(&dsa2_mutex);
-}
-EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
deleted file mode 100644
index 24e0ea218a35..000000000000
--- a/net/dsa/dsa_priv.h
+++ /dev/null
@@ -1,663 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * net/dsa/dsa_priv.h - Hardware switch handling
- * Copyright (c) 2008-2009 Marvell Semiconductor
- */
-
-#ifndef __DSA_PRIV_H
-#define __DSA_PRIV_H
-
-#include <linux/if_bridge.h>
-#include <linux/if_vlan.h>
-#include <linux/phy.h>
-#include <linux/netdevice.h>
-#include <linux/netpoll.h>
-#include <net/dsa.h>
-#include <net/gro_cells.h>
-
-#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG
-
-/* Create 2 modaliases per tagging protocol, one to auto-load the module
- * given the ID reported by get_tag_protocol(), and the other by name.
- */
-#define DSA_TAG_DRIVER_ALIAS "dsa_tag:"
-#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto, __name) \
- MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __name); \
- MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS "id-" \
- __stringify(__proto##_VALUE))
-
-struct dsa_tag_driver {
- const struct dsa_device_ops *ops;
- struct list_head list;
- struct module *owner;
-};
-
-void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
- unsigned int count,
- struct module *owner);
-void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
- unsigned int count);
-
-#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \
-static int __init dsa_tag_driver_module_init(void) \
-{ \
- dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \
- THIS_MODULE); \
- return 0; \
-} \
-module_init(dsa_tag_driver_module_init); \
- \
-static void __exit dsa_tag_driver_module_exit(void) \
-{ \
- dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \
-} \
-module_exit(dsa_tag_driver_module_exit)
-
-/**
- * module_dsa_tag_drivers() - Helper macro for registering DSA tag
- * drivers
- * @__ops_array: Array of tag driver structures
- *
- * Helper macro for DSA tag drivers which do not do anything special
- * in module init/exit. Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit().
- */
-#define module_dsa_tag_drivers(__ops_array) \
-dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array))
-
-#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops
-
-/* Create a static structure we can build a linked list of dsa_tag
- * drivers
- */
-#define DSA_TAG_DRIVER(__ops) \
-static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \
- .ops = &__ops, \
-}
-
-/**
- * module_dsa_tag_driver() - Helper macro for registering a single DSA tag
- * driver
- * @__ops: Single tag driver structures
- *
- * Helper macro for DSA tag drivers which do not do anything special
- * in module init/exit. Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit().
- */
-#define module_dsa_tag_driver(__ops) \
-DSA_TAG_DRIVER(__ops); \
- \
-static struct dsa_tag_driver *dsa_tag_driver_array[] = { \
- &DSA_TAG_DRIVER_NAME(__ops) \
-}; \
-module_dsa_tag_drivers(dsa_tag_driver_array)
-
-enum {
- DSA_NOTIFIER_AGEING_TIME,
- DSA_NOTIFIER_BRIDGE_JOIN,
- DSA_NOTIFIER_BRIDGE_LEAVE,
- DSA_NOTIFIER_FDB_ADD,
- DSA_NOTIFIER_FDB_DEL,
- DSA_NOTIFIER_HOST_FDB_ADD,
- DSA_NOTIFIER_HOST_FDB_DEL,
- DSA_NOTIFIER_LAG_FDB_ADD,
- DSA_NOTIFIER_LAG_FDB_DEL,
- DSA_NOTIFIER_LAG_CHANGE,
- DSA_NOTIFIER_LAG_JOIN,
- DSA_NOTIFIER_LAG_LEAVE,
- DSA_NOTIFIER_MDB_ADD,
- DSA_NOTIFIER_MDB_DEL,
- DSA_NOTIFIER_HOST_MDB_ADD,
- DSA_NOTIFIER_HOST_MDB_DEL,
- DSA_NOTIFIER_VLAN_ADD,
- DSA_NOTIFIER_VLAN_DEL,
- DSA_NOTIFIER_HOST_VLAN_ADD,
- DSA_NOTIFIER_HOST_VLAN_DEL,
- DSA_NOTIFIER_MTU,
- DSA_NOTIFIER_TAG_PROTO,
- DSA_NOTIFIER_TAG_PROTO_CONNECT,
- DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
- DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
- DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
- DSA_NOTIFIER_MASTER_STATE_CHANGE,
-};
-
-/* DSA_NOTIFIER_AGEING_TIME */
-struct dsa_notifier_ageing_time_info {
- unsigned int ageing_time;
-};
-
-/* DSA_NOTIFIER_BRIDGE_* */
-struct dsa_notifier_bridge_info {
- const struct dsa_port *dp;
- struct dsa_bridge bridge;
- bool tx_fwd_offload;
- struct netlink_ext_ack *extack;
-};
-
-/* DSA_NOTIFIER_FDB_* */
-struct dsa_notifier_fdb_info {
- const struct dsa_port *dp;
- const unsigned char *addr;
- u16 vid;
- struct dsa_db db;
-};
-
-/* DSA_NOTIFIER_LAG_FDB_* */
-struct dsa_notifier_lag_fdb_info {
- struct dsa_lag *lag;
- const unsigned char *addr;
- u16 vid;
- struct dsa_db db;
-};
-
-/* DSA_NOTIFIER_MDB_* */
-struct dsa_notifier_mdb_info {
- const struct dsa_port *dp;
- const struct switchdev_obj_port_mdb *mdb;
- struct dsa_db db;
-};
-
-/* DSA_NOTIFIER_LAG_* */
-struct dsa_notifier_lag_info {
- const struct dsa_port *dp;
- struct dsa_lag lag;
- struct netdev_lag_upper_info *info;
- struct netlink_ext_ack *extack;
-};
-
-/* DSA_NOTIFIER_VLAN_* */
-struct dsa_notifier_vlan_info {
- const struct dsa_port *dp;
- const struct switchdev_obj_port_vlan *vlan;
- struct netlink_ext_ack *extack;
-};
-
-/* DSA_NOTIFIER_MTU */
-struct dsa_notifier_mtu_info {
- const struct dsa_port *dp;
- int mtu;
-};
-
-/* DSA_NOTIFIER_TAG_PROTO_* */
-struct dsa_notifier_tag_proto_info {
- const struct dsa_device_ops *tag_ops;
-};
-
-/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
-struct dsa_notifier_tag_8021q_vlan_info {
- const struct dsa_port *dp;
- u16 vid;
-};
-
-/* DSA_NOTIFIER_MASTER_STATE_CHANGE */
-struct dsa_notifier_master_state_info {
- const struct net_device *master;
- bool operational;
-};
-
-struct dsa_switchdev_event_work {
- struct net_device *dev;
- struct net_device *orig_dev;
- struct work_struct work;
- unsigned long event;
- /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
- * SWITCHDEV_FDB_DEL_TO_DEVICE
- */
- unsigned char addr[ETH_ALEN];
- u16 vid;
- bool host_addr;
-};
-
-enum dsa_standalone_event {
- DSA_UC_ADD,
- DSA_UC_DEL,
- DSA_MC_ADD,
- DSA_MC_DEL,
-};
-
-struct dsa_standalone_event_work {
- struct work_struct work;
- struct net_device *dev;
- enum dsa_standalone_event event;
- unsigned char addr[ETH_ALEN];
- u16 vid;
-};
-
-struct dsa_slave_priv {
- /* Copy of CPU port xmit for faster access in slave transmit hot path */
- struct sk_buff * (*xmit)(struct sk_buff *skb,
- struct net_device *dev);
-
- struct gro_cells gcells;
-
- /* DSA port data, such as switch, port index, etc. */
- struct dsa_port *dp;
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
- struct netpoll *netpoll;
-#endif
-
- /* TC context */
- struct list_head mall_tc_list;
-};
-
-/* dsa.c */
-const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol);
-const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name);
-void dsa_tag_driver_put(const struct dsa_device_ops *ops);
-
-bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b);
-
-bool dsa_schedule_work(struct work_struct *work);
-const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
-
-static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
-{
- return ops->needed_headroom + ops->needed_tailroom;
-}
-
-/* master.c */
-int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
-void dsa_master_teardown(struct net_device *dev);
-int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
- struct netdev_lag_upper_info *uinfo,
- struct netlink_ext_ack *extack);
-void dsa_master_lag_teardown(struct net_device *lag_dev,
- struct dsa_port *cpu_dp);
-
-static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
- int device, int port)
-{
- struct dsa_port *cpu_dp = dev->dsa_ptr;
- struct dsa_switch_tree *dst = cpu_dp->dst;
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- if (dp->ds->index == device && dp->index == port &&
- dp->type == DSA_PORT_TYPE_USER)
- return dp->slave;
-
- return NULL;
-}
-
-/* netlink.c */
-extern struct rtnl_link_ops dsa_link_ops __read_mostly;
-
-/* port.c */
-bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr);
-void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
- const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
-int dsa_port_set_mst_state(struct dsa_port *dp,
- const struct switchdev_mst_state *state,
- struct netlink_ext_ack *extack);
-int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
-int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
-void dsa_port_disable_rt(struct dsa_port *dp);
-void dsa_port_disable(struct dsa_port *dp);
-int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
- struct netlink_ext_ack *extack);
-void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br);
-void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
-int dsa_port_lag_change(struct dsa_port *dp,
- struct netdev_lag_lower_state_info *linfo);
-int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
- struct netdev_lag_upper_info *uinfo,
- struct netlink_ext_ack *extack);
-void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
-void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
-int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
- struct netlink_ext_ack *extack);
-bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
-int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
-int dsa_port_mst_enable(struct dsa_port *dp, bool on,
- struct netlink_ext_ack *extack);
-int dsa_port_vlan_msti(struct dsa_port *dp,
- const struct switchdev_vlan_msti *msti);
-int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu);
-int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
- const unsigned char *addr, u16 vid);
-int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
- const unsigned char *addr, u16 vid);
-int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
-int dsa_port_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
- struct switchdev_brport_flags flags,
- struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(struct dsa_port *dp,
- struct switchdev_brport_flags flags,
- struct netlink_ext_ack *extack);
-int dsa_port_vlan_add(struct dsa_port *dp,
- const struct switchdev_obj_port_vlan *vlan,
- struct netlink_ext_ack *extack);
-int dsa_port_vlan_del(struct dsa_port *dp,
- const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_host_vlan_add(struct dsa_port *dp,
- const struct switchdev_obj_port_vlan *vlan,
- struct netlink_ext_ack *extack);
-int dsa_port_host_vlan_del(struct dsa_port *dp,
- const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_mrp_add(const struct dsa_port *dp,
- const struct switchdev_obj_mrp *mrp);
-int dsa_port_mrp_del(const struct dsa_port *dp,
- const struct switchdev_obj_mrp *mrp);
-int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
- const struct switchdev_obj_ring_role_mrp *mrp);
-int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
- const struct switchdev_obj_ring_role_mrp *mrp);
-int dsa_port_phylink_create(struct dsa_port *dp);
-void dsa_port_phylink_destroy(struct dsa_port *dp);
-int dsa_shared_port_link_register_of(struct dsa_port *dp);
-void dsa_shared_port_link_unregister_of(struct dsa_port *dp);
-int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
-void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
-int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
-void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
-void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc);
-int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
- struct netlink_ext_ack *extack);
-
-/* slave.c */
-extern const struct dsa_device_ops notag_netdev_ops;
-extern struct notifier_block dsa_slave_switchdev_notifier;
-extern struct notifier_block dsa_slave_switchdev_blocking_notifier;
-
-void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-int dsa_slave_create(struct dsa_port *dp);
-void dsa_slave_destroy(struct net_device *slave_dev);
-int dsa_slave_suspend(struct net_device *slave_dev);
-int dsa_slave_resume(struct net_device *slave_dev);
-int dsa_slave_register_notifier(void);
-void dsa_slave_unregister_notifier(void);
-void dsa_slave_sync_ha(struct net_device *dev);
-void dsa_slave_unsync_ha(struct net_device *dev);
-void dsa_slave_setup_tagger(struct net_device *slave);
-int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
-int dsa_slave_change_master(struct net_device *dev, struct net_device *master,
- struct netlink_ext_ack *extack);
-int dsa_slave_manage_vlan_filtering(struct net_device *dev,
- bool vlan_filtering);
-
-static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- return p->dp;
-}
-
-static inline struct net_device *
-dsa_slave_to_master(const struct net_device *dev)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_to_master(dp);
-}
-
-/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged
- * frames as untagged, since the bridge will not untag them.
- */
-static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
-{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
- struct net_device *br = dsa_port_bridge_dev_get(dp);
- struct net_device *dev = skb->dev;
- struct net_device *upper_dev;
- u16 vid, pvid, proto;
- int err;
-
- if (!br || br_vlan_enabled(br))
- return skb;
-
- err = br_vlan_get_proto(br, &proto);
- if (err)
- return skb;
-
- /* Move VLAN tag from data to hwaccel */
- if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) {
- skb = skb_vlan_untag(skb);
- if (!skb)
- return NULL;
- }
-
- if (!skb_vlan_tag_present(skb))
- return skb;
-
- vid = skb_vlan_tag_get_id(skb);
-
- /* We already run under an RCU read-side critical section since
- * we are called from netif_receive_skb_list_internal().
- */
- err = br_vlan_get_pvid_rcu(dev, &pvid);
- if (err)
- return skb;
-
- if (vid != pvid)
- return skb;
-
- /* The sad part about attempting to untag from DSA is that we
- * don't know, unless we check, if the skb will end up in
- * the bridge's data path - br_allowed_ingress() - or not.
- * For example, there might be an 8021q upper for the
- * default_pvid of the bridge, which will steal VLAN-tagged traffic
- * from the bridge's data path. This is a configuration that DSA
- * supports because vlan_filtering is 0. In that case, we should
- * definitely keep the tag, to make sure it keeps working.
- */
- upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
- if (upper_dev)
- return skb;
-
- __vlan_hwaccel_clear_tag(skb);
-
- return skb;
-}
-
-/* For switches without hardware support for DSA tagging to be able
- * to support termination through the bridge.
- */
-static inline struct net_device *
-dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
-{
- struct dsa_port *cpu_dp = master->dsa_ptr;
- struct dsa_switch_tree *dst = cpu_dp->dst;
- struct bridge_vlan_info vinfo;
- struct net_device *slave;
- struct dsa_port *dp;
- int err;
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (dp->type != DSA_PORT_TYPE_USER)
- continue;
-
- if (!dp->bridge)
- continue;
-
- if (dp->stp_state != BR_STATE_LEARNING &&
- dp->stp_state != BR_STATE_FORWARDING)
- continue;
-
- /* Since the bridge might learn this packet, keep the CPU port
- * affinity with the port that will be used for the reply on
- * xmit.
- */
- if (dp->cpu_dp != cpu_dp)
- continue;
-
- slave = dp->slave;
-
- err = br_vlan_get_info_rcu(slave, vid, &vinfo);
- if (err)
- continue;
-
- return slave;
- }
-
- return NULL;
-}
-
-/* If the ingress port offloads the bridge, we mark the frame as autonomously
- * forwarded by hardware, so the software bridge doesn't forward in twice, back
- * to us, because we already did. However, if we're in fallback mode and we do
- * software bridging, we are not offloading it, therefore the dp->bridge
- * pointer is not populated, and flooding needs to be done by software (we are
- * effectively operating in standalone ports mode).
- */
-static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
-{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
-
- skb->offload_fwd_mark = !!(dp->bridge);
-}
-
-/* Helper for removing DSA header tags from packets in the RX path.
- * Must not be called before skb_pull(len).
- * skb->data
- * |
- * v
- * | | | | | | | | | | | | | | | | | | |
- * +-----------------------+-----------------------+---------------+-------+
- * | Destination MAC | Source MAC | DSA header | EType |
- * +-----------------------+-----------------------+---------------+-------+
- * | |
- * <----- len -----> <----- len ----->
- * |
- * >>>>>>> v
- * >>>>>>> | | | | | | | | | | | | | | |
- * >>>>>>> +-----------------------+-----------------------+-------+
- * >>>>>>> | Destination MAC | Source MAC | EType |
- * +-----------------------+-----------------------+-------+
- * ^
- * |
- * skb->data
- */
-static inline void dsa_strip_etype_header(struct sk_buff *skb, int len)
-{
- memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN);
-}
-
-/* Helper for creating space for DSA header tags in TX path packets.
- * Must not be called before skb_push(len).
- *
- * Before:
- *
- * <<<<<<< | | | | | | | | | | | | | | |
- * ^ <<<<<<< +-----------------------+-----------------------+-------+
- * | <<<<<<< | Destination MAC | Source MAC | EType |
- * | +-----------------------+-----------------------+-------+
- * <----- len ----->
- * |
- * |
- * skb->data
- *
- * After:
- *
- * | | | | | | | | | | | | | | | | | | |
- * +-----------------------+-----------------------+---------------+-------+
- * | Destination MAC | Source MAC | DSA header | EType |
- * +-----------------------+-----------------------+---------------+-------+
- * ^ | |
- * | <----- len ----->
- * skb->data
- */
-static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len)
-{
- memmove(skb->data, skb->data + len, 2 * ETH_ALEN);
-}
-
-/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from
- * skb_mac_header(skb), which leaves skb->data pointing at the first byte after
- * what the DSA master perceives as the EtherType (the beginning of the L3
- * protocol). Since DSA EtherType header taggers treat the EtherType as part of
- * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header
- * is located 2 bytes behind skb->data. Note that EtherType in this context
- * means the first 2 bytes of the DSA header, not the encapsulated EtherType
- * that will become visible after the DSA header is stripped.
- */
-static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb)
-{
- return skb->data - 2;
-}
-
-/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType
- * header taggers start exactly where the EtherType is (the EtherType is
- * treated as part of the DSA header).
- */
-static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
-{
- return skb->data + 2 * ETH_ALEN;
-}
-
-/* switch.c */
-int dsa_switch_register_notifier(struct dsa_switch *ds);
-void dsa_switch_unregister_notifier(struct dsa_switch *ds);
-
-static inline bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds)
-{
- return ds->ops->port_fdb_add && ds->ops->port_fdb_del &&
- ds->fdb_isolation && !ds->vlan_filtering_is_global &&
- !ds->needs_standalone_vlan_filtering;
-}
-
-static inline bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds)
-{
- return ds->ops->port_mdb_add && ds->ops->port_mdb_del &&
- ds->fdb_isolation && !ds->vlan_filtering_is_global &&
- !ds->needs_standalone_vlan_filtering;
-}
-
-/* dsa2.c */
-void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag);
-void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag);
-struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
- const struct net_device *lag_dev);
-struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst);
-int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
-int dsa_broadcast(unsigned long e, void *v);
-int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- const struct dsa_device_ops *tag_ops,
- const struct dsa_device_ops *old_tag_ops);
-void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up);
-void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up);
-unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
-void dsa_bridge_num_put(const struct net_device *bridge_dev,
- unsigned int bridge_num);
-struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
- const struct net_device *br);
-
-/* tag_8021q.c */
-int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
- struct dsa_notifier_tag_8021q_vlan_info *info);
-int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
- struct dsa_notifier_tag_8021q_vlan_info *info);
-
-extern struct list_head dsa_tree_list;
-
-#endif
diff --git a/net/dsa/master.c b/net/dsa/master.c
index e24f02743c21..26d90140d271 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -6,7 +6,15 @@
* Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*/
-#include "dsa_priv.h"
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/dsa.h>
+
+#include "dsa.h"
+#include "master.h"
+#include "port.h"
+#include "tag.h"
static int dsa_master_get_regs_len(struct net_device *dev)
{
diff --git a/net/dsa/master.h b/net/dsa/master.h
new file mode 100644
index 000000000000..3fc0e610b5b5
--- /dev/null
+++ b/net/dsa/master.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_MASTER_H
+#define __DSA_MASTER_H
+
+struct dsa_port;
+struct net_device;
+struct netdev_lag_upper_info;
+struct netlink_ext_ack;
+
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
+void dsa_master_teardown(struct net_device *dev);
+int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack);
+void dsa_master_lag_teardown(struct net_device *lag_dev,
+ struct dsa_port *cpu_dp);
+
+#endif
diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c
index ecf9ed1de185..bd4bbaf851de 100644
--- a/net/dsa/netlink.c
+++ b/net/dsa/netlink.c
@@ -4,7 +4,8 @@
#include <linux/netdevice.h>
#include <net/rtnetlink.h>
-#include "dsa_priv.h"
+#include "netlink.h"
+#include "slave.h"
static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = {
[IFLA_DSA_MASTER] = { .type = NLA_U32 },
diff --git a/net/dsa/netlink.h b/net/dsa/netlink.h
new file mode 100644
index 000000000000..7eda2fa15722
--- /dev/null
+++ b/net/dsa/netlink.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_NETLINK_H
+#define __DSA_NETLINK_H
+
+extern struct rtnl_link_ops dsa_link_ops __read_mostly;
+
+#endif
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 707bd854cea2..67ad1adec2a2 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -12,7 +12,11 @@
#include <linux/of_mdio.h>
#include <linux/of_net.h>
-#include "dsa_priv.h"
+#include "dsa.h"
+#include "port.h"
+#include "slave.h"
+#include "switch.h"
+#include "tag_8021q.h"
/**
* dsa_port_notify - Notify the switching fabric of changes to a port
diff --git a/net/dsa/port.h b/net/dsa/port.h
new file mode 100644
index 000000000000..9c218660d223
--- /dev/null
+++ b/net/dsa/port.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_PORT_H
+#define __DSA_PORT_H
+
+#include <linux/types.h>
+#include <net/dsa.h>
+
+struct ifreq;
+struct netdev_lag_lower_state_info;
+struct netdev_lag_upper_info;
+struct netlink_ext_ack;
+struct switchdev_mst_state;
+struct switchdev_obj_port_mdb;
+struct switchdev_vlan_msti;
+struct phy_device;
+
+bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr);
+void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
+ const struct dsa_device_ops *tag_ops);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
+int dsa_port_set_mst_state(struct dsa_port *dp,
+ const struct switchdev_mst_state *state,
+ struct netlink_ext_ack *extack);
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable_rt(struct dsa_port *dp);
+void dsa_port_disable(struct dsa_port *dp);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
+ struct netlink_ext_ack *extack);
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br);
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
+int dsa_port_lag_change(struct dsa_port *dp,
+ struct netdev_lag_lower_state_info *linfo);
+int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack);
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
+void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+ struct netlink_ext_ack *extack);
+bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
+int dsa_port_mst_enable(struct dsa_port *dp, bool on,
+ struct netlink_ext_ack *extack);
+int dsa_port_vlan_msti(struct dsa_port *dp,
+ const struct switchdev_vlan_msti *msti);
+int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu);
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid);
+int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid);
+int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
+int dsa_port_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack);
+int dsa_port_bridge_flags(struct dsa_port *dp,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack);
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_host_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int dsa_port_host_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_mrp_add(const struct dsa_port *dp,
+ const struct switchdev_obj_mrp *mrp);
+int dsa_port_mrp_del(const struct dsa_port *dp,
+ const struct switchdev_obj_mrp *mrp);
+int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
+ const struct switchdev_obj_ring_role_mrp *mrp);
+int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
+ const struct switchdev_obj_ring_role_mrp *mrp);
+int dsa_port_phylink_create(struct dsa_port *dp);
+void dsa_port_phylink_destroy(struct dsa_port *dp);
+int dsa_shared_port_link_register_of(struct dsa_port *dp);
+void dsa_shared_port_link_unregister_of(struct dsa_port *dp);
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
+void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc);
+int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
+ struct netlink_ext_ack *extack);
+
+#endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 24d8ad36fc8b..aab79c355224 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -22,7 +22,54 @@
#include <net/dcbnl.h>
#include <linux/netpoll.h>
-#include "dsa_priv.h"
+#include "dsa.h"
+#include "port.h"
+#include "master.h"
+#include "netlink.h"
+#include "slave.h"
+#include "tag.h"
+
+struct dsa_switchdev_event_work {
+ struct net_device *dev;
+ struct net_device *orig_dev;
+ struct work_struct work;
+ unsigned long event;
+ /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
+ * SWITCHDEV_FDB_DEL_TO_DEVICE
+ */
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+ bool host_addr;
+};
+
+enum dsa_standalone_event {
+ DSA_UC_ADD,
+ DSA_UC_DEL,
+ DSA_MC_ADD,
+ DSA_MC_DEL,
+};
+
+struct dsa_standalone_event_work {
+ struct work_struct work;
+ struct net_device *dev;
+ enum dsa_standalone_event event;
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
+static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds)
+{
+ return ds->ops->port_fdb_add && ds->ops->port_fdb_del &&
+ ds->fdb_isolation && !ds->vlan_filtering_is_global &&
+ !ds->needs_standalone_vlan_filtering;
+}
+
+static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds)
+{
+ return ds->ops->port_mdb_add && ds->ops->port_mdb_del &&
+ ds->fdb_isolation && !ds->vlan_filtering_is_global &&
+ !ds->needs_standalone_vlan_filtering;
+}
static void dsa_slave_standalone_event_work(struct work_struct *work)
{
diff --git a/net/dsa/slave.h b/net/dsa/slave.h
new file mode 100644
index 000000000000..d0abe609e00d
--- /dev/null
+++ b/net/dsa/slave.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_SLAVE_H
+#define __DSA_SLAVE_H
+
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/list.h>
+#include <linux/netpoll.h>
+#include <linux/types.h>
+#include <net/dsa.h>
+#include <net/gro_cells.h>
+
+struct net_device;
+struct netlink_ext_ack;
+
+extern struct notifier_block dsa_slave_switchdev_notifier;
+extern struct notifier_block dsa_slave_switchdev_blocking_notifier;
+
+struct dsa_slave_priv {
+ /* Copy of CPU port xmit for faster access in slave transmit hot path */
+ struct sk_buff * (*xmit)(struct sk_buff *skb,
+ struct net_device *dev);
+
+ struct gro_cells gcells;
+
+ /* DSA port data, such as switch, port index, etc. */
+ struct dsa_port *dp;
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct netpoll *netpoll;
+#endif
+
+ /* TC context */
+ struct list_head mall_tc_list;
+};
+
+void dsa_slave_mii_bus_init(struct dsa_switch *ds);
+int dsa_slave_create(struct dsa_port *dp);
+void dsa_slave_destroy(struct net_device *slave_dev);
+int dsa_slave_suspend(struct net_device *slave_dev);
+int dsa_slave_resume(struct net_device *slave_dev);
+int dsa_slave_register_notifier(void);
+void dsa_slave_unregister_notifier(void);
+void dsa_slave_sync_ha(struct net_device *dev);
+void dsa_slave_unsync_ha(struct net_device *dev);
+void dsa_slave_setup_tagger(struct net_device *slave);
+int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
+int dsa_slave_change_master(struct net_device *dev, struct net_device *master,
+ struct netlink_ext_ack *extack);
+int dsa_slave_manage_vlan_filtering(struct net_device *dev,
+ bool vlan_filtering);
+
+static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
+ return p->dp;
+}
+
+static inline struct net_device *
+dsa_slave_to_master(const struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_to_master(dp);
+}
+
+#endif
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index ce56acdba203..d5bc4bb7310d 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -12,7 +12,12 @@
#include <linux/if_vlan.h>
#include <net/switchdev.h>
-#include "dsa_priv.h"
+#include "dsa.h"
+#include "netlink.h"
+#include "port.h"
+#include "slave.h"
+#include "switch.h"
+#include "tag_8021q.h"
static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
@@ -1013,6 +1018,52 @@ static int dsa_switch_event(struct notifier_block *nb,
return notifier_from_errno(err);
}
+/**
+ * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
+ * @dst: collection of struct dsa_switch devices to notify.
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Given a struct dsa_switch_tree, this can be used to run a function once for
+ * each member DSA switch. The other alternative of traversing the tree is only
+ * through its ports list, which does not uniquely list the switches.
+ */
+int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
+{
+ struct raw_notifier_head *nh = &dst->nh;
+ int err;
+
+ err = raw_notifier_call_chain(nh, e, v);
+
+ return notifier_to_errno(err);
+}
+
+/**
+ * dsa_broadcast - Notify all DSA trees in the system.
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Can be used to notify the switching fabric of events such as cross-chip
+ * bridging between disjoint trees (such as islands of tagger-compatible
+ * switches bridged by an incompatible middle switch).
+ *
+ * WARNING: this function is not reliable during probe time, because probing
+ * between trees is asynchronous and not all DSA trees might have probed.
+ */
+int dsa_broadcast(unsigned long e, void *v)
+{
+ struct dsa_switch_tree *dst;
+ int err = 0;
+
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ err = dsa_tree_notify(dst, e, v);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
int dsa_switch_register_notifier(struct dsa_switch *ds)
{
ds->nb.notifier_call = dsa_switch_event;
diff --git a/net/dsa/switch.h b/net/dsa/switch.h
new file mode 100644
index 000000000000..15e67b95eb6e
--- /dev/null
+++ b/net/dsa/switch.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_SWITCH_H
+#define __DSA_SWITCH_H
+
+#include <net/dsa.h>
+
+struct netlink_ext_ack;
+
+enum {
+ DSA_NOTIFIER_AGEING_TIME,
+ DSA_NOTIFIER_BRIDGE_JOIN,
+ DSA_NOTIFIER_BRIDGE_LEAVE,
+ DSA_NOTIFIER_FDB_ADD,
+ DSA_NOTIFIER_FDB_DEL,
+ DSA_NOTIFIER_HOST_FDB_ADD,
+ DSA_NOTIFIER_HOST_FDB_DEL,
+ DSA_NOTIFIER_LAG_FDB_ADD,
+ DSA_NOTIFIER_LAG_FDB_DEL,
+ DSA_NOTIFIER_LAG_CHANGE,
+ DSA_NOTIFIER_LAG_JOIN,
+ DSA_NOTIFIER_LAG_LEAVE,
+ DSA_NOTIFIER_MDB_ADD,
+ DSA_NOTIFIER_MDB_DEL,
+ DSA_NOTIFIER_HOST_MDB_ADD,
+ DSA_NOTIFIER_HOST_MDB_DEL,
+ DSA_NOTIFIER_VLAN_ADD,
+ DSA_NOTIFIER_VLAN_DEL,
+ DSA_NOTIFIER_HOST_VLAN_ADD,
+ DSA_NOTIFIER_HOST_VLAN_DEL,
+ DSA_NOTIFIER_MTU,
+ DSA_NOTIFIER_TAG_PROTO,
+ DSA_NOTIFIER_TAG_PROTO_CONNECT,
+ DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
+ DSA_NOTIFIER_MASTER_STATE_CHANGE,
+};
+
+/* DSA_NOTIFIER_AGEING_TIME */
+struct dsa_notifier_ageing_time_info {
+ unsigned int ageing_time;
+};
+
+/* DSA_NOTIFIER_BRIDGE_* */
+struct dsa_notifier_bridge_info {
+ const struct dsa_port *dp;
+ struct dsa_bridge bridge;
+ bool tx_fwd_offload;
+ struct netlink_ext_ack *extack;
+};
+
+/* DSA_NOTIFIER_FDB_* */
+struct dsa_notifier_fdb_info {
+ const struct dsa_port *dp;
+ const unsigned char *addr;
+ u16 vid;
+ struct dsa_db db;
+};
+
+/* DSA_NOTIFIER_LAG_FDB_* */
+struct dsa_notifier_lag_fdb_info {
+ struct dsa_lag *lag;
+ const unsigned char *addr;
+ u16 vid;
+ struct dsa_db db;
+};
+
+/* DSA_NOTIFIER_MDB_* */
+struct dsa_notifier_mdb_info {
+ const struct dsa_port *dp;
+ const struct switchdev_obj_port_mdb *mdb;
+ struct dsa_db db;
+};
+
+/* DSA_NOTIFIER_LAG_* */
+struct dsa_notifier_lag_info {
+ const struct dsa_port *dp;
+ struct dsa_lag lag;
+ struct netdev_lag_upper_info *info;
+ struct netlink_ext_ack *extack;
+};
+
+/* DSA_NOTIFIER_VLAN_* */
+struct dsa_notifier_vlan_info {
+ const struct dsa_port *dp;
+ const struct switchdev_obj_port_vlan *vlan;
+ struct netlink_ext_ack *extack;
+};
+
+/* DSA_NOTIFIER_MTU */
+struct dsa_notifier_mtu_info {
+ const struct dsa_port *dp;
+ int mtu;
+};
+
+/* DSA_NOTIFIER_TAG_PROTO_* */
+struct dsa_notifier_tag_proto_info {
+ const struct dsa_device_ops *tag_ops;
+};
+
+/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
+struct dsa_notifier_tag_8021q_vlan_info {
+ const struct dsa_port *dp;
+ u16 vid;
+};
+
+/* DSA_NOTIFIER_MASTER_STATE_CHANGE */
+struct dsa_notifier_master_state_info {
+ const struct net_device *master;
+ bool operational;
+};
+
+int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
+int dsa_broadcast(unsigned long e, void *v);
+
+int dsa_switch_register_notifier(struct dsa_switch *ds);
+void dsa_switch_unregister_notifier(struct dsa_switch *ds);
+
+#endif
diff --git a/net/dsa/tag.c b/net/dsa/tag.c
new file mode 100644
index 000000000000..383721e167d6
--- /dev/null
+++ b/net/dsa/tag.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DSA tagging protocol handling
+ *
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+#include <net/dsa.h>
+#include <net/dst_metadata.h>
+
+#include "slave.h"
+#include "tag.h"
+
+static LIST_HEAD(dsa_tag_drivers_list);
+static DEFINE_MUTEX(dsa_tag_drivers_lock);
+
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ unsigned int type;
+
+ if (skb_headroom(skb) < ETH_HLEN)
+ return false;
+
+ __skb_push(skb, ETH_HLEN);
+
+ type = ptp_classify_raw(skb);
+
+ __skb_pull(skb, ETH_HLEN);
+
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ if (likely(ds->ops->port_rxtstamp))
+ return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+ return false;
+}
+
+static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *unused)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct sk_buff *nskb = NULL;
+ struct dsa_slave_priv *p;
+
+ if (unlikely(!cpu_dp)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return 0;
+
+ if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) {
+ unsigned int port = md_dst->u.port_info.port_id;
+
+ skb_dst_drop(skb);
+ if (!skb_has_extensions(skb))
+ skb->slow_gro = 0;
+
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (likely(skb->dev)) {
+ dsa_default_offload_fwd_mark(skb);
+ nskb = skb;
+ }
+ } else {
+ nskb = cpu_dp->rcv(skb, dev);
+ }
+
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ skb = nskb;
+ skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ if (unlikely(!dsa_slave_dev_check(skb->dev))) {
+ /* Packet is to be injected directly on an upper
+ * device, e.g. a team/bond, so skip all DSA-port
+ * specific actions.
+ */
+ netif_rx(skb);
+ return 0;
+ }
+
+ p = netdev_priv(skb->dev);
+
+ if (unlikely(cpu_dp->ds->untag_bridge_pvid)) {
+ nskb = dsa_untag_bridge_pvid(skb);
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
+ }
+ skb = nskb;
+ }
+
+ dev_sw_netstats_rx_add(skb->dev, skb->len);
+
+ if (dsa_skb_defer_rx_timestamp(p, skb))
+ return 0;
+
+ gro_cells_receive(&p->gcells, skb);
+
+ return 0;
+}
+
+struct packet_type dsa_pack_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_XDSA),
+ .func = dsa_switch_rcv,
+};
+
+static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver,
+ struct module *owner)
+{
+ dsa_tag_driver->owner = owner;
+
+ mutex_lock(&dsa_tag_drivers_lock);
+ list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list);
+ mutex_unlock(&dsa_tag_drivers_lock);
+}
+
+void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
+ unsigned int count, struct module *owner)
+{
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ dsa_tag_driver_register(dsa_tag_driver_array[i], owner);
+}
+
+static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver)
+{
+ mutex_lock(&dsa_tag_drivers_lock);
+ list_del(&dsa_tag_driver->list);
+ mutex_unlock(&dsa_tag_drivers_lock);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_drivers_register);
+
+void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
+ unsigned int count)
+{
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ dsa_tag_driver_unregister(dsa_tag_driver_array[i]);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister);
+
+const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
+{
+ return ops->name;
+};
+
+/* Function takes a reference on the module owning the tagger,
+ * so dsa_tag_driver_put must be called afterwards.
+ */
+const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name)
+{
+ const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT);
+ struct dsa_tag_driver *dsa_tag_driver;
+
+ request_module("%s%s", DSA_TAG_DRIVER_ALIAS, name);
+
+ mutex_lock(&dsa_tag_drivers_lock);
+ list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
+ const struct dsa_device_ops *tmp = dsa_tag_driver->ops;
+
+ if (strcmp(name, tmp->name))
+ continue;
+
+ if (!try_module_get(dsa_tag_driver->owner))
+ break;
+
+ ops = tmp;
+ break;
+ }
+ mutex_unlock(&dsa_tag_drivers_lock);
+
+ return ops;
+}
+
+const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol)
+{
+ struct dsa_tag_driver *dsa_tag_driver;
+ const struct dsa_device_ops *ops;
+ bool found = false;
+
+ request_module("%sid-%d", DSA_TAG_DRIVER_ALIAS, tag_protocol);
+
+ mutex_lock(&dsa_tag_drivers_lock);
+ list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
+ ops = dsa_tag_driver->ops;
+ if (ops->proto == tag_protocol) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ if (!try_module_get(dsa_tag_driver->owner))
+ ops = ERR_PTR(-ENOPROTOOPT);
+ } else {
+ ops = ERR_PTR(-ENOPROTOOPT);
+ }
+
+ mutex_unlock(&dsa_tag_drivers_lock);
+
+ return ops;
+}
+
+void dsa_tag_driver_put(const struct dsa_device_ops *ops)
+{
+ struct dsa_tag_driver *dsa_tag_driver;
+
+ mutex_lock(&dsa_tag_drivers_lock);
+ list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
+ if (dsa_tag_driver->ops == ops) {
+ module_put(dsa_tag_driver->owner);
+ break;
+ }
+ }
+ mutex_unlock(&dsa_tag_drivers_lock);
+}
diff --git a/net/dsa/tag.h b/net/dsa/tag.h
new file mode 100644
index 000000000000..7cfbca824f1c
--- /dev/null
+++ b/net/dsa/tag.h
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_TAG_H
+#define __DSA_TAG_H
+
+#include <linux/if_vlan.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <net/dsa.h>
+
+#include "port.h"
+#include "slave.h"
+
+struct dsa_tag_driver {
+ const struct dsa_device_ops *ops;
+ struct list_head list;
+ struct module *owner;
+};
+
+extern struct packet_type dsa_pack_type;
+
+const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol);
+const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name);
+void dsa_tag_driver_put(const struct dsa_device_ops *ops);
+const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
+
+static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
+{
+ return ops->needed_headroom + ops->needed_tailroom;
+}
+
+static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+ int device, int port)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds->index == device && dp->index == port &&
+ dp->type == DSA_PORT_TYPE_USER)
+ return dp->slave;
+
+ return NULL;
+}
+
+/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged
+ * frames as untagged, since the bridge will not untag them.
+ */
+static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct net_device *br = dsa_port_bridge_dev_get(dp);
+ struct net_device *dev = skb->dev;
+ struct net_device *upper_dev;
+ u16 vid, pvid, proto;
+ int err;
+
+ if (!br || br_vlan_enabled(br))
+ return skb;
+
+ err = br_vlan_get_proto(br, &proto);
+ if (err)
+ return skb;
+
+ /* Move VLAN tag from data to hwaccel */
+ if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) {
+ skb = skb_vlan_untag(skb);
+ if (!skb)
+ return NULL;
+ }
+
+ if (!skb_vlan_tag_present(skb))
+ return skb;
+
+ vid = skb_vlan_tag_get_id(skb);
+
+ /* We already run under an RCU read-side critical section since
+ * we are called from netif_receive_skb_list_internal().
+ */
+ err = br_vlan_get_pvid_rcu(dev, &pvid);
+ if (err)
+ return skb;
+
+ if (vid != pvid)
+ return skb;
+
+ /* The sad part about attempting to untag from DSA is that we
+ * don't know, unless we check, if the skb will end up in
+ * the bridge's data path - br_allowed_ingress() - or not.
+ * For example, there might be an 8021q upper for the
+ * default_pvid of the bridge, which will steal VLAN-tagged traffic
+ * from the bridge's data path. This is a configuration that DSA
+ * supports because vlan_filtering is 0. In that case, we should
+ * definitely keep the tag, to make sure it keeps working.
+ */
+ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
+ if (upper_dev)
+ return skb;
+
+ __vlan_hwaccel_clear_tag(skb);
+
+ return skb;
+}
+
+/* For switches without hardware support for DSA tagging to be able
+ * to support termination through the bridge.
+ */
+static inline struct net_device *
+dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct bridge_vlan_info vinfo;
+ struct net_device *slave;
+ struct dsa_port *dp;
+ int err;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->type != DSA_PORT_TYPE_USER)
+ continue;
+
+ if (!dp->bridge)
+ continue;
+
+ if (dp->stp_state != BR_STATE_LEARNING &&
+ dp->stp_state != BR_STATE_FORWARDING)
+ continue;
+
+ /* Since the bridge might learn this packet, keep the CPU port
+ * affinity with the port that will be used for the reply on
+ * xmit.
+ */
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
+ slave = dp->slave;
+
+ err = br_vlan_get_info_rcu(slave, vid, &vinfo);
+ if (err)
+ continue;
+
+ return slave;
+ }
+
+ return NULL;
+}
+
+/* If the ingress port offloads the bridge, we mark the frame as autonomously
+ * forwarded by hardware, so the software bridge doesn't forward in twice, back
+ * to us, because we already did. However, if we're in fallback mode and we do
+ * software bridging, we are not offloading it, therefore the dp->bridge
+ * pointer is not populated, and flooding needs to be done by software (we are
+ * effectively operating in standalone ports mode).
+ */
+static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+
+ skb->offload_fwd_mark = !!(dp->bridge);
+}
+
+/* Helper for removing DSA header tags from packets in the RX path.
+ * Must not be called before skb_pull(len).
+ * skb->data
+ * |
+ * v
+ * | | | | | | | | | | | | | | | | | | |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | Destination MAC | Source MAC | DSA header | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | |
+ * <----- len -----> <----- len ----->
+ * |
+ * >>>>>>> v
+ * >>>>>>> | | | | | | | | | | | | | | |
+ * >>>>>>> +-----------------------+-----------------------+-------+
+ * >>>>>>> | Destination MAC | Source MAC | EType |
+ * +-----------------------+-----------------------+-------+
+ * ^
+ * |
+ * skb->data
+ */
+static inline void dsa_strip_etype_header(struct sk_buff *skb, int len)
+{
+ memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN);
+}
+
+/* Helper for creating space for DSA header tags in TX path packets.
+ * Must not be called before skb_push(len).
+ *
+ * Before:
+ *
+ * <<<<<<< | | | | | | | | | | | | | | |
+ * ^ <<<<<<< +-----------------------+-----------------------+-------+
+ * | <<<<<<< | Destination MAC | Source MAC | EType |
+ * | +-----------------------+-----------------------+-------+
+ * <----- len ----->
+ * |
+ * |
+ * skb->data
+ *
+ * After:
+ *
+ * | | | | | | | | | | | | | | | | | | |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | Destination MAC | Source MAC | DSA header | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * ^ | |
+ * | <----- len ----->
+ * skb->data
+ */
+static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len)
+{
+ memmove(skb->data, skb->data + len, 2 * ETH_ALEN);
+}
+
+/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from
+ * skb_mac_header(skb), which leaves skb->data pointing at the first byte after
+ * what the DSA master perceives as the EtherType (the beginning of the L3
+ * protocol). Since DSA EtherType header taggers treat the EtherType as part of
+ * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header
+ * is located 2 bytes behind skb->data. Note that EtherType in this context
+ * means the first 2 bytes of the DSA header, not the encapsulated EtherType
+ * that will become visible after the DSA header is stripped.
+ */
+static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb)
+{
+ return skb->data - 2;
+}
+
+/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType
+ * header taggers start exactly where the EtherType is (the EtherType is
+ * treated as part of the DSA header).
+ */
+static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
+{
+ return skb->data + 2 * ETH_ALEN;
+}
+
+/* Create 2 modaliases per tagging protocol, one to auto-load the module
+ * given the ID reported by get_tag_protocol(), and the other by name.
+ */
+#define DSA_TAG_DRIVER_ALIAS "dsa_tag:"
+#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto, __name) \
+ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __name); \
+ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS "id-" \
+ __stringify(__proto##_VALUE))
+
+void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
+ unsigned int count,
+ struct module *owner);
+void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
+ unsigned int count);
+
+#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \
+static int __init dsa_tag_driver_module_init(void) \
+{ \
+ dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \
+ THIS_MODULE); \
+ return 0; \
+} \
+module_init(dsa_tag_driver_module_init); \
+ \
+static void __exit dsa_tag_driver_module_exit(void) \
+{ \
+ dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \
+} \
+module_exit(dsa_tag_driver_module_exit)
+
+/**
+ * module_dsa_tag_drivers() - Helper macro for registering DSA tag
+ * drivers
+ * @__ops_array: Array of tag driver structures
+ *
+ * Helper macro for DSA tag drivers which do not do anything special
+ * in module init/exit. Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit().
+ */
+#define module_dsa_tag_drivers(__ops_array) \
+dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array))
+
+#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops
+
+/* Create a static structure we can build a linked list of dsa_tag
+ * drivers
+ */
+#define DSA_TAG_DRIVER(__ops) \
+static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \
+ .ops = &__ops, \
+}
+
+/**
+ * module_dsa_tag_driver() - Helper macro for registering a single DSA tag
+ * driver
+ * @__ops: Single tag driver structures
+ *
+ * Helper macro for DSA tag drivers which do not do anything special
+ * in module init/exit. Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit().
+ */
+#define module_dsa_tag_driver(__ops) \
+DSA_TAG_DRIVER(__ops); \
+ \
+static struct dsa_tag_driver *dsa_tag_driver_array[] = { \
+ &DSA_TAG_DRIVER_NAME(__ops) \
+}; \
+module_dsa_tag_drivers(dsa_tag_driver_array)
+
+#endif
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 34e5ec5d3e23..b1263917fcb2 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -7,7 +7,10 @@
#include <linux/if_vlan.h>
#include <linux/dsa/8021q.h>
-#include "dsa_priv.h"
+#include "port.h"
+#include "switch.h"
+#include "tag.h"
+#include "tag_8021q.h"
/* Binary structure of the fake 12-bit VID field (when the TPID is
* ETH_P_DSA_8021Q):
@@ -60,6 +63,20 @@
#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \
DSA_8021Q_PORT_MASK)
+struct dsa_tag_8021q_vlan {
+ struct list_head list;
+ int port;
+ u16 vid;
+ refcount_t refcount;
+};
+
+struct dsa_8021q_context {
+ struct dsa_switch *ds;
+ struct list_head vlans;
+ /* EtherType of RX VID, used for filtering on master interface */
+ __be16 proto;
+};
+
u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num)
{
/* The VBID value of 0 is reserved for precise TX, but it is also
diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h
new file mode 100644
index 000000000000..b75cbaa028ef
--- /dev/null
+++ b/net/dsa/tag_8021q.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_TAG_8021Q_H
+#define __DSA_TAG_8021Q_H
+
+#include <net/dsa.h>
+
+#include "switch.h"
+
+struct sk_buff;
+struct net_device;
+
+struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
+ u16 tpid, u16 tci);
+
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
+ int *vbid);
+
+struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
+ int vbid);
+
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
+
+#endif
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index bfa161a4f502..7f3b7d730b85 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -7,7 +7,7 @@
#include <linux/bitfield.h>
#include <linux/etherdevice.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define AR9331_NAME "ar9331"
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 9e7477ed70f1..10239daa5745 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -10,7 +10,7 @@
#include <linux/list.h>
#include <linux/slab.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define BRCM_NAME "brcm"
#define BRCM_LEGACY_NAME "brcm-legacy"
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 9fe77f5cc759..1fd7fa26db64 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -50,7 +50,7 @@
#include <linux/list.h>
#include <linux/slab.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define DSA_NAME "dsa"
#define EDSA_NAME "edsa"
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 020050dff3e4..e279cd9057b0 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -10,7 +10,7 @@
#include <linux/skbuff.h>
#include <net/dsa.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define GSWIP_NAME "gswip"
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 03fd5f2877c8..03a1fb9c87a9 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -11,7 +11,7 @@
#include <linux/skbuff.h>
#include <net/dsa.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define HELLCREEK_NAME "hellcreek"
@@ -51,7 +51,8 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
return NULL;
}
- pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
+ if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN))
+ return NULL;
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 37db5156f9a3..080e5c369f5b 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -7,7 +7,8 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <net/dsa.h>
-#include "dsa_priv.h"
+
+#include "tag.h"
#define KSZ8795_NAME "ksz8795"
#define KSZ9477_NAME "ksz9477"
@@ -26,7 +27,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
if (!skb->dev)
return NULL;
- pskb_trim_rcsum(skb, skb->len - len);
+ if (pskb_trim_rcsum(skb, skb->len - len))
+ return NULL;
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 4118292ed218..c25f5536706b 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -7,7 +7,7 @@
#include <linux/list.h>
#include <linux/slab.h>
-#include "dsa_priv.h"
+#include "tag.h"
/* To define the outgoing port and to discover the incoming port a regular
* VLAN tag is used by the LAN9303. But its VID meaning is 'special':
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 8948c4f99f8e..40af80452747 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -8,7 +8,7 @@
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define MTK_NAME "mtk"
diff --git a/net/dsa/tag_none.c b/net/dsa/tag_none.c
new file mode 100644
index 000000000000..d2fd179c4227
--- /dev/null
+++ b/net/dsa/tag_none.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * net/dsa/tag_none.c - Traffic handling for switches with no tag
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ *
+ * WARNING: do not use this for new switches. In case of no hardware
+ * tagging support, look at tag_8021q.c instead.
+ */
+
+#include "tag.h"
+
+#define NONE_NAME "none"
+
+static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Just return the original SKB */
+ return skb;
+}
+
+static const struct dsa_device_ops none_ops = {
+ .name = NONE_NAME,
+ .proto = DSA_TAG_PROTO_NONE,
+ .xmit = dsa_slave_notag_xmit,
+};
+
+module_dsa_tag_driver(none_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_NONE, NONE_NAME);
+MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 8cc31ab47e28..28ebecafdd24 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -2,7 +2,8 @@
/* Copyright 2019 NXP
*/
#include <linux/dsa/ocelot.h>
-#include "dsa_priv.h"
+
+#include "tag.h"
#define OCELOT_NAME "ocelot"
#define SEVILLE_NAME "seville"
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index d1ec68001487..1f0b8c20eba5 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -10,7 +10,9 @@
*/
#include <linux/dsa/8021q.h>
#include <linux/dsa/ocelot.h>
-#include "dsa_priv.h"
+
+#include "tag.h"
+#include "tag_8021q.h"
#define OCELOT_8021Q_NAME "ocelot-8021q"
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 73d6e111228d..e757c8de06f1 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -8,7 +8,7 @@
#include <net/dsa.h>
#include <linux/dsa/tag_qca.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define QCA_NAME "qca"
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 18b52d77d200..c327314b95e3 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -18,7 +18,7 @@
#include <linux/etherdevice.h>
#include <linux/bits.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define RTL4_A_NAME "rtl4a"
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
index 030a8cf0ad48..4f67834fd121 100644
--- a/net/dsa/tag_rtl8_4.c
+++ b/net/dsa/tag_rtl8_4.c
@@ -77,7 +77,7 @@
#include <linux/bits.h>
#include <linux/etherdevice.h>
-#include "dsa_priv.h"
+#include "tag.h"
/* Protocols supported:
*
diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c
index b9135069f9fc..437a6820ac42 100644
--- a/net/dsa/tag_rzn1_a5psw.c
+++ b/net/dsa/tag_rzn1_a5psw.c
@@ -10,7 +10,7 @@
#include <linux/if_ether.h>
#include <net/dsa.h>
-#include "dsa_priv.h"
+#include "tag.h"
/* To define the outgoing port and to discover the incoming port a TAG is
* inserted after Src MAC :
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 3b6e642a90e9..1c2ceba4771b 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -5,7 +5,9 @@
#include <linux/dsa/sja1105.h>
#include <linux/dsa/8021q.h>
#include <linux/packing.h>
-#include "dsa_priv.h"
+
+#include "tag.h"
+#include "tag_8021q.h"
#define SJA1105_NAME "sja1105"
#define SJA1110_NAME "sja1110"
@@ -668,7 +670,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
* padding and trailer we need to account for the fact that
* skb->data points to skb_mac_header(skb) + ETH_HLEN.
*/
- pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN);
+ if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN))
+ return NULL;
/* Trap-to-host frame, no timestamp trailer */
} else {
*source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 8754dfe680f6..7361b9106382 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -8,7 +8,7 @@
#include <linux/list.h>
#include <linux/slab.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define TRAILER_NAME "trailer"
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index dc935dd90f98..af19969f9bc4 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -7,7 +7,7 @@
#include <linux/bitops.h>
-#include "dsa_priv.h"
+#include "tag.h"
#define XRS700X_NAME "xrs700x"
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index e02daa74e833..2edc8b796a4e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -398,7 +398,7 @@ EXPORT_SYMBOL(alloc_etherdev_mqs);
ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
{
- return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
+ return sysfs_emit(buf, "%*phC\n", len, addr);
}
EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 72ab0944262a..228f13df2e18 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -4,7 +4,7 @@ obj-y += ioctl.o common.o
obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
-ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
+ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 21cfe8557205..6f399afc2ff2 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -417,6 +417,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo",
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
[const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
+ [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 99272a67525c..c2f1a542e6fa 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2013,7 +2013,8 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
} else {
/* Driver expects to be called at twice the frequency in rc */
int n = rc * 2, interval = HZ / n;
- u64 count = n * id.data, i = 0;
+ u64 count = mul_u32_u32(n, id.data);
+ u64 i = 0;
do {
rtnl_lock();
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 1a4c11356c96..aee98be6237f 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -287,6 +287,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
[ETHTOOL_MSG_MODULE_GET] = &ethnl_module_request_ops,
[ETHTOOL_MSG_PSE_GET] = &ethnl_pse_request_ops,
+ [ETHTOOL_MSG_RSS_GET] = &ethnl_rss_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -1040,6 +1041,12 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_pse_set_policy,
.maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_RSS_GET,
+ .doit = ethnl_default_doit,
+ .policy = ethnl_rss_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 1bfd374f9718..3753787ba233 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -346,6 +346,7 @@ extern const struct ethnl_request_ops ethnl_stats_request_ops;
extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct ethnl_request_ops ethnl_module_request_ops;
extern const struct ethnl_request_ops ethnl_pse_request_ops;
+extern const struct ethnl_request_ops ethnl_rss_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -386,6 +387,7 @@ extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER +
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
+extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
new file mode 100644
index 000000000000..ebe6145aed3f
--- /dev/null
+++ b/net/ethtool/rss.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct rss_req_info {
+ struct ethnl_req_info base;
+ u32 rss_context;
+};
+
+struct rss_reply_data {
+ struct ethnl_reply_data base;
+ u32 indir_size;
+ u32 hkey_size;
+ u32 hfunc;
+ u32 *indir_table;
+ u8 *hkey;
+};
+
+#define RSS_REQINFO(__req_base) \
+ container_of(__req_base, struct rss_req_info, base)
+
+#define RSS_REPDATA(__reply_base) \
+ container_of(__reply_base, struct rss_reply_data, base)
+
+const struct nla_policy ethnl_rss_get_policy[] = {
+ [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 },
+};
+
+static int
+rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct rss_req_info *request = RSS_REQINFO(req_info);
+
+ if (tb[ETHTOOL_A_RSS_CONTEXT])
+ request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]);
+
+ return 0;
+}
+
+static int
+rss_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base, struct genl_info *info)
+{
+ struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+ u32 total_size, indir_bytes;
+ u8 dev_hfunc = 0;
+ u8 *rss_config;
+ int ret;
+
+ ops = dev->ethtool_ops;
+ if (!ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ /* Some drivers don't handle rss_context */
+ if (request->rss_context && !ops->get_rxfh_context)
+ return -EOPNOTSUPP;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ data->indir_size = 0;
+ data->hkey_size = 0;
+ if (ops->get_rxfh_indir_size)
+ data->indir_size = ops->get_rxfh_indir_size(dev);
+ if (ops->get_rxfh_key_size)
+ data->hkey_size = ops->get_rxfh_key_size(dev);
+
+ indir_bytes = data->indir_size * sizeof(u32);
+ total_size = indir_bytes + data->hkey_size;
+ rss_config = kzalloc(total_size, GFP_KERNEL);
+ if (!rss_config) {
+ ret = -ENOMEM;
+ goto out_ops;
+ }
+
+ if (data->indir_size)
+ data->indir_table = (u32 *)rss_config;
+
+ if (data->hkey_size)
+ data->hkey = rss_config + indir_bytes;
+
+ if (request->rss_context)
+ ret = ops->get_rxfh_context(dev, data->indir_table, data->hkey,
+ &dev_hfunc, request->rss_context);
+ else
+ ret = ops->get_rxfh(dev, data->indir_table, data->hkey,
+ &dev_hfunc);
+
+ if (ret)
+ goto out_ops;
+
+ data->hfunc = dev_hfunc;
+out_ops:
+ ethnl_ops_complete(dev);
+ return ret;
+}
+
+static int
+rss_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ int len;
+
+ len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
+ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
+ nla_total_size(data->hkey_size); /* _RSS_HKEY */
+
+ return len;
+}
+
+static int
+rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+
+ if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) ||
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table) ||
+ nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static void rss_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+
+ kfree(data->indir_table);
+}
+
+const struct ethnl_request_ops ethnl_rss_request_ops = {
+ .request_cmd = ETHTOOL_MSG_RSS_GET,
+ .reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_RSS_HEADER,
+ .req_info_size = sizeof(struct rss_req_info),
+ .reply_data_size = sizeof(struct rss_reply_data),
+
+ .parse_request = rss_parse_request,
+ .prepare_data = rss_prepare_data,
+ .reply_size = rss_reply_size,
+ .fill_reply = rss_fill_reply,
+ .cleanup_data = rss_cleanup_data,
+};
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index de476a417631..1a195efc79cd 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/debugfs.h>
-#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
@@ -21,7 +20,6 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
{
struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
struct hsr_node *node;
- int i;
seq_printf(sfp, "Node Table entries for (%s) device\n",
(priv->prot_version == PRP_V1 ? "PRP" : "HSR"));
@@ -33,28 +31,22 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
seq_puts(sfp, "DAN-H\n");
rcu_read_lock();
-
- for (i = 0 ; i < priv->hash_buckets; i++) {
- hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) {
- /* skip self node */
- if (hsr_addr_is_self(priv, node->macaddress_A))
- continue;
- seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
- seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
- seq_printf(sfp, "%10lx, ",
- node->time_in[HSR_PT_SLAVE_A]);
- seq_printf(sfp, "%10lx, ",
- node->time_in[HSR_PT_SLAVE_B]);
- seq_printf(sfp, "%14x, ", node->addr_B_port);
-
- if (priv->prot_version == PRP_V1)
- seq_printf(sfp, "%5x, %5x, %5x\n",
- node->san_a, node->san_b,
- (node->san_a == 0 &&
- node->san_b == 0));
- else
- seq_printf(sfp, "%5x\n", 1);
- }
+ list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
+ /* skip self node */
+ if (hsr_addr_is_self(priv, node->macaddress_A))
+ continue;
+ seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
+ seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
+ seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]);
+ seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]);
+ seq_printf(sfp, "%14x, ", node->addr_B_port);
+
+ if (priv->prot_version == PRP_V1)
+ seq_printf(sfp, "%5x, %5x, %5x\n",
+ node->san_a, node->san_b,
+ (node->san_a == 0 && node->san_b == 0));
+ else
+ seq_printf(sfp, "%5x\n", 1);
}
rcu_read_unlock();
return 0;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 6ffef47e9be5..5a236aae2366 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -219,7 +219,9 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb->dev = master->dev;
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
+ spin_lock_bh(&hsr->seqnr_lock);
hsr_forward_skb(skb, master);
+ spin_unlock_bh(&hsr->seqnr_lock);
} else {
dev_core_stats_tx_dropped_inc(dev);
dev_kfree_skb_any(skb);
@@ -278,7 +280,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
__u8 type = HSR_TLV_LIFE_CHECK;
struct hsr_sup_payload *hsr_sp;
struct hsr_sup_tag *hsr_stag;
- unsigned long irqflags;
struct sk_buff *skb;
*interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
@@ -299,7 +300,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
/* From HSRv1 on we have separate supervision sequence numbers. */
- spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
+ spin_lock_bh(&hsr->seqnr_lock);
if (hsr->prot_version > 0) {
hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
hsr->sup_sequence_nr++;
@@ -307,7 +308,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_stag->sequence_nr = htons(hsr->sequence_nr);
hsr->sequence_nr++;
}
- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
hsr_stag->tlv.HSR_TLV_type = type;
/* TODO: Why 12 in HSRv0? */
@@ -318,11 +318,13 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
- if (skb_put_padto(skb, ETH_ZLEN))
+ if (skb_put_padto(skb, ETH_ZLEN)) {
+ spin_unlock_bh(&hsr->seqnr_lock);
return;
+ }
hsr_forward_skb(skb, master);
-
+ spin_unlock_bh(&hsr->seqnr_lock);
return;
}
@@ -332,7 +334,6 @@ static void send_prp_supervision_frame(struct hsr_port *master,
struct hsr_priv *hsr = master->hsr;
struct hsr_sup_payload *hsr_sp;
struct hsr_sup_tag *hsr_stag;
- unsigned long irqflags;
struct sk_buff *skb;
skb = hsr_init_skb(master);
@@ -347,7 +348,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0));
/* From HSRv1 on we have separate supervision sequence numbers. */
- spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
+ spin_lock_bh(&hsr->seqnr_lock);
hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
hsr->sup_sequence_nr++;
hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
@@ -358,13 +359,12 @@ static void send_prp_supervision_frame(struct hsr_port *master,
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
if (skb_put_padto(skb, ETH_ZLEN)) {
- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
+ spin_unlock_bh(&hsr->seqnr_lock);
return;
}
- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
-
hsr_forward_skb(skb, master);
+ spin_unlock_bh(&hsr->seqnr_lock);
}
/* Announce (supervision frame) timer function
@@ -444,7 +444,7 @@ void hsr_dev_setup(struct net_device *dev)
dev->header_ops = &hsr_header_ops;
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
- dev->priv_flags |= IFF_NO_QUEUE;
+ dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
dev->needs_free_netdev = true;
@@ -485,16 +485,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
{
bool unregister = false;
struct hsr_priv *hsr;
- int res, i;
+ int res;
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
- INIT_HLIST_HEAD(&hsr->self_node_db);
- hsr->hash_buckets = HSR_HSIZE;
- get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed));
- for (i = 0; i < hsr->hash_buckets; i++)
- INIT_HLIST_HEAD(&hsr->node_db[i]);
-
+ INIT_LIST_HEAD(&hsr->node_db);
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index a50429a62f74..629daacc9607 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
struct hsr_node *node_src)
{
bool was_multicast_frame;
- int res;
+ int res, recv_len;
was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST);
hsr_addr_subst_source(node_src, skb);
skb_pull(skb, ETH_HLEN);
+ recv_len = skb->len;
res = netif_rx(skb);
if (res == NET_RX_DROP) {
dev->stats.rx_dropped++;
} else {
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += recv_len;
if (was_multicast_frame)
dev->stats.multicast++;
}
@@ -499,7 +500,6 @@ static void handle_std_frame(struct sk_buff *skb,
{
struct hsr_port *port = frame->port_rcv;
struct hsr_priv *hsr = port->hsr;
- unsigned long irqflags;
frame->skb_hsr = NULL;
frame->skb_prp = NULL;
@@ -509,10 +509,9 @@ static void handle_std_frame(struct sk_buff *skb,
frame->is_from_san = true;
} else {
/* Sequence nr for the master node */
- spin_lock_irqsave(&hsr->seqnr_lock, irqflags);
+ lockdep_assert_held(&hsr->seqnr_lock);
frame->sequence_nr = hsr->sequence_nr;
hsr->sequence_nr++;
- spin_unlock_irqrestore(&hsr->seqnr_lock, irqflags);
}
}
@@ -570,23 +569,20 @@ static int fill_frame_info(struct hsr_frame_info *frame,
struct ethhdr *ethhdr;
__be16 proto;
int ret;
- u32 hash;
/* Check if skb contains ethhdr */
if (skb->mac_len < sizeof(struct ethhdr))
return -EINVAL;
memset(frame, 0, sizeof(*frame));
-
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
- hash = hsr_mac_hash(port->hsr, ethhdr->h_source);
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb,
+ frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
frame->is_supervision,
port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
frame->is_vlan = false;
proto = ethhdr->h_proto;
@@ -616,11 +612,13 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
{
struct hsr_frame_info frame;
+ rcu_read_lock();
if (fill_frame_info(&frame, skb, port) < 0)
goto out_drop;
hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
hsr_forward_do(&frame);
+ rcu_read_unlock();
/* Gets called for ingress frames as well as egress from master port.
* So check and increment stats for master port only here.
*/
@@ -635,6 +633,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
return;
out_drop:
+ rcu_read_unlock();
port->dev->stats.tx_dropped++;
kfree_skb(skb);
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 584e21788799..00db74d96583 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -15,37 +15,10 @@
#include <linux/etherdevice.h>
#include <linux/slab.h>
#include <linux/rculist.h>
-#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
#include "hsr_netlink.h"
-#ifdef CONFIG_LOCKDEP
-int lockdep_hsr_is_held(spinlock_t *lock)
-{
- return lockdep_is_held(lock);
-}
-#endif
-
-u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
-{
- u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed);
-
- return reciprocal_scale(hash, hsr->hash_buckets);
-}
-
-struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock)
-{
- struct hlist_node *first;
-
- first = rcu_dereference_bh_check(hlist_first_rcu(head),
- lockdep_hsr_is_held(lock));
- if (first)
- return hlist_entry(first, struct hsr_node, mac_list);
-
- return NULL;
-}
-
/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
* false otherwise.
*/
@@ -65,30 +38,32 @@ static bool seq_nr_after(u16 a, u16 b)
bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
- struct hsr_node *node;
+ struct hsr_self_node *sn;
+ bool ret = false;
- node = hsr_node_get_first(&hsr->self_node_db, &hsr->list_lock);
- if (!node) {
+ rcu_read_lock();
+ sn = rcu_dereference(hsr->self_node);
+ if (!sn) {
WARN_ONCE(1, "HSR: No self node\n");
- return false;
+ goto out;
}
- if (ether_addr_equal(addr, node->macaddress_A))
- return true;
- if (ether_addr_equal(addr, node->macaddress_B))
- return true;
-
- return false;
+ if (ether_addr_equal(addr, sn->macaddress_A) ||
+ ether_addr_equal(addr, sn->macaddress_B))
+ ret = true;
+out:
+ rcu_read_unlock();
+ return ret;
}
/* Search for mac entry. Caller must hold rcu read lock.
*/
-static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db,
+static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
const unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
- hlist_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, addr))
return node;
}
@@ -96,58 +71,51 @@ static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db,
return NULL;
}
-/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
+/* Helper for device init; the self_node is used in hsr_rcv() to recognize
* frames from self that's been looped over the HSR ring.
*/
int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
const unsigned char addr_b[ETH_ALEN])
{
- struct hlist_head *self_node_db = &hsr->self_node_db;
- struct hsr_node *node, *oldnode;
+ struct hsr_self_node *sn, *old;
- node = kmalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
+ sn = kmalloc(sizeof(*sn), GFP_KERNEL);
+ if (!sn)
return -ENOMEM;
- ether_addr_copy(node->macaddress_A, addr_a);
- ether_addr_copy(node->macaddress_B, addr_b);
+ ether_addr_copy(sn->macaddress_A, addr_a);
+ ether_addr_copy(sn->macaddress_B, addr_b);
spin_lock_bh(&hsr->list_lock);
- oldnode = hsr_node_get_first(self_node_db, &hsr->list_lock);
- if (oldnode) {
- hlist_replace_rcu(&oldnode->mac_list, &node->mac_list);
- spin_unlock_bh(&hsr->list_lock);
- kfree_rcu(oldnode, rcu_head);
- } else {
- hlist_add_tail_rcu(&node->mac_list, self_node_db);
- spin_unlock_bh(&hsr->list_lock);
- }
+ old = rcu_replace_pointer(hsr->self_node, sn,
+ lockdep_is_held(&hsr->list_lock));
+ spin_unlock_bh(&hsr->list_lock);
+ if (old)
+ kfree_rcu(old, rcu_head);
return 0;
}
void hsr_del_self_node(struct hsr_priv *hsr)
{
- struct hlist_head *self_node_db = &hsr->self_node_db;
- struct hsr_node *node;
+ struct hsr_self_node *old;
spin_lock_bh(&hsr->list_lock);
- node = hsr_node_get_first(self_node_db, &hsr->list_lock);
- if (node) {
- hlist_del_rcu(&node->mac_list);
- kfree_rcu(node, rcu_head);
- }
+ old = rcu_replace_pointer(hsr->self_node, NULL,
+ lockdep_is_held(&hsr->list_lock));
spin_unlock_bh(&hsr->list_lock);
+ if (old)
+ kfree_rcu(old, rcu_head);
}
-void hsr_del_nodes(struct hlist_head *node_db)
+void hsr_del_nodes(struct list_head *node_db)
{
struct hsr_node *node;
- struct hlist_node *tmp;
+ struct hsr_node *tmp;
- hlist_for_each_entry_safe(node, tmp, node_db, mac_list)
- kfree_rcu(node, rcu_head);
+ list_for_each_entry_safe(node, tmp, node_db, mac_list)
+ kfree(node);
}
void prp_handle_san_frame(bool san, enum hsr_port_type port,
@@ -168,7 +136,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
* originating from the newly added node.
*/
static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
- struct hlist_head *node_db,
+ struct list_head *node_db,
unsigned char addr[],
u16 seq_out, bool san,
enum hsr_port_type rx_port)
@@ -182,6 +150,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
return NULL;
ether_addr_copy(new_node->macaddress_A, addr);
+ spin_lock_init(&new_node->seq_out_lock);
/* We are only interested in time diffs here, so use current jiffies
* as initialization. (0 could trigger an spurious ring error warning).
@@ -198,14 +167,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
spin_lock_bh(&hsr->list_lock);
- hlist_for_each_entry_rcu(node, node_db, mac_list,
- lockdep_hsr_is_held(&hsr->list_lock)) {
+ list_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
goto out;
}
- hlist_add_tail_rcu(&new_node->mac_list, node_db);
+ list_add_tail_rcu(&new_node->mac_list, node_db);
spin_unlock_bh(&hsr->list_lock);
return new_node;
out:
@@ -225,7 +194,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup)
/* Get the hsr_node from which 'skb' was sent.
*/
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port)
{
@@ -241,7 +210,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
ethhdr = (struct ethhdr *)skb_mac_header(skb);
- hlist_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
if (hsr->proto_ops->update_san_info)
hsr->proto_ops->update_san_info(node, is_sup);
@@ -291,12 +260,11 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
struct hsr_sup_tlv *hsr_sup_tlv;
struct hsr_node *node_real;
struct sk_buff *skb = NULL;
- struct hlist_head *node_db;
+ struct list_head *node_db;
struct ethhdr *ethhdr;
int i;
unsigned int pull_size = 0;
unsigned int total_pull_size = 0;
- u32 hash;
/* Here either frame->skb_hsr or frame->skb_prp should be
* valid as supervision frame always will have protocol
@@ -334,13 +302,11 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Merge node_curr (registered on macaddress_B) into node_real */
- node_db = port_rcv->hsr->node_db;
- hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A);
- node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A);
+ node_db = &port_rcv->hsr->node_db;
+ node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
if (!node_real)
/* No frame received from AddrA of this node yet */
- node_real = hsr_add_node(hsr, &node_db[hash],
- hsr_sp->macaddress_A,
+ node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
HSR_SEQNR_START - 1, true,
port_rcv->type);
if (!node_real)
@@ -374,14 +340,14 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Check if redbox mac and node mac are equal. */
- if (!ether_addr_equal(node_real->macaddress_A,
- hsr_sp->macaddress_A)) {
+ if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) {
/* This is a redbox supervision frame for a VDAN! */
goto done;
}
}
ether_addr_copy(node_real->macaddress_B, ethhdr->h_source);
+ spin_lock_bh(&node_real->seq_out_lock);
for (i = 0; i < HSR_PT_PORTS; i++) {
if (!node_curr->time_in_stale[i] &&
time_after(node_curr->time_in[i], node_real->time_in[i])) {
@@ -392,12 +358,16 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i]))
node_real->seq_out[i] = node_curr->seq_out[i];
}
+ spin_unlock_bh(&node_real->seq_out_lock);
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
- hlist_del_rcu(&node_curr->mac_list);
+ if (!node_curr->removed) {
+ list_del_rcu(&node_curr->mac_list);
+ node_curr->removed = true;
+ kfree_rcu(node_curr, rcu_head);
+ }
spin_unlock_bh(&hsr->list_lock);
- kfree_rcu(node_curr, rcu_head);
done:
/* Push back here */
@@ -433,7 +403,6 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
struct hsr_port *port)
{
struct hsr_node *node_dst;
- u32 hash;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
@@ -443,8 +412,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
return;
- hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest);
- node_dst = find_node_by_addr_A(&port->hsr->node_db[hash],
+ node_dst = find_node_by_addr_A(&port->hsr->node_db,
eth_hdr(skb)->h_dest);
if (!node_dst) {
if (net_ratelimit())
@@ -484,13 +452,17 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr)
{
+ spin_lock_bh(&node->seq_out_lock);
if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) &&
time_is_after_jiffies(node->time_out[port->type] +
- msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)))
+ msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) {
+ spin_unlock_bh(&node->seq_out_lock);
return 1;
+ }
node->time_out[port->type] = jiffies;
node->seq_out[port->type] = sequence_nr;
+ spin_unlock_bh(&node->seq_out_lock);
return 0;
}
@@ -520,71 +492,60 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
void hsr_prune_nodes(struct timer_list *t)
{
struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
- struct hlist_node *tmp;
struct hsr_node *node;
+ struct hsr_node *tmp;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
- int i;
spin_lock_bh(&hsr->list_lock);
+ list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
+ /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
+ * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
+ * the master port. Thus the master node will be repeatedly
+ * pruned leading to packet loss.
+ */
+ if (hsr_addr_is_self(hsr, node->macaddress_A))
+ continue;
+
+ /* Shorthand */
+ time_a = node->time_in[HSR_PT_SLAVE_A];
+ time_b = node->time_in[HSR_PT_SLAVE_B];
+
+ /* Check for timestamps old enough to risk wrap-around */
+ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_A] = true;
+ if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_B] = true;
+
+ /* Get age of newest frame from node.
+ * At least one time_in is OK here; nodes get pruned long
+ * before both time_ins can get stale
+ */
+ timestamp = time_a;
+ if (node->time_in_stale[HSR_PT_SLAVE_A] ||
+ (!node->time_in_stale[HSR_PT_SLAVE_B] &&
+ time_after(time_b, time_a)))
+ timestamp = time_b;
+
+ /* Warn of ring error only as long as we get frames at all */
+ if (time_is_after_jiffies(timestamp +
+ msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
+ rcu_read_lock();
+ port = get_late_port(hsr, node);
+ if (port)
+ hsr_nl_ringerror(hsr, node->macaddress_A, port);
+ rcu_read_unlock();
+ }
- for (i = 0; i < hsr->hash_buckets; i++) {
- hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i],
- mac_list) {
- /* Don't prune own node.
- * Neither time_in[HSR_PT_SLAVE_A]
- * nor time_in[HSR_PT_SLAVE_B], will ever be updated
- * for the master port. Thus the master node will be
- * repeatedly pruned leading to packet loss.
- */
- if (hsr_addr_is_self(hsr, node->macaddress_A))
- continue;
-
- /* Shorthand */
- time_a = node->time_in[HSR_PT_SLAVE_A];
- time_b = node->time_in[HSR_PT_SLAVE_B];
-
- /* Check for timestamps old enough to
- * risk wrap-around
- */
- if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_A] = true;
- if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_B] = true;
-
- /* Get age of newest frame from node.
- * At least one time_in is OK here; nodes get pruned
- * long before both time_ins can get stale
- */
- timestamp = time_a;
- if (node->time_in_stale[HSR_PT_SLAVE_A] ||
- (!node->time_in_stale[HSR_PT_SLAVE_B] &&
- time_after(time_b, time_a)))
- timestamp = time_b;
-
- /* Warn of ring error only as long as we get
- * frames at all
- */
- if (time_is_after_jiffies(timestamp +
- msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
- rcu_read_lock();
- port = get_late_port(hsr, node);
- if (port)
- hsr_nl_ringerror(hsr,
- node->macaddress_A,
- port);
- rcu_read_unlock();
- }
-
- /* Prune old entries */
- if (time_is_before_jiffies(timestamp +
- msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
- hsr_nl_nodedown(hsr, node->macaddress_A);
- hlist_del_rcu(&node->mac_list);
- /* Note that we need to free this
- * entry later:
- */
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ if (!node->removed) {
+ list_del_rcu(&node->mac_list);
+ node->removed = true;
+ /* Note that we need to free this entry later: */
kfree_rcu(node, rcu_head);
}
}
@@ -600,20 +561,17 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
- u32 hash;
-
- hash = hsr_mac_hash(hsr, addr);
if (!_pos) {
- node = hsr_node_get_first(&hsr->node_db[hash],
- &hsr->list_lock);
+ node = list_first_or_null_rcu(&hsr->node_db,
+ struct hsr_node, mac_list);
if (node)
ether_addr_copy(addr, node->macaddress_A);
return node;
}
node = _pos;
- hlist_for_each_entry_continue_rcu(node, mac_list) {
+ list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
ether_addr_copy(addr, node->macaddress_A);
return node;
}
@@ -633,11 +591,8 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_node *node;
struct hsr_port *port;
unsigned long tdiff;
- u32 hash;
-
- hash = hsr_mac_hash(hsr, addr);
- node = find_node_by_addr_A(&hsr->node_db[hash], addr);
+ node = find_node_by_addr_A(&hsr->node_db, addr);
if (!node)
return -ENOENT;
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index f3762e9e42b5..b23556251d62 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -28,17 +28,9 @@ struct hsr_frame_info {
bool is_from_san;
};
-#ifdef CONFIG_LOCKDEP
-int lockdep_hsr_is_held(spinlock_t *lock);
-#else
-#define lockdep_hsr_is_held(lock) 1
-#endif
-
-u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr);
-struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock);
void hsr_del_self_node(struct hsr_priv *hsr);
-void hsr_del_nodes(struct hlist_head *node_db);
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
+void hsr_del_nodes(struct list_head *node_db);
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port);
void hsr_handle_sup_frame(struct hsr_frame_info *frame);
@@ -76,7 +68,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
void prp_update_san_info(struct hsr_node *node, bool is_sup);
struct hsr_node {
- struct hlist_node mac_list;
+ struct list_head mac_list;
+ /* Protect R/W access to seq_out */
+ spinlock_t seq_out_lock;
unsigned char macaddress_A[ETH_ALEN];
unsigned char macaddress_B[ETH_ALEN];
/* Local slave through which AddrB frames are received from this node */
@@ -88,6 +82,7 @@ struct hsr_node {
bool san_a;
bool san_b;
u16 seq_out[HSR_PT_PORTS];
+ bool removed;
struct rcu_head rcu_head;
};
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index b158ba409f9a..5584c80a5c79 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -47,9 +47,6 @@
#define HSR_V1_SUP_LSDUSIZE 52
-#define HSR_HSIZE_SHIFT 8
-#define HSR_HSIZE BIT(HSR_HSIZE_SHIFT)
-
/* The helper functions below assumes that 'path' occupies the 4 most
* significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
* equivalently, the 4 most significant bits of HSR tag byte 14).
@@ -185,11 +182,17 @@ struct hsr_proto_ops {
void (*update_san_info)(struct hsr_node *node, bool is_sup);
};
+struct hsr_self_node {
+ unsigned char macaddress_A[ETH_ALEN];
+ unsigned char macaddress_B[ETH_ALEN];
+ struct rcu_head rcu_head;
+};
+
struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
- struct hlist_head node_db[HSR_HSIZE]; /* Known HSR nodes */
- struct hlist_head self_node_db; /* MACs of slaves */
+ struct list_head node_db; /* Known HSR nodes */
+ struct hsr_self_node __rcu *self_node; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
struct timer_list prune_timer;
int announce_count;
@@ -199,8 +202,6 @@ struct hsr_priv {
spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
struct hsr_proto_ops *proto_ops;
- u32 hash_buckets;
- u32 hash_seed;
#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
* based on SLAVE_A or SLAVE_B
*/
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 7174a9092900..78fe40eb9f01 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -105,7 +105,6 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
static void hsr_dellink(struct net_device *dev, struct list_head *head)
{
struct hsr_priv *hsr = netdev_priv(dev);
- int i;
del_timer_sync(&hsr->prune_timer);
del_timer_sync(&hsr->announce_timer);
@@ -114,8 +113,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
hsr_del_ports(hsr);
hsr_del_self_node(hsr);
- for (i = 0; i < hsr->hash_buckets; i++)
- hsr_del_nodes(&hsr->node_db[i]);
+ hsr_del_nodes(&hsr->node_db);
unregister_netdevice_queue(dev, head);
}
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index b33d1b5eda87..248ad5e46969 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -26,10 +26,12 @@ static struct genl_family nl802154_fam;
/* multicast groups */
enum nl802154_multicast_groups {
NL802154_MCGRP_CONFIG,
+ NL802154_MCGRP_SCAN,
};
static const struct genl_multicast_group nl802154_mcgrps[] = {
[NL802154_MCGRP_CONFIG] = { .name = "config", },
+ [NL802154_MCGRP_SCAN] = { .name = "scan", },
};
/* returns ERR_PTR values */
@@ -216,6 +218,9 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_PID] = { .type = NLA_U32 },
[NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 },
+
+ [NL802154_ATTR_COORDINATOR] = { .type = NLA_NESTED },
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
@@ -1281,6 +1286,104 @@ static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int nl802154_prep_scan_event_msg(struct sk_buff *msg,
+ struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ u32 portid, u32 seq, int flags, u8 cmd,
+ struct ieee802154_coord_desc *desc)
+{
+ struct nlattr *nla;
+ void *hdr;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx))
+ goto nla_put_failure;
+
+ if (wpan_dev->netdev &&
+ nla_put_u32(msg, NL802154_ATTR_IFINDEX, wpan_dev->netdev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV,
+ wpan_dev_id(wpan_dev), NL802154_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla = nla_nest_start_noflag(msg, NL802154_ATTR_COORDINATOR);
+ if (!nla)
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL802154_COORD_PANID, IEEE802154_PAN_ID_LEN,
+ &desc->addr.pan_id))
+ goto nla_put_failure;
+
+ if (desc->addr.mode == IEEE802154_ADDR_SHORT) {
+ if (nla_put(msg, NL802154_COORD_ADDR,
+ IEEE802154_SHORT_ADDR_LEN,
+ &desc->addr.short_addr))
+ goto nla_put_failure;
+ } else {
+ if (nla_put(msg, NL802154_COORD_ADDR,
+ IEEE802154_EXTENDED_ADDR_LEN,
+ &desc->addr.extended_addr))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(msg, NL802154_COORD_CHANNEL, desc->channel))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_COORD_PAGE, desc->page))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, NL802154_COORD_SUPERFRAME_SPEC,
+ desc->superframe_spec))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_COORD_LINK_QUALITY, desc->link_quality))
+ goto nla_put_failure;
+
+ if (desc->gts_permit && nla_put_flag(msg, NL802154_COORD_GTS_PERMIT))
+ goto nla_put_failure;
+
+ /* TODO: NL802154_COORD_PAYLOAD_DATA if any */
+
+ nla_nest_end(msg, nla);
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+
+ return -EMSGSIZE;
+}
+
+int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ struct ieee802154_coord_desc *desc)
+{
+ struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(wpan_phy);
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = nl802154_prep_scan_event_msg(msg, rdev, wpan_dev, 0, 0, 0,
+ NL802154_CMD_SCAN_EVENT,
+ desc);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ return genlmsg_multicast_netns(&nl802154_fam, wpan_phy_net(wpan_phy),
+ msg, 0, NL802154_MCGRP_SCAN, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(nl802154_scan_event);
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 8c4b6d08954c..89b805500032 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -4,5 +4,7 @@
int nl802154_init(void);
void nl802154_exit(void);
+int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ struct ieee802154_coord_desc *desc);
#endif /* __IEEE802154_NL802154_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5b4d86701822..ab4a06be489b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw);
static int inet_sk_reselect_saddr(struct sock *sk)
{
- struct inet_bind_hashbucket *prev_addr_hashbucket;
struct inet_sock *inet = inet_sk(sk);
__be32 old_saddr = inet->inet_saddr;
__be32 daddr = inet->inet_daddr;
@@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
return 0;
}
- prev_addr_hashbucket =
- inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk,
- sock_net(sk), inet->inet_num);
-
- inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
-
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET);
if (err) {
- inet->inet_saddr = old_saddr;
- inet->inet_rcv_saddr = old_saddr;
ip_rt_put(rt);
return err;
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 6da16ae6a962..4517d2bd186a 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -61,7 +61,9 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info))
return false;
- if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
+ if (base_type(info->reg_type) == PTR_TO_BTF_ID &&
+ !bpf_type_has_unsafe_modifiers(info->reg_type) &&
+ info->btf_id == sock_id)
/* promote it to tcp_sock */
info->btf_id = tcp_sock_id;
@@ -69,18 +71,17 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
}
static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
+ const struct btf_type *t;
size_t end;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
- flag);
+ return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
if (t != tcp_sock_type) {
bpf_log(log, "only read is supported\n");
return -EACCES;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 170152772d33..3969fa805679 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -314,6 +314,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ if (xo->seq.low < seq)
+ xo->seq.hi++;
+
esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
ip_hdr(skb)->tot_len = htons(skb->len);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f361d3d56be2..b5736ef16ed2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -841,6 +841,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
return -EINVAL;
}
+ if (!cfg->fc_table)
+ cfg->fc_table = RT_TABLE_MAIN;
+
return 0;
errout:
return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f721c308248b..ce9ff3c62e84 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
nfi->fib_type == fi->fib_type &&
+ nfi->fib_tb_id == fi->fib_tb_id &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
@@ -888,9 +889,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
return 1;
}
- /* cannot match on nexthop object attributes */
- if (fi->nh)
- return 1;
+ if (fi->nh) {
+ if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp)
+ return 1;
+ return 0;
+ }
if (cfg->fc_oif || cfg->fc_gw_family) {
struct fib_nh *nh;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 452ff177e4da..74d403dbd2b4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct key_vector {
/* This list pointer if valid if (pos | bits) == 0 (LEAF) */
struct hlist_head leaf;
/* This array is valid if (pos | bits) > 0 (TNODE) */
- struct key_vector __rcu *tnode[0];
+ DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode);
};
};
@@ -1381,8 +1381,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
/* The alias was already inserted, so the node must exist. */
l = l ? l : fib_find_node(t, &tp, key);
- if (WARN_ON_ONCE(!l))
+ if (WARN_ON_ONCE(!l)) {
+ err = -ENOENT;
goto out_free_new_fa;
+ }
if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
new_fa) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 033bf3c2538f..3cec471a2cd2 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -858,34 +858,80 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in
return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
}
-int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk)
+static void inet_update_saddr(struct sock *sk, void *saddr, int family)
+{
+ if (family == AF_INET) {
+ inet_sk(sk)->inet_saddr = *(__be32 *)saddr;
+ sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else {
+ sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr;
+ }
+#endif
+}
+
+static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset)
{
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_bind_hashbucket *head, *head2;
struct inet_bind2_bucket *tb2, *new_tb2;
int l3mdev = inet_sk_bound_l3mdev(sk);
- struct inet_bind_hashbucket *head2;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
+ int bhash;
+
+ if (!inet_csk(sk)->icsk_bind2_hash) {
+ /* Not bind()ed before. */
+ if (reset)
+ inet_reset_saddr(sk);
+ else
+ inet_update_saddr(sk, saddr, family);
+
+ return 0;
+ }
/* Allocate a bind2 bucket ahead of time to avoid permanently putting
* the bhash2 table in an inconsistent state if a new tb2 bucket
* allocation fails.
*/
new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC);
- if (!new_tb2)
+ if (!new_tb2) {
+ if (reset) {
+ /* The (INADDR_ANY, port) bucket might have already
+ * been freed, then we cannot fixup icsk_bind2_hash,
+ * so we give up and unlink sk from bhash/bhash2 not
+ * to leave inconsistency in bhash2.
+ */
+ inet_put_port(sk);
+ inet_reset_saddr(sk);
+ }
+
return -ENOMEM;
+ }
+ bhash = inet_bhashfn(net, port, hinfo->bhash_size);
+ head = &hinfo->bhash[bhash];
head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
- if (prev_saddr) {
- spin_lock_bh(&prev_saddr->lock);
- __sk_del_bind2_node(sk);
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- inet_csk(sk)->icsk_bind2_hash);
- spin_unlock_bh(&prev_saddr->lock);
- }
+ /* If we change saddr locklessly, another thread
+ * iterating over bhash might see corrupted address.
+ */
+ spin_lock_bh(&head->lock);
- spin_lock_bh(&head2->lock);
+ spin_lock(&head2->lock);
+ __sk_del_bind2_node(sk);
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash);
+ spin_unlock(&head2->lock);
+
+ if (reset)
+ inet_reset_saddr(sk);
+ else
+ inet_update_saddr(sk, saddr, family);
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+
+ spin_lock(&head2->lock);
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
if (!tb2) {
tb2 = new_tb2;
@@ -893,15 +939,29 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc
}
sk_add_bind2_node(sk, &tb2->owners);
inet_csk(sk)->icsk_bind2_hash = tb2;
- spin_unlock_bh(&head2->lock);
+ spin_unlock(&head2->lock);
+
+ spin_unlock_bh(&head->lock);
if (tb2 != new_tb2)
kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2);
return 0;
}
+
+int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family)
+{
+ return __inet_bhash2_update_saddr(sk, saddr, family, false);
+}
EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
+void inet_bhash2_reset_saddr(struct sock *sk)
+{
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ __inet_bhash2_update_saddr(sk, NULL, 0, true);
+}
+EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr);
+
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4ccef3e6935..ffff46cdcb58 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1492,24 +1492,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_parm *p = &t->parms;
__be16 o_flags = p->o_flags;
- if (t->erspan_ver <= 2) {
- if (t->erspan_ver != 0 && !t->collect_md)
- o_flags |= TUNNEL_KEY;
-
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
- goto nla_put_failure;
-
- if (t->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
- goto nla_put_failure;
- } else if (t->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
- goto nla_put_failure;
- }
- }
-
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
@@ -1550,6 +1532,34 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (t->erspan_ver <= 2) {
+ if (t->erspan_ver != 0 && !t->collect_md)
+ t->parms.o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+ } else if (t->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
+ }
+
+ return ipgre_fill_info(skb, dev);
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static void erspan_setup(struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1628,7 +1638,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = {
.changelink = erspan_changelink,
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
- .fill_info = ipgre_fill_info,
+ .fill_info = erspan_fill_info,
.get_link_net = ip_tunnel_get_link_net,
};
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1b512390b3cf..e880ce77322a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -366,6 +366,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
iph->tos, dev);
if (unlikely(err))
goto drop_error;
+ } else {
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY))
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f8e176c77d1c..b3cc416ed292 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
switch (ctinfo) {
case IP_CT_NEW:
- ct->mark = hash;
+ WRITE_ONCE(ct->mark, hash);
break;
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
@@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef DEBUG
nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
+ pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark));
if (!clusterip_responsible(cipinfo->config, hash)) {
pr_debug("not responsible\n");
return NF_DROP;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index bb9854c2b7a1..409ec2a1f95b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -49,6 +49,11 @@
#include <net/transp_v6.h>
#endif
+#define ping_portaddr_for_each_entry(__sk, node, list) \
+ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
+#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \
+ hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
+
struct ping_table {
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
spinlock_t lock;
@@ -192,7 +197,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
return NULL;
}
- ping_portaddr_for_each_entry(sk, hnode, hslot) {
+ ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) {
isk = inet_sk(sk);
pr_debug("iterate\n");
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a69c5fcfedc..001947136b0a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
- inet_reset_saddr(sk);
+ inet_bhash2_reset_saddr(sk);
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
@@ -4465,11 +4464,8 @@ bool tcp_alloc_md5sig_pool(void)
if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool_populated) {
+ if (!tcp_md5sig_pool_populated)
__tcp_alloc_md5sig_pool();
- if (tcp_md5sig_pool_populated)
- static_branch_inc(&tcp_md5_needed);
- }
mutex_unlock(&tcp_md5sig_mutex);
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index cf9c3e8f7ccb..94aad3870c5f 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -45,8 +45,11 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
tmp->sg.end = i;
if (apply) {
apply_bytes -= size;
- if (!apply_bytes)
+ if (!apply_bytes) {
+ if (sge->length)
+ sk_msg_iter_var_prev(i);
break;
+ }
}
} while (i != msg->sg.end);
@@ -131,10 +134,9 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
return ret;
}
-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
- u32 bytes, int flags)
+int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
+ struct sk_msg *msg, u32 bytes, int flags)
{
- bool ingress = sk_msg_to_ingress(msg);
struct sk_psock *psock = sk_psock_get(sk);
int ret;
@@ -276,10 +278,10 @@ msg_bytes_ready:
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, int *copied, int flags)
{
- bool cork = false, enospc = sk_msg_full(msg);
+ bool cork = false, enospc = sk_msg_full(msg), redir_ingress;
struct sock *sk_redir;
u32 tosend, origsize, sent, delta = 0;
- u32 eval = __SK_NONE;
+ u32 eval;
int ret;
more_data:
@@ -310,6 +312,7 @@ more_data:
tosend = msg->sg.size;
if (psock->apply_bytes && psock->apply_bytes < tosend)
tosend = psock->apply_bytes;
+ eval = __SK_NONE;
switch (psock->eval) {
case __SK_PASS:
@@ -321,6 +324,7 @@ more_data:
sk_msg_apply_bytes(psock, tosend);
break;
case __SK_REDIRECT:
+ redir_ingress = psock->redir_ingress;
sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend);
if (!psock->apply_bytes) {
@@ -337,7 +341,8 @@ more_data:
release_sock(sk);
origsize = msg->sg.size;
- ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+ ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
+ msg, tosend, flags);
sent = origsize - msg->sg.size;
if (eval == __SK_REDIRECT)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0ae291e53eab..1efacbe948da 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6845,7 +6845,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
xchg(&queue->synflood_warned, 1) == 0) {
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) {
net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n",
- proto, &sk->sk_v6_rcv_saddr,
+ proto, inet6_rcv_saddr(sk),
sk->sk_num, msg);
} else {
net_info_ratelimited("%s: Possible SYN flooding on port %pI4:%u. %s.\n",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f0343538d1f8..8320d0ecb13a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_timewait_death_row *tcp_death_row;
- __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct ip_options_rcu *inet_opt;
struct net *net = sock_net(sk);
__be16 orig_sport, orig_dport;
+ __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
@@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (!inet->inet_saddr) {
- if (inet_csk(sk)->icsk_bind2_hash) {
- prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
- sk, net, inet->inet_num);
- prev_sk_rcv_saddr = sk->sk_rcv_saddr;
- }
- inet->inet_saddr = fl4->saddr;
- }
-
- sk_rcv_saddr_set(sk, inet->inet_saddr);
-
- if (prev_addr_hashbucket) {
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET);
if (err) {
- inet->inet_saddr = 0;
- sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
ip_rt_put(rt);
return err;
}
+ } else {
+ sk_rcv_saddr_set(sk, inet->inet_saddr);
}
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -343,6 +331,7 @@ failure:
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
+ inet_bhash2_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
@@ -1064,7 +1053,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
* We need to maintain these in the sk structure.
*/
-DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
+DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ);
EXPORT_SYMBOL(tcp_md5_needed);
static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new)
@@ -1172,10 +1161,25 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);
+static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = kmalloc(sizeof(*md5sig), gfp);
+ if (!md5sig)
+ return -ENOMEM;
+
+ sk_gso_disable(sk);
+ INIT_HLIST_HEAD(&md5sig->head);
+ rcu_assign_pointer(tp->md5sig_info, md5sig);
+ return 0;
+}
+
/* This can be called on a newly created socket, from other files */
-int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index, u8 flags,
- const u8 *newkey, u8 newkeylen, gfp_t gfp)
+static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen, int l3index, u8 flags,
+ const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
struct tcp_md5sig_key *key;
@@ -1204,15 +1208,6 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
md5sig = rcu_dereference_protected(tp->md5sig_info,
lockdep_sock_is_held(sk));
- if (!md5sig) {
- md5sig = kmalloc(sizeof(*md5sig), gfp);
- if (!md5sig)
- return -ENOMEM;
-
- sk_gso_disable(sk);
- INIT_HLIST_HEAD(&md5sig->head);
- rcu_assign_pointer(tp->md5sig_info, md5sig);
- }
key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
if (!key)
@@ -1234,8 +1229,59 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
hlist_add_head_rcu(&key->node, &md5sig->head);
return 0;
}
+
+int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen, int l3index, u8 flags,
+ const u8 *newkey, u8 newkeylen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
+ if (tcp_md5sig_info_add(sk, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!static_branch_inc(&tcp_md5_needed.key)) {
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk));
+ rcu_assign_pointer(tp->md5sig_info, NULL);
+ kfree_rcu(md5sig, rcu);
+ return -EUSERS;
+ }
+ }
+
+ return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags,
+ newkey, newkeylen, GFP_KERNEL);
+}
EXPORT_SYMBOL(tcp_md5_do_add);
+int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen, int l3index,
+ struct tcp_md5sig_key *key)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
+ if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC)))
+ return -ENOMEM;
+
+ if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) {
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk));
+ net_warn_ratelimited("Too many TCP-MD5 keys in the system\n");
+ rcu_assign_pointer(tp->md5sig_info, NULL);
+ kfree_rcu(md5sig, rcu);
+ return -EUSERS;
+ }
+ }
+
+ return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index,
+ key->flags, key->key, key->keylen,
+ sk_gfp_mask(sk, GFP_ATOMIC));
+}
+EXPORT_SYMBOL(tcp_md5_key_copy);
+
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
u8 prefixlen, int l3index, u8 flags)
{
@@ -1322,7 +1368,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags,
- cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+ cmd.tcpm_key, cmd.tcpm_keylen);
}
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
@@ -1573,14 +1619,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
addr = (union tcp_md5_addr *)&newinet->inet_daddr;
key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
if (key) {
- /*
- * We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags,
- key->key, key->keylen, GFP_ATOMIC);
+ if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key))
+ goto put_and_exit;
sk_gso_disable(newsk);
}
#endif
@@ -2272,6 +2312,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_clear_md5_list(sk);
kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
tp->md5sig_info = NULL;
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
}
#endif
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index c375f603a16c..e002f2e1d4f2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -240,6 +240,40 @@ kill:
}
EXPORT_SYMBOL(tcp_timewait_state_process);
+static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw)
+{
+#ifdef CONFIG_TCP_MD5SIG
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_md5sig_key *key;
+
+ /*
+ * The timewait bucket does not have the key DB from the
+ * sock structure. We just make a quick copy of the
+ * md5 key being used (if indeed we are using one)
+ * so the timewait ack generating code has the key.
+ */
+ tcptw->tw_md5_key = NULL;
+ if (!static_branch_unlikely(&tcp_md5_needed.key))
+ return;
+
+ key = tp->af_specific->md5_lookup(sk, sk);
+ if (key) {
+ tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+ if (!tcptw->tw_md5_key)
+ return;
+ if (!tcp_alloc_md5sig_pool())
+ goto out_free;
+ if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key))
+ goto out_free;
+ }
+ return;
+out_free:
+ WARN_ON_ONCE(1);
+ kfree(tcptw->tw_md5_key);
+ tcptw->tw_md5_key = NULL;
+#endif
+}
+
/*
* Move a socket to time-wait or dead fin-wait-2 state.
*/
@@ -282,26 +316,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
#endif
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * The timewait bucket does not have the key DB from the
- * sock structure. We just make a quick copy of the
- * md5 key being used (if indeed we are using one)
- * so the timewait ack generating code has the key.
- */
- do {
- tcptw->tw_md5_key = NULL;
- if (static_branch_unlikely(&tcp_md5_needed)) {
- struct tcp_md5sig_key *key;
-
- key = tp->af_specific->md5_lookup(sk, sk);
- if (key) {
- tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
- }
- }
- } while (0);
-#endif
+ tcp_time_wait_init(sk, tcptw);
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
@@ -337,11 +352,13 @@ EXPORT_SYMBOL(tcp_time_wait);
void tcp_twsk_destructor(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed)) {
+ if (static_branch_unlikely(&tcp_md5_needed.key)) {
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
- if (twsk->tw_md5_key)
+ if (twsk->tw_md5_key) {
kfree_rcu(twsk->tw_md5_key, rcu);
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
+ }
}
#endif
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 894410dc9293..71d01cf3c13e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -766,7 +766,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed) &&
+ if (static_branch_unlikely(&tcp_md5_needed.key) &&
rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
@@ -922,7 +922,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed) &&
+ if (static_branch_unlikely(&tcp_md5_needed.key) &&
rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index aedde65e2268..1f01e15ca24f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -387,7 +387,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
return __udp_gso_segment(skb, features, false);
mss = skb_shinfo(skb)->gso_size;
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index bc3a043a5d5c..029219749785 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -624,6 +624,8 @@ __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
continue;
nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
+ if (!nest)
+ return -EMSGSIZE;
if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
utn->entries[table][j].port) ||
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 79d43548279c..75c02992c520 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -56,12 +56,11 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
__be32 seq;
__be32 spi;
int nhoff;
- int err;
if (!pskb_pull(skb, offset))
return NULL;
- if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
+ if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0)
goto out;
xo = xfrm_offload(skb);
@@ -346,6 +345,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ if (xo->seq.low < seq)
+ xo->seq.hi++;
+
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
len = skb->len - sizeof(struct ipv6hdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e19507614f64..60fd91bb5171 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -920,6 +920,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err < 0)
goto fail;
+ /* We prevent @rt from being freed. */
+ rcu_read_lock();
+
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
@@ -943,6 +946,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
+ rcu_read_unlock();
return 0;
}
@@ -950,6 +954,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
+ rcu_read_unlock();
return err;
slow_path_clean:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f676be14e6b6..11b736a76bd7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -292,24 +292,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (!saddr) {
- struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
- struct in6_addr prev_v6_rcv_saddr;
-
- if (icsk->icsk_bind2_hash) {
- prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
- sk, net, inet->inet_num);
- prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- }
saddr = &fl6.saddr;
- sk->sk_v6_rcv_saddr = *saddr;
- if (prev_addr_hashbucket) {
- err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
- if (err) {
- sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
- goto failure;
- }
- }
+ err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
+ if (err)
+ goto failure;
}
/* set the source address */
@@ -359,6 +346,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
late_failure:
tcp_set_state(sk, TCP_CLOSE);
+ inet_bhash2_reset_saddr(sk);
failure:
inet->inet_dport = 0;
sk->sk_route_caps = 0;
@@ -677,12 +665,11 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
AF_INET, prefixlen, l3index, flags,
- cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
+ cmd.tcpm_key, cmd.tcpm_keylen);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
AF_INET6, prefixlen, l3index, flags,
- cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+ cmd.tcpm_key, cmd.tcpm_keylen);
}
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
@@ -1377,14 +1364,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Copy over the MD5 key from the original socket */
key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
if (key) {
- /* We're using one, so create a matching key
- * on the newsk structure. If we fail to get
- * memory, then we end up not copying the key
- * across. Shucks.
- */
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
- sk_gfp_mask(sk, GFP_ATOMIC));
+ const union tcp_md5_addr *addr;
+
+ addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
+ if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
+ goto out;
+ }
}
#endif
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e0e10f6bcdc1..c39c1e32f980 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,7 +42,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
return __udp_gso_segment(skb, features, true);
mss = skb_shinfo(skb)->gso_size;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4a4b0e49ec92..ea435eba3053 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -287,9 +287,13 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
- register_pernet_subsys(&xfrm6_net_ops);
+ ret = register_pernet_subsys(&xfrm6_net_ops);
+ if (ret)
+ goto out_protocol;
out:
return ret;
+out_protocol:
+ xfrm6_protocol_fini();
out_state:
xfrm6_state_fini();
out_policy:
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c85df5b958d2..2bdbcec781cd 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1377,13 +1377,13 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
max_spi = range->sadb_spirange_max;
}
- err = verify_spi_info(x->id.proto, min_spi, max_spi);
+ err = verify_spi_info(x->id.proto, min_spi, max_spi, NULL);
if (err) {
xfrm_state_put(x);
return err;
}
- err = xfrm_alloc_spi(x, min_spi, max_spi);
+ err = xfrm_alloc_spi(x, min_spi, max_spi, NULL);
resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x);
if (IS_ERR(resp_skb)) {
@@ -2626,7 +2626,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
}
return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
- kma ? &k : NULL, net, NULL, 0);
+ kma ? &k : NULL, net, NULL, 0, NULL);
out:
return err;
@@ -2905,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2923,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+ if (!(ealg_tmpl_set(t, ealg)))
continue;
for (k = 1; ; k++) {
@@ -2934,16 +2934,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
}
return sz + sizeof(struct sadb_prop);
}
-static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
{
struct sadb_prop *p;
+ int sz = 0;
int i;
p = skb_put(skb, sizeof(struct sadb_prop));
@@ -2971,13 +2972,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
c->sadb_comb_soft_addtime = 20*60*60;
c->sadb_comb_hard_usetime = 8*60*60;
c->sadb_comb_soft_usetime = 7*60*60;
+ sz += sizeof(*c);
}
}
+
+ return sz + sizeof(*p);
}
-static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
{
struct sadb_prop *p;
+ int sz = 0;
int i, k;
p = skb_put(skb, sizeof(struct sadb_prop));
@@ -3019,8 +3024,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
c->sadb_comb_soft_addtime = 20*60*60;
c->sadb_comb_hard_usetime = 8*60*60;
c->sadb_comb_soft_usetime = 7*60*60;
+ sz += sizeof(*c);
}
}
+
+ return sz + sizeof(*p);
}
static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
@@ -3150,6 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
struct sadb_x_sec_ctx *sec_ctx;
struct xfrm_sec_ctx *xfrm_ctx;
int ctx_size = 0;
+ int alg_size = 0;
sockaddr_size = pfkey_sockaddr_size(x->props.family);
if (!sockaddr_size)
@@ -3161,16 +3170,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
sizeof(struct sadb_x_policy);
if (x->id.proto == IPPROTO_AH)
- size += count_ah_combs(t);
+ alg_size = count_ah_combs(t);
else if (x->id.proto == IPPROTO_ESP)
- size += count_esp_combs(t);
+ alg_size = count_esp_combs(t);
if ((xfrm_ctx = x->security)) {
ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
}
- skb = alloc_skb(size + 16, GFP_ATOMIC);
+ skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
@@ -3224,10 +3233,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
pol->sadb_x_policy_priority = xp->priority;
/* Set sadb_comb's. */
+ alg_size = 0;
if (x->id.proto == IPPROTO_AH)
- dump_ah_combs(skb, t);
+ alg_size = dump_ah_combs(skb, t);
else if (x->id.proto == IPPROTO_ESP)
- dump_esp_combs(skb, t);
+ alg_size = dump_esp_combs(skb, t);
+
+ hdr->sadb_msg_len += alg_size / 8;
/* security context */
if (xfrm_ctx) {
@@ -3382,7 +3394,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
hdr->sadb_msg_len = size / sizeof(uint64_t);
hdr->sadb_msg_errno = 0;
hdr->sadb_msg_reserved = 0;
- hdr->sadb_msg_seq = x->km.seq = get_acqseq();
+ hdr->sadb_msg_seq = x->km.seq;
hdr->sadb_msg_pid = 0;
/* SA */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 754fdda8a5f5..9a1415fe3fa7 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1474,11 +1474,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
}
sk = sock->sk;
- write_lock(&sk->sk_callback_lock);
-
+ write_lock_bh(&sk->sk_callback_lock);
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
- goto err_sock;
+ goto err_inval_sock;
+ rcu_assign_sk_user_data(sk, tunnel);
+ write_unlock_bh(&sk->sk_callback_lock);
tunnel->l2tp_net = net;
pn = l2tp_pernet(net);
@@ -1507,8 +1508,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
};
setup_udp_tunnel_sock(net, sock, &udp_cfg);
- } else {
- rcu_assign_sk_user_data(sk, tunnel);
}
tunnel->old_sk_destruct = sk->sk_destruct;
@@ -1522,16 +1521,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
if (tunnel->fd >= 0)
sockfd_put(sock);
- write_unlock(&sk->sk_callback_lock);
return 0;
err_sock:
+ write_lock_bh(&sk->sk_callback_lock);
+ rcu_assign_sk_user_data(sk, NULL);
+err_inval_sock:
+ write_unlock_bh(&sk->sk_callback_lock);
+
if (tunnel->fd < 0)
sock_release(sock);
else
sockfd_put(sock);
-
- write_unlock(&sk->sk_callback_lock);
err:
return ret;
}
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 2e66598fac79..e8ebd343e2bf 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
(status->encoding == RX_ENC_HE && streams > 8)))
return 0;
+ if (idx >= MCS_GROUP_RATES)
+ return 0;
+
duration = airtime_mcs_groups[group].duration[idx];
duration <<= airtime_mcs_groups[group].shift;
*overhead = 36 + (streams << 2);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c848fe04dd44..8f9a2ab502b3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -576,7 +576,7 @@ static struct ieee80211_key *
ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id,
u8 key_idx, bool pairwise, const u8 *mac_addr)
{
- struct ieee80211_local *local = sdata->local;
+ struct ieee80211_local *local __maybe_unused = sdata->local;
struct ieee80211_link_data *link = &sdata->deflink;
struct ieee80211_key *key;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7c4ce716c939..d49a5906a943 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1849,8 +1849,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
- synchronize_net();
-
+ /* do_stop will synchronize_rcu() first thing */
ieee80211_do_stop(sdata, false);
ieee80211_teardown_sdata(sdata);
@@ -2179,6 +2178,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ndev->hw_features |= ndev->features &
MAC80211_SUPPORTED_FEATURES_TX;
+ sdata->vif.netdev_features = local->hw.netdev_features;
netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
@@ -2195,6 +2195,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = cfg80211_register_netdevice(ndev);
if (ret) {
+ ieee80211_if_free(ndev);
free_netdev(ndev);
return ret;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a804e0220ed7..0aee2392dd29 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3932,7 +3932,6 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
struct ieee80211_elems_parse_params parse_params = {
.start = elem_start,
.len = elem_len,
- .bss = cbss,
.link_id = link_id == assoc_data->assoc_link_id ? -1 : link_id,
.from_ap = true,
};
@@ -4017,6 +4016,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
parse_params.start = bss_ies->data;
parse_params.len = bss_ies->len;
+ parse_params.bss = cbss;
bss_elems = ieee802_11_parse_elems_full(&parse_params);
if (!bss_elems) {
ret = false;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c28c6fbf786e..7e3ab6e1b28f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2403,7 +2403,6 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx)
static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
{
- struct ieee80211_hdr *hdr = (void *)rx->skb->data;
struct sk_buff *skb = rx->skb;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -2414,31 +2413,6 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
if (status->flag & RX_FLAG_DECRYPTED)
return 0;
- /* check mesh EAPOL frames first */
- if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) &&
- ieee80211_is_data(fc))) {
- struct ieee80211s_hdr *mesh_hdr;
- u16 hdr_len = ieee80211_hdrlen(fc);
- u16 ethertype_offset;
- __be16 ethertype;
-
- if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr))
- goto drop_check;
-
- /* make sure fixed part of mesh header is there, also checks skb len */
- if (!pskb_may_pull(rx->skb, hdr_len + 6))
- goto drop_check;
-
- mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len);
- ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) +
- sizeof(rfc1042_header);
-
- if (skb_copy_bits(rx->skb, ethertype_offset, &ethertype, 2) == 0 &&
- ethertype == rx->sdata->control_port_protocol)
- return 0;
- }
-
-drop_check:
/* Drop unencrypted frames if key is set. */
if (unlikely(!ieee80211_has_protected(fc) &&
!ieee80211_is_any_nullfunc(fc) &&
@@ -2892,8 +2866,16 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
hdr = (struct ieee80211_hdr *) skb->data;
mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
- if (ieee80211_drop_unencrypted(rx, hdr->frame_control))
- return RX_DROP_MONITOR;
+ if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) {
+ int offset = hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr) +
+ sizeof(rfc1042_header);
+ __be16 ethertype;
+
+ if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr) ||
+ skb_copy_bits(rx->skb, offset, &ethertype, 2) != 0 ||
+ ethertype != rx->sdata->control_port_protocol)
+ return RX_DROP_MONITOR;
+ }
/* frame is in RMC, don't forward */
if (ieee80211_is_data(hdr->frame_control) &&
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 165ac0711d71..2171cd1ca807 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1343,7 +1343,7 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
return NULL;
txq = sta->sta.txq[tid];
- } else if (vif) {
+ } else {
txq = vif->txq;
}
@@ -1355,7 +1355,11 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
{
- IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
+ struct sk_buff *next;
+ codel_time_t now = codel_get_time();
+
+ skb_list_walk_safe(skb, skb, next)
+ IEEE80211_SKB_CB(skb)->control.enqueue_time = now;
}
static u32 codel_skb_len_func(const struct sk_buff *skb)
@@ -3578,55 +3582,79 @@ ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
return TX_CONTINUE;
}
-static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- struct ieee80211_fast_tx *fast_tx,
- struct sk_buff *skb)
+static netdev_features_t
+ieee80211_sdata_netdev_features(struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_local *local = sdata->local;
- u16 ethertype = (skb->data[12] << 8) | skb->data[13];
- int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
- int hw_headroom = sdata->local->hw.extra_tx_headroom;
- struct ethhdr eth;
- struct ieee80211_tx_info *info;
- struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
- struct ieee80211_tx_data tx;
- ieee80211_tx_result r;
- struct tid_ampdu_tx *tid_tx = NULL;
- u8 tid = IEEE80211_NUM_TIDS;
+ if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
+ return sdata->vif.netdev_features;
- /* control port protocol needs a lot of special handling */
- if (cpu_to_be16(ethertype) == sdata->control_port_protocol)
- return false;
+ if (!sdata->bss)
+ return 0;
- /* only RFC 1042 SNAP */
- if (ethertype < ETH_P_802_3_MIN)
- return false;
+ sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
+ return sdata->vif.netdev_features;
+}
- /* don't handle TX status request here either */
- if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)
- return false;
+static struct sk_buff *
+ieee80211_tx_skb_fixup(struct sk_buff *skb, netdev_features_t features)
+{
+ if (skb_is_gso(skb)) {
+ struct sk_buff *segs;
- if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
- tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
- tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
- if (tid_tx) {
- if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state))
- return false;
- if (tid_tx->timeout)
- tid_tx->last_tx = jiffies;
- }
+ segs = skb_gso_segment(skb, features);
+ if (!segs)
+ return skb;
+ if (IS_ERR(segs))
+ goto free;
+
+ consume_skb(skb);
+ return segs;
}
- /* after this point (skb is modified) we cannot return false */
+ if (skb_needs_linearize(skb, features) && __skb_linearize(skb))
+ goto free;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ int ofs = skb_checksum_start_offset(skb);
+
+ if (skb->encapsulation)
+ skb_set_inner_transport_header(skb, ofs);
+ else
+ skb_set_transport_header(skb, ofs);
+
+ if (skb_csum_hwoffload_help(skb, features))
+ goto free;
+ }
+
+ skb_mark_not_on_list(skb);
+ return skb;
+
+free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct ieee80211_fast_tx *fast_tx,
+ struct sk_buff *skb, u8 tid, bool ampdu)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
+ struct ieee80211_tx_info *info;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
+ int hw_headroom = sdata->local->hw.extra_tx_headroom;
+ int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
+ struct ethhdr eth;
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
- return true;
+ return;
if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
- return true;
+ return;
/* will not be crypto-handled beyond what we do here, so use false
* as the may-encrypt argument for the resize to not account for
@@ -3635,10 +3663,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
if (unlikely(ieee80211_skb_resize(sdata, skb,
max_t(int, extra_head + hw_headroom -
skb_headroom(skb), 0),
- ENCRYPT_NO))) {
- kfree_skb(skb);
- return true;
- }
+ ENCRYPT_NO)))
+ goto free;
memcpy(&eth, skb->data, ETH_HLEN - 2);
hdr = skb_push(skb, extra_head);
@@ -3652,7 +3678,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
info->control.vif = &sdata->vif;
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
IEEE80211_TX_CTL_DONTFRAG |
- (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
+ (ampdu ? IEEE80211_TX_CTL_AMPDU : 0);
info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT |
u32_encode_bits(IEEE80211_LINK_UNSPECIFIED,
IEEE80211_TX_CTRL_MLO_LINK);
@@ -3676,16 +3702,14 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
tx.key = fast_tx->key;
if (ieee80211_queue_skb(local, sdata, sta, skb))
- return true;
+ return;
tx.skb = skb;
r = ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
fast_tx->key, &tx);
tx.skb = NULL;
- if (r == TX_DROP) {
- kfree_skb(skb);
- return true;
- }
+ if (r == TX_DROP)
+ goto free;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -3693,6 +3717,56 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
__skb_queue_tail(&tx.skbs, skb);
ieee80211_tx_frags(local, &sdata->vif, sta, &tx.skbs, false);
+ return;
+
+free:
+ kfree_skb(skb);
+}
+
+static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct ieee80211_fast_tx *fast_tx,
+ struct sk_buff *skb)
+{
+ u16 ethertype = (skb->data[12] << 8) | skb->data[13];
+ struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
+ struct tid_ampdu_tx *tid_tx = NULL;
+ struct sk_buff *next;
+ u8 tid = IEEE80211_NUM_TIDS;
+
+ /* control port protocol needs a lot of special handling */
+ if (cpu_to_be16(ethertype) == sdata->control_port_protocol)
+ return false;
+
+ /* only RFC 1042 SNAP */
+ if (ethertype < ETH_P_802_3_MIN)
+ return false;
+
+ /* don't handle TX status request here either */
+ if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)
+ return false;
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
+ if (tid_tx) {
+ if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state))
+ return false;
+ if (tid_tx->timeout)
+ tid_tx->last_tx = jiffies;
+ }
+ }
+
+ /* after this point (skb is modified) we cannot return false */
+ skb = ieee80211_tx_skb_fixup(skb, ieee80211_sdata_netdev_features(sdata));
+ if (!skb)
+ return true;
+
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
+ __ieee80211_xmit_fast(sdata, sta, fast_tx, skb, tid, tid_tx);
+ }
+
return true;
}
@@ -4192,31 +4266,14 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
goto out;
}
- if (skb_is_gso(skb)) {
- struct sk_buff *segs;
-
- segs = skb_gso_segment(skb, 0);
- if (IS_ERR(segs)) {
- goto out_free;
- } else if (segs) {
- consume_skb(skb);
- skb = segs;
- }
- } else {
- /* we cannot process non-linear frames on this path */
- if (skb_linearize(skb))
- goto out_free;
-
- /* the frame could be fragmented, software-encrypted, and other
- * things so we cannot really handle checksum offload with it -
- * fix it up in software before we handle anything else.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- skb_set_transport_header(skb,
- skb_checksum_start_offset(skb));
- if (skb_checksum_help(skb))
- goto out_free;
- }
+ /* the frame could be fragmented, software-encrypted, and other
+ * things so we cannot really handle checksum or GSO offload.
+ * fix it up in software before we handle anything else.
+ */
+ skb = ieee80211_tx_skb_fixup(skb, 0);
+ if (!skb) {
+ len = 0;
+ goto out;
}
skb_list_walk_safe(skb, skb, next) {
@@ -4434,9 +4491,11 @@ normal:
return NETDEV_TX_OK;
}
-static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, struct sta_info *sta,
- bool txpending)
+
+
+static bool __ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, struct sta_info *sta,
+ bool txpending)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_control control = {};
@@ -4445,14 +4504,6 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
unsigned long flags;
int q = info->hw_queue;
- if (sta)
- sk_pacing_shift_update(skb->sk, local->hw.tx_sk_pacing_shift);
-
- ieee80211_tpt_led_trig_tx(local, skb->len);
-
- if (ieee80211_queue_skb(local, sdata, sta, skb))
- return true;
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
if (local->queue_stop_reasons[q] ||
@@ -4479,6 +4530,26 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
return true;
}
+static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, struct sta_info *sta,
+ bool txpending)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sk_buff *next;
+ bool ret = true;
+
+ if (ieee80211_queue_skb(local, sdata, sta, skb))
+ return true;
+
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
+ if (!__ieee80211_tx_8023(sdata, skb, sta, txpending))
+ ret = false;
+ }
+
+ return ret;
+}
+
static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
struct net_device *dev, struct sta_info *sta,
struct ieee80211_key *key, struct sk_buff *skb)
@@ -4486,9 +4557,13 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_info *info;
struct ieee80211_local *local = sdata->local;
struct tid_ampdu_tx *tid_tx;
+ struct sk_buff *seg, *next;
+ unsigned int skbs = 0, len = 0;
+ u16 queue;
u8 tid;
- skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, sta, skb));
+ queue = ieee80211_select_queue(sdata, sta, skb);
+ skb_set_queue_mapping(skb, queue);
if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)) &&
test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
@@ -4498,9 +4573,6 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
if (unlikely(!skb))
return;
- info = IEEE80211_SKB_CB(skb);
- memset(info, 0, sizeof(*info));
-
ieee80211_aggr_check(sdata, sta, skb);
tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
@@ -4514,22 +4586,20 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
return;
}
- info->flags |= IEEE80211_TX_CTL_AMPDU;
if (tid_tx->timeout)
tid_tx->last_tx = jiffies;
}
- if (unlikely(skb->sk &&
- skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
- info->ack_frame_id = ieee80211_store_ack_skb(local, skb,
- &info->flags, NULL);
-
- info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+ skb = ieee80211_tx_skb_fixup(skb, ieee80211_sdata_netdev_features(sdata));
+ if (!skb)
+ return;
- dev_sw_netstats_tx_add(dev, 1, skb->len);
+ info = IEEE80211_SKB_CB(skb);
+ memset(info, 0, sizeof(*info));
+ if (tid_tx)
+ info->flags |= IEEE80211_TX_CTL_AMPDU;
- sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++;
+ info->hw_queue = sdata->vif.hw_queue[queue];
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -4541,6 +4611,24 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
if (key)
info->control.hw_key = &key->conf;
+ skb_list_walk_safe(skb, seg, next) {
+ skbs++;
+ len += seg->len;
+ if (seg != skb)
+ memcpy(IEEE80211_SKB_CB(seg), info, sizeof(*info));
+ }
+
+ if (unlikely(skb->sk &&
+ skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
+ info->ack_frame_id = ieee80211_store_ack_skb(local, skb,
+ &info->flags, NULL);
+
+ dev_sw_netstats_tx_add(dev, skbs, len);
+ sta->deflink.tx_stats.packets[queue] += skbs;
+ sta->deflink.tx_stats.bytes[queue] += len;
+
+ ieee80211_tpt_led_trig_tx(local, len);
+
ieee80211_tx_8023(sdata, skb, sta, false);
return;
@@ -4582,6 +4670,7 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
goto skip_offload;
+ sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift);
ieee80211_8023_xmit(sdata, dev, sta, key, skb);
goto out;
@@ -4774,9 +4863,9 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
ps->dtim_count--;
}
- tim = pos = skb_put(skb, 6);
+ tim = pos = skb_put(skb, 5);
*pos++ = WLAN_EID_TIM;
- *pos++ = 4;
+ *pos++ = 3;
*pos++ = ps->dtim_count;
*pos++ = link_conf->dtim_period;
@@ -4807,13 +4896,17 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
/* Bitmap control */
*pos++ = n1 | aid0;
/* Part Virt Bitmap */
- skb_put(skb, n2 - n1);
- memcpy(pos, ps->tim + n1, n2 - n1 + 1);
+ skb_put_data(skb, ps->tim + n1, n2 - n1 + 1);
tim[1] = n2 - n1 + 4;
} else {
*pos++ = aid0; /* Bitmap control */
- *pos++ = 0; /* Part Virt Bitmap */
+
+ if (ieee80211_get_link_sband(link)->band != NL80211_BAND_S1GHZ) {
+ tim[1] = 4;
+ /* Part Virt Bitmap */
+ skb_put_u8(skb, 0);
+ }
}
}
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index d9b50884d34e..ac0b28025fb0 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -254,7 +254,6 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
enum nl802154_iftype iftype)
{
struct ieee802154_local *local = sdata->local;
- struct wpan_dev *wpan_dev = &sdata->wpan_dev;
struct ieee802154_sub_if_data *nsdata;
/* we hold the RTNL here so can safely walk the list */
@@ -262,13 +261,13 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
if (nsdata != sdata && ieee802154_sdata_running(nsdata)) {
int ret;
- /* TODO currently we don't support multiple node types
- * we need to run skb_clone at rx path. Check if there
- * exist really an use case if we need to support
- * multiple node types at the same time.
+ /* TODO currently we don't support multiple node/coord
+ * types we need to run skb_clone at rx path. Check if
+ * there exist really an use case if we need to support
+ * multiple node/coord types at the same time.
*/
- if (wpan_dev->iftype == NL802154_IFTYPE_NODE &&
- nsdata->wpan_dev.iftype == NL802154_IFTYPE_NODE)
+ if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR &&
+ nsdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR)
return -EBUSY;
/* check all phy mac sublayer settings are the same.
@@ -565,6 +564,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
switch (type) {
+ case NL802154_IFTYPE_COORD:
case NL802154_IFTYPE_NODE:
ieee802154_be64_to_le64(&wpan_dev->extended_addr,
sdata->dev->dev_addr);
@@ -624,6 +624,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
ieee802154_le64_to_be64(ndev->perm_addr,
&local->hw.phy->perm_extended_addr);
switch (type) {
+ case NL802154_IFTYPE_COORD:
case NL802154_IFTYPE_NODE:
ndev->type = ARPHRD_IEEE802154;
if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) {
@@ -650,6 +651,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
sdata->dev = ndev;
sdata->wpan_dev.wpan_phy = local->hw.phy;
sdata->local = local;
+ INIT_LIST_HEAD(&sdata->wpan_dev.list);
/* setup type-dependent data */
ret = ieee802154_setup_sdata(sdata, type);
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 40fab08df24b..3ed31daf7b9c 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -107,7 +107,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE;
/* always supported */
- phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE);
+ phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE) | BIT(NL802154_IFTYPE_COORD);
return &local->hw;
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 0724aac8f48c..c2aae2a6d6a6 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -208,6 +208,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
int ret;
struct ieee802154_sub_if_data *sdata;
struct ieee802154_hdr hdr;
+ struct sk_buff *skb2;
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
@@ -217,7 +218,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
}
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->wpan_dev.iftype != NL802154_IFTYPE_NODE)
+ if (sdata->wpan_dev.iftype == NL802154_IFTYPE_MONITOR)
continue;
if (!ieee802154_sdata_running(sdata))
@@ -230,12 +231,12 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
sdata->required_filtering == IEEE802154_FILTERING_4_FRAME_FIELDS)
continue;
- ieee802154_subif_frame(sdata, skb, &hdr);
- skb = NULL;
- break;
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2) {
+ skb2->dev = sdata->dev;
+ ieee802154_subif_frame(sdata, skb2, &hdr);
+ }
}
-
- kfree_skb(skb);
}
static void
@@ -274,7 +275,7 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb)
WARN_ON_ONCE(softirq_count() == 0);
if (local->suspended)
- goto drop;
+ goto free_skb;
/* TODO: When a transceiver omits the checksum here, we
* add an own calculated one. This is currently an ugly
@@ -292,20 +293,17 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb)
/* Level 1 filtering: Check the FCS by software when relevant */
if (local->hw.phy->filtering == IEEE802154_FILTERING_NONE) {
crc = crc_ccitt(0, skb->data, skb->len);
- if (crc) {
- rcu_read_unlock();
+ if (crc)
goto drop;
- }
}
/* remove crc */
skb_trim(skb, skb->len - 2);
__ieee802154_rx_handle_packet(local, skb);
- rcu_read_unlock();
-
- return;
drop:
+ rcu_read_unlock();
+free_skb:
kfree_skb(skb);
}
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index df855c33daf2..689396d6c76a 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -264,6 +264,31 @@ TRACE_EVENT(802154_drv_set_promiscuous_mode,
BOOL_TO_STR(__entry->on))
);
+TRACE_EVENT(802154_new_scan_event,
+ TP_PROTO(struct ieee802154_coord_desc *desc),
+ TP_ARGS(desc),
+ TP_STRUCT__entry(
+ __field(__le16, pan_id)
+ __field(__le64, addr)
+ __field(u8, channel)
+ __field(u8, page)
+ ),
+ TP_fast_assign(
+ __entry->page = desc->page;
+ __entry->channel = desc->channel;
+ __entry->pan_id = desc->addr.pan_id;
+ __entry->addr = desc->addr.extended_addr;
+ ),
+ TP_printk("panid: %u, coord_addr: 0x%llx, page: %u, channel: %u",
+ __le16_to_cpu(__entry->pan_id), __le64_to_cpu(__entry->addr),
+ __entry->page, __entry->channel)
+);
+
+DEFINE_EVENT(802154_new_scan_event, 802154_scan_event,
+ TP_PROTO(struct ieee802154_coord_desc *desc),
+ TP_ARGS(desc)
+);
+
#endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 6e7df47c9584..a3829ce548f9 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
new file mode 100644
index 000000000000..d237d142171c
--- /dev/null
+++ b/net/mptcp/fastopen.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* MPTCP Fast Open Mechanism
+ *
+ * Copyright (c) 2021-2022, Dmytro SHYTYI
+ */
+
+#include "protocol.h"
+
+void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req)
+{
+ struct sock *ssk = subflow->tcp_sock;
+ struct sock *sk = subflow->conn;
+ struct sk_buff *skb;
+ struct tcp_sock *tp;
+
+ tp = tcp_sk(ssk);
+
+ subflow->is_mptfo = 1;
+
+ skb = skb_peek(&ssk->sk_receive_queue);
+ if (WARN_ON_ONCE(!skb))
+ return;
+
+ /* dequeue the skb from sk receive queue */
+ __skb_unlink(skb, &ssk->sk_receive_queue);
+ skb_ext_reset(skb);
+ skb_orphan(skb);
+
+ /* We copy the fastopen data, but that don't belong to the mptcp sequence
+ * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset()
+ */
+ tp->copied_seq += skb->len;
+ subflow->ssn_offset += skb->len;
+
+ /* initialize a dummy sequence number, we will update it at MPC
+ * completion, if needed
+ */
+ MPTCP_SKB_CB(skb)->map_seq = -skb->len;
+ MPTCP_SKB_CB(skb)->end_seq = 0;
+ MPTCP_SKB_CB(skb)->offset = 0;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
+ mptcp_data_lock(sk);
+
+ mptcp_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+
+ sk->sk_data_ready(sk);
+
+ mptcp_data_unlock(sk);
+}
+
+void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct sk_buff *skb;
+
+ mptcp_data_lock(sk);
+ skb = skb_peek_tail(&sk->sk_receive_queue);
+ if (skb) {
+ WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
+ pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk,
+ MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
+ MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
+ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
+ MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
+ }
+
+ pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
+ mptcp_data_unlock(sk);
+}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 30d289044e71..5ded85e2c374 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
{
u8 subtype = *ptr >> 4;
int expected_opsize;
+ u16 subopt;
u8 version;
u8 flags;
u8 i;
@@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
else
expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
+ subopt = OPTION_MPTCP_MPC_ACK;
} else {
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) {
expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
- else
+ subopt = OPTION_MPTCP_MPC_SYNACK;
+ } else {
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
+ subopt = OPTION_MPTCP_MPC_SYN;
+ }
}
/* Cfr RFC 8684 Section 3.3.0:
@@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
- mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
+ mp_opt->suboptions |= subopt;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8;
@@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
!subflow->request_join)
tcp_send_ack(ssk);
- goto fully_established;
+ goto check_notify;
}
/* we must process OoO packets before the first subflow is fully
@@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
if (subflow->mp_join)
goto reset;
+ if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ goto set_fully_established;
return subflow->mp_capable;
}
- if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
- ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
+ if (subflow->remote_key_valid &&
+ (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
- goto fully_established;
+ goto check_notify;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
-fully_established:
+check_notify:
/* if the subflow is not already linked into the conn_list, we can't
* notify the PM: this subflow is still on the listener queue
* and the PM possibly acquiring the subflow lock could race with
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d66fbd558263..2ea7eae43bdb 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1029,6 +1029,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+
return 0;
}
@@ -1188,7 +1190,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
if (!require_family)
- return err;
+ return 0;
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing family");
@@ -1222,7 +1224,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
- return err;
+ return 0;
}
int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
@@ -2092,7 +2094,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
void mptcp_event_addr_announced(const struct sock *ssk,
@@ -2149,7 +2151,59 @@ void mptcp_event_addr_announced(const struct sock *ssk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
+}
+
+void mptcp_event_pm_listener(const struct sock *ssk,
+ enum mptcp_event_type event)
+{
+ const struct inet_sock *issk = inet_sk(ssk);
+ struct net *net = sock_net(ssk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, event);
+ if (!nlh)
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
+ goto nla_put_failure;
+
+ if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
+ goto nla_put_failure;
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
+ goto nla_put_failure;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *np = inet6_sk(ssk);
+
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
+ goto nla_put_failure;
+ break;
+ }
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, GFP_KERNEL);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
}
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
@@ -2197,6 +2251,9 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
goto nla_put_failure;
break;
+ case MPTCP_EVENT_LISTENER_CREATED:
+ case MPTCP_EVENT_LISTENER_CLOSED:
+ break;
}
genlmsg_end(skb, nlh);
@@ -2204,7 +2261,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
static const struct genl_small_ops mptcp_pm_ops[] = {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3796d1bfef6b..f6f93957275b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -36,15 +36,6 @@ struct mptcp6_sock {
};
#endif
-struct mptcp_skb_cb {
- u64 map_seq;
- u64 end_seq;
- u32 offset;
- u8 has_rxtstamp:1;
-};
-
-#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
-
enum {
MPTCP_CMSG_TS = BIT(0),
MPTCP_CMSG_INQ = BIT(1),
@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb)
mptcp_rmem_uncharge(sk, len);
}
-static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
@@ -1711,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;
- /* we don't support FASTOPEN yet */
- if (msg->msg_flags & MSG_FASTOPEN)
- return -EOPNOTSUPP;
-
/* silently ignore everything else */
- msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
+ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
+ if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
+ msg->msg_flags & MSG_FASTOPEN))) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
@@ -2352,12 +2340,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}
- /* if we are invoked by the msk cleanup code, the subflow is
- * already orphaned
- */
- if (ssk->sk_socket)
- sock_orphan(ssk);
-
+ sock_orphan(ssk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2372,6 +2355,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(ssk);
inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
__tcp_close(ssk, 0);
@@ -2940,7 +2924,11 @@ cleanup:
if (ssk == msk->first)
subflow->fail_tout = 0;
- sock_orphan(ssk);
+ /* detach from the parent socket, but allow data_ready to
+ * push incoming data into the mptcp stack, to properly ack it
+ */
+ ssk->sk_socket = NULL;
+ ssk->sk_wq = NULL;
unlock_sock_fast(ssk, slow);
}
sock_orphan(sk);
@@ -3049,7 +3037,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
struct mptcp_sock *msk;
- u64 ack_seq;
if (!nsk)
return NULL;
@@ -3075,15 +3062,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
- if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
- msk->can_ack = true;
- msk->remote_key = mp_opt->sndr_key;
- mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
- ack_seq++;
- WRITE_ONCE(msk->ack_seq, ack_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
- }
-
sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV);
@@ -3356,7 +3334,6 @@ void mptcp_finish_connect(struct sock *ssk)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
struct sock *sk;
- u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
sk = subflow->conn;
@@ -3364,22 +3341,16 @@ void mptcp_finish_connect(struct sock *ssk)
pr_debug("msk=%p, token=%u", sk, subflow->token);
- mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
- ack_seq++;
- subflow->map_seq = ack_seq;
+ subflow->map_seq = subflow->iasn;
subflow->map_subflow_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
*/
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
@@ -3677,6 +3648,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (!err)
mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+
unlock:
release_sock(sock->sk);
return err;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6a09ab99a12d..955fb3d88eb3 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -126,6 +126,15 @@
#define MPTCP_CONNECTED 6
#define MPTCP_RESET_SCHEDULER 7
+struct mptcp_skb_cb {
+ u64 map_seq;
+ u64 end_seq;
+ u32 offset;
+ u8 has_rxtstamp:1;
+};
+
+#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+
static inline bool before64(__u64 seq1, __u64 seq2)
{
return (__s64)(seq1 - seq2) < 0;
@@ -467,17 +476,22 @@ struct mptcp_subflow_context {
send_fastclose : 1,
send_infinite_map : 1,
rx_eof : 1,
- can_ack : 1, /* only after processing the remote a key */
+ remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */
- valid_csum_seen : 1; /* at least one csum validated */
+ valid_csum_seen : 1, /* at least one csum validated */
+ is_mptfo : 1, /* subflow is doing TFO */
+ __unused : 8;
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
u32 remote_token;
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ union {
+ u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
+ u64 iasn; /* initial ack sequence number, MPC subflows only */
+ };
u8 local_id;
u8 remote_id;
u8 reset_seen:1;
@@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- struct mptcp_options_received *mp_opt);
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -824,8 +839,15 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+void mptcp_event_pm_listener(const struct sock *ssk,
+ enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
+void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
+void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req);
+
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) &
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index f62f6483ef77..d4b1e6ec1b36 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -559,7 +559,9 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
case TCP_INQ:
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
return true;
}
@@ -569,9 +571,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
* TCP_REPAIR_WINDOW are not supported, better avoid this mess
*/
- /* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because
- * fastopen for the listener side is currently unsupported
- */
}
return false;
}
@@ -741,7 +740,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
}
release_sock(sk);
- return err;
+ return 0;
}
static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
@@ -801,7 +800,9 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
return 0;
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
@@ -1166,7 +1167,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_INFO:
case TCP_CC_INFO:
case TCP_DEFER_ACCEPT:
+ case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
+ case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 437a283ba6ea..29904303f5c2 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -307,7 +307,48 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
return NULL;
}
+static void subflow_prep_synack(const struct sock *sk, struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ /* clear tstamp_ok, as needed depending on cookie */
+ if (foc && foc->len > -1)
+ ireq->tstamp_ok = 0;
+
+ if (synack_type == TCP_SYNACK_FASTOPEN)
+ mptcp_fastopen_subflow_synack_set_params(subflow, req);
+}
+
+static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
+{
+ subflow_prep_synack(sk, req, foc, synack_type);
+
+ return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc,
+ synack_type, syn_skb);
+}
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
+{
+ subflow_prep_synack(sk, req, foc, synack_type);
+
+ return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc,
+ synack_type, syn_skb);
+}
+
static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
@@ -392,11 +433,33 @@ static void mptcp_set_connected(struct sock *sk)
mptcp_data_unlock(sk);
}
+static void subflow_set_remote_key(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ /* active MPC subflow will reach here multiple times:
+ * at subflow_finish_connect() time and at 4th ack time
+ */
+ if (subflow->remote_key_valid)
+ return;
+
+ subflow->remote_key_valid = 1;
+ subflow->remote_key = mp_opt->sndr_key;
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn);
+ subflow->iasn++;
+
+ WRITE_ONCE(msk->remote_key, subflow->remote_key);
+ WRITE_ONCE(msk->ack_seq, subflow->iasn);
+ WRITE_ONCE(msk->can_ack, true);
+ atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -404,6 +467,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ msk = mptcp_sk(parent);
mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
@@ -416,19 +480,16 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
- pr_fallback(mptcp_sk(subflow->conn));
+ pr_fallback(msk);
goto fallback;
}
if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
- WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ WRITE_ONCE(msk->csum_enabled, true);
if (mp_opt.deny_join_id0)
- WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow->can_ack = 1;
- subflow->remote_key = mp_opt.sndr_key;
- pr_debug("subflow=%p, remote_key=%llu", subflow,
- subflow->remote_key);
+ subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
mptcp_set_connected(parent);
@@ -466,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
- if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
+ if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
ntohs(inet_sk(parent)->inet_dport));
@@ -474,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
+ mptcp_rcv_space_init(msk, sk);
mptcp_set_connected(parent);
}
return;
@@ -637,14 +698,16 @@ static void subflow_drop_ctx(struct sock *ssk)
}
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- struct mptcp_options_received *mp_opt)
+ const struct mptcp_options_received *mp_opt)
{
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- subflow->remote_key = mp_opt->sndr_key;
+ subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
- subflow->can_ack = 1;
WRITE_ONCE(msk->fully_established, true);
+
+ if (subflow->is_mptfo)
+ mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -760,7 +823,7 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
+ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -1198,16 +1261,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
if (WARN_ON_ONCE(!skb))
goto no_data;
- /* if msk lacks the remote key, this subflow must provide an
- * MP_CAPABLE-based mapping
- */
- if (unlikely(!READ_ONCE(msk->can_ack))) {
- if (!subflow->mpc_map)
- goto fallback;
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
- WRITE_ONCE(msk->ack_seq, subflow->map_seq);
- WRITE_ONCE(msk->can_ack, true);
- }
+ if (unlikely(!READ_ONCE(msk->can_ack)))
+ goto fallback;
old_ack = READ_ONCE(msk->ack_seq);
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
@@ -1480,6 +1535,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
+ subflow->remote_key_valid = 1;
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
@@ -1747,16 +1803,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
for (msk = head; msk; msk = next) {
struct sock *sk = (struct sock *)msk;
- bool slow, do_cancel_work;
+ bool do_cancel_work;
sock_hold(sk);
- slow = lock_sock_fast_nested(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->first = NULL;
msk->dl_next = NULL;
do_cancel_work = __mptcp_close(sk, 0);
- unlock_sock_fast(sk, slow);
+ release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
sock_put(sk);
@@ -1873,6 +1929,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
+ new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token;
@@ -1929,6 +1986,7 @@ void __init mptcp_subflow_init(void)
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
+ subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack;
subflow_specific = ipv4_specific;
subflow_specific.conn_request = subflow_v4_conn_request;
@@ -1942,6 +2000,7 @@ void __init mptcp_subflow_init(void)
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
+ subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack;
subflow_v6_specific = ipv6_specific;
subflow_v6_specific.conn_request = subflow_v6_conn_request;
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index dda8b76b7798..fd2236ee9a79 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -228,7 +228,8 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb,
len += max(payload, padding_bytes);
cmd = skb_put_zero(skb, len);
- memcpy(&cmd->mfr_id, nca->data, nca->payload);
+ unsafe_memcpy(&cmd->mfr_id, nca->data, nca->payload,
+ /* skb allocated with enough to load the payload */);
ncsi_cmd_build_header(&cmd->cmd.common, nca);
return 0;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0846bd75b1da..f71b41c7ce2f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -459,6 +459,9 @@ config NF_NAT_REDIRECT
config NF_NAT_MASQUERADE
bool
+config NF_NAT_OVS
+ bool
+
config NETFILTER_SYNPROXY
tristate
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1d4db1943936..3754eb06fb41 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o
obj-$(CONFIG_NF_NAT) += nf_nat.o
nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
+nf_nat-$(CONFIG_NF_NAT_OVS) += nf_nat_ovs.o
ifeq ($(CONFIG_NF_NAT),m)
nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index fdb5225a3e5c..7c2399541771 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -928,7 +928,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
#ifdef IP_SET_HASH_WITH_MULTI
if (h->bucketsize >= AHASH_MAX_TUNED)
goto set_full;
- else if (h->bucketsize < multi)
+ else if (h->bucketsize <= multi)
h->bucketsize += AHASH_INIT_SIZE;
#endif
if (n->size >= AHASH_MAX(h)) {
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 556f74268a28..e30513cefd90 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -153,18 +153,16 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
return -ERANGE;
- if (retried) {
+ if (retried)
ip = ntohl(h->next.ip);
- e.ip = htonl(ip);
- }
for (; ip <= ip_to;) {
+ e.ip = htonl(ip);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
ip += hosts;
- e.ip = htonl(ip);
- if (e.ip == 0)
+ if (ip == 0)
return 0;
ret = 0;
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 8639e7efd0e2..24002bc61e07 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -191,19 +191,16 @@ BTF_ID(struct, nf_conn___init)
/* Check writes into `struct nf_conn` */
static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
- const struct btf *btf,
- const struct btf_type *t, int off,
- int size, enum bpf_access_type atype,
- u32 *next_btf_id,
- enum bpf_type_flag *flag)
+ const struct bpf_reg_state *reg,
+ int off, int size, enum bpf_access_type atype,
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
- const struct btf_type *ncit;
- const struct btf_type *nct;
+ const struct btf_type *ncit, *nct, *t;
size_t end;
- ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
- nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
-
+ ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
+ nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
if (t != nct && t != ncit) {
bpf_log(log, "only read is supported\n");
return -EACCES;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 057ebdcc25d7..5c3cf0834af0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -887,7 +887,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
if (!nf_ct_ext_valid_pre(ct->ext)) {
- NF_CT_STAT_INC(net, insert_failed);
+ NF_CT_STAT_INC_ATOMIC(net, insert_failed);
return -ETIMEDOUT;
}
@@ -934,7 +934,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
- NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return -ETIMEDOUT;
}
@@ -1271,7 +1271,7 @@ chaintoolong:
*/
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
- NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return NF_DROP;
}
@@ -1777,7 +1777,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- ct->mark = exp->master->mark;
+ ct->mark = READ_ONCE(exp->master->mark);
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
ct->secmark = exp->master->secmark;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7562b215b932..1286ae7d4609 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -330,7 +330,12 @@ nla_put_failure:
#ifdef CONFIG_NF_CONNTRACK_MARK
static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
{
- if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark)))
+ u32 mark = READ_ONCE(ct->mark);
+
+ if (!mark)
+ return 0;
+
+ if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
goto nla_put_failure;
return 0;
@@ -826,8 +831,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- if ((events & (1 << IPCT_MARK) || ct->mark)
- && ctnetlink_dump_mark(skb, ct) < 0)
+ if (events & (1 << IPCT_MARK) &&
+ ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
nlmsg_end(skb, nlh);
@@ -1154,7 +1159,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- if ((ct->mark & filter->mark.mask) != filter->mark.val)
+ if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val)
goto ignore_entry;
#endif
status = (u32)READ_ONCE(ct->status);
@@ -2002,9 +2007,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct,
mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
mark = ntohl(nla_get_be32(cda[CTA_MARK]));
- newmark = (ct->mark & mask) ^ mark;
- if (newmark != ct->mark)
- ct->mark = newmark;
+ newmark = (READ_ONCE(ct->mark) & mask) ^ mark;
+ if (newmark != READ_ONCE(ct->mark))
+ WRITE_ONCE(ct->mark, newmark);
}
#endif
@@ -2730,7 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_MARK
- if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
+ if (ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index a884c06abf09..0250725e38a4 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -366,7 +366,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
#if defined(CONFIG_NF_CONNTRACK_MARK)
- seq_printf(s, "mark=%u ", ct->mark);
+ seq_printf(s, "mark=%u ", READ_ONCE(ct->mark));
#endif
ct_show_secctx(s, ct);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b04645ced89b..0fdcdb2c9ae4 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -997,13 +997,13 @@ static void flow_offload_queue_work(struct flow_offload_work *offload)
struct net *net = read_pnet(&offload->flowtable->net);
if (offload->cmd == FLOW_CLS_REPLACE) {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_add);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add);
queue_work(nf_flow_offload_add_wq, &offload->work);
} else if (offload->cmd == FLOW_CLS_DESTROY) {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_del);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del);
queue_work(nf_flow_offload_del_wq, &offload->work);
} else {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_stats);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats);
queue_work(nf_flow_offload_stats_wq, &offload->work);
}
}
@@ -1098,6 +1098,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
struct flow_block_cb *block_cb, *next;
int err = 0;
+ down_write(&flowtable->flow_block_lock);
switch (cmd) {
case FLOW_BLOCK_BIND:
list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
@@ -1112,6 +1113,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
WARN_ON_ONCE(1);
err = -EOPNOTSUPP;
}
+ up_write(&flowtable->flow_block_lock);
return err;
}
@@ -1168,7 +1170,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
extack);
+ down_write(&flowtable->flow_block_lock);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
+ up_write(&flowtable->flow_block_lock);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_nat_ovs.c b/net/netfilter/nf_nat_ovs.c
new file mode 100644
index 000000000000..551abd2da614
--- /dev/null
+++ b/net/netfilter/nf_nat_ovs.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Support nat functions for openvswitch and used by OVS and TC conntrack. */
+
+#include <net/netfilter/nf_nat.h>
+
+/* Modelled after nf_nat_ipv[46]_fn().
+ * range is only used for new, uninitialized NAT state.
+ * Returns either NF_ACCEPT or NF_DROP.
+ */
+static int nf_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, int *action,
+ const struct nf_nat_range2 *range,
+ enum nf_nat_manip_type maniptype)
+{
+ __be16 proto = skb_protocol(skb, true);
+ int hooknum, err = NF_ACCEPT;
+
+ /* See HOOK2MANIP(). */
+ if (maniptype == NF_NAT_MANIP_SRC)
+ hooknum = NF_INET_LOCAL_IN; /* Source NAT */
+ else
+ hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ if (proto == htons(ETH_P_IP) &&
+ ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ hooknum))
+ err = NF_DROP;
+ goto out;
+ } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
+ __be16 frag_off;
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ int hdrlen = ipv6_skip_exthdr(skb,
+ sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct,
+ ctinfo,
+ hooknum,
+ hdrlen))
+ err = NF_DROP;
+ goto out;
+ }
+ }
+ /* Non-ICMP, fall thru to initialize if needed. */
+ fallthrough;
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ /* Initialize according to the NAT action. */
+ err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
+ /* Action is set up to establish a new
+ * mapping.
+ */
+ ? nf_nat_setup_info(ct, range, maniptype)
+ : nf_nat_alloc_null_binding(ct, hooknum);
+ if (err != NF_ACCEPT)
+ goto out;
+ }
+ break;
+
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED_REPLY:
+ break;
+
+ default:
+ err = NF_DROP;
+ goto out;
+ }
+
+ err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+ if (err == NF_ACCEPT)
+ *action |= BIT(maniptype);
+out:
+ return err;
+}
+
+int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, int *action,
+ const struct nf_nat_range2 *range, bool commit)
+{
+ enum nf_nat_manip_type maniptype;
+ int err, ct_action = *action;
+
+ *action = 0;
+
+ /* Add NAT extension if not confirmed yet. */
+ if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
+ return NF_DROP; /* Can't NAT. */
+
+ if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
+ (ctinfo != IP_CT_RELATED || commit)) {
+ /* NAT an established or related connection like before. */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
+ /* This is the REPLY direction for a connection
+ * for which NAT was applied in the forward
+ * direction. Do the reverse NAT.
+ */
+ maniptype = ct->status & IPS_SRC_NAT
+ ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
+ else
+ maniptype = ct->status & IPS_SRC_NAT
+ ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
+ } else if (ct_action & BIT(NF_NAT_MANIP_SRC)) {
+ maniptype = NF_NAT_MANIP_SRC;
+ } else if (ct_action & BIT(NF_NAT_MANIP_DST)) {
+ maniptype = NF_NAT_MANIP_DST;
+ } else {
+ return NF_ACCEPT;
+ }
+
+ err = nf_ct_nat_execute(skb, ct, ctinfo, action, range, maniptype);
+ if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+ if (ct->status & IPS_SRC_NAT) {
+ if (maniptype == NF_NAT_MANIP_SRC)
+ maniptype = NF_NAT_MANIP_DST;
+ else
+ maniptype = NF_NAT_MANIP_SRC;
+
+ err = nf_ct_nat_execute(skb, ct, ctinfo, action, range,
+ maniptype);
+ } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ err = nf_ct_nat_execute(skb, ct, ctinfo, action, NULL,
+ NF_NAT_MANIP_SRC);
+ }
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 833850a4780f..832b881f7c17 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6006,7 +6006,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
&timeout);
if (err)
return err;
- } else if (set->flags & NFT_SET_TIMEOUT) {
+ } else if (set->flags & NFT_SET_TIMEOUT &&
+ !(flags & NFT_SET_ELEM_INTERVAL_END)) {
timeout = set->timeout;
}
@@ -6072,7 +6073,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
- } else if (set->num_exprs > 0) {
+ } else if (set->num_exprs > 0 &&
+ !(flags & NFT_SET_ELEM_INTERVAL_END)) {
err = nft_set_elem_expr_clone(ctx, set, expr_array);
if (err < 0)
goto err_set_elem_expr_clone;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a0696d7ea10c..c68e2151defe 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -98,7 +98,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
return;
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- *dest = ct->mark;
+ *dest = READ_ONCE(ct->mark);
return;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
@@ -297,8 +297,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- if (ct->mark != value) {
- ct->mark = value;
+ if (READ_ONCE(ct->mark) != value) {
+ WRITE_ONCE(ct->mark, value);
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 4f9299b9dcdd..06d46d182634 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_field *f;
+ const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
@@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Validate */
+ start_p = start;
+ end_p = end;
nft_pipapo_for_each_field(f, i, m) {
- const u8 *start_p = start, *end_p = end;
-
if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
return -ENOSPC;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index e5ebc0810675..ad3c033db64e 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
u_int32_t new_targetmark;
struct nf_conn *ct;
u_int32_t newmark;
+ u_int32_t oldmark;
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL)
@@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
switch (info->mode) {
case XT_CONNMARK_SET:
- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+ oldmark = READ_ONCE(ct->mark);
+ newmark = (oldmark & ~info->ctmask) ^ info->ctmark;
if (info->shift_dir == D_SHIFT_RIGHT)
newmark >>= info->shift_bits;
else
newmark <<= info->shift_bits;
- if (ct->mark != newmark) {
- ct->mark = newmark;
+ if (READ_ONCE(ct->mark) != newmark) {
+ WRITE_ONCE(ct->mark, newmark);
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
@@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
else
new_targetmark <<= info->shift_bits;
- newmark = (ct->mark & ~info->ctmask) ^
+ newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^
new_targetmark;
- if (ct->mark != newmark) {
- ct->mark = newmark;
+ if (READ_ONCE(ct->mark) != newmark) {
+ WRITE_ONCE(ct->mark, newmark);
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_RESTORE:
- new_targetmark = (ct->mark & info->ctmask);
+ new_targetmark = (READ_ONCE(ct->mark) & info->ctmask);
if (info->shift_dir == D_SHIFT_RIGHT)
new_targetmark >>= info->shift_bits;
else
@@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ct == NULL)
return false;
- return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+ return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert;
}
static int connmark_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index dbe5258e13ff..fff755dde30d 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -543,7 +543,7 @@ static int nci_open_device(struct nci_dev *ndev)
skb_queue_purge(&ndev->tx_q);
ndev->ops->close(ndev);
- ndev->flags = 0;
+ ndev->flags &= BIT(NCI_UNREG);
}
done:
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index aa5e712adf07..3d36ea5701f0 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
nci_plen(skb->data));
conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data));
- if (!conn_info)
+ if (!conn_info) {
+ kfree_skb(skb);
return;
+ }
/* strip the nci data header */
skb_pull(skb, NCI_DATA_HDR_SIZE);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 282c51051dcc..994a0a1efb58 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
target->sens_res = nfca_poll->sens_res;
target->sel_res = nfca_poll->sel_res;
target->nfcid1_len = nfca_poll->nfcid1_len;
+ if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1))
+ return -EPROTO;
if (target->nfcid1_len > 0) {
memcpy(target->nfcid1, nfca_poll->nfcid1,
target->nfcid1_len);
@@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params;
target->sensb_res_len = nfcb_poll->sensb_res_len;
+ if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res))
+ return -EPROTO;
if (target->sensb_res_len > 0) {
memcpy(target->sensb_res, nfcb_poll->sensb_res,
target->sensb_res_len);
@@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params;
target->sensf_res_len = nfcf_poll->sensf_res_len;
+ if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res))
+ return -EPROTO;
if (target->sensf_res_len > 0) {
memcpy(target->sensf_res, nfcf_poll->sensf_res,
target->sensf_res_len);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 15bd287f5cbd..747d537a3f06 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -15,6 +15,7 @@ config OPENVSWITCH
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
+ select NF_NAT_OVS if NF_NAT
help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 4348321856af..c8b137649ca4 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -152,7 +152,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
static u32 ovs_ct_get_mark(const struct nf_conn *ct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- return ct ? ct->mark : 0;
+ return ct ? READ_ONCE(ct->mark) : 0;
#else
return 0;
#endif
@@ -340,9 +340,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
u32 new_mark;
- new_mark = ct_mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
key->ct.mark = new_mark;
@@ -726,147 +726,27 @@ static void ovs_nat_update_key(struct sw_flow_key *key,
}
}
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
-{
- int hooknum, nh_off, err = NF_ACCEPT;
-
- nh_off = skb_network_offset(skb);
- skb_pull_rcsum(skb, nh_off);
-
- /* See HOOK2MANIP(). */
- if (maniptype == NF_NAT_MANIP_SRC)
- hooknum = NF_INET_LOCAL_IN; /* Source NAT */
- else
- hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (IS_ENABLED(CONFIG_NF_NAT) &&
- skb->protocol == htons(ETH_P_IP) &&
- ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
- err = NF_DROP;
- goto push;
- } else if (IS_ENABLED(CONFIG_IPV6) &&
- skb->protocol == htons(ETH_P_IPV6)) {
- __be16 frag_off;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- int hdrlen = ipv6_skip_exthdr(skb,
- sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct,
- ctinfo,
- hooknum,
- hdrlen))
- err = NF_DROP;
- goto push;
- }
- }
- /* Non-ICMP, fall thru to initialize if needed. */
- fallthrough;
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- /* Initialize according to the NAT action. */
- err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
- /* Action is set up to establish a new
- * mapping.
- */
- ? nf_nat_setup_info(ct, range, maniptype)
- : nf_nat_alloc_null_binding(ct, hooknum);
- if (err != NF_ACCEPT)
- goto push;
- }
- break;
-
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
-
- default:
- err = NF_DROP;
- goto push;
- }
-
- err = nf_nat_packet(ct, ctinfo, hooknum, skb);
-push:
- skb_push_rcsum(skb, nh_off);
-
- /* Update the flow key if NAT successful. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
-
- return err;
-}
-
/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- enum nf_nat_manip_type maniptype;
- int err;
+ int err, action = 0;
- /* Add NAT extension if not confirmed yet. */
- if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
- return NF_ACCEPT; /* Can't NAT. */
+ if (!(info->nat & OVS_CT_NAT))
+ return NF_ACCEPT;
+ if (info->nat & OVS_CT_SRC_NAT)
+ action |= BIT(NF_NAT_MANIP_SRC);
+ if (info->nat & OVS_CT_DST_NAT)
+ action |= BIT(NF_NAT_MANIP_DST);
- /* Determine NAT type.
- * Check if the NAT type can be deduced from the tracked connection.
- * Make sure new expected connections (IP_CT_RELATED) are NATted only
- * when committing.
- */
- if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
- ct->status & IPS_NAT_MASK &&
- (ctinfo != IP_CT_RELATED || info->commit)) {
- /* NAT an established or related connection like before. */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
- /* This is the REPLY direction for a connection
- * for which NAT was applied in the forward
- * direction. Do the reverse NAT.
- */
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
- else
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
- } else if (info->nat & OVS_CT_SRC_NAT) {
- maniptype = NF_NAT_MANIP_SRC;
- } else if (info->nat & OVS_CT_DST_NAT) {
- maniptype = NF_NAT_MANIP_DST;
- } else {
- return NF_ACCEPT; /* Connection is not NATed. */
- }
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
-
- if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
- if (ct->status & IPS_SRC_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype, key);
- } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
- err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC, key);
- }
- }
+ err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+
+ if (action & BIT(NF_NAT_MANIP_SRC))
+ ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
+ if (action & BIT(NF_NAT_MANIP_DST))
+ ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST);
return err;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 861dfb8daf4a..932bcf766d63 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -209,6 +209,26 @@ static struct vport *new_vport(const struct vport_parms *parms)
return vport;
}
+static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info,
+ bool upcall_result)
+{
+ struct vport *p = OVS_CB(skb)->input_vport;
+ struct vport_upcall_stats_percpu *stats;
+
+ if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
+ upcall_info->cmd != OVS_PACKET_CMD_ACTION)
+ return;
+
+ stats = this_cpu_ptr(p->upcall_stats);
+ u64_stats_update_begin(&stats->syncp);
+ if (upcall_result)
+ u64_stats_inc(&stats->n_success);
+ else
+ u64_stats_inc(&stats->n_fail);
+ u64_stats_update_end(&stats->syncp);
+}
+
void ovs_dp_detach_port(struct vport *p)
{
ASSERT_OVSL();
@@ -216,6 +236,9 @@ void ovs_dp_detach_port(struct vport *p)
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
+ /* Free percpu memory */
+ free_percpu(p->upcall_stats);
+
/* Then destroy it. */
ovs_vport_del(p);
}
@@ -305,6 +328,8 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
else
err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
+
+ ovs_vport_update_upcall_stats(skb, upcall_info, !err);
if (err)
goto err;
@@ -1826,6 +1851,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_portids;
}
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto err_destroy_portids;
+ }
+
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_NEW);
BUG_ON(err < 0);
@@ -2098,6 +2129,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
OVS_VPORT_ATTR_PAD))
goto nla_put_failure;
+ if (ovs_vport_get_upcall_stats(vport, skb))
+ goto nla_put_failure;
+
if (ovs_vport_get_upcall_portids(vport, skb))
goto nla_put_failure;
@@ -2279,6 +2313,12 @@ restart:
goto exit_unlock_free;
}
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto exit_unlock_free;
+ }
+
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2508,6 +2548,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
+ [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
};
static const struct genl_small_ops dp_vport_genl_ops[] = {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 82a74f998966..7e0f5c45b512 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -285,6 +285,56 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
}
/**
+ * ovs_vport_get_upcall_stats - retrieve upcall stats
+ *
+ * @vport: vport from which to retrieve the stats.
+ * @skb: sk_buff where upcall stats should be appended.
+ *
+ * Retrieves upcall stats for the given device.
+ *
+ * Must be called with ovs_mutex or rcu_read_lock.
+ */
+int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb)
+{
+ struct nlattr *nla;
+ int i;
+
+ __u64 tx_success = 0;
+ __u64 tx_fail = 0;
+
+ for_each_possible_cpu(i) {
+ const struct vport_upcall_stats_percpu *stats;
+ unsigned int start;
+
+ stats = per_cpu_ptr(vport->upcall_stats, i);
+ do {
+ start = u64_stats_fetch_begin(&stats->syncp);
+ tx_success += u64_stats_read(&stats->n_success);
+ tx_fail += u64_stats_read(&stats->n_fail);
+ } while (u64_stats_fetch_retry(&stats->syncp, start));
+ }
+
+ nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_SUCCESS, tx_success,
+ OVS_VPORT_ATTR_PAD)) {
+ nla_nest_cancel(skb, nla);
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_FAIL, tx_fail,
+ OVS_VPORT_ATTR_PAD)) {
+ nla_nest_cancel(skb, nla);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(skb, nla);
+
+ return 0;
+}
+
+/**
* ovs_vport_get_options - retrieve device options
*
* @vport: vport from which to retrieve the options.
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 6ff45e8a0868..3e71ca8ad8a7 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -32,6 +32,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name);
void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
+int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb);
+
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
@@ -65,6 +67,7 @@ struct vport_portids {
* @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
+ * @upcall_stats: Upcall stats of every ports.
* @detach_list: list used for detaching vport in net-exit call.
* @rcu: RCU callback head for deferred destruction.
*/
@@ -78,6 +81,7 @@ struct vport {
struct hlist_node hash_node;
struct hlist_node dp_hash_node;
const struct vport_ops *ops;
+ struct vport_upcall_stats_percpu __percpu *upcall_stats;
struct list_head detach_list;
struct rcu_head rcu;
@@ -137,6 +141,18 @@ struct vport_ops {
struct list_head list;
};
+/**
+ * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for
+ * a given vport.
+ * @n_success: Number of packets that upcall to userspace succeed.
+ * @n_fail: Number of packets that upcall to userspace failed.
+ */
+struct vport_upcall_stats_percpu {
+ struct u64_stats_sync syncp;
+ u64_stats_t n_success;
+ u64_stats_t n_fail;
+};
+
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
const struct vport_parms *);
void ovs_vport_free(struct vport *);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 44f20cf8a0c0..41c4ccc3a5d6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2294,8 +2294,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->ip_summed == CHECKSUM_PARTIAL)
status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
status |= TP_STATUS_CSUM_VALID;
if (snaplen > res)
@@ -3521,8 +3520,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
aux.tp_status |= TP_STATUS_CSUM_VALID;
aux.tp_len = origlen;
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index accd35c05577..7ae023b37a83 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -58,4 +58,11 @@ config RXKAD
See Documentation/networking/rxrpc.rst.
+config RXPERF
+ tristate "RxRPC test service"
+ help
+ Provide an rxperf service tester. This listens on UDP port 7009 for
+ incoming calls from the rxperf program (an example of which can be
+ found in OpenAFS).
+
endif
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index fdeba488fc6e..e76d3459d78e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -16,6 +16,7 @@ rxrpc-y := \
conn_service.o \
input.o \
insecure.o \
+ io_thread.o \
key.o \
local_event.o \
local_object.o \
@@ -36,3 +37,6 @@ rxrpc-y := \
rxrpc-$(CONFIG_PROC_FS) += proc.o
rxrpc-$(CONFIG_RXKAD) += rxkad.o
rxrpc-$(CONFIG_SYSCTL) += sysctl.o
+
+
+obj-$(CONFIG_RXPERF) += rxperf.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index aacdd96a9886..7ea576f6ba4b 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -194,8 +194,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
service_in_use:
write_unlock(&local->services_lock);
- rxrpc_unuse_local(local);
- rxrpc_put_local(local);
+ rxrpc_unuse_local(local, rxrpc_local_unuse_bind);
+ rxrpc_put_local(local, rxrpc_local_put_bind);
ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
@@ -328,7 +328,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
mutex_unlock(&call->user_mutex);
}
- rxrpc_put_peer(cp.peer);
+ rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p", call);
return call;
}
@@ -359,9 +359,9 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
/* Make sure we're not going to call back into a kernel service */
if (call->notify_rx) {
- spin_lock_bh(&call->notify_lock);
+ spin_lock(&call->notify_lock);
call->notify_rx = rxrpc_dummy_notify_rx;
- spin_unlock_bh(&call->notify_lock);
+ spin_unlock(&call->notify_lock);
}
mutex_unlock(&call->user_mutex);
@@ -812,14 +812,12 @@ static int rxrpc_shutdown(struct socket *sock, int flags)
lock_sock(sk);
- spin_lock_bh(&sk->sk_receive_queue.lock);
if (sk->sk_state < RXRPC_CLOSE) {
sk->sk_state = RXRPC_CLOSE;
sk->sk_shutdown = SHUTDOWN_MASK;
} else {
ret = -ESHUTDOWN;
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
rxrpc_discard_prealloc(rx);
@@ -872,9 +870,7 @@ static int rxrpc_release_sock(struct sock *sk)
break;
}
- spin_lock_bh(&sk->sk_receive_queue.lock);
sk->sk_state = RXRPC_CLOSE;
- spin_unlock_bh(&sk->sk_receive_queue.lock);
if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
write_lock(&rx->local->services_lock);
@@ -888,8 +884,8 @@ static int rxrpc_release_sock(struct sock *sk)
flush_workqueue(rxrpc_workqueue);
rxrpc_purge_queue(&sk->sk_receive_queue);
- rxrpc_unuse_local(rx->local);
- rxrpc_put_local(rx->local);
+ rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock);
+ rxrpc_put_local(rx->local, rxrpc_local_put_release_sock);
rx->local = NULL;
key_put(rx->key);
rx->key = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 0273a9029229..e7dccab7b741 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -36,6 +36,8 @@ struct rxrpc_txbuf;
* to pass supplementary information.
*/
enum rxrpc_skb_mark {
+ RXRPC_SKB_MARK_PACKET, /* Received packet */
+ RXRPC_SKB_MARK_ERROR, /* Error notification */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
};
@@ -76,7 +78,7 @@ struct rxrpc_net {
bool kill_all_client_conns;
atomic_t nr_client_conns;
spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
- spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
+ struct mutex client_conn_discard_lock; /* Prevent multiple discarders */
struct list_head idle_client_conns;
struct work_struct client_conn_reaper;
struct timer_list client_conn_reap_timer;
@@ -99,6 +101,9 @@ struct rxrpc_net {
atomic_t stat_tx_data_retrans;
atomic_t stat_tx_data_send;
atomic_t stat_tx_data_send_frag;
+ atomic_t stat_tx_data_send_fail;
+ atomic_t stat_tx_data_underflow;
+ atomic_t stat_tx_data_cwnd_reset;
atomic_t stat_rx_data;
atomic_t stat_rx_data_reqack;
atomic_t stat_rx_data_jumbo;
@@ -110,6 +115,8 @@ struct rxrpc_net {
atomic_t stat_rx_acks[256];
atomic_t stat_why_req_ack[8];
+
+ atomic_t stat_io_loop;
};
/*
@@ -279,13 +286,11 @@ struct rxrpc_local {
struct rxrpc_net *rxnet; /* The network ns in which this resides */
struct hlist_node link;
struct socket *socket; /* my UDP socket */
- struct work_struct processor;
- struct list_head ack_tx_queue; /* List of ACKs that need sending */
- spinlock_t ack_tx_lock; /* ACK list lock */
+ struct task_struct *io_thread;
struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
- struct sk_buff_head reject_queue; /* packets awaiting rejection */
- struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
+ struct sk_buff_head rx_queue; /* Received packets */
+ struct list_head call_attend_q; /* Calls requiring immediate attention */
struct rb_root client_bundles; /* Client connection bundles by socket params */
spinlock_t client_bundles_lock; /* Lock for client_bundles */
spinlock_t lock; /* access lock */
@@ -403,11 +408,18 @@ enum rxrpc_conn_proto_state {
* RxRPC client connection bundle.
*/
struct rxrpc_bundle {
- struct rxrpc_conn_parameters params;
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct key *key; /* Security details */
refcount_t ref;
+ atomic_t active; /* Number of active users */
unsigned int debug_id;
+ u32 security_level; /* Security level selected */
+ u16 service_id; /* Service ID for this connection */
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
short alloc_error; /* Error from last conn allocation */
spinlock_t channel_lock;
struct rb_node local_node; /* Node in local->client_conns */
@@ -423,9 +435,13 @@ struct rxrpc_bundle {
*/
struct rxrpc_connection {
struct rxrpc_conn_proto proto;
- struct rxrpc_conn_parameters params;
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ struct key *key; /* Security details */
refcount_t ref;
+ atomic_t active; /* Active count for service conns */
struct rcu_head rcu;
struct list_head cache_link;
@@ -446,6 +462,7 @@ struct rxrpc_connection {
struct timer_list timer; /* Conn event timer */
struct work_struct processor; /* connection event processor */
+ struct work_struct destructor; /* In-process-context destroyer */
struct rxrpc_bundle *bundle; /* Client connection bundle */
struct rb_node service_node; /* Node in peer->service_conns */
struct list_head proc_link; /* link in procfs list */
@@ -470,9 +487,13 @@ struct rxrpc_connection {
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
+ u32 security_level; /* Security level selected */
u8 security_ix; /* security type */
u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
u8 bundle_shift; /* Index into bundle->avail_chans */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
+ u16 orig_service_id; /* Originally requested service ID */
short error; /* Local error code */
};
@@ -501,22 +522,19 @@ enum rxrpc_call_flag {
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
- RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
RXRPC_CALL_KERNEL, /* The call was made by the kernel */
RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
- RXRPC_CALL_DELAY_ACK_PENDING, /* DELAY ACK generation is pending */
- RXRPC_CALL_IDLE_ACK_PENDING, /* IDLE ACK generation is pending */
+ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
+ RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */
};
/*
* Events that can be raised on a call.
*/
enum rxrpc_call_event {
- RXRPC_CALL_EV_ABORT, /* need to generate abort */
- RXRPC_CALL_EV_RESEND, /* Tx resend required */
- RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
+ RXRPC_CALL_EV_INITIAL_PING, /* Send initial ping for a new service call */
};
/*
@@ -569,10 +587,13 @@ struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_peer *peer; /* Peer record for remote address */
+ struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_sock __rcu *socket; /* socket responsible */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ struct key *key; /* Security details */
const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
+ struct sockaddr_rxrpc dest_srx; /* Destination address */
unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
unsigned long ack_lost_at; /* When ACK is figured as lost */
unsigned long resend_at; /* When next resend needs to happen */
@@ -584,7 +605,7 @@ struct rxrpc_call {
u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
struct timer_list timer; /* Combined event timer */
- struct work_struct processor; /* Event processor */
+ struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
struct list_head link; /* link in master call list */
struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */
@@ -593,6 +614,7 @@ struct rxrpc_call {
struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
struct list_head sock_link; /* Link in rx->sock_calls */
struct rb_node sock_node; /* Node in rx->calls */
+ struct list_head attend_link; /* Link in local->call_attend_q */
struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */
wait_queue_head_t waitq; /* Wait queue for channel or Tx */
s64 tx_total_len; /* Total length left to be transmitted (or -1) */
@@ -606,20 +628,22 @@ struct rxrpc_call {
enum rxrpc_call_state state; /* current state of call */
enum rxrpc_call_completion completion; /* Call completion condition */
refcount_t ref;
- u16 service_id; /* service ID */
u8 security_ix; /* Security type */
enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
+ u32 security_level; /* Security level selected */
int debug_id; /* debug ID for printks */
unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
unsigned short rx_pkt_len; /* Current recvmsg packet len */
/* Transmitted data tracking. */
spinlock_t tx_lock; /* Transmit queue lock */
+ struct list_head tx_sendmsg; /* Sendmsg prepared packets */
struct list_head tx_buffer; /* Buffer of transmissible packets */
rxrpc_seq_t tx_bottom; /* First packet in buffer */
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
+ rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
u16 tx_backoff; /* Delay to insert due to Tx failure */
u8 tx_winsize; /* Maximum size of Tx window */
@@ -634,13 +658,13 @@ struct rxrpc_call {
rxrpc_seq_t rx_consumed; /* Highest packet consumed */
rxrpc_serial_t rx_serial; /* Highest serial received for this call */
u8 rx_winsize; /* Size of Rx window */
- spinlock_t input_lock; /* Lock for packet input to this call */
/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
* packets) rather than bytes.
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
+#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
u8 cong_cwnd; /* Congestion window size */
u8 cong_extra; /* Extra to send for congestion management */
u8 cong_ssthresh; /* Slow-start threshold */
@@ -675,11 +699,7 @@ struct rxrpc_call {
rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
- rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
- rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */
- struct sk_buff *acks_soft_tbl; /* The last ACK packet with NAKs in it */
- spinlock_t acks_ack_lock; /* Access to ->acks_last_ack */
};
/*
@@ -738,9 +758,8 @@ struct rxrpc_send_params {
*/
struct rxrpc_txbuf {
struct rcu_head rcu;
- struct list_head call_link; /* Link in call->tx_queue */
+ struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
struct list_head tx_link; /* Link in live Enc queue or Tx queue */
- struct rxrpc_call *call; /* Call to which belongs */
ktime_t last_sent; /* Time at which last transmitted */
refcount_t ref;
rxrpc_seq_t seq; /* Sequence number of this packet */
@@ -792,9 +811,9 @@ extern struct workqueue_struct *rxrpc_workqueue;
*/
int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
void rxrpc_discard_prealloc(struct rxrpc_sock *);
-struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
- struct rxrpc_sock *,
- struct sk_buff *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
+ struct rxrpc_connection *, struct sockaddr_rxrpc *,
+ struct sk_buff *);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
@@ -807,14 +826,14 @@ void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
-void rxrpc_process_call(struct work_struct *);
+void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long expire_at,
unsigned long now,
enum rxrpc_timer_trace why);
-void rxrpc_delete_call_timer(struct rxrpc_call *call);
+void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
* call_object.c
@@ -823,6 +842,7 @@ extern const char *const rxrpc_call_states[];
extern const char *const rxrpc_call_completions[];
extern struct kmem_cache *rxrpc_call_jar;
+void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what);
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
@@ -834,10 +854,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
-bool __rxrpc_queue_call(struct rxrpc_call *);
-bool rxrpc_queue_call(struct rxrpc_call *);
-void rxrpc_see_call(struct rxrpc_call *);
-bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
+void rxrpc_see_call(struct rxrpc_call *, enum rxrpc_call_trace);
+struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_cleanup_call(struct rxrpc_call *);
@@ -862,14 +880,14 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry;
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
-struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *);
-void rxrpc_put_bundle(struct rxrpc_bundle *);
+struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
+void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
gfp_t);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
-void rxrpc_put_client_conn(struct rxrpc_connection *);
+void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_discard_expired_client_conns(struct work_struct *);
void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
@@ -879,6 +897,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *);
*/
void rxrpc_process_connection(struct work_struct *);
void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
+int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
/*
* conn_object.c
@@ -886,18 +905,20 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
extern unsigned int rxrpc_connection_expiry;
extern unsigned int rxrpc_closed_conn_expiry;
-struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
-struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
- struct sk_buff *,
- struct rxrpc_peer **);
+struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
+struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
+ struct sockaddr_rxrpc *,
+ struct sk_buff *);
void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_kill_connection(struct rxrpc_connection *);
-bool rxrpc_queue_conn(struct rxrpc_connection *);
-void rxrpc_see_connection(struct rxrpc_connection *);
-struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *);
-struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *);
-void rxrpc_put_service_conn(struct rxrpc_connection *);
+void rxrpc_kill_client_conn(struct rxrpc_connection *);
+void rxrpc_queue_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
+void rxrpc_see_connection(struct rxrpc_connection *, enum rxrpc_conn_trace);
+struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *,
+ enum rxrpc_conn_trace);
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *,
+ enum rxrpc_conn_trace);
+void rxrpc_put_connection(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_service_connection_reaper(struct work_struct *);
void rxrpc_destroy_all_connections(struct rxrpc_net *);
@@ -911,17 +932,6 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
return !rxrpc_conn_is_client(conn);
}
-static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
- if (!conn)
- return;
-
- if (rxrpc_conn_is_client(conn))
- rxrpc_put_client_conn(conn);
- else
- rxrpc_put_service_conn(conn);
-}
-
static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
unsigned long expire_at)
{
@@ -941,7 +951,20 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
/*
* input.c
*/
-int rxrpc_input_packet(struct sock *, struct sk_buff *);
+void rxrpc_congestion_degrade(struct rxrpc_call *);
+void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
+void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * io_thread.c
+ */
+int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
+void rxrpc_error_report(struct sock *);
+int rxrpc_io_thread(void *data);
+static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
+{
+ wake_up_process(local->io_thread);
+}
/*
* insecure.c
@@ -960,28 +983,41 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
/*
* local_event.c
*/
-extern void rxrpc_process_local_events(struct rxrpc_local *);
+void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb);
/*
* local_object.c
*/
struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
-struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
-struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
-void rxrpc_put_local(struct rxrpc_local *);
-struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
-void rxrpc_unuse_local(struct rxrpc_local *);
-void rxrpc_queue_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace);
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_put_local(struct rxrpc_local *, enum rxrpc_local_trace);
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_unuse_local(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_destroy_local(struct rxrpc_local *local);
void rxrpc_destroy_all_locals(struct rxrpc_net *);
-static inline bool __rxrpc_unuse_local(struct rxrpc_local *local)
+static inline bool __rxrpc_use_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- return atomic_dec_return(&local->active_users) == 0;
+ int r, u;
+
+ r = refcount_read(&local->ref);
+ u = atomic_fetch_add_unless(&local->active_users, 1, 0);
+ trace_rxrpc_local(local->debug_id, why, r, u);
+ return u != 0;
}
-static inline bool __rxrpc_use_local(struct rxrpc_local *local)
+static inline void rxrpc_see_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0;
+ int r, u;
+
+ r = refcount_read(&local->ref);
+ u = atomic_read(&local->active_users);
+ trace_rxrpc_local(local->debug_id, why, r, u);
}
/*
@@ -1008,16 +1044,17 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-void rxrpc_transmit_ack_packets(struct rxrpc_local *);
+int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
-void rxrpc_reject_packets(struct rxrpc_local *);
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
/*
* peer_event.c
*/
-void rxrpc_error_report(struct sock *);
+void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
/*
@@ -1027,14 +1064,15 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
struct sockaddr_rxrpc *, gfp_t);
-struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
+ enum rxrpc_peer_trace);
void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
struct rxrpc_peer *);
void rxrpc_destroy_all_peers(struct rxrpc_net *);
-struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
-struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
-void rxrpc_put_peer(struct rxrpc_peer *);
-void rxrpc_put_peer_locked(struct rxrpc_peer *);
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
+void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
+void rxrpc_put_peer_locked(struct rxrpc_peer *, enum rxrpc_peer_trace);
/*
* proc.c
@@ -1096,6 +1134,7 @@ extern const struct rxrpc_security rxkad;
int __init rxrpc_init_security(void);
const struct rxrpc_security *rxrpc_security_lookup(u8);
void rxrpc_exit_security(void);
+int rxrpc_init_client_call_security(struct rxrpc_call *);
int rxrpc_init_client_conn_security(struct rxrpc_connection *);
const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *,
struct sk_buff *);
@@ -1118,7 +1157,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int);
* skbuff.c
*/
void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
-void rxrpc_packet_destructor(struct sk_buff *);
void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace);
@@ -1189,23 +1227,17 @@ extern unsigned int rxrpc_debug;
#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
-#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__)
-#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__)
#if defined(__KDEBUG)
#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
-#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__)
-#define _net(FMT,...) knet(FMT,##__VA_ARGS__)
#elif defined(CONFIG_AF_RXRPC_DEBUG)
#define RXRPC_DEBUG_KENTER 0x01
#define RXRPC_DEBUG_KLEAVE 0x02
#define RXRPC_DEBUG_KDEBUG 0x04
-#define RXRPC_DEBUG_KPROTO 0x08
-#define RXRPC_DEBUG_KNET 0x10
#define _enter(FMT,...) \
do { \
@@ -1225,24 +1257,10 @@ do { \
kdebug(FMT,##__VA_ARGS__); \
} while (0)
-#define _proto(FMT,...) \
-do { \
- if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \
- kproto(FMT,##__VA_ARGS__); \
-} while (0)
-
-#define _net(FMT,...) \
-do { \
- if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \
- knet(FMT,##__VA_ARGS__); \
-} while (0)
-
#else
#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
-#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__)
-#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__)
#endif
/*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 48790ee77019..d1850863507f 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -38,7 +38,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
unsigned long user_call_ID, gfp_t gfp,
unsigned int debug_id)
{
- const void *here = __builtin_return_address(0);
struct rxrpc_call *call, *xcall;
struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
struct rb_node *parent, **pp;
@@ -70,7 +69,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
head = b->peer_backlog_head;
tail = READ_ONCE(b->peer_backlog_tail);
if (CIRC_CNT(head, tail, size) < max) {
- struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp);
+ struct rxrpc_peer *peer;
+
+ peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc);
if (!peer)
return -ENOMEM;
b->peer_backlog[head] = peer;
@@ -89,9 +90,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
b->conn_backlog[head] = conn;
smp_store_release(&b->conn_backlog_head,
(head + 1) & (size - 1));
-
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- refcount_read(&conn->ref), here);
}
/* Now it gets complicated, because calls get registered with the
@@ -102,10 +100,10 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
call->state = RXRPC_CALL_SERVER_PREALLOC;
+ __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events);
- trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
- refcount_read(&call->ref),
- here, (const void *)user_call_ID);
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ user_call_ID, rxrpc_call_new_prealloc_service);
write_lock(&rx->call_lock);
@@ -126,11 +124,11 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->user_call_ID = user_call_ID;
call->notify_rx = notify_rx;
if (user_attach_call) {
- rxrpc_get_call(call, rxrpc_call_got_kernel);
+ rxrpc_get_call(call, rxrpc_call_get_kernel_service);
user_attach_call(call, user_call_ID);
}
- rxrpc_get_call(call, rxrpc_call_got_userid);
+ rxrpc_get_call(call, rxrpc_call_get_userid);
rb_link_node(&call->sock_node, parent, pp);
rb_insert_color(&call->sock_node, &rx->calls);
set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
@@ -140,9 +138,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
list_add_tail_rcu(&call->link, &rxnet->calls);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
b->call_backlog[call_head] = call;
smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
@@ -190,14 +188,14 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
/* Make sure that there aren't any incoming calls in progress before we
* clear the preallocation buffers.
*/
- spin_lock_bh(&rx->incoming_lock);
- spin_unlock_bh(&rx->incoming_lock);
+ spin_lock(&rx->incoming_lock);
+ spin_unlock(&rx->incoming_lock);
head = b->peer_backlog_head;
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
- rxrpc_put_local(peer->local);
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
@@ -230,7 +228,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
}
rxrpc_call_completed(call);
rxrpc_release_call(rx, call);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_discard_prealloc);
tail = (tail + 1) & (size - 1);
}
@@ -238,21 +236,6 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
}
/*
- * Ping the other end to fill our RTT cache and to retrieve the rwind
- * and MTU parameters.
- */
-static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- ktime_t now = skb->tstamp;
-
- if (call->peer->rtt_count < 3 ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
- rxrpc_send_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
- rxrpc_propose_ack_ping_for_params);
-}
-
-/*
* Allocate a new incoming call from the prealloc pool, along with a connection
* and a peer as necessary.
*/
@@ -261,6 +244,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
struct rxrpc_peer *peer,
struct rxrpc_connection *conn,
const struct rxrpc_security *sec,
+ struct sockaddr_rxrpc *peer_srx,
struct sk_buff *skb)
{
struct rxrpc_backlog *b = rx->backlog;
@@ -286,12 +270,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
return NULL;
if (!conn) {
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn))
peer = NULL;
if (!peer) {
peer = b->peer_backlog[peer_tail];
- if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0)
- return NULL;
+ peer->srx = *peer_srx;
b->peer_backlog[peer_tail] = NULL;
smp_store_release(&b->peer_backlog_tail,
(peer_tail + 1) &
@@ -305,12 +288,13 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
b->conn_backlog[conn_tail] = NULL;
smp_store_release(&b->conn_backlog_tail,
(conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
- conn->params.local = rxrpc_get_local(local);
- conn->params.peer = peer;
- rxrpc_see_connection(conn);
+ conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn);
+ conn->peer = peer;
+ rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn);
rxrpc_new_incoming_connection(rx, conn, sec, skb);
} else {
- rxrpc_get_connection(conn);
+ rxrpc_get_connection(conn, rxrpc_conn_get_service_conn);
+ atomic_inc(&conn->active);
}
/* And now we can allocate and set up a new call */
@@ -319,43 +303,69 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
smp_store_release(&b->call_backlog_tail,
(call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_accept);
+ call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call);
call->conn = conn;
call->security = conn->security;
call->security_ix = conn->security_ix;
- call->peer = rxrpc_get_peer(conn->params.peer);
+ call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept);
+ call->dest_srx = peer->srx;
call->cong_ssthresh = call->peer->cong_ssthresh;
call->tx_last_sent = ktime_get_real();
return call;
}
/*
- * Set up a new incoming call. Called in BH context with the RCU read lock
- * held.
+ * Set up a new incoming call. Called from the I/O thread.
*
* If this is for a kernel service, when we allocate the call, it will have
* three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the
* retainer ref obtained from the backlog buffer. Prealloc calls for userspace
- * services only have the ref from the backlog buffer. We want to pass this
- * ref to non-BH context to dispose of.
+ * services only have the ref from the backlog buffer.
*
* If we want to report an error, we mark the skb with the packet type and
- * abort code and return NULL.
- *
- * The call is returned with the user access mutex held.
+ * abort code and return false.
*/
-struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
- struct rxrpc_sock *rx,
- struct sk_buff *skb)
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
const struct rxrpc_security *sec = NULL;
- struct rxrpc_connection *conn;
- struct rxrpc_peer *peer = NULL;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_call *call = NULL;
+ struct rxrpc_sock *rx;
_enter("");
+ /* Don't set up a call for anything other than the first DATA packet. */
+ if (sp->hdr.seq != 1 ||
+ sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ return true; /* Just discard */
+
+ rcu_read_lock();
+
+ /* Weed out packets to services we're not offering. Packets that would
+ * begin a call are explicitly rejected and the rest are just
+ * discarded.
+ */
+ rx = rcu_dereference(local->service);
+ if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
+ sp->hdr.serviceId != rx->second_service)
+ ) {
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ sp->hdr.seq == 1)
+ goto unsupported_service;
+ goto discard;
+ }
+
+ if (!conn) {
+ sec = rxrpc_get_incoming_security(rx, skb);
+ if (!sec)
+ goto reject;
+ }
+
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
@@ -366,20 +376,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
goto no_call;
}
- /* The peer, connection and call may all have sprung into existence due
- * to a duplicate packet being handled on another CPU in parallel, so
- * we have to recheck the routing. However, we're now holding
- * rx->incoming_lock, so the values should remain stable.
- */
- conn = rxrpc_find_connection_rcu(local, skb, &peer);
-
- if (!conn) {
- sec = rxrpc_get_incoming_security(rx, skb);
- if (!sec)
- goto no_call;
- }
-
- call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, skb);
+ call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx,
+ skb);
if (!call) {
skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
goto no_call;
@@ -396,50 +394,41 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
rx->notify_new_call(&rx->sk, call, call->user_call_ID);
spin_lock(&conn->state_lock);
- switch (conn->state) {
- case RXRPC_CONN_SERVICE_UNSECURED:
+ if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
- rxrpc_queue_conn(call->conn);
- break;
-
- case RXRPC_CONN_SERVICE:
- write_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE)
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
- write_unlock(&call->state_lock);
- break;
-
- case RXRPC_CONN_REMOTELY_ABORTED:
- rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->abort_code, conn->error);
- break;
- case RXRPC_CONN_LOCALLY_ABORTED:
- rxrpc_abort_call("CON", call, sp->hdr.seq,
- conn->abort_code, conn->error);
- break;
- default:
- BUG();
+ rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge);
}
spin_unlock(&conn->state_lock);
- spin_unlock(&rx->incoming_lock);
- rxrpc_send_ping(call, skb);
+ spin_unlock(&rx->incoming_lock);
+ rcu_read_unlock();
- /* We have to discard the prealloc queue's ref here and rely on a
- * combination of the RCU read lock and refs held either by the socket
- * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel
- * service to prevent the call from being deallocated too early.
- */
- rxrpc_put_call(call, rxrpc_call_put);
+ if (hlist_unhashed(&call->error_link)) {
+ spin_lock(&call->peer->lock);
+ hlist_add_head(&call->error_link, &call->peer->error_targets);
+ spin_unlock(&call->peer->lock);
+ }
_leave(" = %p{%d}", call, call->debug_id);
- return call;
-
+ rxrpc_input_call_event(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return true;
+
+unsupported_service:
+ trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_INVALID_OPERATION, EOPNOTSUPP);
+ skb->priority = RX_INVALID_OPERATION;
+ goto reject;
no_call:
spin_unlock(&rx->incoming_lock);
- _leave(" = NULL [%u]", skb->mark);
- return NULL;
+reject:
+ rcu_read_unlock();
+ _leave(" = f [%u]", skb->mark);
+ return false;
+discard:
+ rcu_read_unlock();
+ return true;
}
/*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 1e21a708390e..b2cf448fb02c 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -69,21 +69,15 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
{
- struct rxrpc_local *local = call->conn->params.local;
struct rxrpc_txbuf *txb;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return;
- if (ack_reason == RXRPC_ACK_DELAY &&
- test_and_set_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags)) {
- trace_rxrpc_drop_ack(call, why, ack_reason, serial, false);
- return;
- }
rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
- in_softirq() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
+ rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
if (!txb) {
kleave(" = -ENOMEM");
return;
@@ -101,22 +95,9 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
txb->ack.reason = ack_reason;
txb->ack.nAcks = 0;
- if (!rxrpc_try_get_call(call, rxrpc_call_got)) {
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_nomem);
- return;
- }
-
- spin_lock_bh(&local->ack_tx_lock);
- list_add_tail(&txb->tx_link, &local->ack_tx_queue);
- spin_unlock_bh(&local->ack_tx_lock);
trace_rxrpc_send_ack(call, why, ack_reason, serial);
-
- if (in_task()) {
- rxrpc_transmit_ack_packets(call->peer->local);
- } else {
- rxrpc_get_local(local);
- rxrpc_queue_local(local);
- }
+ rxrpc_send_ack_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
}
/*
@@ -130,11 +111,10 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
struct rxrpc_txbuf *txb;
- struct sk_buff *ack_skb = NULL;
unsigned long resend_at;
rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
ktime_t now, max_age, oldest, ack_ts;
@@ -148,32 +128,21 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
oldest = now;
- /* See if there's an ACK saved with a soft-ACK table in it. */
- if (call->acks_soft_tbl) {
- spin_lock_bh(&call->acks_ack_lock);
- ack_skb = call->acks_soft_tbl;
- if (ack_skb) {
- rxrpc_get_skb(ack_skb, rxrpc_skb_ack);
- ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- }
- spin_unlock_bh(&call->acks_ack_lock);
- }
-
if (list_empty(&call->tx_buffer))
goto no_resend;
- spin_lock(&call->tx_lock);
-
if (list_empty(&call->tx_buffer))
goto no_further_resend;
- trace_rxrpc_resend(call);
+ trace_rxrpc_resend(call, ack_skb);
txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
/* Scan the soft ACK table without dropping the lock and resend any
* explicitly NAK'd packets.
*/
- if (ack) {
+ if (ack_skb) {
+ ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
+
for (i = 0; i < ack->nAcks; i++) {
rxrpc_seq_t seq;
@@ -197,8 +166,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
if (list_empty(&txb->tx_link)) {
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans);
- rxrpc_get_call(call, rxrpc_call_got_tx);
list_add_tail(&txb->tx_link, &retrans_queue);
set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
}
@@ -242,7 +209,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
do_resend:
unacked = true;
if (list_empty(&txb->tx_link)) {
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans);
list_add_tail(&txb->tx_link, &retrans_queue);
set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
@@ -250,10 +216,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
no_further_resend:
- spin_unlock(&call->tx_lock);
no_resend:
- rxrpc_free_skb(ack_skb, rxrpc_skb_freed);
-
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
!list_empty(&retrans_queue));
@@ -267,7 +230,7 @@ no_resend:
* retransmitting data.
*/
if (list_empty(&retrans_queue)) {
- rxrpc_reduce_call_timer(call, resend_at, now_j,
+ rxrpc_reduce_call_timer(call, resend_at, jiffies,
rxrpc_timer_set_for_resend);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
@@ -277,76 +240,134 @@ no_resend:
goto out;
}
+ /* Retransmit the queue */
while ((txb = list_first_entry_or_null(&retrans_queue,
struct rxrpc_txbuf, tx_link))) {
list_del_init(&txb->tx_link);
- rxrpc_send_data_packet(call, txb);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
-
- trace_rxrpc_retransmit(call, txb->seq,
- ktime_to_ns(ktime_sub(txb->last_sent,
- max_age)));
+ rxrpc_transmit_one(call, txb);
}
out:
_leave("");
}
+static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
+{
+ unsigned int winsize = min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize;
+ rxrpc_seq_t tx_top = call->tx_top;
+ int space;
+
+ space = wtop - tx_top;
+ return space > 0;
+}
+
+/*
+ * Decant some if the sendmsg prepared queue into the transmission buffer.
+ */
+static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (rxrpc_is_client_call(call) &&
+ !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_expose_client_call(call);
+
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ spin_lock(&call->tx_lock);
+ list_del(&txb->call_link);
+ spin_unlock(&call->tx_lock);
+
+ call->tx_top = txb->seq;
+ list_add_tail(&txb->call_link, &call->tx_buffer);
+
+ rxrpc_transmit_one(call, txb);
+
+ if (!rxrpc_tx_window_has_space(call))
+ break;
+ }
+}
+
+static void rxrpc_transmit_some_data(struct rxrpc_call *call)
+{
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ if (list_empty(&call->tx_sendmsg))
+ return;
+ fallthrough;
+
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ if (!rxrpc_tx_window_has_space(call))
+ return;
+ if (list_empty(&call->tx_sendmsg)) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow);
+ return;
+ }
+ rxrpc_decant_prepared_tx(call);
+ break;
+ default:
+ return;
+ }
+}
+
+/*
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
+ */
+static void rxrpc_send_initial_ping(struct rxrpc_call *call)
+{
+ if (call->peer->rtt_count < 3 ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_params);
+}
+
/*
* Handle retransmission and deferred ACK/abort generation.
*/
-void rxrpc_process_call(struct work_struct *work)
+void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_call *call =
- container_of(work, struct rxrpc_call, processor);
unsigned long now, next, t;
- unsigned int iterations = 0;
rxrpc_serial_t ackr_serial;
+ bool resend = false, expired = false;
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_input);
//printk("\n--------------------\n");
_enter("{%d,%s,%lx}",
call->debug_id, rxrpc_call_states[call->state], call->events);
-recheck_state:
- /* Limit the number of times we do this before returning to the manager */
- iterations++;
- if (iterations > 5)
- goto requeue;
-
- if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
- rxrpc_send_abort_packet(call);
- goto recheck_state;
- }
-
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom)
- rxrpc_shrink_call_tx_buffer(call);
+ if (call->state == RXRPC_CALL_COMPLETE)
+ goto out;
- if (call->state == RXRPC_CALL_COMPLETE) {
- rxrpc_delete_call_timer(call);
- goto out_put;
- }
+ if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
+ goto out;
- /* Work out if any timeouts tripped */
+ /* If we see our async-event poke, check for timeout trippage. */
now = jiffies;
t = READ_ONCE(call->expect_rx_by);
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
- set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ expired = true;
}
t = READ_ONCE(call->expect_req_by);
if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
- set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ expired = true;
}
t = READ_ONCE(call->expect_term_by);
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
- set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ expired = true;
}
t = READ_ONCE(call->delay_ack_at);
@@ -385,11 +406,26 @@ recheck_state:
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
- set_bit(RXRPC_CALL_EV_RESEND, &call->events);
+ resend = true;
}
+ if (skb)
+ rxrpc_input_call_packet(call, skb);
+
+ rxrpc_transmit_some_data(call);
+
+ if (skb) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK)
+ rxrpc_congestion_degrade(call);
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
+ rxrpc_send_initial_ping(call);
+
/* Process events */
- if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
+ if (expired) {
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
@@ -397,52 +433,50 @@ recheck_state:
} else {
rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
}
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- goto recheck_state;
+ rxrpc_send_abort_packet(call);
+ goto out;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
- call->acks_lost_top = call->tx_top;
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- }
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
- call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
- rxrpc_resend(call, now);
- goto recheck_state;
- }
+ if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY)
+ rxrpc_resend(call, NULL);
+
+ if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_rx_idle);
+
+ if (atomic_read(&call->ackr_nr_unacked) > 2)
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
/* Make sure the timer is restarted */
- next = call->expect_rx_by;
+ if (call->state != RXRPC_CALL_COMPLETE) {
+ next = call->expect_rx_by;
#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
- set(call->expect_req_by);
- set(call->expect_term_by);
- set(call->delay_ack_at);
- set(call->ack_lost_at);
- set(call->resend_at);
- set(call->keepalive_at);
- set(call->ping_at);
-
- now = jiffies;
- if (time_after_eq(now, next))
- goto recheck_state;
+ set(call->expect_req_by);
+ set(call->expect_term_by);
+ set(call->delay_ack_at);
+ set(call->ack_lost_at);
+ set(call->resend_at);
+ set(call->keepalive_at);
+ set(call->ping_at);
- rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ now = jiffies;
+ if (time_after_eq(now, next))
+ rxrpc_poke_call(call, rxrpc_call_poke_timer_now);
- /* other events may have been raised since we started checking */
- if (call->events && call->state < RXRPC_CALL_COMPLETE)
- goto requeue;
+ rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ }
-out_put:
- rxrpc_put_call(call, rxrpc_call_put);
out:
+ if (call->state == RXRPC_CALL_COMPLETE)
+ del_timer_sync(&call->timer);
+ if (call->acks_hard_ack != call->tx_bottom)
+ rxrpc_shrink_call_tx_buffer(call);
_leave("");
- return;
-
-requeue:
- __rxrpc_queue_call(call);
- goto out;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 1befe22cd301..be5eb8cdf549 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -45,6 +45,24 @@ static struct semaphore rxrpc_call_limiter =
static struct semaphore rxrpc_kernel_call_limiter =
__SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000);
+void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
+{
+ struct rxrpc_local *local = call->local;
+ bool busy;
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&call->attend_link);
+ trace_rxrpc_poke_call(call, busy, what);
+ if (!busy) {
+ rxrpc_get_call(call, rxrpc_call_get_poke);
+ list_add_tail(&call->attend_link, &local->call_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ rxrpc_wake_up_io_thread(local);
+ }
+}
+
static void rxrpc_call_timer_expired(struct timer_list *t)
{
struct rxrpc_call *call = from_timer(call, t, timer);
@@ -53,9 +71,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
if (call->state < RXRPC_CALL_COMPLETE) {
trace_rxrpc_timer_expired(call, jiffies);
- __rxrpc_queue_call(call);
- } else {
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
}
@@ -64,21 +80,14 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long now,
enum rxrpc_timer_trace why)
{
- if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
- trace_rxrpc_timer(call, why, now);
- if (timer_reduce(&call->timer, expire_at))
- rxrpc_put_call(call, rxrpc_call_put_notimer);
- }
-}
-
-void rxrpc_delete_call_timer(struct rxrpc_call *call)
-{
- if (del_timer_sync(&call->timer))
- rxrpc_put_call(call, rxrpc_call_put_timer);
+ trace_rxrpc_timer(call, why, now);
+ timer_reduce(&call->timer, expire_at);
}
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
+static void rxrpc_destroy_call(struct work_struct *);
+
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -110,7 +119,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
return NULL;
found_extant_call:
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_get_sendmsg);
read_unlock(&rx->call_lock);
_leave(" = %p [%d]", call, refcount_read(&call->ref));
return call;
@@ -139,20 +148,20 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
&rxrpc_call_user_mutex_lock_class_key);
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
- INIT_WORK(&call->processor, &rxrpc_process_call);
+ INIT_WORK(&call->destroyer, rxrpc_destroy_call);
INIT_LIST_HEAD(&call->link);
INIT_LIST_HEAD(&call->chan_wait_link);
INIT_LIST_HEAD(&call->accept_link);
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
+ INIT_LIST_HEAD(&call->attend_link);
+ INIT_LIST_HEAD(&call->tx_sendmsg);
INIT_LIST_HEAD(&call->tx_buffer);
skb_queue_head_init(&call->recvmsg_queue);
skb_queue_head_init(&call->rx_oos_queue);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
- spin_lock_init(&call->input_lock);
- spin_lock_init(&call->acks_ack_lock);
rwlock_init(&call->state_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
@@ -185,22 +194,45 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
*/
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
struct sockaddr_rxrpc *srx,
+ struct rxrpc_conn_parameters *cp,
+ struct rxrpc_call_params *p,
gfp_t gfp,
unsigned int debug_id)
{
struct rxrpc_call *call;
ktime_t now;
+ int ret;
_enter("");
call = rxrpc_alloc_call(rx, gfp, debug_id);
if (!call)
return ERR_PTR(-ENOMEM);
- call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
- call->service_id = srx->srx_service;
now = ktime_get_real();
- call->acks_latest_ts = now;
- call->cong_tstamp = now;
+ call->acks_latest_ts = now;
+ call->cong_tstamp = now;
+ call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
+ call->dest_srx = *srx;
+ call->interruptibility = p->interruptibility;
+ call->tx_total_len = p->tx_total_len;
+ call->key = key_get(cp->key);
+ call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call);
+ if (p->kernel)
+ __set_bit(RXRPC_CALL_KERNEL, &call->flags);
+ if (cp->upgrade)
+ __set_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ if (cp->exclusive)
+ __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+
+ ret = rxrpc_init_client_call_security(call);
+ if (ret < 0) {
+ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
+ rxrpc_put_call(call, rxrpc_call_put_discard_error);
+ return ERR_PTR(ret);
+ }
+
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ p->user_call_ID, rxrpc_call_new_client);
_leave(" = %p", call);
return call;
@@ -218,6 +250,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
call->ack_lost_at = j;
call->resend_at = j;
call->ping_at = j;
+ call->keepalive_at = j;
call->expect_rx_by = j;
call->expect_req_by = j;
call->expect_term_by = j;
@@ -270,7 +303,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_net *rxnet;
struct semaphore *limiter;
struct rb_node *parent, **pp;
- const void *here = __builtin_return_address(0);
int ret;
_enter("%p,%lx", rx, p->user_call_ID);
@@ -281,7 +313,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return ERR_PTR(-ERESTARTSYS);
}
- call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
+ call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id);
if (IS_ERR(call)) {
release_sock(&rx->sk);
up(limiter);
@@ -289,14 +321,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return call;
}
- call->interruptibility = p->interruptibility;
- call->tx_total_len = p->tx_total_len;
- trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
- refcount_read(&call->ref),
- here, (const void *)p->user_call_ID);
- if (p->kernel)
- __set_bit(RXRPC_CALL_KERNEL, &call->flags);
-
/* We need to protect a partially set up call against the user as we
* will be acting outside the socket lock.
*/
@@ -322,7 +346,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
rcu_assign_pointer(call->socket, rx);
call->user_call_ID = p->user_call_ID;
__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
- rxrpc_get_call(call, rxrpc_call_got_userid);
+ rxrpc_get_call(call, rxrpc_call_get_userid);
rb_link_node(&call->sock_node, parent, pp);
rb_insert_color(&call->sock_node, &rx->calls);
list_add(&call->sock_link, &rx->sock_calls);
@@ -330,9 +354,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
list_add_tail_rcu(&call->link, &rxnet->calls);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
/* From this point on, the call is protected by its own lock. */
release_sock(&rx->sk);
@@ -344,13 +368,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
if (ret < 0)
goto error_attached_to_socket;
- trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
- refcount_read(&call->ref), here, NULL);
+ rxrpc_see_call(call, rxrpc_call_see_connected);
rxrpc_start_call_timer(call);
- _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
-
_leave(" = %p [new]", call);
return call;
@@ -364,11 +385,11 @@ error_dup_user_ID:
release_sock(&rx->sk);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, -EEXIST);
- trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- refcount_read(&call->ref), here, ERR_PTR(-EEXIST));
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
+ rxrpc_call_see_userid_exists);
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_userid_exists);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
@@ -378,8 +399,8 @@ error_dup_user_ID:
* leave the error to recvmsg() to deal with.
*/
error_attached_to_socket:
- trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- refcount_read(&call->ref), here, ERR_PTR(ret));
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
+ rxrpc_call_see_connect_failed);
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
@@ -403,11 +424,34 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
rcu_assign_pointer(call->socket, rx);
call->call_id = sp->hdr.callNumber;
- call->service_id = sp->hdr.serviceId;
+ call->dest_srx.srx_service = sp->hdr.serviceId;
call->cid = sp->hdr.cid;
call->state = RXRPC_CALL_SERVER_SECURING;
call->cong_tstamp = skb->tstamp;
+ spin_lock(&conn->state_lock);
+
+ switch (conn->state) {
+ case RXRPC_CONN_SERVICE_UNSECURED:
+ case RXRPC_CONN_SERVICE_CHALLENGING:
+ call->state = RXRPC_CALL_SERVER_SECURING;
+ break;
+ case RXRPC_CONN_SERVICE:
+ call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ break;
+
+ case RXRPC_CONN_REMOTELY_ABORTED:
+ __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ conn->abort_code, conn->error);
+ break;
+ case RXRPC_CONN_LOCALLY_ABORTED:
+ __rxrpc_abort_call("CON", call, 1,
+ conn->abort_code, conn->error);
+ break;
+ default:
+ BUG();
+ }
+
/* Set the channel for this call. We don't get channel_lock as we're
* only defending against the data_ready handler (which we're called
* from) and the RESPONSE packet parser (which is only really
@@ -418,86 +462,48 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
conn->channels[chan].call_counter = call->call_id;
conn->channels[chan].call_id = call->call_id;
rcu_assign_pointer(conn->channels[chan].call, call);
+ spin_unlock(&conn->state_lock);
- spin_lock(&conn->params.peer->lock);
- hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets);
- spin_unlock(&conn->params.peer->lock);
-
- _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+ spin_lock(&conn->peer->lock);
+ hlist_add_head(&call->error_link, &conn->peer->error_targets);
+ spin_unlock(&conn->peer->lock);
rxrpc_start_call_timer(call);
_leave("");
}
/*
- * Queue a call's work processor, getting a ref to pass to the work queue.
- */
-bool rxrpc_queue_call(struct rxrpc_call *call)
-{
- const void *here = __builtin_return_address(0);
- int n;
-
- if (!__refcount_inc_not_zero(&call->ref, &n))
- return false;
- if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
- here, NULL);
- else
- rxrpc_put_call(call, rxrpc_call_put_noqueue);
- return true;
-}
-
-/*
- * Queue a call's work processor, passing the callers ref to the work queue.
- */
-bool __rxrpc_queue_call(struct rxrpc_call *call)
-{
- const void *here = __builtin_return_address(0);
- int n = refcount_read(&call->ref);
- ASSERTCMP(n, >=, 1);
- if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
- here, NULL);
- else
- rxrpc_put_call(call, rxrpc_call_put_noqueue);
- return true;
-}
-
-/*
* Note the re-emergence of a call.
*/
-void rxrpc_see_call(struct rxrpc_call *call)
+void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
- const void *here = __builtin_return_address(0);
if (call) {
- int n = refcount_read(&call->ref);
+ int r = refcount_read(&call->ref);
- trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
- here, NULL);
+ trace_rxrpc_call(call->debug_id, r, 0, why);
}
}
-bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *call,
+ enum rxrpc_call_trace why)
{
- const void *here = __builtin_return_address(0);
- int n;
+ int r;
- if (!__refcount_inc_not_zero(&call->ref, &n))
- return false;
- trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
- return true;
+ if (!call || !__refcount_inc_not_zero(&call->ref, &r))
+ return NULL;
+ trace_rxrpc_call(call->debug_id, r + 1, 0, why);
+ return call;
}
/*
* Note the addition of a ref on a call.
*/
-void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
- const void *here = __builtin_return_address(0);
- int n;
+ int r;
- __refcount_inc(&call->ref, &n);
- trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
+ __refcount_inc(&call->ref, &r);
+ trace_rxrpc_call(call->debug_id, r + 1, 0, why);
}
/*
@@ -514,15 +520,13 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
*/
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
- const void *here = __builtin_return_address(0);
struct rxrpc_connection *conn = call->conn;
bool put = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
- trace_rxrpc_call(call->debug_id, rxrpc_call_release,
- refcount_read(&call->ref),
- here, (const void *)call->flags);
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ call->flags, rxrpc_call_see_release);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -530,10 +534,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
BUG();
rxrpc_put_call_slot(call);
- rxrpc_delete_call_timer(call);
+ del_timer_sync(&call->timer);
/* Make sure we don't get any more notifications */
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -546,16 +550,16 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- write_unlock_bh(&rx->recvmsg_lock);
+ write_unlock(&rx->recvmsg_lock);
if (put)
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_unnotify);
write_lock(&rx->call_lock);
if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
rb_erase(&call->sock_node, &rx->calls);
memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
- rxrpc_put_call(call, rxrpc_call_put_userid);
+ rxrpc_put_call(call, rxrpc_call_put_userid_exists);
}
list_del(&call->sock_link);
@@ -584,17 +588,17 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
}
while (!list_empty(&rx->sock_calls)) {
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_get_release_sock);
rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
_leave("");
@@ -603,26 +607,24 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
/*
* release a call
*/
-void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
struct rxrpc_net *rxnet = call->rxnet;
- const void *here = __builtin_return_address(0);
unsigned int debug_id = call->debug_id;
bool dead;
- int n;
+ int r;
ASSERT(call != NULL);
- dead = __refcount_dec_and_test(&call->ref, &n);
- trace_rxrpc_call(debug_id, op, n, here, NULL);
+ dead = __refcount_dec_and_test(&call->ref, &r);
+ trace_rxrpc_call(debug_id, r - 1, 0, why);
if (dead) {
- _debug("call %d dead", call->debug_id);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
list_del_init(&call->link);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
}
rxrpc_cleanup_call(call);
@@ -630,36 +632,45 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
}
/*
- * Final call destruction - but must be done in process context.
+ * Free up the call under RCU.
*/
-static void rxrpc_destroy_call(struct work_struct *work)
+static void rxrpc_rcu_free_call(struct rcu_head *rcu)
{
- struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
- struct rxrpc_net *rxnet = call->rxnet;
-
- rxrpc_delete_call_timer(call);
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_net *rxnet = READ_ONCE(call->rxnet);
- rxrpc_put_connection(call->conn);
- rxrpc_put_peer(call->peer);
kmem_cache_free(rxrpc_call_jar, call);
if (atomic_dec_and_test(&rxnet->nr_calls))
wake_up_var(&rxnet->nr_calls);
}
/*
- * Final call destruction under RCU.
+ * Final call destruction - but must be done in process context.
*/
-static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+static void rxrpc_destroy_call(struct work_struct *work)
{
- struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer);
+ struct rxrpc_txbuf *txb;
- if (in_softirq()) {
- INIT_WORK(&call->processor, rxrpc_destroy_call);
- if (!rxrpc_queue_work(&call->processor))
- BUG();
- } else {
- rxrpc_destroy_call(&call->processor);
+ del_timer_sync(&call->timer);
+
+ rxrpc_cleanup_ring(call);
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
}
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
+ }
+
+ rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
+ rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+ rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
+ rxrpc_put_local(call->local, rxrpc_local_put_call);
+ call_rcu(&call->rcu, rxrpc_rcu_free_call);
}
/*
@@ -667,25 +678,20 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
*/
void rxrpc_cleanup_call(struct rxrpc_call *call)
{
- struct rxrpc_txbuf *txb;
-
- _net("DESTROY CALL %d", call->debug_id);
-
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
- rxrpc_cleanup_ring(call);
- while ((txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link))) {
- list_del(&txb->call_link);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
- }
- rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
- rxrpc_free_skb(call->acks_soft_tbl, rxrpc_skb_cleaned);
+ del_timer(&call->timer);
- call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
+ if (rcu_read_lock_held())
+ /* Can't use the rxrpc workqueue as we need to cancel/flush
+ * something that may be running/waiting there.
+ */
+ schedule_work(&call->destroyer);
+ else
+ rxrpc_destroy_call(&call->destroyer);
}
/*
@@ -700,14 +706,14 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
if (!list_empty(&rxnet->calls)) {
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
while (!list_empty(&rxnet->calls)) {
call = list_entry(rxnet->calls.next,
struct rxrpc_call, link);
_debug("Zapping call %p", call);
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_zap);
list_del_init(&call->link);
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
@@ -715,12 +721,12 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
rxrpc_call_states[call->state],
call->flags, call->events);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
cond_resched();
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
}
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
}
atomic_dec(&rxnet->nr_calls);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index f020f308ed9e..a08e33c9e54b 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
+
/*
* Get a connection ID and epoch for a client connection from the global pool.
* The connection struct pointer is then recorded in the idr radix tree. The
@@ -49,7 +51,7 @@ static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
gfp_t gfp)
{
- struct rxrpc_net *rxnet = conn->params.local->rxnet;
+ struct rxrpc_net *rxnet = conn->rxnet;
int id;
_enter("");
@@ -120,36 +122,47 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
bundle = kzalloc(sizeof(*bundle), gfp);
if (bundle) {
- bundle->params = *cp;
- rxrpc_get_peer(bundle->params.peer);
+ bundle->local = cp->local;
+ bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle);
+ bundle->key = cp->key;
+ bundle->exclusive = cp->exclusive;
+ bundle->upgrade = cp->upgrade;
+ bundle->service_id = cp->service_id;
+ bundle->security_level = cp->security_level;
refcount_set(&bundle->ref, 1);
+ atomic_set(&bundle->active, 1);
spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
+ trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
}
return bundle;
}
-struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
+struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle,
+ enum rxrpc_bundle_trace why)
{
- refcount_inc(&bundle->ref);
+ int r;
+
+ __refcount_inc(&bundle->ref, &r);
+ trace_rxrpc_bundle(bundle->debug_id, r + 1, why);
return bundle;
}
static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
- rxrpc_put_peer(bundle->params.peer);
+ trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
+ rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
kfree(bundle);
}
-void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
+void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
{
- unsigned int d = bundle->debug_id;
+ unsigned int id = bundle->debug_id;
bool dead;
int r;
dead = __refcount_dec_and_test(&bundle->ref, &r);
-
- _debug("PUT B=%x %d", d, r);
+ trace_rxrpc_bundle(id, r - 1, why);
if (dead)
rxrpc_free_bundle(bundle);
}
@@ -161,12 +174,12 @@ static struct rxrpc_connection *
rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet = bundle->params.local->rxnet;
+ struct rxrpc_net *rxnet = bundle->local->rxnet;
int ret;
_enter("");
- conn = rxrpc_alloc_connection(gfp);
+ conn = rxrpc_alloc_connection(rxnet, gfp);
if (!conn) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
@@ -174,10 +187,16 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
refcount_set(&conn->ref, 1);
conn->bundle = bundle;
- conn->params = bundle->params;
+ conn->local = bundle->local;
+ conn->peer = bundle->peer;
+ conn->key = bundle->key;
+ conn->exclusive = bundle->exclusive;
+ conn->upgrade = bundle->upgrade;
+ conn->orig_service_id = bundle->service_id;
+ conn->security_level = bundle->security_level;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
conn->state = RXRPC_CONN_CLIENT;
- conn->service_id = conn->params.service_id;
+ conn->service_id = conn->orig_service_id;
ret = rxrpc_get_client_connection_id(conn, gfp);
if (ret < 0)
@@ -192,14 +211,13 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- rxrpc_get_bundle(bundle);
- rxrpc_get_peer(conn->params.peer);
- rxrpc_get_local(conn->params.local);
- key_get(conn->params.key);
+ rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+ rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn);
+ rxrpc_get_local(conn->local, rxrpc_local_get_client_conn);
+ key_get(conn->key);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
- refcount_read(&conn->ref),
- __builtin_return_address(0));
+ trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
+ rxrpc_conn_new_client);
atomic_inc(&rxnet->nr_client_conns);
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
@@ -225,7 +243,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
if (!conn)
goto dont_reuse;
- rxnet = conn->params.local->rxnet;
+ rxnet = conn->rxnet;
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
@@ -282,7 +300,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
while (p) {
bundle = rb_entry(p, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->params.X - (long)cp->X)
+#define cmp(X) ((long)bundle->X - (long)cp->X)
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level) ?:
@@ -311,7 +329,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
parent = *pp;
bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->params.X - (long)cp->X)
+#define cmp(X) ((long)bundle->X - (long)cp->X)
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level) ?:
@@ -329,7 +347,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
rb_link_node(&candidate->local_node, parent, pp);
rb_insert_color(&candidate->local_node, &local->client_bundles);
- rxrpc_get_bundle(candidate);
+ rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
spin_unlock(&local->client_bundles_lock);
_leave(" = %u [new]", candidate->debug_id);
return candidate;
@@ -337,7 +355,8 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
found_bundle_free:
rxrpc_free_bundle(candidate);
found_bundle:
- rxrpc_get_bundle(bundle);
+ rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
+ atomic_inc(&bundle->active);
spin_unlock(&local->client_bundles_lock);
_leave(" = %u [found]", bundle->debug_id);
return bundle;
@@ -436,6 +455,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
if (old)
trace_rxrpc_client(old, -1, rxrpc_client_replace);
candidate->bundle_shift = shift;
+ atomic_inc(&bundle->active);
bundle->conns[i] = candidate;
for (j = 0; j < RXRPC_MAXCALLS; j++)
set_bit(shift + j, &bundle->avail_chans);
@@ -451,10 +471,10 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
if (candidate) {
_debug("discard C=%x", candidate->debug_id);
trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
- rxrpc_put_connection(candidate);
+ rxrpc_put_connection(candidate, rxrpc_conn_put_discard);
}
- rxrpc_put_connection(old);
+ rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
_leave("");
}
@@ -525,23 +545,21 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_activate_client);
list_del_init(&call->chan_wait_link);
- call->peer = rxrpc_get_peer(conn->params.peer);
- call->conn = rxrpc_get_connection(conn);
+ call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call);
+ call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
call->security = conn->security;
call->security_ix = conn->security_ix;
- call->service_id = conn->service_id;
+ call->dest_srx.srx_service = conn->service_id;
trace_rxrpc_connect_call(call);
- _net("CONNECT call %08x:%08x as call %d on conn %d",
- call->cid, call->call_id, call->debug_id, conn->debug_id);
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
/* Paired with the read barrier in rxrpc_connect_call(). This orders
* cid and epoch in the connection wrt to call_id without the need to
@@ -566,7 +584,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
*/
static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet = bundle->params.local->rxnet;
+ struct rxrpc_net *rxnet = bundle->local->rxnet;
bool drop_ref;
if (!list_empty(&conn->cache_link)) {
@@ -578,7 +596,7 @@ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connecti
}
spin_unlock(&rxnet->client_conn_cache_lock);
if (drop_ref)
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
}
}
@@ -726,7 +744,8 @@ granted_channel:
smp_rmb();
out_put_bundle:
- rxrpc_put_bundle(bundle);
+ rxrpc_deactivate_bundle(bundle);
+ rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call);
out:
_leave(" = %d", ret);
return ret;
@@ -767,6 +786,10 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
if (chan->call_counter >= INT_MAX)
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
trace_rxrpc_client(conn, channel, rxrpc_client_exposed);
+
+ spin_lock(&call->peer->lock);
+ hlist_add_head(&call->error_link, &call->peer->error_targets);
+ spin_unlock(&call->peer->lock);
}
}
@@ -791,7 +814,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
{
struct rxrpc_connection *conn;
struct rxrpc_channel *chan = NULL;
- struct rxrpc_net *rxnet = bundle->params.local->rxnet;
+ struct rxrpc_net *rxnet = bundle->local->rxnet;
unsigned int channel;
bool may_reuse;
u32 cid;
@@ -881,7 +904,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
conn->idle_timestamp = jiffies;
- rxrpc_get_connection(conn);
+ rxrpc_get_connection(conn, rxrpc_conn_get_idle);
spin_lock(&rxnet->client_conn_cache_lock);
list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
spin_unlock(&rxnet->client_conn_cache_lock);
@@ -901,9 +924,8 @@ out:
static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
{
struct rxrpc_bundle *bundle = conn->bundle;
- struct rxrpc_local *local = bundle->params.local;
unsigned int bindex;
- bool need_drop = false, need_put = false;
+ bool need_drop = false;
int i;
_enter("C=%x", conn->debug_id);
@@ -922,15 +944,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
}
spin_unlock(&bundle->channel_lock);
- /* If there are no more connections, remove the bundle */
- if (!bundle->avail_chans) {
- _debug("maybe unbundle");
- spin_lock(&local->client_bundles_lock);
+ if (need_drop) {
+ rxrpc_deactivate_bundle(bundle);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
+ }
+}
+
+/*
+ * Drop the active count on a bundle.
+ */
+static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+{
+ struct rxrpc_local *local = bundle->local;
+ bool need_put = false;
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
- if (bundle->conns[i])
- break;
- if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) {
+ if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
+ if (!bundle->exclusive) {
_debug("erase bundle");
rb_erase(&bundle->local_node, &local->client_bundles);
need_put = true;
@@ -938,20 +967,16 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
spin_unlock(&local->client_bundles_lock);
if (need_put)
- rxrpc_put_bundle(bundle);
+ rxrpc_put_bundle(bundle, rxrpc_bundle_put_discard);
}
-
- if (need_drop)
- rxrpc_put_connection(conn);
- _leave("");
}
/*
* Clean up a dead client connection.
*/
-static void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
+void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_local *local = conn->params.local;
+ struct rxrpc_local *local = conn->local;
struct rxrpc_net *rxnet = local->rxnet;
_enter("C=%x", conn->debug_id);
@@ -960,23 +985,6 @@ static void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
atomic_dec(&rxnet->nr_client_conns);
rxrpc_put_client_connection_id(conn);
- rxrpc_kill_connection(conn);
-}
-
-/*
- * Clean up a dead client connections.
- */
-void rxrpc_put_client_conn(struct rxrpc_connection *conn)
-{
- const void *here = __builtin_return_address(0);
- unsigned int debug_id = conn->debug_id;
- bool dead;
- int r;
-
- dead = __refcount_dec_and_test(&conn->ref, &r);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here);
- if (dead)
- rxrpc_kill_client_conn(conn);
}
/*
@@ -1002,7 +1010,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
}
/* Don't double up on the discarding */
- if (!spin_trylock(&rxnet->client_conn_discard_lock)) {
+ if (!mutex_trylock(&rxnet->client_conn_discard_lock)) {
_leave(" [already]");
return;
}
@@ -1030,7 +1038,7 @@ next:
expiry = rxrpc_conn_idle_client_expiry;
if (nr_conns > rxrpc_reap_client_connections)
expiry = rxrpc_conn_idle_client_fast_expiry;
- if (conn->params.local->service_closed)
+ if (conn->local->service_closed)
expiry = rxrpc_closed_conn_expiry * HZ;
conn_expires_at = conn->idle_timestamp + expiry;
@@ -1040,13 +1048,15 @@ next:
goto not_yet_expired;
}
+ atomic_dec(&conn->active);
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_del_init(&conn->cache_link);
spin_unlock(&rxnet->client_conn_cache_lock);
rxrpc_unbundle_conn(conn);
- rxrpc_put_connection(conn); /* Drop the ->cache_link ref */
+ /* Drop the ->cache_link ref */
+ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
nr_conns--;
goto next;
@@ -1065,7 +1075,7 @@ not_yet_expired:
out:
spin_unlock(&rxnet->client_conn_cache_lock);
- spin_unlock(&rxnet->client_conn_discard_lock);
+ mutex_unlock(&rxnet->client_conn_discard_lock);
_leave("");
}
@@ -1104,7 +1114,8 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
cache_link) {
- if (conn->params.local == local) {
+ if (conn->local == local) {
+ atomic_dec(&conn->active);
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_move(&conn->cache_link, &graveyard);
}
@@ -1117,7 +1128,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
struct rxrpc_connection, cache_link);
list_del_init(&conn->cache_link);
rxrpc_unbundle_conn(conn);
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
}
_leave(" [culled]");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index aab069701398..480364bcbf85 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -52,8 +52,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
if (skb && call_id != sp->hdr.callNumber)
return;
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -86,8 +86,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
break;
case RXRPC_PACKET_TYPE_ACK:
- mtu = conn->params.peer->if_mtu;
- mtu -= conn->params.peer->hdrsize;
+ mtu = conn->peer->if_mtu;
+ mtu -= conn->peer->hdrsize;
pkt.ack.bufferSpace = 0;
pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
pkt.ack.firstPacket = htonl(chan->last_seq + 1);
@@ -122,19 +122,17 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
- _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code);
break;
case RXRPC_PACKET_TYPE_ACK:
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
pkt.ack.reason, 0);
- _proto("Tx ACK %%%u [re]", serial);
break;
}
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret,
rxrpc_tx_point_call_final_resend);
@@ -200,9 +198,9 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
/* generate a connection-level abort */
- spin_lock_bh(&conn->state_lock);
+ spin_lock(&conn->state_lock);
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
_leave(" = 0 [already dead]");
return 0;
}
@@ -211,10 +209,10 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
conn->abort_code = abort_code;
conn->state = RXRPC_CONN_LOCALLY_ABORTED;
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -242,9 +240,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
serial = atomic_inc_return(&conn->serial);
rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
whdr.serial = htonl(serial);
- _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_conn_abort);
@@ -254,7 +251,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ conn->peer->last_tx_at = ktime_get_seconds();
_leave(" = 0");
return 0;
@@ -268,12 +265,12 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
_enter("%p", call);
if (call) {
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
if (call->state == RXRPC_CALL_SERVER_SECURING) {
call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
rxrpc_notify_socket(call);
}
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
}
@@ -285,8 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
u32 *_abort_code)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- __be32 wtmp;
- u32 abort_code;
int loop, ret;
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
@@ -308,17 +303,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return 0;
case RXRPC_PACKET_TYPE_ABORT:
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_abort"));
- return -EPROTO;
- }
- abort_code = ntohl(wtmp);
- _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
-
conn->error = -ECONNABORTED;
- conn->abort_code = abort_code;
+ conn->abort_code = skb->priority;
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
@@ -334,23 +320,23 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return ret;
ret = conn->security->init_connection_security(
- conn, conn->params.key->payload.data[0]);
+ conn, conn->key->payload.data[0]);
if (ret < 0)
return ret;
spin_lock(&conn->bundle->channel_lock);
- spin_lock_bh(&conn->state_lock);
+ spin_lock(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
conn->state = RXRPC_CONN_SERVICE;
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
rxrpc_call_is_secure(
rcu_dereference_protected(
conn->channels[loop].call,
lockdep_is_held(&conn->bundle->channel_lock)));
} else {
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
}
spin_unlock(&conn->bundle->channel_lock);
@@ -451,7 +437,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
- rxrpc_see_skb(skb, rxrpc_skb_seen);
+ rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
ret = rxrpc_process_event(conn, skb, &abort_code);
switch (ret) {
case -EPROTO:
@@ -463,7 +449,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
goto requeue_and_leave;
case -ECONNABORTED:
default:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
break;
}
}
@@ -477,7 +463,7 @@ requeue_and_leave:
protocol_error:
if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
return;
}
@@ -486,14 +472,70 @@ void rxrpc_process_connection(struct work_struct *work)
struct rxrpc_connection *conn =
container_of(work, struct rxrpc_connection, processor);
- rxrpc_see_connection(conn);
+ rxrpc_see_connection(conn, rxrpc_conn_see_work);
- if (__rxrpc_use_local(conn->params.local)) {
+ if (__rxrpc_use_local(conn->local, rxrpc_local_use_conn_work)) {
rxrpc_do_process_connection(conn);
- rxrpc_unuse_local(conn->params.local);
+ rxrpc_unuse_local(conn->local, rxrpc_local_unuse_conn_work);
}
+}
- rxrpc_put_connection(conn);
- _leave("");
- return;
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses, some aborts and call terminal packet
+ * retransmission.
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", conn, skb);
+
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_work);
+ skb_queue_tail(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_rx_work);
+}
+
+/*
+ * Input a connection-level packet.
+ */
+int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
+ _leave(" = -ECONNABORTED [%u]", conn->state);
+ return -ECONNABORTED;
+ }
+
+ _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_DATA:
+ case RXRPC_PACKET_TYPE_ACK:
+ rxrpc_conn_retransmit_call(conn, skb,
+ sp->hdr.cid & RXRPC_CHANNELMASK);
+ return 0;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ /* Just ignore BUSY packets for now. */
+ return 0;
+
+ case RXRPC_PACKET_TYPE_ABORT:
+ conn->error = -ECONNABORTED;
+ conn->abort_code = skb->priority;
+ conn->state = RXRPC_CONN_REMOTELY_ABORTED;
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
+ return -ECONNABORTED;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ rxrpc_post_packet_to_conn(conn, skb);
+ return 0;
+
+ default:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_conn_pkt"));
+ return -EPROTO;
+ }
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 156bd26daf74..3c8f83dacb2b 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -19,20 +19,23 @@
unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
-static void rxrpc_destroy_connection(struct rcu_head *);
+static void rxrpc_clean_up_connection(struct work_struct *work);
+static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
+ unsigned long reap_at);
static void rxrpc_connection_timer(struct timer_list *timer)
{
struct rxrpc_connection *conn =
container_of(timer, struct rxrpc_connection, timer);
- rxrpc_queue_conn(conn);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_timer);
}
/*
* allocate a new connection
*/
-struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
+ gfp_t gfp)
{
struct rxrpc_connection *conn;
@@ -42,10 +45,12 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
if (conn) {
INIT_LIST_HEAD(&conn->cache_link);
timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
- INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_WORK(&conn->processor, rxrpc_process_connection);
+ INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
skb_queue_head_init(&conn->rx_queue);
+ conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
spin_lock_init(&conn->state_lock);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
@@ -67,89 +72,55 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
*
* The caller must be holding the RCU read lock.
*/
-struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
- struct sk_buff *skb,
- struct rxrpc_peer **_peer)
+struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
{
struct rxrpc_connection *conn;
- struct rxrpc_conn_proto k;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct sockaddr_rxrpc srx;
struct rxrpc_peer *peer;
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
- goto not_found;
-
- if (srx.transport.family != local->srx.transport.family &&
- (srx.transport.family == AF_INET &&
- local->srx.transport.family != AF_INET6)) {
- pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
- srx.transport.family,
- local->srx.transport.family);
+ /* Look up client connections by connection ID alone as their IDs are
+ * unique for this machine.
+ */
+ conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
+ if (!conn || refcount_read(&conn->ref) == 0) {
+ _debug("no conn");
goto not_found;
}
- k.epoch = sp->hdr.epoch;
- k.cid = sp->hdr.cid & RXRPC_CIDMASK;
-
- if (rxrpc_to_server(sp)) {
- /* We need to look up service connections by the full protocol
- * parameter set. We look up the peer first as an intermediate
- * step and then the connection from the peer's tree.
- */
- peer = rxrpc_lookup_peer_rcu(local, &srx);
- if (!peer)
- goto not_found;
- *_peer = peer;
- conn = rxrpc_find_service_conn_rcu(peer, skb);
- if (!conn || refcount_read(&conn->ref) == 0)
- goto not_found;
- _leave(" = %p", conn);
- return conn;
- } else {
- /* Look up client connections by connection ID alone as their
- * IDs are unique for this machine.
- */
- conn = idr_find(&rxrpc_client_conn_ids,
- sp->hdr.cid >> RXRPC_CIDSHIFT);
- if (!conn || refcount_read(&conn->ref) == 0) {
- _debug("no conn");
- goto not_found;
- }
+ if (conn->proto.epoch != sp->hdr.epoch ||
+ conn->local != local)
+ goto not_found;
- if (conn->proto.epoch != k.epoch ||
- conn->params.local != local)
+ peer = conn->peer;
+ switch (srx->transport.family) {
+ case AF_INET:
+ if (peer->srx.transport.sin.sin_port !=
+ srx->transport.sin.sin_port ||
+ peer->srx.transport.sin.sin_addr.s_addr !=
+ srx->transport.sin.sin_addr.s_addr)
goto not_found;
-
- peer = conn->params.peer;
- switch (srx.transport.family) {
- case AF_INET:
- if (peer->srx.transport.sin.sin_port !=
- srx.transport.sin.sin_port ||
- peer->srx.transport.sin.sin_addr.s_addr !=
- srx.transport.sin.sin_addr.s_addr)
- goto not_found;
- break;
+ break;
#ifdef CONFIG_AF_RXRPC_IPV6
- case AF_INET6:
- if (peer->srx.transport.sin6.sin6_port !=
- srx.transport.sin6.sin6_port ||
- memcmp(&peer->srx.transport.sin6.sin6_addr,
- &srx.transport.sin6.sin6_addr,
- sizeof(struct in6_addr)) != 0)
- goto not_found;
- break;
+ case AF_INET6:
+ if (peer->srx.transport.sin6.sin6_port !=
+ srx->transport.sin6.sin6_port ||
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr)) != 0)
+ goto not_found;
+ break;
#endif
- default:
- BUG();
- }
-
- _leave(" = %p", conn);
- return conn;
+ default:
+ BUG();
}
+ _leave(" = %p", conn);
+ return conn;
+
not_found:
_leave(" = NULL");
return NULL;
@@ -210,9 +181,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
- spin_lock_bh(&call->peer->lock);
- hlist_del_rcu(&call->error_link);
- spin_unlock_bh(&call->peer->lock);
+ spin_lock(&call->peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&call->peer->lock);
}
if (rxrpc_is_client_call(call))
@@ -224,79 +195,45 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
conn->idle_timestamp = jiffies;
-}
-
-/*
- * Kill off a connection.
- */
-void rxrpc_kill_connection(struct rxrpc_connection *conn)
-{
- struct rxrpc_net *rxnet = conn->params.local->rxnet;
-
- ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
- !rcu_access_pointer(conn->channels[1].call) &&
- !rcu_access_pointer(conn->channels[2].call) &&
- !rcu_access_pointer(conn->channels[3].call));
- ASSERT(list_empty(&conn->cache_link));
-
- write_lock(&rxnet->conn_lock);
- list_del_init(&conn->proc_link);
- write_unlock(&rxnet->conn_lock);
-
- /* Drain the Rx queue. Note that even though we've unpublished, an
- * incoming packet could still be being added to our Rx queue, so we
- * will need to drain it again in the RCU cleanup handler.
- */
- rxrpc_purge_queue(&conn->rx_queue);
-
- /* Leave final destruction to RCU. The connection processor work item
- * must carry a ref on the connection to prevent us getting here whilst
- * it is queued or running.
- */
- call_rcu(&conn->rcu, rxrpc_destroy_connection);
+ if (atomic_dec_and_test(&conn->active))
+ rxrpc_set_service_reap_timer(conn->rxnet,
+ jiffies + rxrpc_connection_expiry);
}
/*
* Queue a connection's work processor, getting a ref to pass to the work
* queue.
*/
-bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
- int r;
-
- if (!__refcount_inc_not_zero(&conn->ref, &r))
- return false;
- if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here);
- else
- rxrpc_put_connection(conn);
- return true;
+ if (atomic_read(&conn->active) >= 0 &&
+ rxrpc_queue_work(&conn->processor))
+ rxrpc_see_connection(conn, why);
}
/*
* Note the re-emergence of a connection.
*/
-void rxrpc_see_connection(struct rxrpc_connection *conn)
+void rxrpc_see_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
if (conn) {
- int n = refcount_read(&conn->ref);
+ int r = refcount_read(&conn->ref);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
+ trace_rxrpc_conn(conn->debug_id, r, why);
}
}
/*
* Get a ref on a connection.
*/
-struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
+struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
__refcount_inc(&conn->ref, &r);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here);
+ trace_rxrpc_conn(conn->debug_id, r + 1, why);
return conn;
}
@@ -304,14 +241,14 @@ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
* Try to get a ref on a connection.
*/
struct rxrpc_connection *
-rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+rxrpc_get_connection_maybe(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
if (conn) {
if (__refcount_inc_not_zero(&conn->ref, &r))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here);
+ trace_rxrpc_conn(conn->debug_id, r + 1, why);
else
conn = NULL;
}
@@ -329,49 +266,95 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
}
/*
- * Release a service connection
+ * destroy a virtual connection
*/
-void rxrpc_put_service_conn(struct rxrpc_connection *conn)
+static void rxrpc_rcu_free_connection(struct rcu_head *rcu)
{
- const void *here = __builtin_return_address(0);
- unsigned int debug_id = conn->debug_id;
- int r;
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+ struct rxrpc_net *rxnet = conn->rxnet;
- __refcount_dec(&conn->ref, &r);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here);
- if (r - 1 == 1)
- rxrpc_set_service_reap_timer(conn->params.local->rxnet,
- jiffies + rxrpc_connection_expiry);
+ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
+
+ trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
+ rxrpc_conn_free);
+ kfree(conn);
+
+ if (atomic_dec_and_test(&rxnet->nr_conns))
+ wake_up_var(&rxnet->nr_conns);
}
/*
- * destroy a virtual connection
+ * Clean up a dead connection.
*/
-static void rxrpc_destroy_connection(struct rcu_head *rcu)
+static void rxrpc_clean_up_connection(struct work_struct *work)
{
struct rxrpc_connection *conn =
- container_of(rcu, struct rxrpc_connection, rcu);
+ container_of(work, struct rxrpc_connection, destructor);
+ struct rxrpc_net *rxnet = conn->rxnet;
- _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
+ ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+ !rcu_access_pointer(conn->channels[1].call) &&
+ !rcu_access_pointer(conn->channels[2].call) &&
+ !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(list_empty(&conn->cache_link));
- ASSERTCMP(refcount_read(&conn->ref), ==, 0);
+ del_timer_sync(&conn->timer);
+ cancel_work_sync(&conn->processor); /* Processing may restart the timer */
+ del_timer_sync(&conn->timer);
- _net("DESTROY CONN %d", conn->debug_id);
+ write_lock(&rxnet->conn_lock);
+ list_del_init(&conn->proc_link);
+ write_unlock(&rxnet->conn_lock);
- del_timer_sync(&conn->timer);
rxrpc_purge_queue(&conn->rx_queue);
+ rxrpc_kill_client_conn(conn);
+
conn->security->clear(conn);
- key_put(conn->params.key);
- rxrpc_put_bundle(conn->bundle);
- rxrpc_put_peer(conn->params.peer);
+ key_put(conn->key);
+ rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn);
+ rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn);
+ rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn);
+
+ /* Drain the Rx queue. Note that even though we've unpublished, an
+ * incoming packet could still be being added to our Rx queue, so we
+ * will need to drain it again in the RCU cleanup handler.
+ */
+ rxrpc_purge_queue(&conn->rx_queue);
- if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns))
- wake_up_var(&conn->params.local->rxnet->nr_conns);
- rxrpc_put_local(conn->params.local);
+ call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
+}
- kfree(conn);
- _leave("");
+/*
+ * Drop a ref on a connection.
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
+{
+ unsigned int debug_id;
+ bool dead;
+ int r;
+
+ if (!conn)
+ return;
+
+ debug_id = conn->debug_id;
+ dead = __refcount_dec_and_test(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, r - 1, why);
+ if (dead) {
+ del_timer(&conn->timer);
+ cancel_work(&conn->processor);
+
+ if (in_softirq() || work_busy(&conn->processor) ||
+ timer_pending(&conn->timer))
+ /* Can't use the rxrpc workqueue as we need to cancel/flush
+ * something that may be running/waiting there.
+ */
+ schedule_work(&conn->destructor);
+ else
+ rxrpc_clean_up_connection(&conn->destructor);
+ }
}
/*
@@ -383,6 +366,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
struct rxrpc_net *rxnet =
container_of(work, struct rxrpc_net, service_conn_reaper);
unsigned long expire_at, earliest, idle_timestamp, now;
+ int active;
LIST_HEAD(graveyard);
@@ -393,20 +377,20 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
- ASSERTCMP(refcount_read(&conn->ref), >, 0);
- if (likely(refcount_read(&conn->ref) > 1))
+ ASSERTCMP(atomic_read(&conn->active), >=, 0);
+ if (likely(atomic_read(&conn->active) > 0))
continue;
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
- if (rxnet->live && !conn->params.local->dead) {
+ if (rxnet->live && !conn->local->dead) {
idle_timestamp = READ_ONCE(conn->idle_timestamp);
expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
- if (conn->params.local->service_closed)
+ if (conn->local->service_closed)
expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, refcount_read(&conn->ref),
+ _debug("reap CONN %d { a=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->active),
(long)expire_at - (long)now);
if (time_before(now, expire_at)) {
@@ -416,12 +400,13 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
}
}
- /* The usage count sits at 1 whilst the object is unused on the
- * list; we reduce that to 0 to make the object unavailable.
+ /* The activity count sits at 0 whilst the conn is unused on
+ * the list; we reduce that to -1 to make the conn unavailable.
*/
- if (!refcount_dec_if_one(&conn->ref))
+ active = 0;
+ if (!atomic_try_cmpxchg(&conn->active, &active, -1))
continue;
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
+ rxrpc_see_connection(conn, rxrpc_conn_see_reap_service);
if (rxrpc_conn_is_client(conn))
BUG();
@@ -443,8 +428,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
link);
list_del_init(&conn->link);
- ASSERTCMP(refcount_read(&conn->ref), ==, 0);
- rxrpc_kill_connection(conn);
+ ASSERTCMP(atomic_read(&conn->active), ==, -1);
+ rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped);
}
_leave("");
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 6e6aa02c6f9e..2a55a88b2a5b 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -73,7 +73,7 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
struct rxrpc_conn_proto k = conn->proto;
struct rb_node **pp, *parent;
- write_seqlock_bh(&peer->service_conn_lock);
+ write_seqlock(&peer->service_conn_lock);
pp = &peer->service_conns.rb_node;
parent = NULL;
@@ -94,14 +94,14 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
rb_insert_color(&conn->service_node, &peer->service_conns);
conn_published:
set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
- write_sequnlock_bh(&peer->service_conn_lock);
+ write_sequnlock(&peer->service_conn_lock);
_leave(" = %d [new]", conn->debug_id);
return;
found_extant_conn:
if (refcount_read(&cursor->ref) == 0)
goto replace_old_connection;
- write_sequnlock_bh(&peer->service_conn_lock);
+ write_sequnlock(&peer->service_conn_lock);
/* We should not be able to get here. rxrpc_incoming_connection() is
* called in a non-reentrant context, so there can't be a race to
* insert a new connection.
@@ -125,7 +125,7 @@ replace_old_connection:
struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet,
gfp_t gfp)
{
- struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
+ struct rxrpc_connection *conn = rxrpc_alloc_connection(rxnet, gfp);
if (conn) {
/* We maintain an extra ref on the connection whilst it is on
@@ -133,7 +133,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
refcount_set(&conn->ref, 2);
- conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle);
+ conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle,
+ rxrpc_bundle_get_service_conn);
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
@@ -141,9 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- refcount_read(&conn->ref),
- __builtin_return_address(0));
+ rxrpc_see_connection(conn, rxrpc_conn_new_service);
}
return conn;
@@ -164,7 +163,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
conn->proto.epoch = sp->hdr.epoch;
conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
- conn->params.service_id = sp->hdr.serviceId;
+ conn->orig_service_id = sp->hdr.serviceId;
conn->service_id = sp->hdr.serviceId;
conn->security_ix = sp->hdr.securityIndex;
conn->out_clientflag = 0;
@@ -182,10 +181,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
conn->service_id == rx->service_upgrade.from)
conn->service_id = rx->service_upgrade.to;
- /* Make the connection a target for incoming packets. */
- rxrpc_publish_service_conn(conn->params.peer, conn);
+ atomic_set(&conn->active, 1);
- _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid);
+ /* Make the connection a target for incoming packets. */
+ rxrpc_publish_service_conn(conn->peer, conn);
}
/*
@@ -194,10 +193,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
*/
void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_peer *peer = conn->params.peer;
+ struct rxrpc_peer *peer = conn->peer;
- write_seqlock_bh(&peer->service_conn_lock);
+ write_seqlock(&peer->service_conn_lock);
if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
rb_erase(&conn->service_node, &peer->service_conns);
- write_sequnlock_bh(&peer->service_conn_lock);
+ write_sequnlock(&peer->service_conn_lock);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index bdf70b81addc..d0e20e946e48 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* RxRPC packet reception
+/* Processing of received RxRPC packets
*
- * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
@@ -12,10 +12,8 @@
static void rxrpc_proto_abort(const char *why,
struct rxrpc_call *call, rxrpc_seq_t seq)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) {
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
- }
+ if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG))
+ rxrpc_send_abort_packet(call);
}
/*
@@ -58,25 +56,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
summary->cumulative_acks = cumulative_acks;
summary->dup_acks = call->cong_dup_acks;
- /* If we haven't transmitted anything for >1RTT, we should reset the
- * congestion management state.
- */
- if ((call->cong_mode == RXRPC_CALL_SLOW_START ||
- call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) &&
- ktime_before(ktime_add_us(call->tx_last_sent,
- call->peer->srtt_us >> 3),
- ktime_get_real())
- ) {
- change = rxrpc_cong_idle_reset;
- summary->mode = RXRPC_CALL_SLOW_START;
- if (RXRPC_TX_SMSS > 2190)
- summary->cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- summary->cwnd = 3;
- else
- summary->cwnd = 4;
- }
-
switch (call->cong_mode) {
case RXRPC_CALL_SLOW_START:
if (summary->saw_nacks)
@@ -174,8 +153,8 @@ out_no_clear_ca:
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
trace_rxrpc_congest(call, summary, acked_serial, change);
- if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- rxrpc_queue_call(call);
+ if (resend)
+ rxrpc_resend(call, skb);
return;
packet_loss_detected:
@@ -197,6 +176,33 @@ send_extra_data:
}
/*
+ * Degrade the congestion window if we haven't transmitted a packet for >1RTT.
+ */
+void rxrpc_congestion_degrade(struct rxrpc_call *call)
+{
+ ktime_t rtt, now;
+
+ if (call->cong_mode != RXRPC_CALL_SLOW_START &&
+ call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
+ return;
+ if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ return;
+
+ rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
+ now = ktime_get_real();
+ if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
+ return;
+
+ trace_rxrpc_reset_cwnd(call, now);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
+ call->tx_last_sent = now;
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+ call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh,
+ call->cong_cwnd * 3 / 4);
+ call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND);
+}
+
+/*
* Apply a hard ACK by advancing the Tx window.
*/
static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
@@ -338,7 +344,8 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
/*
* Process a DATA packet.
*/
-static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
+static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
+ bool *_notify)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
@@ -361,7 +368,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
seq + 1 != wtop) {
rxrpc_proto_abort("LSN", call, seq);
- goto err_free;
+ return;
}
} else {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
@@ -369,7 +376,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
call->debug_id, seq, window, wtop, wlimit);
rxrpc_proto_abort("LSA", call, seq);
- goto err_free;
+ return;
}
}
@@ -397,14 +404,18 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
+ else
+ atomic_inc_return(&call->ackr_nr_unacked);
window++;
if (after(window, wtop))
wtop = window;
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
+
spin_lock(&call->recvmsg_queue.lock);
rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue);
- skb = NULL;
+ *_notify = true;
while ((oos = skb_peek(&call->rx_oos_queue))) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
@@ -456,36 +467,26 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
if (after(osp->hdr.seq, seq)) {
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos);
__skb_queue_before(&call->rx_oos_queue, oos, skb);
goto oos_queued;
}
}
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos);
__skb_queue_tail(&call->rx_oos_queue, skb);
oos_queued:
trace_rxrpc_receive(call, last ? rxrpc_receive_oos_last : rxrpc_receive_oos,
sp->hdr.serial, sp->hdr.seq);
- skb = NULL;
}
send_ack:
- if (ack_reason < 0 &&
- atomic_inc_return(&call->ackr_nr_unacked) > 2 &&
- test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) {
- ack_reason = RXRPC_ACK_IDLE;
- } else if (ack_reason >= 0) {
- set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags);
- }
-
if (ack_reason >= 0)
rxrpc_send_ACK(call, ack_reason, serial,
rxrpc_propose_ack_input_data);
else
rxrpc_propose_delay_ACK(call, serial,
rxrpc_propose_ack_input_data);
-
-err_free:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
}
/*
@@ -498,6 +499,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
struct sk_buff *jskb;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len - offset;
+ bool notify = false;
while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
if (len < RXRPC_JUMBO_SUBPKTLEN)
@@ -508,16 +510,17 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
&jhdr, sizeof(jhdr)) < 0)
goto protocol_error;
- jskb = skb_clone(skb, GFP_ATOMIC);
+ jskb = skb_clone(skb, GFP_NOFS);
if (!jskb) {
kdebug("couldn't clone");
return false;
}
- rxrpc_new_skb(jskb, rxrpc_skb_cloned_jumbo);
+ rxrpc_new_skb(jskb, rxrpc_skb_new_jumbo_subpacket);
jsp = rxrpc_skb(jskb);
jsp->offset = offset;
jsp->len = RXRPC_JUMBO_DATALEN;
- rxrpc_input_data_one(call, jskb);
+ rxrpc_input_data_one(call, jskb, &notify);
+ rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket);
sp->hdr.flags = jhdr.flags;
sp->hdr._rsvd = ntohs(jhdr._rsvd);
@@ -529,7 +532,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
sp->offset = offset;
sp->len = len;
- rxrpc_input_data_one(call, skb);
+ rxrpc_input_data_one(call, skb, &notify);
+ if (notify) {
+ trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
+ rxrpc_notify_socket(call);
+ }
return true;
protocol_error:
@@ -551,32 +558,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
atomic64_read(&call->ackr_window), call->rx_highest_seq,
skb->len, seq0);
- _proto("Rx DATA %%%u { #%u f=%02x }",
- sp->hdr.serial, seq0, sp->hdr.flags);
-
state = READ_ONCE(call->state);
- if (state >= RXRPC_CALL_COMPLETE) {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ if (state >= RXRPC_CALL_COMPLETE)
return;
- }
-
- /* Unshare the packet so that it can be modified for in-place
- * decryption.
- */
- if (sp->hdr.securityIndex != 0) {
- struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
- if (!nskb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
- return;
- }
-
- if (nskb != skb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_received);
- skb = nskb;
- rxrpc_new_skb(skb, rxrpc_skb_unshared);
- sp = rxrpc_skb(skb);
- }
- }
if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
unsigned long timo = READ_ONCE(call->next_req_timo);
@@ -591,28 +575,23 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
}
}
- spin_lock(&call->input_lock);
-
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
!rxrpc_receiving_reply(call))
- goto out;
+ goto out_notify;
if (!rxrpc_input_split_jumbo(call, skb)) {
rxrpc_proto_abort("VLD", call, sp->hdr.seq);
- goto out;
+ goto out_notify;
}
skb = NULL;
-out:
+out_notify:
trace_rxrpc_notify_socket(call->debug_id, serial);
rxrpc_notify_socket(call);
-
- spin_unlock(&call->input_lock);
- rxrpc_free_skb(skb, rxrpc_skb_freed);
_leave(" [queued]");
}
@@ -671,32 +650,6 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
}
/*
- * Process the response to a ping that we sent to find out if we lost an ACK.
- *
- * If we got back a ping response that indicates a lower tx_top than what we
- * had at the time of the ping transmission, we adjudge all the DATA packets
- * sent between the response tx_top and the ping-time tx_top to have been lost.
- */
-static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
-{
- if (after(call->acks_lost_top, call->acks_prev_seq) &&
- !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- rxrpc_queue_call(call);
-}
-
-/*
- * Process a ping response.
- */
-static void rxrpc_input_ping_response(struct rxrpc_call *call,
- ktime_t resp_time,
- rxrpc_serial_t acked_serial,
- rxrpc_serial_t ack_serial)
-{
- if (acked_serial == call->acks_lost_ping)
- rxrpc_input_check_for_lost_ack(call);
-}
-
-/*
* Process the extra information that may be appended to an ACK packet
*/
static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
@@ -708,11 +661,6 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
bool wake = false;
u32 rwind = ntohl(ackinfo->rwind);
- _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
- sp->hdr.serial,
- ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
- rwind, ntohl(ackinfo->jumbo_max));
-
if (rwind > RXRPC_TX_MAX_WINDOW)
rwind = RXRPC_TX_MAX_WINDOW;
if (call->tx_winsize != rwind) {
@@ -729,11 +677,10 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
peer = call->peer;
if (mtu < peer->maxdata) {
- spin_lock_bh(&peer->lock);
+ spin_lock(&peer->lock);
peer->maxdata = mtu;
peer->mtu = mtu + peer->hdrsize;
- spin_unlock_bh(&peer->lock);
- _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+ spin_unlock(&peer->lock);
}
if (wake)
@@ -810,7 +757,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_ackinfo info;
- struct sk_buff *skb_old = NULL, *skb_put = skb;
rxrpc_serial_t ack_serial, acked_serial;
rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;
@@ -818,10 +764,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
_enter("");
offset = sizeof(struct rxrpc_wire_header);
- if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) {
- rxrpc_proto_abort("XAK", call, 0);
- goto out_not_locked;
- }
+ if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
+ return rxrpc_proto_abort("XAK", call, 0);
offset += sizeof(ack);
ack_serial = sp->hdr.serial;
@@ -855,7 +799,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
if (ack.reason == RXRPC_ACK_PING) {
- _proto("Rx ACK %%%u PING Request", ack_serial);
rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
rxrpc_propose_ack_respond_to_ping);
} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
@@ -895,41 +838,25 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
first_soft_ack, call->acks_first_seq,
prev_pkt, call->acks_prev_seq);
- goto out_not_locked;
+ return;
}
info.rxMTU = 0;
ioffset = offset + nr_acks + 3;
if (skb->len >= ioffset + sizeof(info) &&
- skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) {
- rxrpc_proto_abort("XAI", call, 0);
- goto out_not_locked;
- }
+ skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
+ return rxrpc_proto_abort("XAI", call, 0);
if (nr_acks > 0)
skb_condense(skb);
- spin_lock(&call->input_lock);
-
- /* Discard any out-of-order or duplicate ACKs (inside lock). */
- if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
- trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->acks_first_seq,
- prev_pkt, call->acks_prev_seq);
- goto out;
- }
call->acks_latest_ts = skb->tstamp;
-
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
switch (ack.reason) {
case RXRPC_ACK_PING:
break;
- case RXRPC_ACK_PING_RESPONSE:
- rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
- ack_serial);
- fallthrough;
default:
if (after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
@@ -940,10 +867,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (info.rxMTU)
rxrpc_input_ackinfo(call, skb, &info);
- if (first_soft_ack == 0) {
- rxrpc_proto_abort("AK0", call, 0);
- goto out;
- }
+ if (first_soft_ack == 0)
+ return rxrpc_proto_abort("AK0", call, 0);
/* Ignore ACKs unless we are or have just been transmitting. */
switch (READ_ONCE(call->state)) {
@@ -953,45 +878,27 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_CALL_SERVER_AWAIT_ACK:
break;
default:
- goto out;
+ return;
}
if (before(hard_ack, call->acks_hard_ack) ||
- after(hard_ack, call->tx_top)) {
- rxrpc_proto_abort("AKW", call, 0);
- goto out;
- }
- if (nr_acks > call->tx_top - hard_ack) {
- rxrpc_proto_abort("AKN", call, 0);
- goto out;
- }
+ after(hard_ack, call->tx_top))
+ return rxrpc_proto_abort("AKW", call, 0);
+ if (nr_acks > call->tx_top - hard_ack)
+ return rxrpc_proto_abort("AKN", call, 0);
if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
rxrpc_end_tx_phase(call, false, "ETA");
- goto out;
+ return;
}
}
if (nr_acks > 0) {
- if (offset > (int)skb->len - nr_acks) {
- rxrpc_proto_abort("XSA", call, 0);
- goto out;
- }
-
- spin_lock(&call->acks_ack_lock);
- skb_old = call->acks_soft_tbl;
- call->acks_soft_tbl = skb;
- spin_unlock(&call->acks_ack_lock);
-
+ if (offset > (int)skb->len - nr_acks)
+ return rxrpc_proto_abort("XSA", call, 0);
rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
nr_acks, &summary);
- skb_put = NULL;
- } else if (call->acks_soft_tbl) {
- spin_lock(&call->acks_ack_lock);
- skb_old = call->acks_soft_tbl;
- call->acks_soft_tbl = NULL;
- spin_unlock(&call->acks_ack_lock);
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
@@ -1001,11 +908,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_ping_for_lost_reply);
rxrpc_congestion_management(call, skb, &summary, acked_serial);
-out:
- spin_unlock(&call->input_lock);
-out_not_locked:
- rxrpc_free_skb(skb_put, rxrpc_skb_freed);
- rxrpc_free_skb(skb_old, rxrpc_skb_freed);
}
/*
@@ -1014,16 +916,9 @@ out_not_locked:
static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- _proto("Rx ACKALL %%%u", sp->hdr.serial);
-
- spin_lock(&call->input_lock);
if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
rxrpc_end_tx_phase(call, false, "ETL");
-
- spin_unlock(&call->input_lock);
}
/*
@@ -1032,35 +927,30 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- __be32 wtmp;
- u32 abort_code = RX_CALL_DEAD;
-
- _enter("");
-
- if (skb->len >= 4 &&
- skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) >= 0)
- abort_code = ntohl(wtmp);
- trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code);
-
- _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
+ trace_rxrpc_rx_abort(call, sp->hdr.serial, skb->priority);
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, -ECONNABORTED);
+ skb->priority, -ECONNABORTED);
}
/*
* Process an incoming call packet.
*/
-static void rxrpc_input_call_packet(struct rxrpc_call *call,
- struct sk_buff *skb)
+void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned long timo;
_enter("%p,%p", call, skb);
+ if (sp->hdr.serviceId != call->dest_srx.srx_service)
+ call->dest_srx.srx_service = sp->hdr.serviceId;
+ if ((int)sp->hdr.serial - (int)call->rx_serial > 0)
+ call->rx_serial = sp->hdr.serial;
+ if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
+ set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
+
timo = READ_ONCE(call->next_rx_timo);
if (timo) {
unsigned long now = jiffies, expect_rx_by;
@@ -1074,15 +964,13 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
rxrpc_input_data(call, skb);
- goto no_free;
+ break;
case RXRPC_PACKET_TYPE_ACK:
rxrpc_input_ack(call, skb);
- goto no_free;
+ break;
case RXRPC_PACKET_TYPE_BUSY:
- _proto("Rx BUSY %%%u", sp->hdr.serial);
-
/* Just ignore BUSY packets from the server; the retry and
* lifespan timers will take care of business. BUSY packets
* from the client don't make sense.
@@ -1100,10 +988,6 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
default:
break;
}
-
- rxrpc_free_skb(skb, rxrpc_skb_freed);
-no_free:
- _leave("");
}
/*
@@ -1112,10 +996,10 @@ no_free:
*
* TODO: If callNumber > call_id + 1, renegotiate security.
*/
-static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
- struct rxrpc_connection *conn,
- struct rxrpc_call *call)
+void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
{
+ struct rxrpc_connection *conn = call->conn;
+
switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
@@ -1123,360 +1007,15 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) {
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
- }
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN))
+ rxrpc_send_abort_packet(call);
trace_rxrpc_improper_term(call);
break;
}
- spin_lock(&rx->incoming_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&rx->incoming_lock);
-}
-
-/*
- * post connection-level events to the connection
- * - this includes challenges, responses, some aborts and call terminal packet
- * retransmission.
- */
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
- struct sk_buff *skb)
-{
- _enter("%p,%p", conn, skb);
-
- skb_queue_tail(&conn->rx_queue, skb);
- rxrpc_queue_conn(conn);
-}
-
-/*
- * post endpoint-level events to the local endpoint
- * - this includes debug and version messages
- */
-static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
- struct sk_buff *skb)
-{
- _enter("%p,%p", local, skb);
-
- if (rxrpc_get_local_maybe(local)) {
- skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_local(local);
- } else {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- }
-}
-
-/*
- * put a packet up for transport-level abort
- */
-static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
-{
- if (rxrpc_get_local_maybe(local)) {
- skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_local(local);
- } else {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- }
-}
-
-/*
- * Extract the wire header from a packet and translate the byte order.
- */
-static noinline
-int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
-{
- struct rxrpc_wire_header whdr;
-
- /* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_hdr"));
- return -EBADMSG;
- }
-
- memset(sp, 0, sizeof(*sp));
- sp->hdr.epoch = ntohl(whdr.epoch);
- sp->hdr.cid = ntohl(whdr.cid);
- sp->hdr.callNumber = ntohl(whdr.callNumber);
- sp->hdr.seq = ntohl(whdr.seq);
- sp->hdr.serial = ntohl(whdr.serial);
- sp->hdr.flags = whdr.flags;
- sp->hdr.type = whdr.type;
- sp->hdr.userStatus = whdr.userStatus;
- sp->hdr.securityIndex = whdr.securityIndex;
- sp->hdr._rsvd = ntohs(whdr._rsvd);
- sp->hdr.serviceId = ntohs(whdr.serviceId);
- return 0;
-}
-
-/*
- * handle data received on the local endpoint
- * - may be called in interrupt context
- *
- * [!] Note that as this is called from the encap_rcv hook, the socket is not
- * held locked by the caller and nothing prevents sk_user_data on the UDP from
- * being cleared in the middle of processing this function.
- *
- * Called with the RCU read lock held from the IP layer via UDP.
- */
-int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
-{
- struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
- struct rxrpc_connection *conn;
- struct rxrpc_channel *chan;
- struct rxrpc_call *call = NULL;
- struct rxrpc_skb_priv *sp;
- struct rxrpc_peer *peer = NULL;
- struct rxrpc_sock *rx = NULL;
- unsigned int channel;
-
- _enter("%p", udp_sk);
-
- if (unlikely(!local)) {
- kfree_skb(skb);
- return 0;
- }
- if (skb->tstamp == 0)
- skb->tstamp = ktime_get_real();
-
- rxrpc_new_skb(skb, rxrpc_skb_received);
-
- skb_pull(skb, sizeof(struct udphdr));
-
- /* The UDP protocol already released all skb resources;
- * we are free to add our own data there.
- */
- sp = rxrpc_skb(skb);
-
- /* dig out the RxRPC connection details */
- if (rxrpc_extract_header(sp, skb) < 0)
- goto bad_message;
-
- if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
- static int lose;
- if ((lose++ & 7) == 7) {
- trace_rxrpc_rx_lose(sp);
- rxrpc_free_skb(skb, rxrpc_skb_lost);
- return 0;
- }
- }
-
- if (skb->tstamp == 0)
- skb->tstamp = ktime_get_real();
- trace_rxrpc_rx_packet(sp);
-
- switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_VERSION:
- if (rxrpc_to_client(sp))
- goto discard;
- rxrpc_post_packet_to_local(local, skb);
- goto out;
-
- case RXRPC_PACKET_TYPE_BUSY:
- if (rxrpc_to_server(sp))
- goto discard;
- fallthrough;
- case RXRPC_PACKET_TYPE_ACK:
- case RXRPC_PACKET_TYPE_ACKALL:
- if (sp->hdr.callNumber == 0)
- goto bad_message;
- fallthrough;
- case RXRPC_PACKET_TYPE_ABORT:
- break;
-
- case RXRPC_PACKET_TYPE_DATA:
- if (sp->hdr.callNumber == 0 ||
- sp->hdr.seq == 0)
- goto bad_message;
-
- /* Unshare the packet so that it can be modified for in-place
- * decryption.
- */
- if (sp->hdr.securityIndex != 0) {
- struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
- if (!nskb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
- goto out;
- }
-
- if (nskb != skb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_received);
- skb = nskb;
- rxrpc_new_skb(skb, rxrpc_skb_unshared);
- sp = rxrpc_skb(skb);
- }
- }
- break;
-
- case RXRPC_PACKET_TYPE_CHALLENGE:
- if (rxrpc_to_server(sp))
- goto discard;
- break;
- case RXRPC_PACKET_TYPE_RESPONSE:
- if (rxrpc_to_client(sp))
- goto discard;
- break;
-
- /* Packet types 9-11 should just be ignored. */
- case RXRPC_PACKET_TYPE_PARAMS:
- case RXRPC_PACKET_TYPE_10:
- case RXRPC_PACKET_TYPE_11:
- goto discard;
-
- default:
- _proto("Rx Bad Packet Type %u", sp->hdr.type);
- goto bad_message;
- }
-
- if (sp->hdr.serviceId == 0)
- goto bad_message;
-
- if (rxrpc_to_server(sp)) {
- /* Weed out packets to services we're not offering. Packets
- * that would begin a call are explicitly rejected and the rest
- * are just discarded.
- */
- rx = rcu_dereference(local->service);
- if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
- sp->hdr.serviceId != rx->second_service)) {
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- sp->hdr.seq == 1)
- goto unsupported_service;
- goto discard;
- }
- }
-
- conn = rxrpc_find_connection_rcu(local, skb, &peer);
- if (conn) {
- if (sp->hdr.securityIndex != conn->security_ix)
- goto wrong_security;
-
- if (sp->hdr.serviceId != conn->service_id) {
- int old_id;
-
- if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
- goto reupgrade;
- old_id = cmpxchg(&conn->service_id, conn->params.service_id,
- sp->hdr.serviceId);
-
- if (old_id != conn->params.service_id &&
- old_id != sp->hdr.serviceId)
- goto reupgrade;
- }
-
- if (sp->hdr.callNumber == 0) {
- /* Connection-level packet */
- _debug("CONN %p {%d}", conn, conn->debug_id);
- rxrpc_post_packet_to_conn(conn, skb);
- goto out;
- }
-
- if ((int)sp->hdr.serial - (int)conn->hi_serial > 0)
- conn->hi_serial = sp->hdr.serial;
-
- /* Call-bound packets are routed by connection channel. */
- channel = sp->hdr.cid & RXRPC_CHANNELMASK;
- chan = &conn->channels[channel];
-
- /* Ignore really old calls */
- if (sp->hdr.callNumber < chan->last_call)
- goto discard;
-
- if (sp->hdr.callNumber == chan->last_call) {
- if (chan->call ||
- sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
- goto discard;
-
- /* For the previous service call, if completed
- * successfully, we discard all further packets.
- */
- if (rxrpc_conn_is_service(conn) &&
- chan->last_type == RXRPC_PACKET_TYPE_ACK)
- goto discard;
-
- /* But otherwise we need to retransmit the final packet
- * from data cached in the connection record.
- */
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
- trace_rxrpc_rx_data(chan->call_debug_id,
- sp->hdr.seq,
- sp->hdr.serial,
- sp->hdr.flags);
- rxrpc_post_packet_to_conn(conn, skb);
- goto out;
- }
-
- call = rcu_dereference(chan->call);
-
- if (sp->hdr.callNumber > chan->call_id) {
- if (rxrpc_to_client(sp))
- goto reject_packet;
- if (call)
- rxrpc_input_implicit_end_call(rx, conn, call);
- call = NULL;
- }
-
- if (call) {
- if (sp->hdr.serviceId != call->service_id)
- call->service_id = sp->hdr.serviceId;
- if ((int)sp->hdr.serial - (int)call->rx_serial > 0)
- call->rx_serial = sp->hdr.serial;
- if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
- set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
- }
- }
-
- if (!call || refcount_read(&call->ref) == 0) {
- if (rxrpc_to_client(sp) ||
- sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
- goto bad_message;
- if (sp->hdr.seq != 1)
- goto discard;
- call = rxrpc_new_incoming_call(local, rx, skb);
- if (!call)
- goto reject_packet;
- }
-
- /* Process a call packet; this either discards or passes on the ref
- * elsewhere.
- */
- rxrpc_input_call_packet(call, skb);
- goto out;
+ rxrpc_input_call_event(call, skb);
-discard:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
-out:
- trace_rxrpc_rx_done(0, 0);
- return 0;
-
-wrong_security:
- trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- skb->priority = RXKADINCONSISTENCY;
- goto post_abort;
-
-unsupported_service:
- trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EOPNOTSUPP);
- skb->priority = RX_INVALID_OPERATION;
- goto post_abort;
-
-reupgrade:
- trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
- goto protocol_error;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
-post_abort:
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- trace_rxrpc_rx_done(skb->mark, skb->priority);
- rxrpc_reject_packet(local, skb);
- _leave(" [badmsg]");
- return 0;
+ spin_lock(&conn->bundle->channel_lock);
+ __rxrpc_disconnect_call(conn, call);
+ spin_unlock(&conn->bundle->channel_lock);
}
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
new file mode 100644
index 000000000000..d83ae3193032
--- /dev/null
+++ b/net/rxrpc/io_thread.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007, 2016, 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ar-internal.h"
+
+static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ *
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
+ *
+ * Called with the RCU read lock held from the IP layer via UDP.
+ */
+int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
+{
+ struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
+
+ if (unlikely(!local)) {
+ kfree_skb(skb);
+ return 0;
+ }
+ if (skb->tstamp == 0)
+ skb->tstamp = ktime_get_real();
+
+ skb->mark = RXRPC_SKB_MARK_PACKET;
+ rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
+ skb_queue_tail(&local->rx_queue, skb);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+}
+
+/*
+ * Handle an error received on the local endpoint.
+ */
+void rxrpc_error_report(struct sock *sk)
+{
+ struct rxrpc_local *local;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ local = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!local)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ while ((skb = skb_dequeue(&sk->sk_error_queue))) {
+ skb->mark = RXRPC_SKB_MARK_ERROR;
+ rxrpc_new_skb(skb, rxrpc_skb_new_error_report);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+
+ rxrpc_wake_up_io_thread(local);
+ rcu_read_unlock();
+}
+
+/*
+ * Process event packets targeted at a local endpoint.
+ */
+static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ char v;
+
+ _enter("");
+
+ rxrpc_see_skb(skb, rxrpc_skb_see_version);
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &v, 1) >= 0) {
+ if (v == 0)
+ rxrpc_send_version_request(local, &sp->hdr, skb);
+ }
+}
+
+/*
+ * Extract the wire header from a packet and translate the byte order.
+ */
+static noinline
+int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
+{
+ struct rxrpc_wire_header whdr;
+
+ /* dig out the RxRPC connection details */
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_hdr"));
+ return -EBADMSG;
+ }
+
+ memset(sp, 0, sizeof(*sp));
+ sp->hdr.epoch = ntohl(whdr.epoch);
+ sp->hdr.cid = ntohl(whdr.cid);
+ sp->hdr.callNumber = ntohl(whdr.callNumber);
+ sp->hdr.seq = ntohl(whdr.seq);
+ sp->hdr.serial = ntohl(whdr.serial);
+ sp->hdr.flags = whdr.flags;
+ sp->hdr.type = whdr.type;
+ sp->hdr.userStatus = whdr.userStatus;
+ sp->hdr.securityIndex = whdr.securityIndex;
+ sp->hdr._rsvd = ntohs(whdr._rsvd);
+ sp->hdr.serviceId = ntohs(whdr.serviceId);
+ return 0;
+}
+
+/*
+ * Extract the abort code from an ABORT packet and stash it in skb->priority.
+ */
+static bool rxrpc_extract_abort(struct sk_buff *skb)
+{
+ __be32 wtmp;
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) < 0)
+ return false;
+ skb->priority = ntohl(wtmp);
+ return true;
+}
+
+/*
+ * Process packets received on the local endpoint
+ */
+static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+{
+ struct rxrpc_connection *conn;
+ struct sockaddr_rxrpc peer_srx;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_peer *peer = NULL;
+ struct sk_buff *skb = *_skb;
+ int ret = 0;
+
+ skb_pull(skb, sizeof(struct udphdr));
+
+ sp = rxrpc_skb(skb);
+
+ /* dig out the RxRPC connection details */
+ if (rxrpc_extract_header(sp, skb) < 0)
+ goto bad_message;
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+ if ((lose++ & 7) == 7) {
+ trace_rxrpc_rx_lose(sp);
+ return 0;
+ }
+ }
+
+ trace_rxrpc_rx_packet(sp);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
+ if (rxrpc_to_client(sp))
+ return 0;
+ rxrpc_input_version(local, skb);
+ return 0;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ if (rxrpc_to_server(sp))
+ return 0;
+ fallthrough;
+ case RXRPC_PACKET_TYPE_ACK:
+ case RXRPC_PACKET_TYPE_ACKALL:
+ if (sp->hdr.callNumber == 0)
+ goto bad_message;
+ break;
+ case RXRPC_PACKET_TYPE_ABORT:
+ if (!rxrpc_extract_abort(skb))
+ return 0; /* Just discard if malformed */
+ break;
+
+ case RXRPC_PACKET_TYPE_DATA:
+ if (sp->hdr.callNumber == 0 ||
+ sp->hdr.seq == 0)
+ goto bad_message;
+
+ /* Unshare the packet so that it can be modified for in-place
+ * decryption.
+ */
+ if (sp->hdr.securityIndex != 0) {
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb) {
+ rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
+ *_skb = NULL;
+ return 0;
+ }
+
+ if (skb != *_skb) {
+ rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare);
+ *_skb = skb;
+ rxrpc_new_skb(skb, rxrpc_skb_new_unshared);
+ sp = rxrpc_skb(skb);
+ }
+ }
+ break;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ if (rxrpc_to_server(sp))
+ return 0;
+ break;
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_to_client(sp))
+ return 0;
+ break;
+
+ /* Packet types 9-11 should just be ignored. */
+ case RXRPC_PACKET_TYPE_PARAMS:
+ case RXRPC_PACKET_TYPE_10:
+ case RXRPC_PACKET_TYPE_11:
+ return 0;
+
+ default:
+ goto bad_message;
+ }
+
+ if (sp->hdr.serviceId == 0)
+ goto bad_message;
+
+ if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
+ return true; /* Unsupported address type - discard. */
+
+ if (peer_srx.transport.family != local->srx.transport.family &&
+ (peer_srx.transport.family == AF_INET &&
+ local->srx.transport.family != AF_INET6)) {
+ pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+ peer_srx.transport.family,
+ local->srx.transport.family);
+ return true; /* Wrong address type - discard. */
+ }
+
+ if (rxrpc_to_client(sp)) {
+ rcu_read_lock();
+ conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
+ conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
+ rcu_read_unlock();
+ if (!conn) {
+ trace_rxrpc_abort(0, "NCC", sp->hdr.cid,
+ sp->hdr.callNumber, sp->hdr.seq,
+ RXKADINCONSISTENCY, EBADMSG);
+ goto protocol_error;
+ }
+
+ ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
+ rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
+ return ret;
+ }
+
+ /* We need to look up service connections by the full protocol
+ * parameter set. We look up the peer first as an intermediate step
+ * and then the connection from the peer's tree.
+ */
+ rcu_read_lock();
+
+ peer = rxrpc_lookup_peer_rcu(local, &peer_srx);
+ if (!peer) {
+ rcu_read_unlock();
+ return rxrpc_new_incoming_call(local, NULL, NULL, &peer_srx, skb);
+ }
+
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
+ if (conn) {
+ rcu_read_unlock();
+ ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
+ rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
+ return ret;
+ }
+
+ peer = rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input);
+ rcu_read_unlock();
+
+ ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
+ rxrpc_put_peer(peer, rxrpc_peer_put_input);
+ if (ret < 0)
+ goto reject_packet;
+ return 0;
+
+bad_message:
+ trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
+ skb->priority = RX_PROTOCOL_ERROR;
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+reject_packet:
+ rxrpc_reject_packet(local, skb);
+ return ret;
+}
+
+/*
+ * Deal with a packet that's associated with an extant connection.
+ */
+static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_channel *chan;
+ struct rxrpc_call *call = NULL;
+ unsigned int channel;
+
+ if (sp->hdr.securityIndex != conn->security_ix)
+ goto wrong_security;
+
+ if (sp->hdr.serviceId != conn->service_id) {
+ int old_id;
+
+ if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
+ goto reupgrade;
+ old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
+ sp->hdr.serviceId);
+
+ if (old_id != conn->orig_service_id &&
+ old_id != sp->hdr.serviceId)
+ goto reupgrade;
+ }
+
+ if (after(sp->hdr.serial, conn->hi_serial))
+ conn->hi_serial = sp->hdr.serial;
+
+ /* It's a connection-level packet if the call number is 0. */
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_input_conn_packet(conn, skb);
+
+ /* Call-bound packets are routed by connection channel. */
+ channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+
+ /* Ignore really old calls */
+ if (sp->hdr.callNumber < chan->last_call)
+ return 0;
+
+ if (sp->hdr.callNumber == chan->last_call) {
+ if (chan->call ||
+ sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ return 0;
+
+ /* For the previous service call, if completed successfully, we
+ * discard all further packets.
+ */
+ if (rxrpc_conn_is_service(conn) &&
+ chan->last_type == RXRPC_PACKET_TYPE_ACK)
+ return 0;
+
+ /* But otherwise we need to retransmit the final packet from
+ * data cached in the connection record.
+ */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+ trace_rxrpc_rx_data(chan->call_debug_id,
+ sp->hdr.seq,
+ sp->hdr.serial,
+ sp->hdr.flags);
+ rxrpc_input_conn_packet(conn, skb);
+ return 0;
+ }
+
+ rcu_read_lock();
+ call = rxrpc_try_get_call(rcu_dereference(chan->call),
+ rxrpc_call_get_input);
+ rcu_read_unlock();
+
+ if (sp->hdr.callNumber > chan->call_id) {
+ if (rxrpc_to_client(sp)) {
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ goto reject_packet;
+ }
+
+ if (call) {
+ rxrpc_implicit_end_call(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ call = NULL;
+ }
+ }
+
+ if (!call) {
+ if (rxrpc_to_client(sp))
+ goto bad_message;
+ if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+ peer_srx, skb))
+ return 0;
+ goto reject_packet;
+ }
+
+ rxrpc_input_call_event(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return 0;
+
+wrong_security:
+ trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RXKADINCONSISTENCY, EBADMSG);
+ skb->priority = RXKADINCONSISTENCY;
+ goto post_abort;
+
+reupgrade:
+ trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+ goto protocol_error;
+
+bad_message:
+ trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
+ skb->priority = RX_PROTOCOL_ERROR;
+post_abort:
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+reject_packet:
+ rxrpc_reject_packet(conn->local, skb);
+ return 0;
+}
+
+/*
+ * I/O and event handling thread.
+ */
+int rxrpc_io_thread(void *data)
+{
+ struct sk_buff_head rx_queue;
+ struct rxrpc_local *local = data;
+ struct rxrpc_call *call;
+ struct sk_buff *skb;
+
+ skb_queue_head_init(&rx_queue);
+
+ set_user_nice(current, MIN_NICE);
+
+ for (;;) {
+ rxrpc_inc_stat(local->rxnet, stat_io_loop);
+
+ /* Deal with calls that want immediate attention. */
+ if ((call = list_first_entry_or_null(&local->call_attend_q,
+ struct rxrpc_call,
+ attend_link))) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&call->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ trace_rxrpc_call_poked(call);
+ rxrpc_input_call_event(call, NULL);
+ rxrpc_put_call(call, rxrpc_call_put_poke);
+ continue;
+ }
+
+ /* Process received packets and errors. */
+ if ((skb = __skb_dequeue(&rx_queue))) {
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_PACKET:
+ skb->priority = 0;
+ rxrpc_input_packet(local, &skb);
+ trace_rxrpc_rx_done(skb->mark, skb->priority);
+ rxrpc_free_skb(skb, rxrpc_skb_put_input);
+ break;
+ case RXRPC_SKB_MARK_ERROR:
+ rxrpc_input_error(local, skb);
+ rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
+ break;
+ }
+ continue;
+ }
+
+ if (!skb_queue_empty(&local->rx_queue)) {
+ spin_lock_irq(&local->rx_queue.lock);
+ skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
+ spin_unlock_irq(&local->rx_queue.lock);
+ continue;
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!skb_queue_empty(&local->rx_queue) ||
+ !list_empty(&local->call_attend_q)) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ if (kthread_should_stop())
+ break;
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ rxrpc_see_local(local, rxrpc_local_stop);
+ rxrpc_destroy_local(local);
+ local->io_thread = NULL;
+ rxrpc_see_local(local, rxrpc_local_stopped);
+ return 0;
+}
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 8d2073e0e3da..8d53aded09c4 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -513,7 +513,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
if (ret < 0)
goto error;
- conn->params.key = key;
+ conn->key = key;
_leave(" = 0 [%d]", key_serial(key));
return 0;
@@ -602,7 +602,8 @@ static long rxrpc_read(const struct key *key,
}
_debug("token[%u]: toksize=%u", ntoks, toksize);
- ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX);
+ if (WARN_ON(toksize > AFSTOKEN_LENGTH_MAX))
+ return -EIO;
toksizes[ntoks++] = toksize;
size += toksize + 4; /* each token has a length word */
@@ -679,8 +680,9 @@ static long rxrpc_read(const struct key *key,
return -ENOPKG;
}
- ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
- toksize);
+ if (WARN_ON((unsigned long)xdr - (unsigned long)oldxdr ==
+ toksize))
+ return -EIO;
}
#undef ENCODE_STR
@@ -688,8 +690,10 @@ static long rxrpc_read(const struct key *key,
#undef ENCODE64
#undef ENCODE
- ASSERTCMP(tok, ==, ntoks);
- ASSERTCMP((char __user *) xdr - buffer, ==, size);
+ if (WARN_ON(tok != ntoks))
+ return -EIO;
+ if (WARN_ON((unsigned long)xdr - (unsigned long)buffer != size))
+ return -EIO;
_leave(" = %zu", size);
return size;
}
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 19e929c7c38b..5e69ea6b233d 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -21,9 +21,9 @@ static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
/*
* Reply to a version request
*/
-static void rxrpc_send_version_request(struct rxrpc_local *local,
- struct rxrpc_host_header *hdr,
- struct sk_buff *skb)
+void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -63,8 +63,6 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
len = iov[0].iov_len + iov[1].iov_len;
- _proto("Tx VERSION (reply)");
-
ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
if (ret < 0)
trace_rxrpc_tx_fail(local->debug_id, 0, ret,
@@ -75,41 +73,3 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_leave("");
}
-
-/*
- * Process event packets targeted at a local endpoint.
- */
-void rxrpc_process_local_events(struct rxrpc_local *local)
-{
- struct sk_buff *skb;
- char v;
-
- _enter("");
-
- skb = skb_dequeue(&local->event_queue);
- if (skb) {
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- rxrpc_see_skb(skb, rxrpc_skb_seen);
- _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
-
- switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_VERSION:
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &v, 1) < 0)
- return;
- _proto("Rx VERSION { %02x }", v);
- if (v == 0)
- rxrpc_send_version_request(local, &sp->hdr, skb);
- break;
-
- default:
- /* Just ignore anything we don't understand */
- break;
- }
-
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- }
-
- _leave("");
-}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a943fdf91e24..44222923c0d1 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -20,7 +20,6 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static void rxrpc_local_processor(struct work_struct *);
static void rxrpc_local_rcu(struct rcu_head *);
/*
@@ -97,12 +96,9 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
atomic_set(&local->active_users, 1);
local->rxnet = rxnet;
INIT_HLIST_NODE(&local->link);
- INIT_WORK(&local->processor, rxrpc_local_processor);
- INIT_LIST_HEAD(&local->ack_tx_queue);
- spin_lock_init(&local->ack_tx_lock);
init_rwsem(&local->defrag_sem);
- skb_queue_head_init(&local->reject_queue);
- skb_queue_head_init(&local->event_queue);
+ skb_queue_head_init(&local->rx_queue);
+ INIT_LIST_HEAD(&local->call_attend_q);
local->client_bundles = RB_ROOT;
spin_lock_init(&local->client_bundles_lock);
spin_lock_init(&local->lock);
@@ -110,7 +106,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
- trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
}
_leave(" = %p", local);
@@ -126,6 +122,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct sockaddr_rxrpc *srx = &local->srx;
struct udp_port_cfg udp_conf = {0};
+ struct task_struct *io_thread;
struct sock *usk;
int ret;
@@ -152,7 +149,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
}
tuncfg.encap_type = UDP_ENCAP_RXRPC;
- tuncfg.encap_rcv = rxrpc_input_packet;
+ tuncfg.encap_rcv = rxrpc_encap_rcv;
tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
tuncfg.sk_user_data = local;
setup_udp_tunnel_sock(net, local->socket, &tuncfg);
@@ -185,8 +182,23 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
BUG();
}
+ io_thread = kthread_run(rxrpc_io_thread, local,
+ "krxrpcio/%u", ntohs(udp_conf.local_udp_port));
+ if (IS_ERR(io_thread)) {
+ ret = PTR_ERR(io_thread);
+ goto error_sock;
+ }
+
+ local->io_thread = io_thread;
_leave(" = 0");
return 0;
+
+error_sock:
+ kernel_sock_shutdown(local->socket, SHUT_RDWR);
+ local->socket->sk->sk_user_data = NULL;
+ sock_release(local->socket);
+ local->socket = NULL;
+ return ret;
}
/*
@@ -198,7 +210,6 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
struct rxrpc_local *local;
struct rxrpc_net *rxnet = rxrpc_net(net);
struct hlist_node *cursor;
- const char *age;
long diff;
int ret;
@@ -229,10 +240,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
* we're attempting to use a local address that the dying
* object is still using.
*/
- if (!rxrpc_use_local(local))
+ if (!rxrpc_use_local(local, rxrpc_local_use_lookup))
break;
- age = "old";
goto found;
}
@@ -250,14 +260,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
} else {
hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
}
- age = "new";
found:
mutex_unlock(&rxnet->local_mutex);
-
- _net("LOCAL %s %d {%pISp}",
- age, local->debug_id, &local->srx.transport);
-
_leave(" = %p", local);
return local;
@@ -279,64 +284,49 @@ addr_in_use:
/*
* Get a ref on a local endpoint.
*/
-struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- const void *here = __builtin_return_address(0);
- int r;
+ int r, u;
+ u = atomic_read(&local->active_users);
__refcount_inc(&local->ref, &r);
- trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here);
+ trace_rxrpc_local(local->debug_id, why, r + 1, u);
return local;
}
/*
* Get a ref on a local endpoint unless its usage has already reached 0.
*/
-struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- const void *here = __builtin_return_address(0);
- int r;
+ int r, u;
- if (local) {
- if (__refcount_inc_not_zero(&local->ref, &r))
- trace_rxrpc_local(local->debug_id, rxrpc_local_got,
- r + 1, here);
- else
- local = NULL;
+ if (local && __refcount_inc_not_zero(&local->ref, &r)) {
+ u = atomic_read(&local->active_users);
+ trace_rxrpc_local(local->debug_id, why, r + 1, u);
+ return local;
}
- return local;
-}
-/*
- * Queue a local endpoint and pass the caller's reference to the work item.
- */
-void rxrpc_queue_local(struct rxrpc_local *local)
-{
- const void *here = __builtin_return_address(0);
- unsigned int debug_id = local->debug_id;
- int r = refcount_read(&local->ref);
-
- if (rxrpc_queue_work(&local->processor))
- trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here);
- else
- rxrpc_put_local(local);
+ return NULL;
}
/*
* Drop a ref on a local endpoint.
*/
-void rxrpc_put_local(struct rxrpc_local *local)
+void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
{
- const void *here = __builtin_return_address(0);
unsigned int debug_id;
bool dead;
- int r;
+ int r, u;
if (local) {
debug_id = local->debug_id;
+ u = atomic_read(&local->active_users);
dead = __refcount_dec_and_test(&local->ref, &r);
- trace_rxrpc_local(debug_id, rxrpc_local_put, r, here);
+ trace_rxrpc_local(debug_id, why, r, u);
if (dead)
call_rcu(&local->rcu, rxrpc_local_rcu);
@@ -346,14 +336,15 @@ void rxrpc_put_local(struct rxrpc_local *local)
/*
* Start using a local endpoint.
*/
-struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- local = rxrpc_get_local_maybe(local);
+ local = rxrpc_get_local_maybe(local, rxrpc_local_get_for_use);
if (!local)
return NULL;
- if (!__rxrpc_use_local(local)) {
- rxrpc_put_local(local);
+ if (!__rxrpc_use_local(local, why)) {
+ rxrpc_put_local(local, rxrpc_local_put_for_use);
return NULL;
}
@@ -362,15 +353,19 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
/*
* Cease using a local endpoint. Once the number of active users reaches 0, we
- * start the closure of the transport in the work processor.
+ * start the closure of the transport in the I/O thread..
*/
-void rxrpc_unuse_local(struct rxrpc_local *local)
+void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
{
+ unsigned int debug_id = local->debug_id;
+ int r, u;
+
if (local) {
- if (__rxrpc_unuse_local(local)) {
- rxrpc_get_local(local);
- rxrpc_queue_local(local);
- }
+ r = refcount_read(&local->ref);
+ u = atomic_dec_return(&local->active_users);
+ trace_rxrpc_local(debug_id, why, r, u);
+ if (u == 0)
+ kthread_stop(local->io_thread);
}
}
@@ -381,7 +376,7 @@ void rxrpc_unuse_local(struct rxrpc_local *local)
* Closing the socket cannot be done from bottom half context or RCU callback
* context because it might sleep.
*/
-static void rxrpc_local_destroyer(struct rxrpc_local *local)
+void rxrpc_destroy_local(struct rxrpc_local *local)
{
struct socket *socket = local->socket;
struct rxrpc_net *rxnet = local->rxnet;
@@ -408,52 +403,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
- rxrpc_purge_queue(&local->reject_queue);
- rxrpc_purge_queue(&local->event_queue);
-}
-
-/*
- * Process events on an endpoint. The work item carries a ref which
- * we must release.
- */
-static void rxrpc_local_processor(struct work_struct *work)
-{
- struct rxrpc_local *local =
- container_of(work, struct rxrpc_local, processor);
- bool again;
-
- if (local->dead)
- return;
-
- trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
- refcount_read(&local->ref), NULL);
-
- do {
- again = false;
- if (!__rxrpc_use_local(local)) {
- rxrpc_local_destroyer(local);
- break;
- }
-
- if (!list_empty(&local->ack_tx_queue)) {
- rxrpc_transmit_ack_packets(local);
- again = true;
- }
-
- if (!skb_queue_empty(&local->reject_queue)) {
- rxrpc_reject_packets(local);
- again = true;
- }
-
- if (!skb_queue_empty(&local->event_queue)) {
- rxrpc_process_local_events(local);
- again = true;
- }
-
- __rxrpc_unuse_local(local);
- } while (again);
-
- rxrpc_put_local(local);
+ rxrpc_purge_queue(&local->rx_queue);
}
/*
@@ -463,13 +413,8 @@ static void rxrpc_local_rcu(struct rcu_head *rcu)
{
struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
- _enter("%d", local->debug_id);
-
- ASSERT(!work_pending(&local->processor));
-
- _net("DESTROY LOCAL %d", local->debug_id);
+ rxrpc_see_local(local, rxrpc_local_free);
kfree(local);
- _leave("");
}
/*
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 84242c0e467c..5905530e2f33 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -65,7 +65,7 @@ static __net_init int rxrpc_init_net(struct net *net)
atomic_set(&rxnet->nr_client_conns, 0);
rxnet->kill_all_client_conns = false;
spin_lock_init(&rxnet->client_conn_cache_lock);
- spin_lock_init(&rxnet->client_conn_discard_lock);
+ mutex_init(&rxnet->client_conn_discard_lock);
INIT_LIST_HEAD(&rxnet->idle_client_conns);
INIT_WORK(&rxnet->client_conn_reaper,
rxrpc_discard_expired_client_conns);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index c5eed0e83e47..3d8c9f830ee0 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -142,8 +142,8 @@ retry:
txb->ack.reason = RXRPC_ACK_IDLE;
}
- mtu = conn->params.peer->if_mtu;
- mtu -= conn->params.peer->hdrsize;
+ mtu = conn->peer->if_mtu;
+ mtu -= conn->peer->hdrsize;
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
@@ -203,12 +203,11 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
}
/*
- * Send an ACK call packet.
+ * Transmit an ACK packet.
*/
-static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *txb)
+int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
struct rxrpc_connection *conn;
- struct rxrpc_call *call = txb->call;
struct msghdr msg;
struct kvec iov[1];
rxrpc_serial_t serial;
@@ -229,11 +228,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
if (txb->ack.reason == RXRPC_ACK_PING)
txb->wire.flags |= RXRPC_REQUEST_ACK;
- if (txb->ack.reason == RXRPC_ACK_DELAY)
- clear_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags);
- if (txb->ack.reason == RXRPC_ACK_IDLE)
- clear_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags);
-
n = rxrpc_fill_out_ack(conn, call, txb);
if (n == 0)
return 0;
@@ -247,8 +241,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks);
- if (txb->ack_why == rxrpc_propose_ack_ping_for_lost_ack)
- call->acks_lost_ping = serial;
if (txb->ack.reason == RXRPC_ACK_PING)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
@@ -259,7 +251,7 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
txb->ack.previousPacket = htonl(call->rx_highest_seq);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
@@ -279,44 +271,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
}
/*
- * ACK transmitter for a local endpoint. The UDP socket locks around each
- * transmission, so we can only transmit one packet at a time, ACK, DATA or
- * otherwise.
- */
-void rxrpc_transmit_ack_packets(struct rxrpc_local *local)
-{
- LIST_HEAD(queue);
- int ret;
-
- trace_rxrpc_local(local->debug_id, rxrpc_local_tx_ack,
- refcount_read(&local->ref), NULL);
-
- if (list_empty(&local->ack_tx_queue))
- return;
-
- spin_lock_bh(&local->ack_tx_lock);
- list_splice_tail_init(&local->ack_tx_queue, &queue);
- spin_unlock_bh(&local->ack_tx_lock);
-
- while (!list_empty(&queue)) {
- struct rxrpc_txbuf *txb =
- list_entry(queue.next, struct rxrpc_txbuf, tx_link);
-
- ret = rxrpc_send_ack_packet(local, txb);
- if (ret < 0 && ret != -ECONNRESET) {
- spin_lock_bh(&local->ack_tx_lock);
- list_splice_init(&queue, &local->ack_tx_queue);
- spin_unlock_bh(&local->ack_tx_lock);
- break;
- }
-
- list_del_init(&txb->tx_link);
- rxrpc_put_call(txb->call, rxrpc_call_put);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
- }
-}
-
-/*
* Send an ABORT call packet.
*/
int rxrpc_send_abort_packet(struct rxrpc_call *call)
@@ -358,7 +312,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
pkt.whdr.userStatus = 0;
pkt.whdr.securityIndex = call->security_ix;
pkt.whdr._rsvd = 0;
- pkt.whdr.serviceId = htons(call->service_id);
+ pkt.whdr.serviceId = htons(call->dest_srx.srx_service);
pkt.abort_code = htonl(call->abort_code);
iov[0].iov_base = &pkt;
@@ -368,8 +322,8 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, sizeof(pkt));
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt));
+ conn->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_abort);
@@ -395,12 +349,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
_enter("%x,{%d}", txb->seq, txb->len);
- if (hlist_unhashed(&call->error_link)) {
- spin_lock_bh(&call->peer->lock);
- hlist_add_head_rcu(&call->error_link, &call->peer->error_targets);
- spin_unlock_bh(&call->peer->lock);
- }
-
/* Each transmission of a Tx packet needs a new serial number */
serial = atomic_inc_return(&conn->serial);
txb->wire.serial = htonl(serial);
@@ -466,6 +414,14 @@ dont_set_request_ack:
trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+
+ /* Track what we've attempted to transmit at least once so that the
+ * retransmission algorithm doesn't try to resend what we haven't sent
+ * yet. However, this can race as we can receive an ACK before we get
+ * to this point. But, OTOH, if we won't get an ACK mentioning this
+ * packet unless the far side received it (though it could have
+ * discarded it anyway and NAK'd it).
+ */
cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
/* send the packet with the don't fragment bit set if we currently
@@ -473,7 +429,7 @@ dont_set_request_ack:
if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
- down_read(&conn->params.local->defrag_sem);
+ down_read(&conn->local->defrag_sem);
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
@@ -486,11 +442,12 @@ dont_set_request_ack:
* message and update the peer record
*/
rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
- up_read(&conn->params.local->defrag_sem);
+ up_read(&conn->local->defrag_sem);
if (ret < 0) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_data_nofrag);
@@ -549,22 +506,22 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&conn->params.local->defrag_sem);
+ down_write(&conn->local->defrag_sem);
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
- switch (conn->params.local->srx.transport.family) {
+ switch (conn->local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ ip_sock_set_mtu_discover(conn->local->socket->sk,
IP_PMTUDISC_DONT);
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
- ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ ip_sock_set_mtu_discover(conn->local->socket->sk,
IP_PMTUDISC_DO);
break;
@@ -573,6 +530,7 @@ send_fragmentable:
}
if (ret < 0) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_data_frag);
@@ -582,26 +540,25 @@ send_fragmentable:
}
rxrpc_tx_backoff(call, ret);
- up_write(&conn->params.local->defrag_sem);
+ up_write(&conn->local->defrag_sem);
goto done;
}
/*
- * reject packets through the local endpoint
+ * Reject a packet through the local endpoint.
*/
-void rxrpc_reject_packets(struct rxrpc_local *local)
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
- struct sockaddr_rxrpc srx;
- struct rxrpc_skb_priv *sp;
struct rxrpc_wire_header whdr;
- struct sk_buff *skb;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct msghdr msg;
struct kvec iov[2];
size_t size;
__be32 code;
int ret, ioc;
- _enter("%d", local->debug_id);
+ rxrpc_see_skb(skb, rxrpc_skb_see_reject);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
@@ -615,52 +572,42 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
memset(&whdr, 0, sizeof(whdr));
- while ((skb = skb_dequeue(&local->reject_queue))) {
- rxrpc_see_skb(skb, rxrpc_skb_seen);
- sp = rxrpc_skb(skb);
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_REJECT_BUSY:
+ whdr.type = RXRPC_PACKET_TYPE_BUSY;
+ size = sizeof(whdr);
+ ioc = 1;
+ break;
+ case RXRPC_SKB_MARK_REJECT_ABORT:
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ code = htonl(skb->priority);
+ size = sizeof(whdr) + sizeof(code);
+ ioc = 2;
+ break;
+ default:
+ return;
+ }
- switch (skb->mark) {
- case RXRPC_SKB_MARK_REJECT_BUSY:
- whdr.type = RXRPC_PACKET_TYPE_BUSY;
- size = sizeof(whdr);
- ioc = 1;
- break;
- case RXRPC_SKB_MARK_REJECT_ABORT:
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
- code = htonl(skb->priority);
- size = sizeof(whdr) + sizeof(code);
- ioc = 2;
- break;
- default:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- continue;
- }
+ if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+ msg.msg_namelen = srx.transport_len;
- if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
- msg.msg_namelen = srx.transport_len;
-
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.serviceId = htons(sp->hdr.serviceId);
- whdr.flags = sp->hdr.flags;
- whdr.flags ^= RXRPC_CLIENT_INITIATED;
- whdr.flags &= RXRPC_CLIENT_INITIATED;
-
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
- ret = do_udp_sendmsg(local->socket, &msg, size);
- if (ret < 0)
- trace_rxrpc_tx_fail(local->debug_id, 0, ret,
- rxrpc_tx_point_reject);
- else
- trace_rxrpc_tx_packet(local->debug_id, &whdr,
- rxrpc_tx_point_reject);
- }
+ whdr.epoch = htonl(sp->hdr.epoch);
+ whdr.cid = htonl(sp->hdr.cid);
+ whdr.callNumber = htonl(sp->hdr.callNumber);
+ whdr.serviceId = htons(sp->hdr.serviceId);
+ whdr.flags = sp->hdr.flags;
+ whdr.flags ^= RXRPC_CLIENT_INITIATED;
+ whdr.flags &= RXRPC_CLIENT_INITIATED;
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
+ ret = do_udp_sendmsg(local->socket, &msg, size);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+ rxrpc_tx_point_reject);
+ else
+ trace_rxrpc_tx_packet(local->debug_id, &whdr,
+ rxrpc_tx_point_reject);
}
-
- _leave("");
}
/*
@@ -701,8 +648,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
len = iov[0].iov_len + iov[1].iov_len;
- _proto("Tx VERSION (keepalive)");
-
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
ret = do_udp_sendmsg(peer->local->socket, &msg, len);
if (ret < 0)
@@ -715,3 +660,43 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
peer->last_tx_at = ktime_get_seconds();
_leave("");
}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb)
+{
+ if (call->state < RXRPC_CALL_COMPLETE)
+ kdebug("resend");
+}
+
+/*
+ * Transmit one packet.
+ */
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ int ret;
+
+ ret = rxrpc_send_data_packet(call, txb);
+ if (ret < 0) {
+ switch (ret) {
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ 0, ret);
+ break;
+ default:
+ _debug("need instant resend %d", ret);
+ rxrpc_instant_resend(call, txb);
+ }
+ } else {
+ unsigned long now = jiffies;
+ unsigned long resend_at = now + call->peer->rto_j;
+
+ WRITE_ONCE(call->resend_at, resend_at);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_send);
+ }
+}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index cda3890657a9..6685bf917aa6 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -18,9 +18,9 @@
#include <net/ip.h>
#include "ar-internal.h"
-static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
-static void rxrpc_distribute_error(struct rxrpc_peer *, int,
- enum rxrpc_call_completion);
+static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *);
+static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *,
+ enum rxrpc_call_completion, int);
/*
* Find the peer associated with a local error.
@@ -48,13 +48,11 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
- _net("Rx ICMP");
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
case SO_EE_ORIGIN_ICMP6:
- _net("Rx ICMP6 on v4 sock");
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset + 12,
sizeof(struct in_addr));
@@ -70,14 +68,12 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
case AF_INET6:
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
- _net("Rx ICMP6");
srx->transport.sin6.sin6_port = serr->port;
memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
- _net("Rx ICMP on v6 sock");
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
@@ -106,13 +102,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
*/
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
- _net("Rx ICMP Fragmentation Needed (%d)", mtu);
-
/* wind down the local interface MTU */
- if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
peer->if_mtu = mtu;
- _net("I/F MTU %u", mtu);
- }
if (mtu == 0) {
/* they didn't give us a size, estimate one */
@@ -129,63 +121,36 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
}
if (mtu < peer->mtu) {
- spin_lock_bh(&peer->lock);
+ spin_lock(&peer->lock);
peer->mtu = mtu;
peer->maxdata = peer->mtu - peer->hdrsize;
- spin_unlock_bh(&peer->lock);
- _net("Net MTU %u (maxdata %u)",
- peer->mtu, peer->maxdata);
+ spin_unlock(&peer->lock);
}
}
/*
* Handle an error received on the local endpoint.
*/
-void rxrpc_error_report(struct sock *sk)
+void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb)
{
- struct sock_exterr_skb *serr;
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
struct sockaddr_rxrpc srx;
- struct rxrpc_local *local;
struct rxrpc_peer *peer = NULL;
- struct sk_buff *skb;
- rcu_read_lock();
- local = rcu_dereference_sk_user_data(sk);
- if (unlikely(!local)) {
- rcu_read_unlock();
- return;
- }
- _enter("%p{%d}", sk, local->debug_id);
-
- /* Clear the outstanding error value on the socket so that it doesn't
- * cause kernel_sendmsg() to return it later.
- */
- sock_error(sk);
+ _enter("L=%x", local->debug_id);
- skb = sock_dequeue_err_skb(sk);
- if (!skb) {
- rcu_read_unlock();
- _leave("UDP socket errqueue empty");
- return;
- }
- rxrpc_new_skb(skb, rxrpc_skb_received);
- serr = SKB_EXT_ERR(skb);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
return;
}
+ rcu_read_lock();
peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error))
peer = NULL;
- if (!peer) {
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- _leave(" [no peer]");
+ rcu_read_unlock();
+ if (!peer)
return;
- }
trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
@@ -196,72 +161,26 @@ void rxrpc_error_report(struct sock *sk)
goto out;
}
- rxrpc_store_error(peer, serr);
+ rxrpc_store_error(peer, skb);
out:
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- rxrpc_put_peer(peer);
-
- _leave("");
+ rxrpc_put_peer(peer, rxrpc_peer_put_input_error);
}
/*
* Map an error report to error codes on the peer record.
*/
-static void rxrpc_store_error(struct rxrpc_peer *peer,
- struct sock_exterr_skb *serr)
+static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb)
{
enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
- struct sock_extended_err *ee;
- int err;
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ struct sock_extended_err *ee = &serr->ee;
+ int err = ee->ee_errno;
_enter("");
- ee = &serr->ee;
-
- err = ee->ee_errno;
-
switch (ee->ee_origin) {
- case SO_EE_ORIGIN_ICMP:
- switch (ee->ee_type) {
- case ICMP_DEST_UNREACH:
- switch (ee->ee_code) {
- case ICMP_NET_UNREACH:
- _net("Rx Received ICMP Network Unreachable");
- break;
- case ICMP_HOST_UNREACH:
- _net("Rx Received ICMP Host Unreachable");
- break;
- case ICMP_PORT_UNREACH:
- _net("Rx Received ICMP Port Unreachable");
- break;
- case ICMP_NET_UNKNOWN:
- _net("Rx Received ICMP Unknown Network");
- break;
- case ICMP_HOST_UNKNOWN:
- _net("Rx Received ICMP Unknown Host");
- break;
- default:
- _net("Rx Received ICMP DestUnreach code=%u",
- ee->ee_code);
- break;
- }
- break;
-
- case ICMP_TIME_EXCEEDED:
- _net("Rx Received ICMP TTL Exceeded");
- break;
-
- default:
- _proto("Rx Received ICMP error { type=%u code=%u }",
- ee->ee_type, ee->ee_code);
- break;
- }
- break;
-
case SO_EE_ORIGIN_NONE:
case SO_EE_ORIGIN_LOCAL:
- _proto("Rx Received local error { error=%d }", err);
compl = RXRPC_CALL_LOCAL_ERROR;
break;
@@ -269,26 +188,40 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
if (err == EACCES)
err = EHOSTUNREACH;
fallthrough;
+ case SO_EE_ORIGIN_ICMP:
default:
- _proto("Rx Received error report { orig=%u }", ee->ee_origin);
break;
}
- rxrpc_distribute_error(peer, err, compl);
+ rxrpc_distribute_error(peer, skb, compl, err);
}
/*
* Distribute an error that occurred on a peer.
*/
-static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
- enum rxrpc_call_completion compl)
+static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb,
+ enum rxrpc_call_completion compl, int err)
{
struct rxrpc_call *call;
+ HLIST_HEAD(error_targets);
- hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
- rxrpc_see_call(call);
- rxrpc_set_call_completion(call, compl, 0, -error);
+ spin_lock(&peer->lock);
+ hlist_move_list(&peer->error_targets, &error_targets);
+
+ while (!hlist_empty(&error_targets)) {
+ call = hlist_entry(error_targets.first,
+ struct rxrpc_call, error_link);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&peer->lock);
+
+ rxrpc_see_call(call, rxrpc_call_see_distribute_error);
+ rxrpc_set_call_completion(call, compl, 0, -err);
+ rxrpc_input_call_event(call, skb);
+
+ spin_lock(&peer->lock);
}
+
+ spin_unlock(&peer->lock);
}
/*
@@ -304,18 +237,18 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
time64_t keepalive_at;
int slot;
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
while (!list_empty(collector)) {
peer = list_entry(collector->next,
struct rxrpc_peer, keepalive_link);
list_del_init(&peer->keepalive_link);
- if (!rxrpc_get_peer_maybe(peer))
+ if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive))
continue;
- if (__rxrpc_use_local(peer->local)) {
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ if (__rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive)) {
+ spin_unlock(&rxnet->peer_hash_lock);
keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
slot = keepalive_at - base;
@@ -334,15 +267,15 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
*/
slot += cursor;
slot &= mask;
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
list_add_tail(&peer->keepalive_link,
&rxnet->peer_keepalive[slot & mask]);
- rxrpc_unuse_local(peer->local);
+ rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
}
- rxrpc_put_peer_locked(peer);
+ rxrpc_put_peer_locked(peer, rxrpc_peer_put_keepalive);
}
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
}
/*
@@ -372,7 +305,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
* second; the bucket at cursor + 1 goes at now + 1s and so
* on...
*/
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
list_splice_init(&rxnet->peer_keepalive_new, &collector);
stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
@@ -384,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
}
base = now;
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
rxnet->peer_keepalive_base = base;
rxnet->peer_keepalive_cursor = cursor;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 041a51225c5f..608946dcc505 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -138,10 +138,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
- if (peer) {
- _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
+ if (peer)
_leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
- }
return peer;
}
@@ -207,9 +205,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
/*
* Allocate a peer.
*/
-struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
+ enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
struct rxrpc_peer *peer;
_enter("");
@@ -217,7 +215,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
refcount_set(&peer->ref, 1);
- peer->local = rxrpc_get_local(local);
+ peer->local = rxrpc_get_local(local, rxrpc_local_get_peer);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
seqlock_init(&peer->service_conn_lock);
@@ -228,7 +226,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
rxrpc_peer_init_rtt(peer);
peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
+ trace_rxrpc_peer(peer->debug_id, why, 1);
}
_leave(" = %p", peer);
@@ -284,7 +282,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
_enter("");
- peer = rxrpc_alloc_peer(local, gfp);
+ peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client);
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
rxrpc_init_peer(rx, peer, hash_key);
@@ -296,7 +294,8 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
static void rxrpc_free_peer(struct rxrpc_peer *peer)
{
- rxrpc_put_local(peer->local);
+ trace_rxrpc_peer(peer->debug_id, 0, rxrpc_peer_free);
+ rxrpc_put_local(peer->local, rxrpc_local_put_peer);
kfree_rcu(peer, rcu);
}
@@ -336,7 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/* search the peer list first */
rcu_read_lock();
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client))
peer = NULL;
rcu_read_unlock();
@@ -350,11 +349,11 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
return NULL;
}
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client))
peer = NULL;
if (!peer) {
hash_add_rcu(rxnet->peer_hash,
@@ -363,7 +362,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
&rxnet->peer_keepalive_new);
}
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
if (peer)
rxrpc_free_peer(candidate);
@@ -371,8 +370,6 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
peer = candidate;
}
- _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
-
_leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
return peer;
}
@@ -380,27 +377,26 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/*
* Get a ref on a peer record.
*/
-struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
__refcount_inc(&peer->ref, &r);
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
+ trace_rxrpc_peer(peer->debug_id, why, r + 1);
return peer;
}
/*
* Get a ref on a peer record unless its usage has already reached 0.
*/
-struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer,
+ enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
if (peer) {
if (__refcount_inc_not_zero(&peer->ref, &r))
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
+ trace_rxrpc_peer(peer->debug_id, r + 1, why);
else
peer = NULL;
}
@@ -416,10 +412,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
ASSERT(hlist_empty(&peer->error_targets));
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
rxrpc_free_peer(peer);
}
@@ -427,9 +423,8 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
/*
* Drop a ref on a peer record.
*/
-void rxrpc_put_peer(struct rxrpc_peer *peer)
+void rxrpc_put_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
unsigned int debug_id;
bool dead;
int r;
@@ -437,7 +432,7 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
if (peer) {
debug_id = peer->debug_id;
dead = __refcount_dec_and_test(&peer->ref, &r);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ trace_rxrpc_peer(debug_id, r - 1, why);
if (dead)
__rxrpc_put_peer(peer);
}
@@ -447,15 +442,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
* Drop a ref on a peer record where the caller already holds the
* peer_hash_lock.
*/
-void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
+void rxrpc_put_peer_locked(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
unsigned int debug_id = peer->debug_id;
bool dead;
int r;
dead = __refcount_dec_and_test(&peer->ref, &r);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ trace_rxrpc_peer(debug_id, r - 1, why);
if (dead) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index fae22a8b38d6..3a59591ec061 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -49,8 +49,6 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_local *local;
- struct rxrpc_sock *rx;
- struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned long timeout = 0;
@@ -63,28 +61,19 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
"Proto Local "
" Remote "
" SvID ConnID CallID End Use State Abort "
- " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n");
+ " DebugId TxSeq TW RxSeq RW RxSerial CW RxTimo\n");
return 0;
}
call = list_entry(v, struct rxrpc_call, link);
- rx = rcu_dereference(call->socket);
- if (rx) {
- local = READ_ONCE(rx->local);
- if (local)
- sprintf(lbuff, "%pISpc", &local->srx.transport);
- else
- strcpy(lbuff, "no_local");
- } else {
- strcpy(lbuff, "no_socket");
- }
-
- peer = call->peer;
- if (peer)
- sprintf(rbuff, "%pISpc", &peer->srx.transport);
+ local = call->local;
+ if (local)
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
else
- strcpy(rbuff, "no_connection");
+ strcpy(lbuff, "no_local");
+
+ sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
timeout = READ_ONCE(call->expect_rx_by);
@@ -95,10 +84,10 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
- " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
+ " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
lbuff,
rbuff,
- call->service_id,
+ call->dest_srx.srx_service,
call->cid,
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
@@ -109,6 +98,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
call->rx_serial,
+ call->cong_cwnd,
timeout);
return 0;
@@ -159,7 +149,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Proto Local "
" Remote "
- " SvID ConnID End Use State Key "
+ " SvID ConnID End Ref Act State Key "
" Serial ISerial CallId0 CallId1 CallId2 CallId3\n"
);
return 0;
@@ -172,12 +162,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
goto print;
}
- sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
+ sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
- sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
+ sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
print:
seq_printf(seq,
- "UDP %-47.47s %-47.47s %4x %08x %s %3u"
+ "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d"
" %s %08x %08x %08x %08x %08x %08x %08x\n",
lbuff,
rbuff,
@@ -185,8 +175,9 @@ print:
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
refcount_read(&conn->ref),
+ atomic_read(&conn->active),
rxrpc_conn_states[conn->state],
- key_serial(conn->params.key),
+ key_serial(conn->key),
atomic_read(&conn->serial),
conn->hi_serial,
conn->channels[0].call_id,
@@ -341,7 +332,7 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Proto Local "
- " Use Act\n");
+ " Use Act RxQ\n");
return 0;
}
@@ -350,10 +341,11 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
sprintf(lbuff, "%pISpc", &local->srx.transport);
seq_printf(seq,
- "UDP %-47.47s %3u %3u\n",
+ "UDP %-47.47s %3u %3u %3u\n",
lbuff,
refcount_read(&local->ref),
- atomic_read(&local->active_users));
+ atomic_read(&local->active_users),
+ local->rx_queue.qlen);
return 0;
}
@@ -407,13 +399,16 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
seq_printf(seq,
- "Data : send=%u sendf=%u\n",
+ "Data : send=%u sendf=%u fail=%u\n",
atomic_read(&rxnet->stat_tx_data_send),
- atomic_read(&rxnet->stat_tx_data_send_frag));
+ atomic_read(&rxnet->stat_tx_data_send_frag),
+ atomic_read(&rxnet->stat_tx_data_send_fail));
seq_printf(seq,
- "Data-Tx : nr=%u retrans=%u\n",
+ "Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n",
atomic_read(&rxnet->stat_tx_data),
- atomic_read(&rxnet->stat_tx_data_retrans));
+ atomic_read(&rxnet->stat_tx_data_retrans),
+ atomic_read(&rxnet->stat_tx_data_underflow),
+ atomic_read(&rxnet->stat_tx_data_cwnd_reset));
seq_printf(seq,
"Data-Rx : nr=%u reqack=%u jumbo=%u\n",
atomic_read(&rxnet->stat_rx_data),
@@ -462,6 +457,9 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
"Buffers : txb=%u rxb=%u\n",
atomic_read(&rxrpc_nr_txbuf),
atomic_read(&rxrpc_n_rx_skbs));
+ seq_printf(seq,
+ "IO-thread: loops=%u\n",
+ atomic_read(&rxnet->stat_io_loop));
return 0;
}
@@ -478,8 +476,11 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
atomic_set(&rxnet->stat_tx_data, 0);
atomic_set(&rxnet->stat_tx_data_retrans, 0);
+ atomic_set(&rxnet->stat_tx_data_underflow, 0);
+ atomic_set(&rxnet->stat_tx_data_cwnd_reset, 0);
atomic_set(&rxnet->stat_tx_data_send, 0);
atomic_set(&rxnet->stat_tx_data_send_frag, 0);
+ atomic_set(&rxnet->stat_tx_data_send_fail, 0);
atomic_set(&rxnet->stat_rx_data, 0);
atomic_set(&rxnet->stat_rx_data_reqack, 0);
atomic_set(&rxnet->stat_rx_data_jumbo, 0);
@@ -491,5 +492,7 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks));
memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack));
+
+ atomic_set(&rxnet->stat_io_loop, 0);
return size;
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index efb85f983657..36b25d003cf0 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -36,16 +36,16 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
sk = &rx->sk;
if (rx && sk->sk_state < RXRPC_CLOSE) {
if (call->notify_rx) {
- spin_lock_bh(&call->notify_lock);
+ spin_lock(&call->notify_lock);
call->notify_rx(sk, call, call->user_call_ID);
- spin_unlock_bh(&call->notify_lock);
+ spin_unlock(&call->notify_lock);
} else {
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- write_unlock_bh(&rx->recvmsg_lock);
+ write_unlock(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -87,9 +87,9 @@ bool rxrpc_set_call_completion(struct rxrpc_call *call,
bool ret = false;
if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
return ret;
}
@@ -107,9 +107,9 @@ bool rxrpc_call_completed(struct rxrpc_call *call)
bool ret = false;
if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
ret = __rxrpc_call_completed(call);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
return ret;
}
@@ -131,9 +131,9 @@ bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
{
bool ret;
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
return ret;
}
@@ -193,23 +193,23 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY)
rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
__rxrpc_call_completed(call);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
break;
case RXRPC_CALL_SERVER_RECV_REQUEST:
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
rxrpc_propose_delay_ACK(call, serial,
rxrpc_propose_ack_processing_op);
break;
default:
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
break;
}
}
@@ -228,9 +228,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
_enter("%d", call->debug_id);
-further_rotation:
skb = skb_dequeue(&call->recvmsg_queue);
- rxrpc_see_skb(skb, rxrpc_skb_rotated);
+ rxrpc_see_skb(skb, rxrpc_skb_see_rotate);
sp = rxrpc_skb(skb);
tseq = sp->hdr.seq;
@@ -241,7 +240,7 @@ further_rotation:
if (after(tseq, call->rx_consumed))
smp_store_release(&call->rx_consumed, tseq);
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_free_skb(skb, rxrpc_skb_put_rotate);
trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
serial, call->rx_consumed);
@@ -250,26 +249,12 @@ further_rotation:
return;
}
- /* The next packet on the queue might entirely overlap with the one we
- * just consumed; if so, rotate that away also.
- */
- skb = skb_peek(&call->recvmsg_queue);
- if (skb) {
- sp = rxrpc_skb(skb);
- if (sp->hdr.seq != call->rx_consumed &&
- after_eq(call->rx_consumed, sp->hdr.seq))
- goto further_rotation;
- }
-
/* Check to see if there's an ACK that needs sending. */
acked = atomic_add_return(call->rx_consumed - old_consumed,
&call->ackr_nr_consumed);
if (acked > 2 &&
- !test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) {
- rxrpc_send_ACK(call, RXRPC_ACK_IDLE, serial,
- rxrpc_propose_ack_rotate_rx);
- rxrpc_transmit_ack_packets(call->peer->local);
- }
+ !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
+ rxrpc_poke_call(call, rxrpc_call_poke_idle);
}
/*
@@ -314,15 +299,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
*/
skb = skb_peek(&call->recvmsg_queue);
while (skb) {
- rxrpc_see_skb(skb, rxrpc_skb_seen);
+ rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg);
sp = rxrpc_skb(skb);
seq = sp->hdr.seq;
- if (after_eq(call->rx_consumed, seq)) {
- kdebug("obsolete %x %x", call->rx_consumed, seq);
- goto skip_obsolete;
- }
-
if (!(flags & MSG_PEEK))
trace_rxrpc_receive(call, rxrpc_receive_front,
sp->hdr.serial, seq);
@@ -340,7 +320,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
ret = ret2;
goto out;
}
- rxrpc_transmit_ack_packets(call->peer->local);
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -373,7 +352,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
break;
}
- skip_obsolete:
/* The whole packet has been transferred. */
if (sp->hdr.flags & RXRPC_LAST_PACKET)
ret = 1;
@@ -395,7 +373,7 @@ done:
trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq,
rx_pkt_offset, rx_pkt_len, ret);
if (ret == -EAGAIN)
- set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags);
+ set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
return ret;
}
@@ -463,14 +441,14 @@ try_again:
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
*/
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
- rxrpc_get_call(call, rxrpc_call_got);
- write_unlock_bh(&rx->recvmsg_lock);
+ rxrpc_get_call(call, rxrpc_call_get_recvmsg);
+ write_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
@@ -508,11 +486,9 @@ try_again:
}
if (msg->msg_name && call->peer) {
- struct sockaddr_rxrpc *srx = msg->msg_name;
- size_t len = sizeof(call->peer->srx);
+ size_t len = sizeof(call->dest_srx);
- memcpy(msg->msg_name, &call->peer->srx, len);
- srx->srx_service = call->service_id;
+ memcpy(msg->msg_name, &call->dest_srx, len);
msg->msg_namelen = len;
}
@@ -525,7 +501,6 @@ try_again:
if (ret == -EAGAIN)
ret = 0;
- rxrpc_transmit_ack_packets(call->peer->local);
if (!skb_queue_empty(&call->recvmsg_queue))
rxrpc_notify_socket(call);
break;
@@ -555,18 +530,18 @@ try_again:
error_unlock_call:
mutex_unlock(&call->user_mutex);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
return ret;
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- write_unlock_bh(&rx->recvmsg_lock);
+ write_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
} else {
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
}
error_no_call:
release_sock(&rx->sk);
@@ -655,9 +630,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
- rxrpc_transmit_ack_packets(call->peer->local);
if (_service)
- *_service = call->service_id;
+ *_service = call->dest_srx.srx_service;
mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
return ret;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 110a5550c0a6..d1233720e05f 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -103,7 +103,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn,
struct crypto_sync_skcipher *ci;
int ret;
- _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key));
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
conn->security_ix = token->security_index;
@@ -118,7 +118,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn,
sizeof(token->kad->session_key)) < 0)
BUG();
- switch (conn->params.security_level) {
+ switch (conn->security_level) {
case RXRPC_SECURITY_PLAIN:
case RXRPC_SECURITY_AUTH:
case RXRPC_SECURITY_ENCRYPT:
@@ -150,7 +150,7 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
{
size_t shdr, buf_size, chunk;
- switch (call->conn->params.security_level) {
+ switch (call->conn->security_level) {
default:
buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
shdr = 0;
@@ -192,7 +192,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
_enter("");
- if (!conn->params.key)
+ if (!conn->key)
return 0;
tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
@@ -205,7 +205,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
return -ENOMEM;
}
- token = conn->params.key->payload.data[0];
+ token = conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
tmpbuf[0] = htonl(conn->proto.epoch);
@@ -317,7 +317,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
}
/* encrypt from the session key */
- token = call->conn->params.key->payload.data[0];
+ token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg, txb->data, txb->len);
@@ -344,13 +344,13 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
int ret;
_enter("{%d{%x}},{#%u},%u,",
- call->debug_id, key_serial(call->conn->params.key),
+ call->debug_id, key_serial(call->conn->key),
txb->seq, txb->len);
if (!call->conn->rxkad.cipher)
return 0;
- ret = key_validate(call->conn->params.key);
+ ret = key_validate(call->conn->key);
if (ret < 0)
return ret;
@@ -380,7 +380,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
y = 1; /* zero checksums are not permitted */
txb->wire.cksum = htons(y);
- switch (call->conn->params.security_level) {
+ switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
@@ -525,7 +525,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
}
/* decrypt from the session key */
- token = call->conn->params.key->payload.data[0];
+ token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
@@ -596,7 +596,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
u32 x, y;
_enter("{%d{%x}},{#%u}",
- call->debug_id, key_serial(call->conn->params.key), seq);
+ call->debug_id, key_serial(call->conn->key), seq);
if (!call->conn->rxkad.cipher)
return 0;
@@ -632,7 +632,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
goto protocol_error;
}
- switch (call->conn->params.security_level) {
+ switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
@@ -678,8 +678,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
challenge.min_level = htonl(0);
challenge.__padding = 0;
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -704,16 +704,15 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
serial = atomic_inc_return(&conn->serial);
whdr.serial = htonl(serial);
- _proto("Tx CHALLENGE %%%u", serial);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_challenge);
return -EAGAIN;
}
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ conn->peer->last_tx_at = ktime_get_seconds();
trace_rxrpc_tx_packet(conn->debug_id, &whdr,
rxrpc_tx_point_rxkad_challenge);
_leave(" = 0");
@@ -737,8 +736,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
_enter("");
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -762,16 +761,15 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
serial = atomic_inc_return(&conn->serial);
whdr.serial = htonl(serial);
- _proto("Tx RESPONSE %%%u", serial);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
return -EAGAIN;
}
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ conn->peer->last_tx_at = ktime_get_seconds();
_leave(" = 0");
return 0;
}
@@ -834,15 +832,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
u32 version, nonce, min_level, abort_code;
int ret;
- _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
eproto = tracepoint_string("chall_no_key");
abort_code = RX_PROTOCOL_ERROR;
- if (!conn->params.key)
+ if (!conn->key)
goto protocol_error;
abort_code = RXKADEXPIRED;
- ret = key_validate(conn->params.key);
+ ret = key_validate(conn->key);
if (ret < 0)
goto other_error;
@@ -856,8 +854,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
nonce = ntohl(challenge.nonce);
min_level = ntohl(challenge.min_level);
- _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
- sp->hdr.serial, version, nonce, min_level);
+ trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
eproto = tracepoint_string("chall_ver");
abort_code = RXKADINCONSISTENCY;
@@ -866,10 +863,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
abort_code = RXKADLEVELFAIL;
ret = -EACCES;
- if (conn->params.security_level < min_level)
+ if (conn->security_level < min_level)
goto other_error;
- token = conn->params.key->payload.data[0];
+ token = conn->key->payload.data[0];
/* build the response packet */
resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
@@ -881,7 +878,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
resp->encrypted.cid = htonl(conn->proto.cid);
resp->encrypted.securityIndex = htonl(conn->security_ix);
resp->encrypted.inc_nonce = htonl(nonce + 1);
- resp->encrypted.level = htonl(conn->params.security_level);
+ resp->encrypted.level = htonl(conn->security_level);
resp->kvno = htonl(token->kad->kvno);
resp->ticket_len = htonl(token->kad->ticket_len);
resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
@@ -1139,8 +1136,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
version = ntohl(response->version);
ticket_len = ntohl(response->ticket_len);
kvno = ntohl(response->kvno);
- _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
- sp->hdr.serial, version, kvno, ticket_len);
+
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
eproto = tracepoint_string("rxkad_rsp_ver");
abort_code = RXKADINCONSISTENCY;
@@ -1229,7 +1226,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
level = ntohl(response->encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
goto protocol_error_free;
- conn->params.security_level = level;
+ conn->security_level = level;
/* create a key to hold the security data and expiration time - after
* this the connection security can be handled in exactly the same way
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
new file mode 100644
index 000000000000..66f5eea291ff
--- /dev/null
+++ b/net/rxrpc/rxperf.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* In-kernel rxperf server for testing purposes.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) "rxperf: " fmt
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+
+MODULE_DESCRIPTION("rxperf test server (afs)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+#define RXPERF_PORT 7009
+#define RX_PERF_SERVICE 147
+#define RX_PERF_VERSION 3
+#define RX_PERF_SEND 0
+#define RX_PERF_RECV 1
+#define RX_PERF_RPC 3
+#define RX_PERF_FILE 4
+#define RX_PERF_MAGIC_COOKIE 0x4711
+
+struct rxperf_proto_params {
+ __be32 version;
+ __be32 type;
+ __be32 rsize;
+ __be32 wsize;
+} __packed;
+
+static const u8 rxperf_magic_cookie[] = { 0x00, 0x00, 0x47, 0x11 };
+static const u8 secret[8] = { 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+
+enum rxperf_call_state {
+ RXPERF_CALL_SV_AWAIT_PARAMS, /* Server: Awaiting parameter block */
+ RXPERF_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */
+ RXPERF_CALL_SV_REPLYING, /* Server: Replying */
+ RXPERF_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */
+ RXPERF_CALL_COMPLETE, /* Completed or failed */
+};
+
+struct rxperf_call {
+ struct rxrpc_call *rxcall;
+ struct iov_iter iter;
+ struct kvec kvec[1];
+ struct work_struct work;
+ const char *type;
+ size_t iov_len;
+ size_t req_len; /* Size of request blob */
+ size_t reply_len; /* Size of reply blob */
+ unsigned int debug_id;
+ unsigned int operation_id;
+ struct rxperf_proto_params params;
+ __be32 tmp[2];
+ s32 abort_code;
+ enum rxperf_call_state state;
+ short error;
+ unsigned short unmarshal;
+ u16 service_id;
+ int (*deliver)(struct rxperf_call *call);
+ void (*processor)(struct work_struct *work);
+};
+
+static struct socket *rxperf_socket;
+static struct key *rxperf_sec_keyring; /* Ring of security/crypto keys */
+static struct workqueue_struct *rxperf_workqueue;
+
+static void rxperf_deliver_to_call(struct work_struct *work);
+static int rxperf_deliver_param_block(struct rxperf_call *call);
+static int rxperf_deliver_request(struct rxperf_call *call);
+static int rxperf_process_call(struct rxperf_call *call);
+static void rxperf_charge_preallocation(struct work_struct *work);
+
+static DECLARE_WORK(rxperf_charge_preallocation_work,
+ rxperf_charge_preallocation);
+
+static inline void rxperf_set_call_state(struct rxperf_call *call,
+ enum rxperf_call_state to)
+{
+ call->state = to;
+}
+
+static inline void rxperf_set_call_complete(struct rxperf_call *call,
+ int error, s32 remote_abort)
+{
+ if (call->state != RXPERF_CALL_COMPLETE) {
+ call->abort_code = remote_abort;
+ call->error = error;
+ call->state = RXPERF_CALL_COMPLETE;
+ }
+}
+
+static void rxperf_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ kfree((struct rxperf_call *)user_call_ID);
+}
+
+static void rxperf_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ queue_work(rxperf_workqueue, &rxperf_charge_preallocation_work);
+}
+
+static void rxperf_queue_call_work(struct rxperf_call *call)
+{
+ queue_work(rxperf_workqueue, &call->work);
+}
+
+static void rxperf_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct rxperf_call *call = (struct rxperf_call *)call_user_ID;
+
+ if (call->state != RXPERF_CALL_COMPLETE)
+ rxperf_queue_call_work(call);
+}
+
+static void rxperf_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
+{
+ struct rxperf_call *call = (struct rxperf_call *)user_call_ID;
+
+ call->rxcall = rxcall;
+}
+
+static void rxperf_notify_end_reply_tx(struct sock *sock,
+ struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ rxperf_set_call_state((struct rxperf_call *)call_user_ID,
+ RXPERF_CALL_SV_AWAIT_ACK);
+}
+
+/*
+ * Charge the incoming call preallocation.
+ */
+static void rxperf_charge_preallocation(struct work_struct *work)
+{
+ struct rxperf_call *call;
+
+ for (;;) {
+ call = kzalloc(sizeof(*call), GFP_KERNEL);
+ if (!call)
+ break;
+
+ call->type = "unset";
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->deliver = rxperf_deliver_param_block;
+ call->state = RXPERF_CALL_SV_AWAIT_PARAMS;
+ call->service_id = RX_PERF_SERVICE;
+ call->iov_len = sizeof(call->params);
+ call->kvec[0].iov_len = sizeof(call->params);
+ call->kvec[0].iov_base = &call->params;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ INIT_WORK(&call->work, rxperf_deliver_to_call);
+
+ if (rxrpc_kernel_charge_accept(rxperf_socket,
+ rxperf_notify_rx,
+ rxperf_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL,
+ call->debug_id) < 0)
+ break;
+ call = NULL;
+ }
+
+ kfree(call);
+}
+
+/*
+ * Open an rxrpc socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+static int rxperf_open_socket(void)
+{
+ struct sockaddr_rxrpc srx;
+ struct socket *socket;
+ int ret;
+
+ ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6,
+ &socket);
+ if (ret < 0)
+ goto error_1;
+
+ socket->sk->sk_allocation = GFP_NOFS;
+
+ /* bind the callback manager's address to make this a server socket */
+ memset(&srx, 0, sizeof(srx));
+ srx.srx_family = AF_RXRPC;
+ srx.srx_service = RX_PERF_SERVICE;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(RXPERF_PORT);
+
+ ret = rxrpc_sock_set_min_security_level(socket->sk,
+ RXRPC_SECURITY_ENCRYPT);
+ if (ret < 0)
+ goto error_2;
+
+ ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring);
+
+ ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx));
+ if (ret < 0)
+ goto error_2;
+
+ rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call,
+ rxperf_rx_discard_new_call);
+
+ ret = kernel_listen(socket, INT_MAX);
+ if (ret < 0)
+ goto error_2;
+
+ rxperf_socket = socket;
+ rxperf_charge_preallocation(&rxperf_charge_preallocation_work);
+ return 0;
+
+error_2:
+ sock_release(socket);
+error_1:
+ pr_err("Can't set up rxperf socket: %d\n", ret);
+ return ret;
+}
+
+/*
+ * close the rxrpc socket rxperf was using
+ */
+static void rxperf_close_socket(void)
+{
+ kernel_listen(rxperf_socket, 0);
+ kernel_sock_shutdown(rxperf_socket, SHUT_RDWR);
+ flush_workqueue(rxperf_workqueue);
+ sock_release(rxperf_socket);
+}
+
+/*
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
+ */
+static void rxperf_log_error(struct rxperf_call *call, s32 remote_abort)
+{
+ static int max = 0;
+ const char *msg;
+ int m;
+
+ switch (remote_abort) {
+ case RX_EOF: msg = "unexpected EOF"; break;
+ case RXGEN_CC_MARSHAL: msg = "client marshalling"; break;
+ case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break;
+ case RXGEN_SS_MARSHAL: msg = "server marshalling"; break;
+ case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break;
+ case RXGEN_DECODE: msg = "opcode decode"; break;
+ case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break;
+ case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break;
+ case -32: msg = "insufficient data"; break;
+ default:
+ return;
+ }
+
+ m = max;
+ if (m < 3) {
+ max = m + 1;
+ pr_info("Peer reported %s failure on %s\n", msg, call->type);
+ }
+}
+
+/*
+ * deliver messages to a call
+ */
+static void rxperf_deliver_to_call(struct work_struct *work)
+{
+ struct rxperf_call *call = container_of(work, struct rxperf_call, work);
+ enum rxperf_call_state state;
+ u32 abort_code, remote_abort = 0;
+ int ret;
+
+ if (call->state == RXPERF_CALL_COMPLETE)
+ return;
+
+ while (state = call->state,
+ state == RXPERF_CALL_SV_AWAIT_PARAMS ||
+ state == RXPERF_CALL_SV_AWAIT_REQUEST ||
+ state == RXPERF_CALL_SV_AWAIT_ACK
+ ) {
+ if (state == RXPERF_CALL_SV_AWAIT_ACK) {
+ if (!rxrpc_kernel_check_life(rxperf_socket, call->rxcall))
+ goto call_complete;
+ return;
+ }
+
+ ret = call->deliver(call);
+ if (ret == 0)
+ ret = rxperf_process_call(call);
+
+ switch (ret) {
+ case 0:
+ continue;
+ case -EINPROGRESS:
+ case -EAGAIN:
+ return;
+ case -ECONNABORTED:
+ rxperf_log_error(call, call->abort_code);
+ goto call_complete;
+ case -EOPNOTSUPP:
+ abort_code = RXGEN_OPCODE;
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ abort_code, ret, "GOP");
+ goto call_complete;
+ case -ENOTSUPP:
+ abort_code = RX_USER_ABORT;
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ abort_code, ret, "GUA");
+ goto call_complete;
+ case -EIO:
+ pr_err("Call %u in bad state %u\n",
+ call->debug_id, call->state);
+ fallthrough;
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
+ case -ENOMEM:
+ case -EFAULT:
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RXGEN_SS_UNMARSHAL, ret, "GUM");
+ goto call_complete;
+ default:
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RX_CALL_DEAD, ret, "GER");
+ goto call_complete;
+ }
+ }
+
+call_complete:
+ rxperf_set_call_complete(call, ret, remote_abort);
+ /* The call may have been requeued */
+ rxrpc_kernel_end_call(rxperf_socket, call->rxcall);
+ cancel_work(&call->work);
+ kfree(call);
+}
+
+/*
+ * Extract a piece of data from the received data socket buffers.
+ */
+static int rxperf_extract_data(struct rxperf_call *call, bool want_more)
+{
+ u32 remote_abort = 0;
+ int ret;
+
+ ret = rxrpc_kernel_recv_data(rxperf_socket, call->rxcall, &call->iter,
+ &call->iov_len, want_more, &remote_abort,
+ &call->service_id);
+ pr_debug("Extract i=%zu l=%zu m=%u ret=%d\n",
+ iov_iter_count(&call->iter), call->iov_len, want_more, ret);
+ if (ret == 0 || ret == -EAGAIN)
+ return ret;
+
+ if (ret == 1) {
+ switch (call->state) {
+ case RXPERF_CALL_SV_AWAIT_REQUEST:
+ rxperf_set_call_state(call, RXPERF_CALL_SV_REPLYING);
+ break;
+ case RXPERF_CALL_COMPLETE:
+ pr_debug("premature completion %d", call->error);
+ return call->error;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ rxperf_set_call_complete(call, ret, remote_abort);
+ return ret;
+}
+
+/*
+ * Grab the operation ID from an incoming manager call.
+ */
+static int rxperf_deliver_param_block(struct rxperf_call *call)
+{
+ u32 version;
+ int ret;
+
+ /* Extract the parameter block */
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ version = ntohl(call->params.version);
+ call->operation_id = ntohl(call->params.type);
+ call->deliver = rxperf_deliver_request;
+
+ if (version != RX_PERF_VERSION) {
+ pr_info("Version mismatch %x\n", version);
+ return -ENOTSUPP;
+ }
+
+ switch (call->operation_id) {
+ case RX_PERF_SEND:
+ call->type = "send";
+ call->reply_len = 0;
+ call->iov_len = 4; /* Expect req size */
+ break;
+ case RX_PERF_RECV:
+ call->type = "recv";
+ call->req_len = 0;
+ call->iov_len = 4; /* Expect reply size */
+ break;
+ case RX_PERF_RPC:
+ call->type = "rpc";
+ call->iov_len = 8; /* Expect req size and reply size */
+ break;
+ case RX_PERF_FILE:
+ call->type = "file";
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rxperf_set_call_state(call, RXPERF_CALL_SV_AWAIT_REQUEST);
+ return call->deliver(call);
+}
+
+/*
+ * Deliver the request data.
+ */
+static int rxperf_deliver_request(struct rxperf_call *call)
+{
+ int ret;
+
+ switch (call->unmarshal) {
+ case 0:
+ call->kvec[0].iov_len = call->iov_len;
+ call->kvec[0].iov_base = call->tmp;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ call->unmarshal++;
+ fallthrough;
+ case 1:
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ switch (call->operation_id) {
+ case RX_PERF_SEND:
+ call->type = "send";
+ call->req_len = ntohl(call->tmp[0]);
+ call->reply_len = 0;
+ break;
+ case RX_PERF_RECV:
+ call->type = "recv";
+ call->req_len = 0;
+ call->reply_len = ntohl(call->tmp[0]);
+ break;
+ case RX_PERF_RPC:
+ call->type = "rpc";
+ call->req_len = ntohl(call->tmp[0]);
+ call->reply_len = ntohl(call->tmp[1]);
+ break;
+ default:
+ pr_info("Can't parse extra params\n");
+ return -EIO;
+ }
+
+ pr_debug("CALL op=%s rq=%zx rp=%zx\n",
+ call->type, call->req_len, call->reply_len);
+
+ call->iov_len = call->req_len;
+ iov_iter_discard(&call->iter, READ, call->req_len);
+ call->unmarshal++;
+ fallthrough;
+ case 2:
+ ret = rxperf_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+ call->unmarshal++;
+ fallthrough;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Process a call for which we've received the request.
+ */
+static int rxperf_process_call(struct rxperf_call *call)
+{
+ struct msghdr msg = {};
+ struct bio_vec bv[1];
+ struct kvec iov[1];
+ ssize_t n;
+ size_t reply_len = call->reply_len, len;
+
+ rxrpc_kernel_set_tx_length(rxperf_socket, call->rxcall,
+ reply_len + sizeof(rxperf_magic_cookie));
+
+ while (reply_len > 0) {
+ len = min_t(size_t, reply_len, PAGE_SIZE);
+ bv[0].bv_page = ZERO_PAGE(0);
+ bv[0].bv_offset = 0;
+ bv[0].bv_len = len;
+ iov_iter_bvec(&msg.msg_iter, WRITE, bv, 1, len);
+ msg.msg_flags = MSG_MORE;
+ n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg,
+ len, rxperf_notify_end_reply_tx);
+ if (n < 0)
+ return n;
+ if (n == 0)
+ return -EIO;
+ reply_len -= n;
+ }
+
+ len = sizeof(rxperf_magic_cookie);
+ iov[0].iov_base = (void *)rxperf_magic_cookie;
+ iov[0].iov_len = len;
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+ msg.msg_flags = 0;
+ n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, len,
+ rxperf_notify_end_reply_tx);
+ if (n >= 0)
+ return 0; /* Success */
+
+ if (n == -ENOMEM)
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RXGEN_SS_MARSHAL, -ENOMEM, "GOM");
+ return n;
+}
+
+/*
+ * Add a key to the security keyring.
+ */
+static int rxperf_add_key(struct key *keyring)
+{
+ key_ref_t kref;
+ int ret;
+
+ kref = key_create_or_update(make_key_ref(keyring, true),
+ "rxrpc_s",
+ __stringify(RX_PERF_SERVICE) ":2",
+ secret,
+ sizeof(secret),
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH
+ | KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+
+ if (IS_ERR(kref)) {
+ pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref));
+ return PTR_ERR(kref);
+ }
+
+ ret = key_link(keyring, key_ref_to_ptr(kref));
+ if (ret < 0)
+ pr_err("Can't link rxperf server key: %d\n", ret);
+ key_ref_put(kref);
+ return ret;
+}
+
+/*
+ * Initialise the rxperf server.
+ */
+static int __init rxperf_init(void)
+{
+ struct key *keyring;
+ int ret = -ENOMEM;
+
+ pr_info("Server registering\n");
+
+ rxperf_workqueue = alloc_workqueue("rxperf", 0, 0);
+ if (!rxperf_workqueue)
+ goto error_workqueue;
+
+ keyring = keyring_alloc("rxperf_server",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+ KEY_POS_WRITE |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
+ KEY_USR_WRITE |
+ KEY_OTH_VIEW | KEY_OTH_READ | KEY_OTH_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(keyring)) {
+ pr_err("Can't allocate rxperf server keyring: %ld\n",
+ PTR_ERR(keyring));
+ goto error_keyring;
+ }
+ rxperf_sec_keyring = keyring;
+ ret = rxperf_add_key(keyring);
+ if (ret < 0)
+ goto error_key;
+
+ ret = rxperf_open_socket();
+ if (ret < 0)
+ goto error_socket;
+ return 0;
+
+error_socket:
+error_key:
+ key_put(rxperf_sec_keyring);
+error_keyring:
+ destroy_workqueue(rxperf_workqueue);
+ rcu_barrier();
+error_workqueue:
+ pr_err("Failed to register: %d\n", ret);
+ return ret;
+}
+late_initcall(rxperf_init); /* Must be called after net/ to create socket */
+
+static void __exit rxperf_exit(void)
+{
+ pr_info("Server unregistering.\n");
+
+ rxperf_close_socket();
+ key_put(rxperf_sec_keyring);
+ destroy_workqueue(rxperf_workqueue);
+ rcu_barrier();
+}
+module_exit(rxperf_exit);
+
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 50cb5f1ee0c0..209f2c25a0da 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -63,13 +63,43 @@ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
}
/*
+ * Initialise the security on a client call.
+ */
+int rxrpc_init_client_call_security(struct rxrpc_call *call)
+{
+ const struct rxrpc_security *sec;
+ struct rxrpc_key_token *token;
+ struct key *key = call->key;
+ int ret;
+
+ if (!key)
+ return 0;
+
+ ret = key_validate(key);
+ if (ret < 0)
+ return ret;
+
+ for (token = key->payload.data[0]; token; token = token->next) {
+ sec = rxrpc_security_lookup(token->security_index);
+ if (sec)
+ goto found;
+ }
+ return -EKEYREJECTED;
+
+found:
+ call->security = sec;
+ _leave(" = 0");
+ return 0;
+}
+
+/*
* initialise the security on a client connection
*/
int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
{
const struct rxrpc_security *sec;
struct rxrpc_key_token *token;
- struct key *key = conn->params.key;
+ struct key *key = conn->key;
int ret;
_enter("{%d},{%x}", conn->debug_id, key_serial(key));
@@ -163,7 +193,7 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
rcu_read_lock();
- rx = rcu_dereference(conn->params.local->service);
+ rx = rcu_dereference(conn->local->service);
if (!rx)
goto out;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index e5fd8a95bf71..9fa7e37f7155 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -22,30 +22,9 @@
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
{
- unsigned int win_size;
- rxrpc_seq_t tx_win = smp_load_acquire(&call->acks_hard_ack);
-
- /* If we haven't transmitted anything for >1RTT, we should reset the
- * congestion management state.
- */
- if (ktime_before(ktime_add_us(call->tx_last_sent,
- call->peer->srtt_us >> 3),
- ktime_get_real())) {
- if (RXRPC_TX_SMSS > 2190)
- win_size = 2;
- else if (RXRPC_TX_SMSS > 1095)
- win_size = 3;
- else
- win_size = 4;
- win_size += call->cong_extra;
- } else {
- win_size = min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra);
- }
-
if (_tx_win)
- *_tx_win = tx_win;
- return call->tx_top - tx_win < win_size;
+ *_tx_win = call->tx_bottom;
+ return call->tx_prepared - call->tx_bottom < 256;
}
/*
@@ -66,11 +45,6 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (signal_pending(current))
return sock_intr_errno(*timeo);
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
@@ -107,11 +81,6 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
tx_win == tx_start && signal_pending(current))
return -EINTR;
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
if (tx_win != tx_start) {
timeout = rtt;
tx_start = tx_win;
@@ -137,11 +106,6 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
@@ -206,33 +170,32 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
{
unsigned long now;
rxrpc_seq_t seq = txb->seq;
- bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags);
- int ret;
+ bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
- ASSERTCMP(seq, ==, call->tx_top + 1);
+ ASSERTCMP(txb->seq, ==, call->tx_prepared + 1);
/* We have to set the timestamp before queueing as the retransmit
* algorithm can see the packet as soon as we queue it.
*/
txb->last_sent = ktime_get_real();
- /* Add the packet to the call's output buffer */
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_buffer);
- spin_lock(&call->tx_lock);
- list_add_tail(&txb->call_link, &call->tx_buffer);
- call->tx_top = seq;
- spin_unlock(&call->tx_lock);
-
if (last)
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last);
else
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue);
+ /* Add the packet to the call's output buffer */
+ spin_lock(&call->tx_lock);
+ poke = list_empty(&call->tx_sendmsg);
+ list_add_tail(&txb->call_link, &call->tx_sendmsg);
+ call->tx_prepared = seq;
+ spin_unlock(&call->tx_lock);
+
if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
_debug("________awaiting reply/ACK__________");
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
@@ -255,33 +218,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
default:
break;
}
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
- if (seq == 1 && rxrpc_is_client_call(call))
- rxrpc_expose_client_call(call);
-
- ret = rxrpc_send_data_packet(call, txb);
- if (ret < 0) {
- switch (ret) {
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- 0, ret);
- goto out;
- }
- } else {
- unsigned long now = jiffies;
- unsigned long resend_at = now + call->peer->rto_j;
-
- WRITE_ONCE(call->resend_at, resend_at);
- rxrpc_reduce_call_timer(call, resend_at, now,
- rxrpc_timer_set_for_send);
- }
-
-out:
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
+ if (poke)
+ rxrpc_poke_call(call, rxrpc_call_poke_start);
}
/*
@@ -335,8 +276,6 @@ reload:
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more);
do {
- rxrpc_transmit_ack_packets(call->peer->local);
-
if (!txb) {
size_t remain, bufsize, chunk, offset;
@@ -416,10 +355,10 @@ reload:
success:
ret = copied;
if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
- read_lock_bh(&call->state_lock);
+ read_lock(&call->state_lock);
if (call->error < 0)
ret = call->error;
- read_unlock_bh(&call->state_lock);
+ read_unlock(&call->state_lock);
}
out:
call->tx_pending = txb;
@@ -604,7 +543,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
- rxrpc_put_peer(cp.peer);
+ rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p\n", call);
return call;
}
@@ -667,7 +606,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
case RXRPC_CALL_CLIENT_AWAIT_CONN:
case RXRPC_CALL_SERVER_PREALLOC:
case RXRPC_CALL_SERVER_SECURING:
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_sendmsg);
ret = -EBUSY;
goto error_release_sock;
default:
@@ -737,7 +676,7 @@ out_put_unlock:
if (!dropped_lock)
mutex_unlock(&call->user_mutex);
error_put:
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_sendmsg);
_leave(" = %d", ret);
return ret;
@@ -784,9 +723,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
- read_lock_bh(&call->state_lock);
+ read_lock(&call->state_lock);
ret = call->error;
- read_unlock_bh(&call->state_lock);
+ read_unlock(&call->state_lock);
break;
default:
/* Request phase complete for this client call */
diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
index ee269e0e6ee8..e51940589ee5 100644
--- a/net/rxrpc/server_key.c
+++ b/net/rxrpc/server_key.c
@@ -144,3 +144,28 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
_leave(" = 0 [key %x]", key->serial);
return 0;
}
+
+/**
+ * rxrpc_sock_set_security_keyring - Set the security keyring for a kernel service
+ * @sk: The socket to set the keyring on
+ * @keyring: The keyring to set
+ *
+ * Set the server security keyring on an rxrpc socket. This is used to provide
+ * the encryption keys for a kernel service.
+ */
+int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret = 0;
+
+ lock_sock(sk);
+ if (rx->securities)
+ ret = -EINVAL;
+ else if (rx->sk.sk_state != RXRPC_UNBOUND)
+ ret = -EISCONN;
+ else
+ rx->securities = key_get(keyring);
+ release_sock(sk);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_security_keyring);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 0c827d5bb2b8..ebe0c75e7b07 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* ar-skbuff.c: socket buffer destruction handling
+/* Socket buffer accounting
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
@@ -19,56 +19,50 @@
/*
* Note the allocation or reception of a socket buffer.
*/
-void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
}
/*
* Note the re-emergence of a socket buffer from a queue or buffer.
*/
-void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
if (skb) {
int n = atomic_read(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
}
}
/*
* Note the addition of a ref on a socket buffer.
*/
-void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
skb_get(skb);
}
/*
* Note the dropping of a ref on a socket buffer by the core.
*/
-void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&rxrpc_n_rx_skbs);
- trace_rxrpc_skb(skb, op, 0, n, here);
+ trace_rxrpc_skb(skb, 0, n, why);
}
/*
* Note the destruction of a socket buffer.
*/
-void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
if (skb) {
- int n;
- n = atomic_dec_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ int n = atomic_dec_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
kfree_skb(skb);
}
}
@@ -78,12 +72,12 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
*/
void rxrpc_purge_queue(struct sk_buff_head *list)
{
- const void *here = __builtin_return_address(0);
struct sk_buff *skb;
+
while ((skb = skb_dequeue((list))) != NULL) {
int n = atomic_dec_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, rxrpc_skb_purged,
- refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
+ rxrpc_skb_put_purge);
kfree_skb(skb);
}
}
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index 96bfee89927b..d2cf2aac3adb 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -26,7 +26,6 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
INIT_LIST_HEAD(&txb->call_link);
INIT_LIST_HEAD(&txb->tx_link);
refcount_set(&txb->ref, 1);
- txb->call = call;
txb->call_debug_id = call->debug_id;
txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
txb->space = sizeof(txb->data);
@@ -34,7 +33,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
txb->offset = 0;
txb->flags = 0;
txb->ack_why = 0;
- txb->seq = call->tx_top + 1;
+ txb->seq = call->tx_prepared + 1;
txb->wire.epoch = htonl(call->conn->proto.epoch);
txb->wire.cid = htonl(call->cid);
txb->wire.callNumber = htonl(call->call_id);
@@ -44,7 +43,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
txb->wire.userStatus = 0;
txb->wire.securityIndex = call->security_ix;
txb->wire._rsvd = 0;
- txb->wire.serviceId = htons(call->service_id);
+ txb->wire.serviceId = htons(call->dest_srx.srx_service);
trace_rxrpc_txbuf(txb->debug_id,
txb->call_debug_id, txb->seq, 1,
@@ -107,6 +106,7 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
{
struct rxrpc_txbuf *txb;
rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack);
+ bool wake = false;
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
@@ -120,8 +120,10 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
if (before(hard_ack, txb->seq))
break;
+ if (txb->seq != call->tx_bottom + 1)
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step);
ASSERTCMP(txb->seq, ==, call->tx_bottom + 1);
- call->tx_bottom++;
+ smp_store_release(&call->tx_bottom, call->tx_bottom + 1);
list_del_rcu(&txb->call_link);
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
@@ -129,7 +131,12 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
spin_unlock(&call->tx_lock);
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
+ if (after(call->acks_hard_ack, call->tx_bottom + 128))
+ wake = true;
}
spin_unlock(&call->tx_lock);
+
+ if (wake)
+ wake_up(&call->waitq);
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1e8ab4749c6c..777d6b50505c 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -976,7 +976,8 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
+ depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
+ select NF_NAT_OVS if NF_NAT
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9b31a10cc639..5b3c0ac495be 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -23,6 +23,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/flow_offload.h>
+#include <net/tc_wrapper.h>
#ifdef CONFIG_INET
DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
@@ -1080,7 +1081,7 @@ restart_act_graph:
repeat_ttl = 32;
repeat:
- ret = a->ops->act(skb, a, res);
+ ret = tc_act(skb, a, res);
if (unlikely(ret == TC_ACT_REPEAT)) {
if (--repeat_ttl != 0)
goto repeat;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b79eee44e24e..b0455fda7d0b 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -18,6 +18,7 @@
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
+#include <net/tc_wrapper.h>
#define ACT_BPF_NAME_LEN 256
@@ -31,8 +32,9 @@ struct tcf_bpf_cfg {
static struct tc_action_ops act_bpf_ops;
-static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
+ const struct tc_action *act,
+ struct tcf_result *res)
{
bool at_ingress = skb_at_tc_ingress(skb);
struct tcf_bpf *prog = to_bpf(act);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 66b143bb04ac..7e63ff7e3ed7 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -20,6 +20,7 @@
#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_connmark.h>
#include <net/tc_act/tc_connmark.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -27,8 +28,9 @@
static struct tc_action_ops act_connmark_ops;
-static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
const struct nf_conntrack_tuple_hash *thash;
struct nf_conntrack_tuple tuple;
@@ -61,7 +63,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
c = nf_ct_get(skb, &ctinfo);
if (c) {
- skb->mark = c->mark;
+ skb->mark = READ_ONCE(c->mark);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
goto out;
@@ -81,7 +83,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
c = nf_ct_tuplehash_to_ctrack(thash);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
- skb->mark = c->mark;
+ skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);
out:
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 1366adf9b909..95e9304024b7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -32,6 +32,7 @@
#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
+#include <net/tc_wrapper.h>
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -563,8 +564,9 @@ fail:
return 0;
}
-static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
bool orig_vlan_tag_present = false;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index da0b7f665277..0ca2bb8ed026 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -24,6 +24,7 @@
#include <net/ipv6_frag.h>
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
@@ -179,7 +180,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
entry = tcf_ct_flow_table_flow_action_get_next(action);
entry->id = FLOW_ACTION_CT_METADATA;
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- entry->ct_metadata.mark = ct->mark;
+ entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
IP_CT_ESTABLISHED_REPLY;
@@ -863,90 +864,6 @@ static void tcf_ct_params_free_rcu(struct rcu_head *head)
tcf_ct_params_free(params);
}
-#if IS_ENABLED(CONFIG_NF_NAT)
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
-{
- __be16 proto = skb_protocol(skb, true);
- int hooknum, err = NF_ACCEPT;
-
- /* See HOOK2MANIP(). */
- if (maniptype == NF_NAT_MANIP_SRC)
- hooknum = NF_INET_LOCAL_IN; /* Source NAT */
- else
- hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (proto == htons(ETH_P_IP) &&
- ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
- err = NF_DROP;
- goto out;
- } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
- __be16 frag_off;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- int hdrlen = ipv6_skip_exthdr(skb,
- sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct,
- ctinfo,
- hooknum,
- hdrlen))
- err = NF_DROP;
- goto out;
- }
- }
- /* Non-ICMP, fall thru to initialize if needed. */
- fallthrough;
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- /* Initialize according to the NAT action. */
- err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
- /* Action is set up to establish a new
- * mapping.
- */
- ? nf_nat_setup_info(ct, range, maniptype)
- : nf_nat_alloc_null_binding(ct, hooknum);
- if (err != NF_ACCEPT)
- goto out;
- }
- break;
-
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
-
- default:
- err = NF_DROP;
- goto out;
- }
-
- err = nf_nat_packet(ct, ctinfo, hooknum, skb);
- if (err == NF_ACCEPT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- tc_skb_cb(skb)->post_ct_snat = 1;
- if (maniptype == NF_NAT_MANIP_DST)
- tc_skb_cb(skb)->post_ct_dnat = 1;
- }
-out:
- return err;
-}
-#endif /* CONFIG_NF_NAT */
-
static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
@@ -955,9 +872,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
if (!mask)
return;
- new_mark = mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
+ new_mark = mark | (READ_ONCE(ct->mark) & ~(mask));
+ if (READ_ONCE(ct->mark) != new_mark) {
+ WRITE_ONCE(ct->mark, new_mark);
if (nf_ct_is_confirmed(ct))
nf_conntrack_event_cache(IPCT_MARK, ct);
}
@@ -986,60 +903,30 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
bool commit)
{
#if IS_ENABLED(CONFIG_NF_NAT)
- int err;
- enum nf_nat_manip_type maniptype;
+ int err, action = 0;
if (!(ct_action & TCA_CT_ACT_NAT))
return NF_ACCEPT;
+ if (ct_action & TCA_CT_ACT_NAT_SRC)
+ action |= BIT(NF_NAT_MANIP_SRC);
+ if (ct_action & TCA_CT_ACT_NAT_DST)
+ action |= BIT(NF_NAT_MANIP_DST);
- /* Add NAT extension if not confirmed yet. */
- if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
- return NF_DROP; /* Can't NAT. */
-
- if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
- (ctinfo != IP_CT_RELATED || commit)) {
- /* NAT an established or related connection like before. */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
- /* This is the REPLY direction for a connection
- * for which NAT was applied in the forward
- * direction. Do the reverse NAT.
- */
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
- else
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
- } else if (ct_action & TCA_CT_ACT_NAT_SRC) {
- maniptype = NF_NAT_MANIP_SRC;
- } else if (ct_action & TCA_CT_ACT_NAT_DST) {
- maniptype = NF_NAT_MANIP_DST;
- } else {
- return NF_ACCEPT;
- }
+ err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit);
+
+ if (action & BIT(NF_NAT_MANIP_SRC))
+ tc_skb_cb(skb)->post_ct_snat = 1;
+ if (action & BIT(NF_NAT_MANIP_DST))
+ tc_skb_cb(skb)->post_ct_dnat = 1;
- err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
- if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
- if (ct->status & IPS_SRC_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ct_nat_execute(skb, ct, ctinfo, range,
- maniptype);
- } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
- err = ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
- }
- }
return err;
#else
return NF_ACCEPT;
#endif
}
-static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
{
struct net *net = dev_net(skb->dev);
enum ip_conntrack_info ctinfo;
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index d4102f0a9abd..4b1b59da5c0b 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -18,6 +18,7 @@
#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_ctinfo.h>
#include <net/tc_act/tc_ctinfo.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -32,7 +33,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
{
u8 dscp, newdscp;
- newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+ newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
~INET_ECN_MASK;
switch (proto) {
@@ -72,11 +73,12 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
struct sk_buff *skb)
{
ca->stats_cpmark_set++;
- skb->mark = ct->mark & cp->cpmarkmask;
+ skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}
-static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
const struct nf_conntrack_tuple_hash *thash = NULL;
struct tcf_ctinfo *ca = to_ctinfo(a);
@@ -130,7 +132,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
}
if (cp->mode & CTINFO_MODE_DSCP)
- if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
+ if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask))
tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
if (cp->mode & CTINFO_MODE_CPMARK)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 62d682b96b88..54f1b13b2360 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -18,6 +18,7 @@
#include <net/pkt_cls.h>
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_gact_ops;
@@ -145,8 +146,9 @@ release_idr:
return err;
}
-static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_gact_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_gact *gact = to_gact(a);
int action = READ_ONCE(gact->tcf_action);
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 3049878e7315..9b8def0be41e 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -14,6 +14,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gate.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_gate_ops;
@@ -113,8 +114,9 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
return HRTIMER_RESTART;
}
-static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 41d63b33461d..bc7611b0744c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -29,6 +29,7 @@
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
#include <net/ife.h>
+#include <net/tc_wrapper.h>
static int max_metacnt = IFE_META_MAX + 1;
static struct tc_action_ops act_ife_ops;
@@ -861,8 +862,9 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
return action;
}
-static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ife_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 1625e1037416..5d96ffebd40f 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -20,6 +20,7 @@
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_ipt.h>
#include <net/tc_act/tc_ipt.h>
+#include <net/tc_wrapper.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -216,8 +217,9 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla,
a, &act_xt_ops, tp, flags);
}
-static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
int ret = 0, result = 0;
struct tcf_ipt *ipt = to_ipt(a);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b8ad6ae282c0..7284bcea7b0b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -24,6 +24,7 @@
#include <net/pkt_cls.h>
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
+#include <net/tc_wrapper.h>
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
@@ -217,8 +218,9 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
return err;
}
-static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
struct sk_buff *skb2 = skb;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 8ad25cc8ccd5..ff47ce4d3968 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -14,6 +14,7 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mpls.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_mpls_ops;
@@ -49,8 +50,9 @@ static __be32 tcf_mpls_get_lse(struct mpls_shim_hdr *lse,
return cpu_to_be32(new_lse);
}
-static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_mpls *m = to_mpls(a);
struct tcf_mpls_params *p;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9265145f1040..74c74be33048 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -24,7 +24,7 @@
#include <net/tc_act/tc_nat.h>
#include <net/tcp.h>
#include <net/udp.h>
-
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_nat_ops;
@@ -98,8 +98,9 @@ release_idr:
return err;
}
-static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_nat *p = to_tcf_nat(a);
struct iphdr *iph;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 94ed5857ce67..a0378e9f0121 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -20,6 +20,7 @@
#include <net/tc_act/tc_pedit.h>
#include <uapi/linux/tc_act/tc_pedit.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_pedit_ops;
@@ -319,8 +320,9 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb,
return ret;
}
-static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
u32 max_offset;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0adb26e366a7..227cba58ce9f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_police.h>
+#include <net/tc_wrapper.h>
/* Each policer is serialized by its individual spinlock */
@@ -242,8 +243,9 @@ static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
return len <= limit;
}
-static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_police *police = to_police(a);
s64 now, toks, ppstoks = 0, ptoks = 0;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 7a25477f5d99..98dea08c1764 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -20,6 +20,7 @@
#include <net/tc_act/tc_sample.h>
#include <net/psample.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/if_arp.h>
@@ -153,8 +154,9 @@ static bool tcf_sample_dev_ok_push(struct net_device *dev)
}
}
-static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 18d376135461..4b84514534f3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -14,6 +14,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -21,8 +22,9 @@
static struct tc_action_ops act_simp_ops;
#define SIMP_MAX_DATA 32
-static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_simp_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_defact *d = to_defact(a);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 1710780c908a..ce7008cf291c 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -16,6 +16,7 @@
#include <net/ipv6.h>
#include <net/dsfield.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
@@ -36,8 +37,9 @@ static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
return netdev_cap_txqueue(skb->dev, queue_mapping);
}
-static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_skbedit *d = to_skbedit(a);
struct tcf_skbedit_params *params;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index d98758a63934..dffa990a9629 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -15,14 +15,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
static struct tc_action_ops act_skbmod_ops;
-static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_skbmod *d = to_skbmod(a);
int action, max_edit_len, err;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2691a3d8e451..2d12d2626415 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -16,14 +16,16 @@
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
static struct tc_action_ops act_tunnel_key_ops;
-static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 7b24e898a3e6..0251442f5f29 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -12,14 +12,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
static struct tc_action_ops act_vlan_ops;
-static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_vlan_params *p;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 23d1cfa4f58c..668130f08903 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -40,6 +40,7 @@
#include <net/tc_act/tc_mpls.h>
#include <net/tc_act/tc_gate.h>
#include <net/flow_offload.h>
+#include <net/tc_wrapper.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -1564,7 +1565,7 @@ reclassify:
tp->protocol != htons(ETH_P_ALL))
continue;
- err = tp->classify(skb, tp, res);
+ err = tc_classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
first_tp = orig_tp;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d229ce99e554..1b92c33b5f81 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
struct basic_head {
struct list_head flist;
@@ -36,8 +37,9 @@ struct basic_filter {
struct rcu_work rwork;
};
-static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int basic_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
int r;
struct basic_head *head = rcu_dereference_bh(tp->root);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index bc317b3eac12..466c26df853a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -19,6 +19,7 @@
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
+#include <net/tc_wrapper.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
@@ -77,8 +78,9 @@ static int cls_bpf_exec_opcode(int code)
}
}
-static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
bool at_ingress = skb_at_tc_ingress(skb);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index ed00001b528a..bd9322d71910 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -13,6 +13,7 @@
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/cls_cgroup.h>
+#include <net/tc_wrapper.h>
struct cls_cgroup_head {
u32 handle;
@@ -22,8 +23,9 @@ struct cls_cgroup_head {
struct rcu_work rwork;
};
-static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int cls_cgroup_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
u32 classid = task_get_classid(skb);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 014cd3de7b5d..535668e1f748 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -24,6 +24,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/flow_dissector.h>
+#include <net/tc_wrapper.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
@@ -292,8 +293,9 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
(1 << FLOW_KEY_NFCT_PROTO_SRC) | \
(1 << FLOW_KEY_NFCT_PROTO_DST))
-static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct flow_head *head = rcu_dereference_bh(tp->root);
struct flow_filter *f;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 25bc57ee6ea1..0b15698b3531 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -27,6 +27,7 @@
#include <net/vxlan.h>
#include <net/erspan.h>
#include <net/gtp.h>
+#include <net/tc_wrapper.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -305,8 +306,9 @@ static u16 fl_ct_info_to_flower_map[] = {
TCA_FLOWER_KEY_CT_FLAGS_NEW,
};
-static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int fl_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
bool post_ct = tc_skb_cb(skb)->post_ct;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a32351da968c..ae9439a6c56c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -21,6 +21,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+#include <net/tc_wrapper.h>
#define HTSIZE 256
@@ -47,8 +48,9 @@ static u32 fw_hash(u32 handle)
return handle % HTSIZE;
}
-static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 39a5d9c170de..705f63da2c21 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -12,6 +12,7 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
struct cls_mall_head {
struct tcf_exts exts;
@@ -24,8 +25,9 @@ struct cls_mall_head {
bool deleting;
};
-static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int mall_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_mall_head *head = rcu_dereference_bh(tp->root);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 9e43b929d4ca..d0c53724d3e8 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
/*
* 1. For now we assume that route tags < 256.
@@ -121,8 +122,9 @@ static inline int route4_hash_wild(void)
return 0; \
}
-static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int route4_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct route4_head *head = rcu_dereference_bh(tp->root);
struct dst_entry *dst;
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index de1c1d4da597..03d8619bd9c6 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -15,10 +15,12 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#define RSVP_DST_LEN 1
#define RSVP_ID "rsvp"
#define RSVP_OPS cls_rsvp_ops
+#define RSVP_CLS rsvp_classify
#include "cls_rsvp.h"
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b00a7dbd0587..869efba9f834 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -124,8 +124,8 @@ static inline unsigned int hash_src(__be32 *src)
return r; \
}
-static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct rsvp_head *head = rcu_dereference_bh(tp->root);
struct rsvp_session *s;
@@ -738,7 +738,7 @@ static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
- .classify = rsvp_classify,
+ .classify = RSVP_CLS,
.init = rsvp_init,
.destroy = rsvp_destroy,
.get = rsvp_get,
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index 64078846000e..e627cc32d633 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -15,10 +15,12 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <net/netlink.h>
+#include <net/tc_wrapper.h>
#define RSVP_DST_LEN 4
#define RSVP_ID "rsvp6"
#define RSVP_OPS cls_rsvp6_ops
+#define RSVP_CLS rsvp6_classify
#include "cls_rsvp.h"
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 1c9eeb98d826..eb0e9458e722 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -16,6 +16,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+#include <net/tc_wrapper.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
@@ -98,9 +99,9 @@ static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
return NULL;
}
-
-static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct tcindex_data *p = rcu_dereference_bh(tp->root);
struct tcindex_filter_result *f;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 34d25f7a0687..4e2e269f121f 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -39,6 +39,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/idr.h>
+#include <net/tc_wrapper.h>
struct tc_u_knode {
struct tc_u_knode __rcu *next;
@@ -100,8 +101,9 @@ static inline unsigned int u32_hash_fold(__be32 key,
return h;
}
-static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int u32_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct {
struct tc_u_knode *knode;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4a27dfb1ba0f..2317db02c764 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -31,6 +31,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <trace/events/qdisc.h>
@@ -2273,6 +2274,8 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
+DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+
static int __init pktsched_init(void)
{
int err;
@@ -2300,6 +2303,8 @@ static int __init pktsched_init(void)
rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
0);
+ tc_wrapper_init();
+
return 0;
}
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ef9fceadef8d..ee6514af830f 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt)
}
}
+static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid)
+{
+ struct sctp_sched_ops *sched;
+
+ if (!SCTP_SO(stream, sid)->ext)
+ return;
+
+ sched = sctp_sched_ops_from_stream(stream);
+ sched->free_sid(stream, sid);
+ kfree(SCTP_SO(stream, sid)->ext);
+ SCTP_SO(stream, sid)->ext = NULL;
+}
+
/* Migrates chunks from stream queues to new stream queues if needed,
* but not across associations. Also, removes those chunks to streams
* higher than the new max.
@@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
* sctp_stream_update will swap ->out pointers.
*/
for (i = 0; i < outcnt; i++) {
- kfree(SCTP_SO(new, i)->ext);
+ sctp_stream_free_ext(new, i);
SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext;
SCTP_SO(stream, i)->ext = NULL;
}
}
- for (i = outcnt; i < stream->outcnt; i++) {
- kfree(SCTP_SO(stream, i)->ext);
- SCTP_SO(stream, i)->ext = NULL;
- }
+ for (i = outcnt; i < stream->outcnt; i++)
+ sctp_stream_free_ext(stream, i);
}
static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
@@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream)
struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
int i;
- sched->free(stream);
+ sched->unsched_all(stream);
for (i = 0; i < stream->outcnt; i++)
- kfree(SCTP_SO(stream, i)->ext);
+ sctp_stream_free_ext(stream, i);
genradix_free(&stream->out);
genradix_free(&stream->in);
}
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 1ad565ed5627..330067002deb 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -46,7 +46,7 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
return 0;
}
-static void sctp_sched_fcfs_free(struct sctp_stream *stream)
+static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid)
{
}
@@ -96,7 +96,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
.get = sctp_sched_fcfs_get,
.init = sctp_sched_fcfs_init,
.init_sid = sctp_sched_fcfs_init_sid,
- .free = sctp_sched_fcfs_free,
+ .free_sid = sctp_sched_fcfs_free_sid,
.enqueue = sctp_sched_fcfs_enqueue,
.dequeue = sctp_sched_fcfs_dequeue,
.dequeue_done = sctp_sched_fcfs_dequeue_done,
@@ -126,6 +126,23 @@ void sctp_sched_ops_init(void)
sctp_sched_ops_rr_init();
}
+static void sctp_sched_free_sched(struct sctp_stream *stream)
+{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ struct sctp_stream_out_ext *soute;
+ int i;
+
+ sched->unsched_all(stream);
+ for (i = 0; i < stream->outcnt; i++) {
+ soute = SCTP_SO(stream, i)->ext;
+ if (!soute)
+ continue;
+ sched->free_sid(stream, i);
+ /* Give the next scheduler a clean slate. */
+ memset_after(soute, 0, outq);
+ }
+}
+
int sctp_sched_set_sched(struct sctp_association *asoc,
enum sctp_sched_type sched)
{
@@ -141,18 +158,8 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
if (sched > SCTP_SS_MAX)
return -EINVAL;
- if (old) {
- old->free(&asoc->stream);
-
- /* Give the next scheduler a clean slate. */
- for (i = 0; i < asoc->stream.outcnt; i++) {
- struct sctp_stream_out_ext *ext = SCTP_SO(&asoc->stream, i)->ext;
-
- if (!ext)
- continue;
- memset_after(ext, 0, outq);
- }
- }
+ if (old)
+ sctp_sched_free_sched(&asoc->stream);
asoc->outqueue.sched = n;
n->init(&asoc->stream);
@@ -176,7 +183,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
return ret;
err:
- n->free(&asoc->stream);
+ sctp_sched_free_sched(&asoc->stream);
asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
return ret;
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 80b5a2c4cbc7..42d4800f263d 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -204,30 +204,22 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
return sctp_sched_prio_set(stream, sid, 0, gfp);
}
-static void sctp_sched_prio_free(struct sctp_stream *stream)
+static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid)
{
- struct sctp_stream_priorities *prio, *n;
- LIST_HEAD(list);
+ struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head;
int i;
- /* As we don't keep a list of priorities, to avoid multiple
- * frees we have to do it in 3 steps:
- * 1. unsched everyone, so the lists are free to use in 2.
- * 2. build the list of the priorities
- * 3. free the list
- */
- sctp_sched_prio_unsched_all(stream);
+ if (!prio)
+ return;
+
+ SCTP_SO(stream, sid)->ext->prio_head = NULL;
for (i = 0; i < stream->outcnt; i++) {
- if (!SCTP_SO(stream, i)->ext)
- continue;
- prio = SCTP_SO(stream, i)->ext->prio_head;
- if (prio && list_empty(&prio->prio_sched))
- list_add(&prio->prio_sched, &list);
- }
- list_for_each_entry_safe(prio, n, &list, prio_sched) {
- list_del_init(&prio->prio_sched);
- kfree(prio);
+ if (SCTP_SO(stream, i)->ext &&
+ SCTP_SO(stream, i)->ext->prio_head == prio)
+ return;
}
+
+ kfree(prio);
}
static void sctp_sched_prio_enqueue(struct sctp_outq *q,
@@ -323,7 +315,7 @@ static struct sctp_sched_ops sctp_sched_prio = {
.get = sctp_sched_prio_get,
.init = sctp_sched_prio_init,
.init_sid = sctp_sched_prio_init_sid,
- .free = sctp_sched_prio_free,
+ .free_sid = sctp_sched_prio_free_sid,
.enqueue = sctp_sched_prio_enqueue,
.dequeue = sctp_sched_prio_dequeue,
.dequeue_done = sctp_sched_prio_dequeue_done,
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index ff425aed62c7..1f235e7f643a 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -90,9 +90,8 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
return 0;
}
-static void sctp_sched_rr_free(struct sctp_stream *stream)
+static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid)
{
- sctp_sched_rr_unsched_all(stream);
}
static void sctp_sched_rr_enqueue(struct sctp_outq *q,
@@ -177,7 +176,7 @@ static struct sctp_sched_ops sctp_sched_rr = {
.get = sctp_sched_rr_get,
.init = sctp_sched_rr_init,
.init_sid = sctp_sched_rr_init_sid,
- .free = sctp_sched_rr_free,
+ .free_sid = sctp_sched_rr_free_sid,
.enqueue = sctp_sched_rr_enqueue,
.dequeue = sctp_sched_rr_dequeue,
.dequeue_done = sctp_sched_rr_dequeue_done,
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7f40ed117fc7..a7a9136198fd 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -84,17 +84,18 @@ static struct ctl_table sctp_table[] = {
{ /* sentinel */ }
};
+/* The following index defines are used in sctp_sysctl_net_register().
+ * If you add new items to the sctp_net_table, please ensure that
+ * the index values of these defines hold the same meaning indicated by
+ * their macro names when they appear in sctp_net_table.
+ */
+#define SCTP_RTO_MIN_IDX 0
+#define SCTP_RTO_MAX_IDX 1
+#define SCTP_PF_RETRANS_IDX 2
+#define SCTP_PS_RETRANS_IDX 3
+
static struct ctl_table sctp_net_table[] = {
- {
- .procname = "rto_initial",
- .data = &init_net.sctp.rto_initial,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- .extra2 = &timer_max
- },
- {
+ [SCTP_RTO_MIN_IDX] = {
.procname = "rto_min",
.data = &init_net.sctp.rto_min,
.maxlen = sizeof(unsigned int),
@@ -103,7 +104,7 @@ static struct ctl_table sctp_net_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &init_net.sctp.rto_max
},
- {
+ [SCTP_RTO_MAX_IDX] = {
.procname = "rto_max",
.data = &init_net.sctp.rto_max,
.maxlen = sizeof(unsigned int),
@@ -112,6 +113,33 @@ static struct ctl_table sctp_net_table[] = {
.extra1 = &init_net.sctp.rto_min,
.extra2 = &timer_max
},
+ [SCTP_PF_RETRANS_IDX] = {
+ .procname = "pf_retrans",
+ .data = &init_net.sctp.pf_retrans,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &init_net.sctp.ps_retrans,
+ },
+ [SCTP_PS_RETRANS_IDX] = {
+ .procname = "ps_retrans",
+ .data = &init_net.sctp.ps_retrans,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.sctp.pf_retrans,
+ .extra2 = &ps_retrans_max,
+ },
+ {
+ .procname = "rto_initial",
+ .data = &init_net.sctp.rto_initial,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &timer_max
+ },
{
.procname = "rto_alpha_exp_divisor",
.data = &init_net.sctp.rto_alpha,
@@ -208,24 +236,6 @@ static struct ctl_table sctp_net_table[] = {
.extra2 = SYSCTL_INT_MAX,
},
{
- .procname = "pf_retrans",
- .data = &init_net.sctp.pf_retrans,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &init_net.sctp.ps_retrans,
- },
- {
- .procname = "ps_retrans",
- .data = &init_net.sctp.ps_retrans,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &init_net.sctp.pf_retrans,
- .extra2 = &ps_retrans_max,
- },
- {
.procname = "sndbuf_policy",
.data = &init_net.sctp.sndbuf_policy,
.maxlen = sizeof(int),
@@ -597,6 +607,11 @@ int sctp_sysctl_net_register(struct net *net)
for (i = 0; table[i].data; i++)
table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+ table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max;
+ table[SCTP_RTO_MAX_IDX].extra1 = &net->sctp.rto_min;
+ table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans;
+ table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans;
+
net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
if (net->sctp.sysctl_header == NULL) {
kfree(table);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f09316a9035f..d67440de011e 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1971,6 +1971,9 @@ rcv:
/* Ok, everything's fine, try to synch own keys according to peers' */
tipc_crypto_key_synch(rx, *skb);
+ /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */
+ skb_cb = TIPC_SKB_CB(*skb);
+
/* Mark skb decrypted */
skb_cb->decrypted = 1;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index e8630707901e..e8dcdf267c0c 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
u32 self;
int err;
- skb_linearize(skb);
+ if (skb_linearize(skb)) {
+ kfree_skb(skb);
+ return;
+ }
hdr = buf_msg(skb);
if (caps & TIPC_NODE_ID128)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e260c0d557f5..b3ce24823f50 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2224,7 +2224,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
- skb_linearize(skb);
+ if (skb_linearize(skb))
+ goto exit;
+
hdr = buf_msg(skb);
data = msg_data(hdr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b48d97cbbe29..49ddc484c4fe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1689,6 +1689,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
struct tipc_node *n;
struct sk_buff_head xmitq;
bool node_up = false;
+ struct net *peer_net;
int bearer_id;
int rc;
@@ -1705,18 +1706,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
return -EHOSTUNREACH;
}
+ rcu_read_lock();
tipc_node_read_lock(n);
node_up = node_is_up(n);
- if (node_up && n->peer_net && check_net(n->peer_net)) {
+ peer_net = n->peer_net;
+ tipc_node_read_unlock(n);
+ if (node_up && peer_net && check_net(peer_net)) {
/* xmit inner linux container */
- tipc_lxc_xmit(n->peer_net, list);
+ tipc_lxc_xmit(peer_net, list);
if (likely(skb_queue_empty(list))) {
- tipc_node_read_unlock(n);
+ rcu_read_unlock();
tipc_node_put(n);
return 0;
}
}
+ rcu_read_unlock();
+ tipc_node_read_lock(n);
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index d92ec92f0b71..e3b427a70398 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con)
conn_put(con);
}
-static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock)
{
struct tipc_conn *con;
int ret;
@@ -202,10 +202,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
}
con->conid = ret;
s->idr_in_use++;
- spin_unlock_bh(&s->idr_lock);
set_bit(CF_CONNECTED, &con->flags);
con->server = s;
+ con->sock = sock;
+ conn_get(con);
+ spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -467,7 +469,7 @@ static void tipc_topsrv_accept(struct work_struct *work)
ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
if (ret < 0)
return;
- con = tipc_conn_alloc(srv);
+ con = tipc_conn_alloc(srv, newsock);
if (IS_ERR(con)) {
ret = PTR_ERR(con);
sock_release(newsock);
@@ -479,11 +481,11 @@ static void tipc_topsrv_accept(struct work_struct *work)
newsk->sk_data_ready = tipc_conn_data_ready;
newsk->sk_write_space = tipc_conn_write_space;
newsk->sk_user_data = con;
- con->sock = newsock;
write_unlock_bh(&newsk->sk_callback_lock);
/* Wake up receive process in case of 'SYN+' message */
newsk->sk_data_ready(newsk);
+ conn_put(con);
}
}
@@ -577,17 +579,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
sub.filter = filter;
*(u64 *)&sub.usr_handle = (u64)port;
- con = tipc_conn_alloc(tipc_topsrv(net));
+ con = tipc_conn_alloc(tipc_topsrv(net), NULL);
if (IS_ERR(con))
return false;
*conid = con->conid;
- con->sock = NULL;
rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
- if (rc >= 0)
- return true;
+ if (rc)
+ conn_put(con);
+
conn_put(con);
- return false;
+ return !rc;
}
void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 264cf367e265..9ed978634125 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -792,7 +792,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sk_psock *psock;
struct sock *sk_redir;
struct tls_rec *rec;
- bool enospc, policy;
+ bool enospc, policy, redir_ingress;
int err = 0, send;
u32 delta = 0;
@@ -837,6 +837,7 @@ more_data:
}
break;
case __SK_REDIRECT:
+ redir_ingress = psock->redir_ingress;
sk_redir = psock->sk_redir;
memcpy(&msg_redir, msg, sizeof(*msg));
if (msg->apply_bytes < send)
@@ -846,7 +847,8 @@ more_data:
sk_msg_return_zero(sk, msg, send);
msg->sg.size -= send;
release_sock(sk);
- err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
+ err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
+ &msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 105f522a89fe..616b55c5b890 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -114,14 +114,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
}
-static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
+static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
+ struct user_namespace *user_ns)
{
- uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
+ uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk));
return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 portid, u32 seq, u32 flags, int sk_ino)
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
{
struct nlmsghdr *nlh;
struct unix_diag_msg *rep;
@@ -167,7 +169,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
goto out_nlmsg_trim;
if ((req->udiag_show & UDIAG_SHOW_UID) &&
- sk_diag_dump_uid(sk, skb))
+ sk_diag_dump_uid(sk, skb, user_ns))
goto out_nlmsg_trim;
nlmsg_end(skb, nlh);
@@ -179,7 +181,8 @@ out_nlmsg_trim:
}
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 portid, u32 seq, u32 flags)
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags)
{
int sk_ino;
@@ -190,7 +193,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
if (!sk_ino)
return 0;
- return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino);
+ return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino);
}
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -214,7 +217,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (!(req->udiag_states & (1 << sk->sk_state)))
goto next;
- if (sk_diag_dump(sk, skb, req,
+ if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) {
@@ -282,7 +285,8 @@ again:
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid,
+ err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk),
+ NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, req->udiag_ino);
if (err < 0) {
nlmsg_free(rep);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1ad0326ff4dc..33a82ecab9d5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3868,6 +3868,9 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
struct cfg80211_chan_def chandef = {};
int ret;
+ if (!link)
+ goto nla_put_failure;
+
if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))
goto nla_put_failure;
if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c3d950d29432..4f3f31244e8b 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4311,8 +4311,10 @@ static int __init regulatory_init_db(void)
return -EINVAL;
err = load_builtin_regdb_keys();
- if (err)
+ if (err) {
+ platform_device_unregister(reg_pdev);
return err;
+ }
/* We always try to get an update for the static regdomain */
err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index e70302a30013..790bc31cf82e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -158,9 +158,8 @@ static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
if (bss->pub.hidden_beacon_bss) {
struct cfg80211_internal_bss *hbss;
- hbss = container_of(bss->pub.hidden_beacon_bss,
- struct cfg80211_internal_bss,
- pub);
+
+ hbss = bss_from_pub(bss->pub.hidden_beacon_bss);
hbss->refcount--;
if (hbss->refcount == 0)
bss_free(hbss);
@@ -169,9 +168,7 @@ static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
if (bss->pub.transmitted_bss) {
struct cfg80211_internal_bss *tbss;
- tbss = container_of(bss->pub.transmitted_bss,
- struct cfg80211_internal_bss,
- pub);
+ tbss = bss_from_pub(bss->pub.transmitted_bss);
tbss->refcount--;
if (tbss->refcount == 0)
bss_free(tbss);
@@ -330,7 +327,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
* determine if they are the same ie.
*/
if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) {
- if (!memcmp(tmp_old + 2, tmp + 2, 5)) {
+ if (tmp_old[1] >= 5 && tmp[1] >= 5 &&
+ !memcmp(tmp_old + 2, tmp + 2, 5)) {
/* same vendor ie, copy from
* subelement
*/
@@ -1289,7 +1287,8 @@ static int cmp_bss(struct cfg80211_bss *a,
int i, r;
if (a->channel != b->channel)
- return b->channel->center_freq - a->channel->center_freq;
+ return (b->channel->center_freq * 1000 + b->channel->freq_offset) -
+ (a->channel->center_freq * 1000 + a->channel->freq_offset);
a_ies = rcu_access_pointer(a->ies);
if (!a_ies)
@@ -1790,13 +1789,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
/* This must be before the call to bss_ref_get */
if (tmp->pub.transmitted_bss) {
- struct cfg80211_internal_bss *pbss =
- container_of(tmp->pub.transmitted_bss,
- struct cfg80211_internal_bss,
- pub);
-
new->pub.transmitted_bss = tmp->pub.transmitted_bss;
- bss_ref_get(rdev, pbss);
+ bss_ref_get(rdev, bss_from_pub(tmp->pub.transmitted_bss));
}
list_add_tail(&new->list, &rdev->bss_list);
@@ -2526,10 +2520,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
const struct cfg80211_bss_ies *ies1, *ies2;
size_t ielen = len - offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
- struct cfg80211_non_tx_bss non_tx_data;
+ struct cfg80211_non_tx_bss non_tx_data = {};
res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
len, gfp);
+
+ /* don't do any further MBSSID handling for S1G */
+ if (ieee80211_is_s1g_beacon(mgmt->frame_control))
+ return res;
+
if (!res || !wiphy->support_mbssid ||
!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
return res;
@@ -2569,15 +2568,12 @@ EXPORT_SYMBOL(cfg80211_inform_bss_frame_data);
void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- struct cfg80211_internal_bss *bss;
if (!pub)
return;
- bss = container_of(pub, struct cfg80211_internal_bss, pub);
-
spin_lock_bh(&rdev->bss_lock);
- bss_ref_get(rdev, bss);
+ bss_ref_get(rdev, bss_from_pub(pub));
spin_unlock_bh(&rdev->bss_lock);
}
EXPORT_SYMBOL(cfg80211_ref_bss);
@@ -2585,15 +2581,12 @@ EXPORT_SYMBOL(cfg80211_ref_bss);
void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- struct cfg80211_internal_bss *bss;
if (!pub)
return;
- bss = container_of(pub, struct cfg80211_internal_bss, pub);
-
spin_lock_bh(&rdev->bss_lock);
- bss_ref_put(rdev, bss);
+ bss_ref_put(rdev, bss_from_pub(pub));
spin_unlock_bh(&rdev->bss_lock);
}
EXPORT_SYMBOL(cfg80211_put_bss);
@@ -2607,7 +2600,7 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
if (WARN_ON(!pub))
return;
- bss = container_of(pub, struct cfg80211_internal_bss, pub);
+ bss = bss_from_pub(pub);
spin_lock_bh(&rdev->bss_lock);
if (list_empty(&bss->list))
@@ -2616,8 +2609,7 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
list_for_each_entry_safe(nontrans_bss, tmp,
&pub->nontrans_list,
nontrans_list) {
- tmp1 = container_of(nontrans_bss,
- struct cfg80211_internal_bss, pub);
+ tmp1 = bss_from_pub(nontrans_bss);
if (__cfg80211_unlink_bss(rdev, tmp1))
rdev->bss_generation++;
}
@@ -2674,9 +2666,7 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
/* use transmitting bss */
if (cbss->pub.transmitted_bss)
- cbss = container_of(cbss->pub.transmitted_bss,
- struct cfg80211_internal_bss,
- pub);
+ cbss = bss_from_pub(cbss->pub.transmitted_bss);
cbss->pub.channel = chan;
@@ -2705,8 +2695,7 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
list_for_each_entry_safe(nontrans_bss, tmp,
&new->pub.nontrans_list,
nontrans_list) {
- bss = container_of(nontrans_bss,
- struct cfg80211_internal_bss, pub);
+ bss = bss_from_pub(nontrans_bss);
if (__cfg80211_unlink_bss(rdev, bss))
rdev->bss_generation++;
}
@@ -2723,8 +2712,7 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
list_for_each_entry_safe(nontrans_bss, tmp,
&cbss->pub.nontrans_list,
nontrans_list) {
- bss = container_of(nontrans_bss,
- struct cfg80211_internal_bss, pub);
+ bss = bss_from_pub(nontrans_bss);
bss->pub.channel = chan;
rb_erase(&bss->rbn, &rdev->bss_tree);
rb_insert_bss(rdev, bss);
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index acc8e52a4f5f..771d0fa90ef5 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -231,9 +231,9 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
-static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags)
{
- return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+ return __bpf_xdp_redirect_map(map, index, flags, 0,
__xsk_map_lookup_elem);
}
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 494aa744bfb9..cd47f88921f5 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -3,6 +3,14 @@
# Makefile for the XFRM subsystem.
#
+xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
+
+ifeq ($(CONFIG_XFRM_INTERFACE),m)
+xfrm_interface-$(CONFIG_DEBUG_INFO_BTF_MODULES) += xfrm_interface_bpf.o
+else ifeq ($(CONFIG_XFRM_INTERFACE),y)
+xfrm_interface-$(CONFIG_DEBUG_INFO_BTF) += xfrm_interface_bpf.o
+endif
+
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
xfrm_sysctl.o xfrm_replay.o xfrm_device.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5f5aafd418af..4aff76c6f12e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
}
}
+static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ __u32 seq = xo->seq.low;
+
+ seq += skb_shinfo(skb)->gso_segs;
+ if (unlikely(seq < xo->seq.low))
+ return true;
+
+ return false;
+}
+
struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
int err;
@@ -120,6 +132,16 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN)
return skb;
+ /* The packet was sent to HW IPsec packet offload engine,
+ * but to wrong device. Drop the packet, so it won't skip
+ * XFRM stack.
+ */
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) {
+ kfree_skb(skb);
+ dev_core_stats_tx_dropped_inc(dev);
+ return NULL;
+ }
+
/* This skb was already validated on the upper/virtual dev */
if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
return skb;
@@ -134,7 +156,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
return skb;
}
- if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) {
+ if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) ||
+ unlikely(xmit_xfrm_check_overflow(skb)))) {
struct sk_buff *segs;
/* Packet got rerouted, fixup features and segment it. */
@@ -216,6 +239,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_dev_offload *xso = &x->xso;
xfrm_address_t *saddr;
xfrm_address_t *daddr;
+ bool is_packet_offload;
if (!x->type_offload) {
NL_SET_ERR_MSG(extack, "Type doesn't support offload");
@@ -228,11 +252,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) {
+ if (xuo->flags &
+ ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) {
NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
return -EINVAL;
}
+ is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
@@ -247,7 +273,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
x->props.family,
xfrm_smark_get(0, x));
if (IS_ERR(dst))
- return 0;
+ return (is_packet_offload) ? -EINVAL : 0;
dev = dst->dev;
@@ -258,7 +284,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
xso->dev = NULL;
dev_put(dev);
- return 0;
+ return (is_packet_offload) ? -EINVAL : 0;
}
if (x->props.flags & XFRM_STATE_ESN &&
@@ -278,14 +304,28 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
else
xso->dir = XFRM_DEV_OFFLOAD_OUT;
+ if (is_packet_offload)
+ xso->type = XFRM_DEV_OFFLOAD_PACKET;
+ else
+ xso->type = XFRM_DEV_OFFLOAD_CRYPTO;
+
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
xso->dev = NULL;
xso->dir = 0;
xso->real_dev = NULL;
netdev_put(dev, &xso->dev_tracker);
-
- if (err != -EOPNOTSUPP) {
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+
+ /* User explicitly requested packet offload mode and configured
+ * policy in addition to the XFRM state. So be civil to users,
+ * and return an error instead of taking fallback path.
+ *
+ * This WARN_ON() can be seen as a documentation for driver
+ * authors to do not return -EOPNOTSUPP in packet offload mode.
+ */
+ WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
+ if (err != -EOPNOTSUPP || is_packet_offload) {
NL_SET_ERR_MSG(extack, "Device failed to offload this state");
return err;
}
@@ -295,6 +335,69 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
EXPORT_SYMBOL_GPL(xfrm_dev_state_add);
+int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
+ struct xfrm_user_offload *xuo, u8 dir,
+ struct netlink_ext_ack *extack)
+{
+ struct xfrm_dev_offload *xdo = &xp->xdo;
+ struct net_device *dev;
+ int err;
+
+ if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) {
+ /* We support only packet offload mode and it means
+ * that user must set XFRM_OFFLOAD_PACKET bit.
+ */
+ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
+ return -EINVAL;
+ }
+
+ dev = dev_get_by_index(net, xuo->ifindex);
+ if (!dev)
+ return -EINVAL;
+
+ if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) {
+ xdo->dev = NULL;
+ dev_put(dev);
+ NL_SET_ERR_MSG(extack, "Policy offload is not supported");
+ return -EINVAL;
+ }
+
+ xdo->dev = dev;
+ netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC);
+ xdo->real_dev = dev;
+ xdo->type = XFRM_DEV_OFFLOAD_PACKET;
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ xdo->dir = XFRM_DEV_OFFLOAD_IN;
+ break;
+ case XFRM_POLICY_OUT:
+ xdo->dir = XFRM_DEV_OFFLOAD_OUT;
+ break;
+ case XFRM_POLICY_FWD:
+ xdo->dir = XFRM_DEV_OFFLOAD_FWD;
+ break;
+ default:
+ xdo->dev = NULL;
+ dev_put(dev);
+ NL_SET_ERR_MSG(extack, "Unrecognized offload direction");
+ return -EINVAL;
+ }
+
+ err = dev->xfrmdev_ops->xdo_dev_policy_add(xp);
+ if (err) {
+ xdo->dev = NULL;
+ xdo->real_dev = NULL;
+ xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ xdo->dir = 0;
+ netdev_put(dev, &xdo->dev_tracker);
+ NL_SET_ERR_MSG(extack, "Device failed to offload this policy");
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_policy_add);
+
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
{
int mtu;
@@ -305,8 +408,9 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
if (!x->type_offload || x->encap)
return false;
- if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
- (!xdst->child->xfrm)) {
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
+ ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
+ !xdst->child->xfrm)) {
mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
goto ok;
@@ -397,8 +501,10 @@ static int xfrm_api_check(struct net_device *dev)
static int xfrm_dev_down(struct net_device *dev)
{
- if (dev->features & NETIF_F_HW_ESP)
+ if (dev->features & NETIF_F_HW_ESP) {
xfrm_dev_state_flush(dev_net(dev), dev, true);
+ xfrm_dev_policy_flush(dev_net(dev), dev, true);
+ }
return NOTIFY_DONE;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 97074f6f2bde..c06e54a10540 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -671,6 +671,7 @@ resume:
x->curlft.bytes += skb->len;
x->curlft.packets++;
+ x->lastused = ktime_get_real_seconds();
spin_unlock(&x->lock);
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
new file mode 100644
index 000000000000..1ef2162cebcf
--- /dev/null
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM Helpers for TC-BPF hook
+ *
+ * These are called from SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+
+#include <net/dst_metadata.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_info - XFRM metadata information
+ *
+ * Members:
+ * @if_id - XFRM if_id:
+ * Transmit: if_id to be used in policy and state lookups
+ * Receive: if_id of the state matched for the incoming packet
+ * @link - Underlying device ifindex:
+ * Transmit: used as the underlying device in VRF routing
+ * Receive: the device on which the packet had been received
+ */
+struct bpf_xfrm_info {
+ u32 if_id;
+ int link;
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in xfrm_interface BTF");
+
+/* bpf_skb_get_xfrm_info - Get XFRM metadata
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @to - Pointer to memory to which the metadata will be copied
+ * Cannot be NULL
+ */
+__used noinline
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct xfrm_md_info *info;
+
+ info = skb_xfrm_md_info(skb);
+ if (!info)
+ return -EINVAL;
+
+ to->if_id = info->if_id;
+ to->link = info->link;
+ return 0;
+}
+
+/* bpf_skb_get_xfrm_info - Set XFRM metadata
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @from - Pointer to memory from which the metadata will be copied
+ * Cannot be NULL
+ */
+__used noinline
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+ const struct bpf_xfrm_info *from)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct metadata_dst *md_dst;
+ struct xfrm_md_info *info;
+
+ if (unlikely(skb_metadata_dst(skb)))
+ return -EINVAL;
+
+ if (!xfrm_bpf_md_dst) {
+ struct metadata_dst __percpu *tmp;
+
+ tmp = metadata_dst_alloc_percpu(0, METADATA_XFRM, GFP_ATOMIC);
+ if (!tmp)
+ return -ENOMEM;
+ if (cmpxchg(&xfrm_bpf_md_dst, NULL, tmp))
+ metadata_dst_free_percpu(tmp);
+ }
+ md_dst = this_cpu_ptr(xfrm_bpf_md_dst);
+
+ info = &md_dst->u.xfrm_info;
+
+ info->if_id = from->if_id;
+ info->link = from->link;
+ skb_dst_force(skb);
+ info->dst_orig = skb_dst(skb);
+
+ dst_hold((struct dst_entry *)md_dst);
+ skb_dst_set(skb, (struct dst_entry *)md_dst);
+ return 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(xfrm_ifc_kfunc_set)
+BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
+BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
+BTF_SET8_END(xfrm_ifc_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xfrm_ifc_kfunc_set,
+};
+
+int __init register_xfrm_interface_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &xfrm_interface_kfunc_set);
+}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface_core.c
index 5a67b120c4db..1f99dc469027 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -396,6 +396,14 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if_id = md_info->if_id;
fl->flowi_oif = md_info->link;
+ if (md_info->dst_orig) {
+ struct dst_entry *tmp_dst = dst;
+
+ dst = md_info->dst_orig;
+ skb_dst_set(skb, dst);
+ md_info->dst_orig = NULL;
+ dst_release(tmp_dst);
+ }
} else {
if_id = xi->p.if_id;
}
@@ -1162,12 +1170,18 @@ static int __init xfrmi_init(void)
if (err < 0)
goto rtnl_link_failed;
+ err = register_xfrm_interface_bpf();
+ if (err < 0)
+ goto kfunc_failed;
+
lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
xfrm_if_register_cb(&xfrm_if_cb);
return err;
+kfunc_failed:
+ rtnl_link_unregister(&xfrmi_link_ops);
rtnl_link_failed:
xfrmi6_fini();
xfrmi6_failed:
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 9a5e79a38c67..ff114d68cc43 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -209,8 +209,6 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
__skb_pull(skb, hdr_len);
memmove(ipv6_hdr(skb), iph, hdr_len);
- x->lastused = ktime_get_real_seconds();
-
return 0;
#else
WARN_ON_ONCE(1);
@@ -494,7 +492,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
struct xfrm_state *x = dst->xfrm;
struct net *net = xs_net(x);
- if (err <= 0)
+ if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
goto resume;
do {
@@ -534,6 +532,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
x->curlft.bytes += skb->len;
x->curlft.packets++;
+ x->lastused = ktime_get_real_seconds();
spin_unlock_bh(&x->lock);
@@ -718,6 +717,16 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
break;
}
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+ if (!xfrm_dev_offload_ok(skb, x)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+
+ return xfrm_output_resume(sk, skb, 0);
+ }
+
secpath_reset(skb);
if (xfrm_dev_offload_ok(skb, x)) {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e392d8d05e0c..e9eb82c5457d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -425,6 +425,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
BUG();
+ xfrm_dev_policy_free(policy);
call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
}
EXPORT_SYMBOL(xfrm_policy_destroy);
@@ -535,7 +536,7 @@ redo:
__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
pol->family, nhashmask, dbits, sbits);
- if (!entry0) {
+ if (!entry0 || pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
hlist_del_rcu(&pol->bydst);
hlist_add_head_rcu(&pol->bydst, ndsttable + h);
h0 = h;
@@ -605,7 +606,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
}
-static void xfrm_byidx_resize(struct net *net, int total)
+static void xfrm_byidx_resize(struct net *net)
{
unsigned int hmask = net->xfrm.policy_idx_hmask;
unsigned int nhashmask = xfrm_new_hash_mask(hmask);
@@ -683,7 +684,7 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_bydst_resize(net, dir);
}
if (xfrm_byidx_should_resize(net, total))
- xfrm_byidx_resize(net, total);
+ xfrm_byidx_resize(net);
mutex_unlock(&hash_resize_mutex);
}
@@ -866,7 +867,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
break;
}
- if (newpos)
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst, newpos);
else
hlist_add_head_rcu(&policy->bydst, &n->hhead);
@@ -1347,7 +1348,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
else
break;
}
- if (newpos)
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst, newpos);
else
hlist_add_head_rcu(&policy->bydst, chain);
@@ -1524,7 +1525,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
break;
}
- if (newpos)
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
else
hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
@@ -1561,9 +1562,12 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
break;
}
- if (newpos)
+ if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
else
+ /* Packet offload policies enter to the head
+ * to speed-up lookups.
+ */
hlist_add_head_rcu(&policy->bydst, chain);
return delpol;
@@ -1769,12 +1773,41 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
}
return err;
}
+
+static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
+ struct net_device *dev,
+ bool task_valid)
+{
+ struct xfrm_policy *pol;
+ int err = 0;
+
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead ||
+ xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
+ pol->xdo.dev != dev)
+ continue;
+
+ err = security_xfrm_policy_delete(pol->security);
+ if (err) {
+ xfrm_audit_policy_delete(pol, 0, task_valid);
+ return err;
+ }
+ }
+ return err;
+}
#else
static inline int
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
return 0;
}
+
+static inline int xfrm_dev_policy_flush_secctx_check(struct net *net,
+ struct net_device *dev,
+ bool task_valid)
+{
+ return 0;
+}
#endif
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
@@ -1814,6 +1847,44 @@ out:
}
EXPORT_SYMBOL(xfrm_policy_flush);
+int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
+ bool task_valid)
+{
+ int dir, err = 0, cnt = 0;
+ struct xfrm_policy *pol;
+
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+
+ err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid);
+ if (err)
+ goto out;
+
+again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ dir = xfrm_policy_id2dir(pol->index);
+ if (pol->walk.dead ||
+ dir >= XFRM_POLICY_MAX ||
+ pol->xdo.dev != dev)
+ continue;
+
+ __xfrm_policy_unlink(pol, dir);
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ cnt++;
+ xfrm_audit_policy_delete(pol, 1, task_valid);
+ xfrm_policy_kill(pol);
+ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+ goto again;
+ }
+ if (cnt)
+ __xfrm_policy_inexact_flush(net);
+ else
+ err = -ESRCH;
+out:
+ spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_dev_policy_flush);
+
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
int (*func)(struct xfrm_policy *, int, int, void*),
void *data)
@@ -2113,6 +2184,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
break;
}
}
+ if (ret && ret->xdo.type == XFRM_DEV_OFFLOAD_PACKET)
+ goto skip_inexact;
+
bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
daddr))
@@ -2245,6 +2319,7 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
pol = __xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (pol) {
+ xfrm_dev_policy_delete(pol);
xfrm_policy_kill(pol);
return 0;
}
@@ -4333,7 +4408,8 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
/* update endpoint address(es) of template(s) */
static int xfrm_policy_migrate(struct xfrm_policy *pol,
- struct xfrm_migrate *m, int num_migrate)
+ struct xfrm_migrate *m, int num_migrate,
+ struct netlink_ext_ack *extack)
{
struct xfrm_migrate *mp;
int i, j, n = 0;
@@ -4341,6 +4417,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
write_lock_bh(&pol->lock);
if (unlikely(pol->walk.dead)) {
/* target policy has been deleted */
+ NL_SET_ERR_MSG(extack, "Target policy not found");
write_unlock_bh(&pol->lock);
return -ENOENT;
}
@@ -4372,17 +4449,22 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
return 0;
}
-static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
+static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate,
+ struct netlink_ext_ack *extack)
{
int i, j;
- if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
+ if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) {
+ NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)");
return -EINVAL;
+ }
for (i = 0; i < num_migrate; i++) {
if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
- xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
+ xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) {
+ NL_SET_ERR_MSG(extack, "Addresses in the MIGRATE attribute's list cannot be null");
return -EINVAL;
+ }
/* check if there is any duplicated entry */
for (j = i + 1; j < num_migrate; j++) {
@@ -4393,8 +4475,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
m[i].proto == m[j].proto &&
m[i].mode == m[j].mode &&
m[i].reqid == m[j].reqid &&
- m[i].old_family == m[j].old_family)
+ m[i].old_family == m[j].old_family) {
+ NL_SET_ERR_MSG(extack, "Entries in the MIGRATE attribute's list must be unique");
return -EINVAL;
+ }
}
}
@@ -4404,7 +4488,8 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_migrate,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap, u32 if_id)
+ struct xfrm_encap_tmpl *encap, u32 if_id,
+ struct netlink_ext_ack *extack)
{
int i, err, nx_cur = 0, nx_new = 0;
struct xfrm_policy *pol = NULL;
@@ -4414,16 +4499,20 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *mp;
/* Stage 0 - sanity checks */
- if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
+ err = xfrm_migrate_check(m, num_migrate, extack);
+ if (err < 0)
goto out;
if (dir >= XFRM_POLICY_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid policy direction");
err = -EINVAL;
goto out;
}
/* Stage 1 - find policy */
- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
+ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
+ if (!pol) {
+ NL_SET_ERR_MSG(extack, "Target policy not found");
err = -ENOENT;
goto out;
}
@@ -4445,7 +4534,8 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 3 - update policy */
- if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
+ err = xfrm_policy_migrate(pol, m, num_migrate, extack);
+ if (err < 0)
goto restore_state;
/* Stage 4 - delete old state(s) */
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 9f4d42eb090f..ce56d659c55a 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
oseq += skb_shinfo(skb)->gso_segs;
}
- if (unlikely(oseq < replay_esn->oseq)) {
+ if (unlikely(xo->seq.low < replay_esn->oseq)) {
XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
xo->seq.hi = oseq_hi;
replay_esn->oseq_hi = oseq_hi;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3d2fe7712ac5..cc1d0ea42672 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -84,6 +84,25 @@ static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
}
+#define XFRM_STATE_INSERT(by, _n, _h, _type) \
+ { \
+ struct xfrm_state *_x = NULL; \
+ \
+ if (_type != XFRM_DEV_OFFLOAD_PACKET) { \
+ hlist_for_each_entry_rcu(_x, _h, by) { \
+ if (_x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
+ continue; \
+ break; \
+ } \
+ } \
+ \
+ if (!_x || _x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
+ /* SAD is empty or consist from HW SAs only */ \
+ hlist_add_head_rcu(_n, _h); \
+ else \
+ hlist_add_before_rcu(_n, &_x->by); \
+ }
+
static void xfrm_hash_transfer(struct hlist_head *list,
struct hlist_head *ndsttable,
struct hlist_head *nsrctable,
@@ -100,23 +119,25 @@ static void xfrm_hash_transfer(struct hlist_head *list,
h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family,
nhashmask);
- hlist_add_head_rcu(&x->bydst, ndsttable + h);
+ XFRM_STATE_INSERT(bydst, &x->bydst, ndsttable + h, x->xso.type);
h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
x->props.family,
nhashmask);
- hlist_add_head_rcu(&x->bysrc, nsrctable + h);
+ XFRM_STATE_INSERT(bysrc, &x->bysrc, nsrctable + h, x->xso.type);
if (x->id.spi) {
h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
x->id.proto, x->props.family,
nhashmask);
- hlist_add_head_rcu(&x->byspi, nspitable + h);
+ XFRM_STATE_INSERT(byspi, &x->byspi, nspitable + h,
+ x->xso.type);
}
if (x->km.seq) {
h = __xfrm_seq_hash(x->km.seq, nhashmask);
- hlist_add_head_rcu(&x->byseq, nseqtable + h);
+ XFRM_STATE_INSERT(byseq, &x->byseq, nseqtable + h,
+ x->xso.type);
}
}
}
@@ -549,6 +570,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
int err = 0;
spin_lock(&x->lock);
+ xfrm_dev_state_update_curlft(x);
+
if (x->km.state == XFRM_STATE_DEAD)
goto out;
if (x->km.state == XFRM_STATE_EXPIRED)
@@ -951,6 +974,49 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
x->props.family = tmpl->encap_family;
}
+static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family,
+ struct xfrm_dev_offload *xdo)
+{
+ unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
+ struct xfrm_state *x;
+
+ hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ /* HW states are in the head of list, there is
+ * no need to iterate further.
+ */
+ break;
+
+ /* Packet offload: both policy and SA should
+ * have same device.
+ */
+ if (xdo->dev != x->xso.dev)
+ continue;
+ } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+ /* Skip HW policy for SW lookups */
+ continue;
+#endif
+ if (x->props.family != family ||
+ x->id.spi != spi ||
+ x->id.proto != proto ||
+ !xfrm_addr_equal(&x->id.daddr, daddr, family))
+ continue;
+
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ return x;
+ }
+
+ return NULL;
+}
+
static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
@@ -1092,6 +1158,23 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ /* HW states are in the head of list, there is
+ * no need to iterate further.
+ */
+ break;
+
+ /* Packet offload: both policy and SA should
+ * have same device.
+ */
+ if (pol->xdo.dev != x->xso.dev)
+ continue;
+ } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+ /* Skip HW policy for SW lookups */
+ continue;
+#endif
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
@@ -1109,6 +1192,23 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ /* HW states are in the head of list, there is
+ * no need to iterate further.
+ */
+ break;
+
+ /* Packet offload: both policy and SA should
+ * have same device.
+ */
+ if (pol->xdo.dev != x->xso.dev)
+ continue;
+ } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+ /* Skip HW policy for SW lookups */
+ continue;
+#endif
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
@@ -1126,8 +1226,10 @@ found:
x = best;
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
- (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
- tmpl->id.proto, encap_family)) != NULL) {
+ (x0 = __xfrm_state_lookup_all(net, mark, daddr,
+ tmpl->id.spi, tmpl->id.proto,
+ encap_family,
+ &pol->xdo)) != NULL) {
to_put = x0;
error = -EEXIST;
goto out;
@@ -1161,21 +1263,53 @@ found:
x = NULL;
goto out;
}
-
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
+ struct xfrm_dev_offload *xdo = &pol->xdo;
+ struct xfrm_dev_offload *xso = &x->xso;
+
+ xso->type = XFRM_DEV_OFFLOAD_PACKET;
+ xso->dir = xdo->dir;
+ xso->dev = xdo->dev;
+ xso->real_dev = xdo->real_dev;
+ netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
+ GFP_ATOMIC);
+ error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
+ if (error) {
+ xso->dir = 0;
+ netdev_put(xso->dev, &xso->dev_tracker);
+ xso->dev = NULL;
+ xso->real_dev = NULL;
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ x->km.state = XFRM_STATE_DEAD;
+ to_put = x;
+ x = NULL;
+ goto out;
+ }
+ }
+#endif
if (km_query(x, tmpl, pol) == 0) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
list_add(&x->km.all, &net->xfrm.state_all);
- hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
+ XFRM_STATE_INSERT(bydst, &x->bydst,
+ net->xfrm.state_bydst + h,
+ x->xso.type);
h = xfrm_src_hash(net, daddr, saddr, encap_family);
- hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
+ XFRM_STATE_INSERT(bysrc, &x->bysrc,
+ net->xfrm.state_bysrc + h,
+ x->xso.type);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
- hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
+ XFRM_STATE_INSERT(byspi, &x->byspi,
+ net->xfrm.state_byspi + h,
+ x->xso.type);
}
if (x->km.seq) {
h = xfrm_seq_hash(net, x->km.seq);
- hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+ XFRM_STATE_INSERT(byseq, &x->byseq,
+ net->xfrm.state_byseq + h,
+ x->xso.type);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
hrtimer_start(&x->mtimer,
@@ -1185,6 +1319,18 @@ found:
xfrm_hash_grow_check(net, x->bydst.next != NULL);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
} else {
+#ifdef CONFIG_XFRM_OFFLOAD
+ struct xfrm_dev_offload *xso = &x->xso;
+
+ if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
+ xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
+ xso->dir = 0;
+ netdev_put(xso->dev, &xso->dev_tracker);
+ xso->dev = NULL;
+ xso->real_dev = NULL;
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ }
+#endif
x->km.state = XFRM_STATE_DEAD;
to_put = x;
x = NULL;
@@ -1280,22 +1426,26 @@ static void __xfrm_state_insert(struct xfrm_state *x)
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
- hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
+ XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
+ x->xso.type);
h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
- hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
+ XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
+ x->xso.type);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
x->props.family);
- hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
+ XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
+ x->xso.type);
}
if (x->km.seq) {
h = xfrm_seq_hash(net, x->km.seq);
- hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+ XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h,
+ x->xso.type);
}
hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
@@ -1409,9 +1559,11 @@ static struct xfrm_state *__find_acq_core(struct net *net,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
HRTIMER_MODE_REL_SOFT);
list_add(&x->km.all, &net->xfrm.state_all);
- hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
+ XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
+ x->xso.type);
h = xfrm_src_hash(net, daddr, saddr, family);
- hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
+ XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
+ x->xso.type);
net->xfrm.state_num++;
@@ -1786,6 +1938,8 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
+ xfrm_dev_state_update_curlft(x);
+
if (!x->curlft.use_time)
x->curlft.use_time = ktime_get_real_seconds();
@@ -2017,7 +2171,7 @@ u32 xfrm_get_acqseq(void)
}
EXPORT_SYMBOL(xfrm_get_acqseq);
-int verify_spi_info(u8 proto, u32 min, u32 max)
+int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack)
{
switch (proto) {
case IPPROTO_AH:
@@ -2026,22 +2180,28 @@ int verify_spi_info(u8 proto, u32 min, u32 max)
case IPPROTO_COMP:
/* IPCOMP spi is 16-bits. */
- if (max >= 0x10000)
+ if (max >= 0x10000) {
+ NL_SET_ERR_MSG(extack, "IPCOMP SPI must be <= 65535");
return -EINVAL;
+ }
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid protocol, must be one of AH, ESP, IPCOMP");
return -EINVAL;
}
- if (min > max)
+ if (min > max) {
+ NL_SET_ERR_MSG(extack, "Invalid SPI range: min > max");
return -EINVAL;
+ }
return 0;
}
EXPORT_SYMBOL(verify_spi_info);
-int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
+int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
+ struct netlink_ext_ack *extack)
{
struct net *net = xs_net(x);
unsigned int h;
@@ -2053,8 +2213,10 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
- if (x->km.state == XFRM_STATE_DEAD)
+ if (x->km.state == XFRM_STATE_DEAD) {
+ NL_SET_ERR_MSG(extack, "Target ACQUIRE is in DEAD state");
goto unlock;
+ }
err = 0;
if (x->id.spi)
@@ -2065,6 +2227,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
if (minspi == maxspi) {
x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
if (x0) {
+ NL_SET_ERR_MSG(extack, "Requested SPI is already in use");
xfrm_state_put(x0);
goto unlock;
}
@@ -2085,10 +2248,13 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
+ XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
+ x->xso.type);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
+ } else {
+ NL_SET_ERR_MSG(extack, "No SPI available in the requested range");
}
unlock:
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e73f9efc54c1..cf5172d4ce68 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,7 +515,8 @@ static int attach_aead(struct xfrm_state *x, struct nlattr *rta,
}
static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn,
- struct nlattr *rp)
+ struct nlattr *rp,
+ struct netlink_ext_ack *extack)
{
struct xfrm_replay_state_esn *up;
unsigned int ulen;
@@ -528,13 +529,25 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
/* Check the overall length and the internal bitmap length to avoid
* potential overflow. */
- if (nla_len(rp) < (int)ulen ||
- xfrm_replay_state_esn_len(replay_esn) != ulen ||
- replay_esn->bmp_len != up->bmp_len)
+ if (nla_len(rp) < (int)ulen) {
+ NL_SET_ERR_MSG(extack, "ESN attribute is too short");
return -EINVAL;
+ }
+
+ if (xfrm_replay_state_esn_len(replay_esn) != ulen) {
+ NL_SET_ERR_MSG(extack, "New ESN size doesn't match the existing SA's ESN size");
+ return -EINVAL;
+ }
+
+ if (replay_esn->bmp_len != up->bmp_len) {
+ NL_SET_ERR_MSG(extack, "New ESN bitmap size doesn't match the existing SA's ESN bitmap");
+ return -EINVAL;
+ }
- if (up->replay_window > up->bmp_len * sizeof(__u32) * 8)
+ if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) {
+ NL_SET_ERR_MSG(extack, "ESN replay window is longer than the bitmap");
return -EINVAL;
+ }
return 0;
}
@@ -862,12 +875,12 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
if (xfrm_state_kern(x)) {
+ NL_SET_ERR_MSG(extack, "SA is in use by tunnels");
err = -EPERM;
goto out;
}
err = xfrm_state_delete(x);
-
if (err < 0)
goto out;
@@ -943,6 +956,8 @@ static int copy_user_offload(struct xfrm_dev_offload *xso, struct sk_buff *skb)
xuo->ifindex = xso->dev->ifindex;
if (xso->dir == XFRM_DEV_OFFLOAD_IN)
xuo->flags = XFRM_OFFLOAD_INBOUND;
+ if (xso->type == XFRM_DEV_OFFLOAD_PACKET)
+ xuo->flags |= XFRM_OFFLOAD_PACKET;
return 0;
}
@@ -1354,20 +1369,28 @@ static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_SPD_IPV4_HTHRESH]) {
struct nlattr *rta = attrs[XFRMA_SPD_IPV4_HTHRESH];
- if (nla_len(rta) < sizeof(*thresh4))
+ if (nla_len(rta) < sizeof(*thresh4)) {
+ NL_SET_ERR_MSG(extack, "Invalid SPD_IPV4_HTHRESH attribute length");
return -EINVAL;
+ }
thresh4 = nla_data(rta);
- if (thresh4->lbits > 32 || thresh4->rbits > 32)
+ if (thresh4->lbits > 32 || thresh4->rbits > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid hash threshold (must be <= 32 for IPv4)");
return -EINVAL;
+ }
}
if (attrs[XFRMA_SPD_IPV6_HTHRESH]) {
struct nlattr *rta = attrs[XFRMA_SPD_IPV6_HTHRESH];
- if (nla_len(rta) < sizeof(*thresh6))
+ if (nla_len(rta) < sizeof(*thresh6)) {
+ NL_SET_ERR_MSG(extack, "Invalid SPD_IPV6_HTHRESH attribute length");
return -EINVAL;
+ }
thresh6 = nla_data(rta);
- if (thresh6->lbits > 128 || thresh6->rbits > 128)
+ if (thresh6->lbits > 128 || thresh6->rbits > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid hash threshold (must be <= 128 for IPv6)");
return -EINVAL;
+ }
}
if (thresh4 || thresh6) {
@@ -1510,7 +1533,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 if_id = 0;
p = nlmsg_data(nlh);
- err = verify_spi_info(p->info.id.proto, p->min, p->max);
+ err = verify_spi_info(p->info.id.proto, p->min, p->max, extack);
if (err)
goto out_noput;
@@ -1538,10 +1561,12 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
&p->info.saddr, 1,
family);
err = -ENOENT;
- if (x == NULL)
+ if (!x) {
+ NL_SET_ERR_MSG(extack, "Target ACQUIRE not found");
goto out_noput;
+ }
- err = xfrm_alloc_spi(x, p->min, p->max);
+ err = xfrm_alloc_spi(x, p->min, p->max, extack);
if (err)
goto out;
@@ -1867,6 +1892,15 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net,
if (attrs[XFRMA_IF_ID])
xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ /* configure the hardware if offload is requested */
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_policy_add(net, xp,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]),
+ p->dir, extack);
+ if (err)
+ goto error;
+ }
+
return xp;
error:
*errp = err;
@@ -1906,6 +1940,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
xfrm_audit_policy_add(xp, err ? 0 : 1, true);
if (err) {
+ xfrm_dev_policy_delete(xp);
security_xfrm_policy_free(xp->security);
kfree(xp);
return err;
@@ -2018,6 +2053,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
+ if (!err && xp->xdo.dev)
+ err = copy_user_offload(&xp->xdo, skb);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@@ -2433,12 +2470,16 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
- if (!lt && !rp && !re && !et && !rt)
+ if (!lt && !rp && !re && !et && !rt) {
+ NL_SET_ERR_MSG(extack, "Missing required attribute for AE");
return err;
+ }
/* pedantic mode - thou shalt sayeth replaceth */
- if (!(nlh->nlmsg_flags&NLM_F_REPLACE))
+ if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "NLM_F_REPLACE flag is required");
return err;
+ }
mark = xfrm_mark_get(attrs, &m);
@@ -2446,10 +2487,12 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
if (x == NULL)
return -ESRCH;
- if (x->km.state != XFRM_STATE_VALID)
+ if (x->km.state != XFRM_STATE_VALID) {
+ NL_SET_ERR_MSG(extack, "SA must be in VALID state");
goto out;
+ }
- err = xfrm_replay_verify_len(x->replay_esn, re);
+ err = xfrm_replay_verify_len(x->replay_esn, re, extack);
if (err)
goto out;
@@ -2584,8 +2627,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
spin_lock_bh(&x->lock);
err = -EINVAL;
- if (x->km.state != XFRM_STATE_VALID)
+ if (x->km.state != XFRM_STATE_VALID) {
+ NL_SET_ERR_MSG(extack, "SA must be in VALID state");
goto out;
+ }
+
km_state_expired(x, ue->hard, nlh->nlmsg_pid);
if (ue->hard) {
@@ -2665,7 +2711,8 @@ nomem:
#ifdef CONFIG_XFRM_MIGRATE
static int copy_from_user_migrate(struct xfrm_migrate *ma,
struct xfrm_kmaddress *k,
- struct nlattr **attrs, int *num)
+ struct nlattr **attrs, int *num,
+ struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_MIGRATE];
struct xfrm_user_migrate *um;
@@ -2684,8 +2731,10 @@ static int copy_from_user_migrate(struct xfrm_migrate *ma,
um = nla_data(rt);
num_migrate = nla_len(rt) / sizeof(*um);
- if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH)
+ if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) {
+ NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)");
return -EINVAL;
+ }
for (i = 0; i < num_migrate; i++, um++, ma++) {
memcpy(&ma->old_daddr, &um->old_daddr, sizeof(ma->old_daddr));
@@ -2718,8 +2767,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_encap_tmpl *encap = NULL;
u32 if_id = 0;
- if (attrs[XFRMA_MIGRATE] == NULL)
+ if (!attrs[XFRMA_MIGRATE]) {
+ NL_SET_ERR_MSG(extack, "Missing required MIGRATE attribute");
return -EINVAL;
+ }
kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL;
@@ -2727,7 +2778,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
- err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n);
+ err = copy_from_user_migrate(m, kmp, attrs, &n, extack);
if (err)
return err;
@@ -2744,7 +2795,8 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id);
+ err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap,
+ if_id, extack);
kfree(encap);
@@ -3341,6 +3393,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
+ if (!err && xp->xdo.dev)
+ err = copy_user_offload(&xp->xdo, skb);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@@ -3459,6 +3513,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
+ if (!err && xp->xdo.dev)
+ err = copy_user_offload(&xp->xdo, skb);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@@ -3542,6 +3598,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
err = xfrm_mark_put(skb, &xp->mark);
if (!err)
err = xfrm_if_id_put(skb, xp->if_id);
+ if (!err && xp->xdo.dev)
+ err = copy_user_offload(&xp->xdo, skb);
if (err)
goto out_free_skb;