summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/batman-adv/bat_iv_ogm.c5
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v_elp.c10
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c73
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/main.h4
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/tp_meter.c3
-rw-r--r--net/batman-adv/translation-table.c21
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/ecdh_helper.c11
-rw-r--r--net/bluetooth/hci_core.c63
-rw-r--r--net/bluetooth/smp.c39
-rw-r--r--net/bridge/br.c19
-rw-r--r--net/bridge/br_fdb.c26
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_mdb.c4
-rw-r--r--net/bridge/br_multicast.c8
-rw-r--r--net/bridge/br_netlink.c89
-rw-r--r--net/bridge/br_private.h37
-rw-r--r--net/bridge/br_stp_if.c11
-rw-r--r--net/bridge/br_switchdev.c76
-rw-r--r--net/bridge/br_vlan.c8
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c5
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/core/datagram.c96
-rw-r--r--net/core/dev.c94
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/filter.c120
-rw-r--r--net/core/flow_dissector.c69
-rw-r--r--net/core/lwt_bpf.c5
-rw-r--r--net/core/lwtunnel.c38
-rw-r--r--net/core/neighbour.c60
-rw-r--r--net/core/net-procfs.c13
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/net_namespace.c43
-rw-r--r--net/core/rtnetlink.c69
-rw-r--r--net/core/secure_seq.c9
-rw-r--r--net/core/skbuff.c148
-rw-r--r--net/core/sock.c24
-rw-r--r--net/dcb/dcbnl.c11
-rw-r--r--net/dccp/ccids/ccid2.c8
-rw-r--r--net/dccp/ccids/ccid2.h2
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dsa/Kconfig9
-rw-r--r--net/dsa/Makefile9
-rw-r--r--net/dsa/dsa.c46
-rw-r--r--net/dsa/dsa2.c170
-rw-r--r--net/dsa/dsa_priv.h117
-rw-r--r--net/dsa/legacy.c39
-rw-r--r--net/dsa/port.c259
-rw-r--r--net/dsa/slave.c448
-rw-r--r--net/dsa/switch.c184
-rw-r--r--net/dsa/tag_brcm.c24
-rw-r--r--net/dsa/tag_dsa.c23
-rw-r--r--net/dsa/tag_edsa.c23
-rw-r--r--net/dsa/tag_ksz.c99
-rw-r--r--net/dsa/tag_lan9303.c7
-rw-r--r--net/dsa/tag_mtk.c17
-rw-r--r--net/dsa/tag_qca.c22
-rw-r--r--net/dsa/tag_trailer.c20
-rw-r--r--net/ieee802154/socket.c10
-rw-r--r--net/ipv4/ah4.c8
-rw-r--r--net/ipv4/arp.c4
-rw-r--r--net/ipv4/devinet.c33
-rw-r--r--net/ipv4/esp4.c20
-rw-r--r--net/ipv4/fib_frontend.c31
-rw-r--r--net/ipv4/fib_lookup.h6
-rw-r--r--net/ipv4/fib_semantics.c153
-rw-r--r--net/ipv4/fib_trie.c39
-rw-r--r--net/ipv4/fou.c82
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c11
-rw-r--r--net/ipv4/ipmr.c126
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c151
-rw-r--r--net/ipv4/syncookies.c25
-rw-r--r--net/ipv4/sysctl_net_ipv4.c42
-rw-r--r--net/ipv4/tcp.c46
-rw-r--r--net/ipv4/tcp_bbr.c43
-rw-r--r--net/ipv4/tcp_bic.c6
-rw-r--r--net/ipv4/tcp_cubic.c14
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c207
-rw-r--r--net/ipv4/tcp_ipv4.c28
-rw-r--r--net/ipv4/tcp_lp.c17
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_nv.c5
-rw-r--r--net/ipv4/tcp_output.c150
-rw-r--r--net/ipv4/tcp_rate.c16
-rw-r--r--net/ipv4/tcp_recovery.c24
-rw-r--r--net/ipv4/tcp_timer.c45
-rw-r--r--net/ipv4/tcp_westwood.c6
-rw-r--r--net/ipv4/udp.c272
-rw-r--r--net/ipv6/addrconf.c21
-rw-r--r--net/ipv6/addrconf_core.c19
-rw-r--r--net/ipv6/ah6.c8
-rw-r--r--net/ipv6/esp6.c20
-rw-r--r--net/ipv6/fou6.c14
-rw-r--r--net/ipv6/ila/ila_lwt.c7
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/route.c137
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_iptunnel.c5
-rw-r--r--net/ipv6/syncookies.c9
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/mac80211/agg-rx.c47
-rw-r--r--net/mac80211/cfg.c1
-rw-r--r--net/mac80211/debugfs_sta.c6
-rw-r--r--net/mac80211/ht.c12
-rw-r--r--net/mac80211/ibss.c1
-rw-r--r--net/mac80211/ieee80211_i.h16
-rw-r--r--net/mac80211/iface.c25
-rw-r--r--net/mac80211/mesh.c85
-rw-r--r--net/mac80211/mesh_plink.c3
-rw-r--r--net/mac80211/mlme.c1
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/mac80211/rx.c4
-rw-r--r--net/mac80211/spectmgmt.c7
-rw-r--r--net/mac80211/sta_info.c30
-rw-r--r--net/mac80211/sta_info.h14
-rw-r--r--net/mac80211/status.c4
-rw-r--r--net/mac80211/trace.h11
-rw-r--r--net/mac80211/tx.c9
-rw-r--r--net/mac80211/util.c37
-rw-r--r--net/mpls/af_mpls.c266
-rw-r--r--net/mpls/internal.h4
-rw-r--r--net/mpls/mpls_iptunnel.c17
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/qrtr/qrtr.c104
-rw-r--r--net/rfkill/rfkill-gpio.c5
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c156
-rw-r--r--net/rxrpc/ar-internal.h87
-rw-r--r--net/rxrpc/call_accept.c20
-rw-r--r--net/rxrpc/call_object.c49
-rw-r--r--net/rxrpc/conn_client.c206
-rw-r--r--net/rxrpc/conn_event.c4
-rw-r--r--net/rxrpc/conn_object.c58
-rw-r--r--net/rxrpc/conn_service.c23
-rw-r--r--net/rxrpc/input.c17
-rw-r--r--net/rxrpc/local_object.c49
-rw-r--r--net/rxrpc/net_ns.c84
-rw-r--r--net/rxrpc/output.c4
-rw-r--r--net/rxrpc/peer_object.c33
-rw-r--r--net/rxrpc/proc.c42
-rw-r--r--net/rxrpc/recvmsg.c7
-rw-r--r--net/rxrpc/rxkad.c21
-rw-r--r--net/rxrpc/security.c5
-rw-r--r--net/rxrpc/sendmsg.c136
-rw-r--r--net/sched/Kconfig1
-rw-r--r--net/sched/act_api.c55
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/cls_api.c425
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_flower.c62
-rw-r--r--net/sched/cls_matchall.c9
-rw-r--r--net/sched/cls_u32.c12
-rw-r--r--net/sched/sch_api.c50
-rw-r--r--net/sched/sch_atm.c30
-rw-r--r--net/sched/sch_cbq.c22
-rw-r--r--net/sched/sch_drr.c16
-rw-r--r--net/sched/sch_dsmark.c18
-rw-r--r--net/sched/sch_fq.c8
-rw-r--r--net/sched/sch_fq_codel.c18
-rw-r--r--net/sched/sch_hfsc.c22
-rw-r--r--net/sched/sch_htb.c29
-rw-r--r--net/sched/sch_ingress.c61
-rw-r--r--net/sched/sch_mqprio.c5
-rw-r--r--net/sched/sch_multiq.c17
-rw-r--r--net/sched/sch_prio.c20
-rw-r--r--net/sched/sch_qfq.c17
-rw-r--r--net/sched/sch_sfb.c18
-rw-r--r--net/sched/sch_sfq.c18
-rw-r--r--net/sctp/associola.c24
-rw-r--r--net/sctp/chunk.c4
-rw-r--r--net/sctp/offload.c7
-rw-r--r--net/sctp/output.c1
-rw-r--r--net/sctp/outqueue.c10
-rw-r--r--net/sctp/proc.c10
-rw-r--r--net/sctp/sm_make_chunk.c8
-rw-r--r--net/sctp/sm_statefuns.c8
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/sctp/stream.c93
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/socket.c49
-rw-r--r--net/switchdev/switchdev.c30
-rw-r--r--net/wireless/core.c5
-rw-r--r--net/wireless/mesh.c8
-rw-r--r--net/wireless/nl80211.c135
-rw-r--r--net/wireless/rdev-ops.h25
-rw-r--r--net/wireless/sme.c1
-rw-r--r--net/wireless/trace.h60
-rw-r--r--net/wireless/util.c11
207 files changed, 5492 insertions, 2505 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index abc5f400fc71..c1742322f7d2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -797,12 +797,6 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
- .ndo_fdb_add = switchdev_port_fdb_add,
- .ndo_fdb_del = switchdev_port_fdb_del,
- .ndo_fdb_dump = switchdev_port_fdb_dump,
- .ndo_bridge_setlink = switchdev_port_bridge_setlink,
- .ndo_bridge_getlink = switchdev_port_bridge_getlink,
- .ndo_bridge_dellink = switchdev_port_bridge_dellink,
.ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
.ndo_get_iflink = vlan_dev_get_iflink,
};
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 495ba7cdcb04..fa8d6b475c06 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1022,7 +1022,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
u8 tq_avg;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "update_originator(): Searching and updating originator entry of received packet\n");
+ "%s(): Searching and updating originator entry of received packet\n",
+ __func__);
rcu_read_lock();
hlist_for_each_entry_rcu(tmp_neigh_node,
@@ -1944,7 +1945,7 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing,
seq);
- seq_puts(seq, "\n");
+ seq_putc(seq, '\n');
batman_count++;
next:
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index a36c8e7291d6..4e2724c5b33d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -400,7 +400,7 @@ static void batadv_v_orig_print(struct batadv_priv *bat_priv,
neigh_node->if_incoming->net_dev->name);
batadv_v_orig_print_neigh(orig_node, if_outgoing, seq);
- seq_puts(seq, "\n");
+ seq_putc(seq, '\n');
batman_count++;
next:
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index b90c9903e246..b58007b79e3a 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -19,6 +19,7 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -29,6 +30,7 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/netdevice.h>
+#include <linux/nl80211.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -109,8 +111,12 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
*/
return 0;
}
- if (!ret)
- return sinfo.expected_throughput / 100;
+ if (ret)
+ goto default_throughput;
+ if (!(sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)))
+ goto default_throughput;
+
+ return sinfo.expected_throughput / 100;
}
/* if not a wifi interface, check if this device provides data via
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index d07e89ec8467..cdd8e8e4df0b 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -394,7 +394,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
*/
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): CLAIM %pM on vid %d\n", mac,
+ "%s(): CLAIM %pM on vid %d\n", __func__, mac,
batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_UNCLAIM:
@@ -403,7 +403,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
*/
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
+ "%s(): UNCLAIM %pM on vid %d\n", __func__, mac,
batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
@@ -412,7 +412,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
*/
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
+ "%s(): ANNOUNCE of %pM on vid %d\n", __func__,
ethhdr->h_source, batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_REQUEST:
@@ -423,15 +423,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
ether_addr_copy(ethhdr->h_dest, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
+ "%s(): REQUEST of %pM to %pM on vid %d\n", __func__,
ethhdr->h_source, ethhdr->h_dest,
batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_LOOPDETECT:
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n",
- ethhdr->h_source, ethhdr->h_dest,
+ "%s(): LOOPDETECT of %pM to %pM on vid %d\n",
+ __func__, ethhdr->h_source, ethhdr->h_dest,
batadv_print_vid(vid));
break;
@@ -509,7 +509,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
return entry;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n",
+ "%s(): not found (%pM, %d), creating new entry\n", __func__,
orig, batadv_print_vid(vid));
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
@@ -605,7 +605,8 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
int i;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_answer_request(): received a claim request, send all of our own claims again\n");
+ "%s(): received a claim request, send all of our own claims again\n",
+ __func__);
backbone_gw = batadv_backbone_hash_find(bat_priv,
primary_if->net_dev->dev_addr,
@@ -718,8 +719,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
kref_init(&claim->refcount);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
- mac, batadv_print_vid(vid));
+ "%s(): adding new entry %pM, vid %d to hash ...\n",
+ __func__, mac, batadv_print_vid(vid));
kref_get(&claim->refcount);
hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
@@ -739,8 +740,9 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
goto claim_free_ref;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n",
- mac, batadv_print_vid(vid), backbone_gw->orig);
+ "%s(): changing ownership for %pM, vid %d to gw %pM\n",
+ __func__, mac, batadv_print_vid(vid),
+ backbone_gw->orig);
remove_crc = true;
}
@@ -808,7 +810,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
if (!claim)
return;
- batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__,
mac, batadv_print_vid(vid));
batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
@@ -848,8 +850,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
crc = ntohs(*((__be16 *)(&an_addr[4])));
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
- batadv_print_vid(vid), backbone_gw->orig, crc);
+ "%s(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
+ __func__, batadv_print_vid(vid), backbone_gw->orig, crc);
spin_lock_bh(&backbone_gw->crc_lock);
backbone_crc = backbone_gw->crc;
@@ -857,8 +859,8 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
if (backbone_crc != crc) {
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
- "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
- backbone_gw->orig,
+ "%s(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
+ __func__, backbone_gw->orig,
batadv_print_vid(backbone_gw->vid),
backbone_crc, crc);
@@ -903,8 +905,8 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
return true;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "handle_request(): REQUEST vid %d (sent by %pM)...\n",
- batadv_print_vid(vid), ethhdr->h_source);
+ "%s(): REQUEST vid %d (sent by %pM)...\n",
+ __func__, batadv_print_vid(vid), ethhdr->h_source);
batadv_bla_answer_request(bat_priv, primary_if, vid);
return true;
@@ -940,7 +942,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
/* this must be an UNCLAIM frame */
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n",
+ "%s(): UNCLAIM %pM on vid %d (sent by %pM)...\n", __func__,
claim_addr, batadv_print_vid(vid), backbone_gw->orig);
batadv_bla_del_claim(bat_priv, claim_addr, vid);
@@ -1160,9 +1162,9 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
ethhdr);
if (ret == 1)
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, batadv_print_vid(vid), hw_src,
- hw_dst);
+ "%s(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
+ __func__, ethhdr->h_source, batadv_print_vid(vid),
+ hw_src, hw_dst);
if (ret < 2)
return !!ret;
@@ -1196,8 +1198,9 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
}
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst);
+ "%s(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
+ __func__, ethhdr->h_source, batadv_print_vid(vid), hw_src,
+ hw_dst);
return true;
}
@@ -1237,8 +1240,8 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
continue;
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
- "bla_purge_backbone_gw(): backbone gw %pM timed out\n",
- backbone_gw->orig);
+ "%s(): backbone gw %pM timed out\n",
+ __func__, backbone_gw->orig);
purge_now:
/* don't wait for the pending request anymore */
@@ -1295,11 +1298,11 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
goto skip;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_purge_claims(): timed out.\n");
+ "%s(): timed out.\n", __func__);
purge_now:
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_purge_claims(): %pM, vid %d\n",
+ "%s(): %pM, vid %d\n", __func__,
claim->addr, claim->vid);
batadv_handle_unclaim(bat_priv, primary_if,
@@ -1851,8 +1854,8 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
*/
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
- ethhdr->h_source,
+ "%s(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
+ __func__, ethhdr->h_source,
batadv_is_my_client(bat_priv,
ethhdr->h_source, vid) ?
"yes" : "no");
@@ -1978,15 +1981,15 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
* older than 100 ms to make sure we really
* have a roaming client here.
*/
- batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n",
- ethhdr->h_source);
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Roaming client %pM detected. Unclaim it.\n",
+ __func__, ethhdr->h_source);
batadv_handle_unclaim(bat_priv, primary_if,
primary_if->net_dev->dev_addr,
ethhdr->h_source, vid);
goto allow;
} else {
- batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n",
- ethhdr->h_source);
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Race for claim %pM detected. Drop packet.\n",
+ __func__, ethhdr->h_source);
goto handled;
}
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 000ca2f113ab..6930d6b50f99 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -601,7 +601,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
BATADV_DAT_ADDR_MAX);
batadv_dbg(BATADV_DBG_DAT, bat_priv,
- "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst,
+ "%s(): IP=%pI4 hash(IP)=%u\n", __func__, &ip_dst,
ip_key);
for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++)
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 810f7d026f54..2be8f1f46529 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.1"
+#define BATADV_SOURCE_VERSION "2017.2"
#endif
/* B.A.T.M.A.N. parameters */
@@ -168,7 +168,7 @@ enum batadv_uev_type {
/* Maximum number of fragments for one packet */
#define BATADV_FRAG_MAX_FRAGMENTS 16
/* Maxumim size of each fragment */
-#define BATADV_FRAG_MAX_FRAG_SIZE 1400
+#define BATADV_FRAG_MAX_FRAG_SIZE 1280
/* Time to keep fragments while waiting for rest of the fragments */
#define BATADV_FRAG_TIMEOUT 10000
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index e1f6fc72fe3e..3604d7899e2c 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1935,9 +1935,7 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
list)
seq_printf(seq, "%pM ",
nc_node->addr);
- seq_puts(seq, "\n");
-
- seq_puts(seq, " Outgoing: ");
+ seq_puts(seq, "\n Outgoing: ");
/* For out_nc_node to this orig_node */
list_for_each_entry_rcu(nc_node,
&orig_node->out_coding_list,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index ae9f4d37d34f..f10e3ff26f9d 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -985,8 +985,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
batadv_orig_node_put(orig_node_gw);
if (is_gw) {
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
- orig_addr_gw);
+ "%s(): Dropped unicast pkt received from another backbone gw %pM.\n",
+ __func__, orig_addr_gw);
goto free_skb;
}
}
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 403df596a73d..d239a9d72ac3 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -971,11 +971,11 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (hard_iface)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "purge_outstanding_packets(): %s\n",
- hard_iface->net_dev->name);
+ "%s(): %s\n",
+ __func__, hard_iface->net_dev->name);
else
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "purge_outstanding_packets()\n");
+ "%s()\n", __func__);
/* claim bcast list for free() */
spin_lock_bh(&bat_priv->forw_bcast_list_lock);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 556f9a865ddf..e3e2585d0977 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -27,6 +27,7 @@
#include <linux/etherdevice.h>
#include <linux/fs.h>
#include <linux/if_ether.h>
+#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
@@ -1497,7 +1498,7 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
/**
* batadv_tp_meter_init - initialize global tp_meter structures
*/
-void batadv_tp_meter_init(void)
+void __init batadv_tp_meter_init(void)
{
get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom));
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index e75b4937b497..e1133bc634b5 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2488,18 +2488,16 @@ static bool
_batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
struct batadv_tt_global_entry *tt_global_entry)
{
- bool ret = false;
-
if (tt_local_entry->common.flags & BATADV_TT_CLIENT_WIFI &&
tt_global_entry->common.flags & BATADV_TT_CLIENT_WIFI)
- ret = true;
+ return true;
/* check if the two clients are marked as isolated */
if (tt_local_entry->common.flags & BATADV_TT_CLIENT_ISOLA &&
tt_global_entry->common.flags & BATADV_TT_CLIENT_ISOLA)
- ret = true;
+ return true;
- return ret;
+ return false;
}
/**
@@ -4010,19 +4008,22 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
const unsigned char *addr,
unsigned short vid)
{
- bool ret = false;
+ /* ignore loop detect macs, they are not supposed to be in the tt local
+ * data as well.
+ */
+ if (batadv_bla_is_loopdetect_mac(addr))
+ return false;
if (!batadv_tt_global_add(bat_priv, orig_node, addr, vid,
BATADV_TT_CLIENT_TEMP,
atomic_read(&orig_node->last_ttvn)))
- goto out;
+ return false;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n",
addr, batadv_print_vid(vid), orig_node->orig);
- ret = true;
-out:
- return ret;
+
+ return true;
}
/**
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 42d0997e2fbb..8a8f77a247e6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -733,7 +733,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
EXPORT_SYMBOL(bt_procfs_init);
EXPORT_SYMBOL(bt_procfs_cleanup);
-static struct net_proto_family bt_sock_family_ops = {
+static const struct net_proto_family bt_sock_family_ops = {
.owner = THIS_MODULE,
.family = PF_BLUETOOTH,
.create = bt_sock_create,
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index 24d4e60f8c48..c7b1a9aee579 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -89,11 +89,9 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
p.curve_id = ECC_CURVE_NIST_P256;
buf_len = crypto_ecdh_key_len(&p);
buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf) {
- pr_err("alg: kpp: Failed to allocate %d bytes for buf\n",
- buf_len);
+ if (!buf)
goto free_req;
- }
+
crypto_ecdh_encode_key(buf, buf_len, &p);
/* Set A private Key */
@@ -170,11 +168,8 @@ bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
p.key_size = 32;
buf_len = crypto_ecdh_key_len(&p);
buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf) {
- pr_err("alg: kpp: Failed to allocate %d bytes for buf\n",
- buf_len);
+ if (!buf)
goto free_req;
- }
do {
if (tries++ >= max_tries)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 05686776a5fb..93806b959039 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -148,13 +148,13 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
return -EINVAL;
/* When the diagnostic flags are not persistent and the transport
- * is not active, then there is no need for the vendor callback.
- *
- * Instead just store the desired value. If needed the setting
- * will be programmed when the controller gets powered on.
+ * is not active or in user channel operation, then there is no need
+ * for the vendor callback. Instead just store the desired value and
+ * the setting will be programmed when the controller gets powered on.
*/
if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
- !test_bit(HCI_RUNNING, &hdev->flags))
+ (!test_bit(HCI_RUNNING, &hdev->flags) ||
+ hci_dev_test_flag(hdev, HCI_USER_CHANNEL)))
goto done;
hci_req_sync_lock(hdev);
@@ -548,6 +548,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ bool changed = false;
/* If Connectionless Slave Broadcast master role is supported
* enable all necessary events for it.
@@ -557,6 +558,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
events[1] |= 0x80; /* Synchronization Train Complete */
events[2] |= 0x10; /* Slave Page Response Timeout */
events[2] |= 0x20; /* CSB Channel Map Change */
+ changed = true;
}
/* If Connectionless Slave Broadcast slave role is supported
@@ -567,13 +569,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
events[2] |= 0x02; /* CSB Receive */
events[2] |= 0x04; /* CSB Timeout */
events[2] |= 0x08; /* Truncated Page Complete */
+ changed = true;
}
/* Enable Authenticated Payload Timeout Expired event if supported */
- if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING)
+ if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) {
events[2] |= 0x80;
+ changed = true;
+ }
- hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events);
+ /* Some Broadcom based controllers indicate support for Set Event
+ * Mask Page 2 command, but then actually do not support it. Since
+ * the default value is all bits set to zero, the command is only
+ * required if the event mask has to be changed. In case no change
+ * to the event mask is needed, skip this command.
+ */
+ if (changed)
+ hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2,
+ sizeof(events), events);
}
static int hci_init3_req(struct hci_request *req, unsigned long opt)
@@ -635,6 +648,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
* Report
*/
+ /* If the controller supports Channel Selection Algorithm #2
+ * feature, enable the corresponding event.
+ */
+ if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2)
+ events[2] |= 0x08; /* LE Channel Selection
+ * Algorithm
+ */
+
/* If the controller supports the LE Set Scan Enable command,
* enable the corresponding advertising report event.
*/
@@ -677,6 +698,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[34] & 0x04)
events[1] |= 0x01; /* LE Generate DHKey Complete */
+ /* If the controller supports the LE Set Default PHY or
+ * LE Set PHY commands, enable the corresponding event.
+ */
+ if (hdev->commands[35] & (0x20 | 0x40))
+ events[1] |= 0x08; /* LE PHY Update Complete */
+
hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
events);
@@ -771,6 +798,27 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
sizeof(support), &support);
}
+ /* Set Suggested Default Data Length to maximum if supported */
+ if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
+ struct hci_cp_le_write_def_data_len cp;
+
+ cp.tx_len = hdev->le_max_tx_len;
+ cp.tx_time = hdev->le_max_tx_time;
+ hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp);
+ }
+
+ /* Set Default PHY parameters if command is supported */
+ if (hdev->commands[35] & 0x20) {
+ struct hci_cp_le_set_default_phy cp;
+
+ /* No transmitter PHY or receiver PHY preferences */
+ cp.all_phys = 0x03;
+ cp.tx_phys = 0;
+ cp.rx_phys = 0;
+
+ hci_req_add(req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp);
+ }
+
return 0;
}
@@ -1384,6 +1432,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
* completed.
*/
if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
ret = hdev->set_diag(hdev, true);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 14585edc9439..a0ef89772c36 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -23,6 +23,7 @@
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/crypto.h>
+#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/hash.h>
@@ -523,7 +524,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
if (err)
return false;
- return !memcmp(bdaddr->b, hash, 3);
+ return !crypto_memneq(bdaddr->b, hash, 3);
}
int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa)
@@ -579,7 +580,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (memcmp(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_sk, debug_sk, 32))
break;
}
smp->debug_key = false;
@@ -993,7 +994,7 @@ static u8 smp_random(struct smp_chan *smp)
if (ret)
return SMP_UNSPECIFIED;
- if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) {
+ if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
BT_ERR("Pairing failed (confirmation values mismatch)");
return SMP_CONFIRM_FAILED;
}
@@ -1512,7 +1513,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op)
smp->rrnd, r, cfm))
return SMP_UNSPECIFIED;
- if (memcmp(smp->pcnf, cfm, 16))
+ if (crypto_memneq(smp->pcnf, cfm, 16))
return SMP_CONFIRM_FAILED;
smp->passkey_round++;
@@ -1908,7 +1909,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (memcmp(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_sk, debug_sk, 32))
break;
}
}
@@ -2176,7 +2177,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
if (err)
return SMP_UNSPECIFIED;
- if (memcmp(smp->pcnf, cfm, 16))
+ if (crypto_memneq(smp->pcnf, cfm, 16))
return SMP_CONFIRM_FAILED;
} else {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
@@ -2660,7 +2661,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
if (err)
return SMP_UNSPECIFIED;
- if (memcmp(cfm.confirm_val, smp->pcnf, 16))
+ if (crypto_memneq(cfm.confirm_val, smp->pcnf, 16))
return SMP_CONFIRM_FAILED;
}
@@ -2693,7 +2694,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
else
hcon->pending_sec_level = BT_SECURITY_FIPS;
- if (!memcmp(debug_pk, smp->remote_pk, 64))
+ if (!crypto_memneq(debug_pk, smp->remote_pk, 64))
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
if (smp->method == DSP_PASSKEY) {
@@ -2792,7 +2793,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
if (err)
return SMP_UNSPECIFIED;
- if (memcmp(check->e, e, 16))
+ if (crypto_memneq(check->e, e, 16))
return SMP_DHKEY_CHECK_FAILED;
if (!hcon->out) {
@@ -3506,10 +3507,10 @@ static int __init test_debug_key(void)
if (!generate_ecdh_keys(pk, sk))
return -EINVAL;
- if (memcmp(sk, debug_sk, 32))
+ if (crypto_memneq(sk, debug_sk, 32))
return -EINVAL;
- if (memcmp(pk, debug_pk, 64))
+ if (crypto_memneq(pk, debug_pk, 64))
return -EINVAL;
return 0;
@@ -3529,7 +3530,7 @@ static int __init test_ah(struct crypto_cipher *tfm_aes)
if (err)
return err;
- if (memcmp(res, exp, 3))
+ if (crypto_memneq(res, exp, 3))
return -EINVAL;
return 0;
@@ -3559,7 +3560,7 @@ static int __init test_c1(struct crypto_cipher *tfm_aes)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3584,7 +3585,7 @@ static int __init test_s1(struct crypto_cipher *tfm_aes)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3616,7 +3617,7 @@ static int __init test_f4(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3650,10 +3651,10 @@ static int __init test_f5(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(mackey, exp_mackey, 16))
+ if (crypto_memneq(mackey, exp_mackey, 16))
return -EINVAL;
- if (memcmp(ltk, exp_ltk, 16))
+ if (crypto_memneq(ltk, exp_ltk, 16))
return -EINVAL;
return 0;
@@ -3686,7 +3687,7 @@ static int __init test_f6(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3740,7 +3741,7 @@ static int __init test_h6(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 889e5640455f..1407d1ba7577 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,7 +121,7 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
-/* called with RTNL */
+/* called with RTNL or RCU */
static int br_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -131,27 +131,36 @@ static int br_switchdev_event(struct notifier_block *unused,
struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
- p = br_port_get_rtnl(dev);
+ p = br_port_get_rtnl_rcu(dev);
if (!p)
goto out;
br = p->br;
switch (event) {
- case SWITCHDEV_FDB_ADD:
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
fdb_info->vid);
- if (err)
+ if (err) {
err = notifier_from_errno(err);
+ break;
+ }
+ br_fdb_offloaded_set(br, p, fdb_info->addr,
+ fdb_info->vid);
break;
- case SWITCHDEV_FDB_DEL:
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_del(br, p, fdb_info->addr,
fdb_info->vid);
if (err)
err = notifier_from_errno(err);
break;
+ case SWITCHDEV_FDB_OFFLOADED:
+ fdb_info = ptr;
+ br_fdb_offloaded_set(br, p, fdb_info->addr,
+ fdb_info->vid);
+ break;
}
out:
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ab0c7cc8448f..fef7872a320b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -511,6 +511,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb->is_static = is_static;
fdb->added_by_user = 0;
fdb->added_by_external_learn = 0;
+ fdb->offloaded = 0;
fdb->updated = fdb->used = jiffies;
hlist_add_head_rcu(&fdb->hlist, head);
}
@@ -647,11 +648,16 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0;
+ ndm->ndm_flags = 0;
ndm->ndm_type = 0;
ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(br, fdb);
+ if (fdb->offloaded)
+ ndm->ndm_flags |= NTF_OFFLOADED;
+ if (fdb->added_by_external_learn)
+ ndm->ndm_flags |= NTF_EXT_LEARNED;
+
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
goto nla_put_failure;
if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
@@ -690,6 +696,8 @@ static void fdb_notify(struct net_bridge *br,
struct sk_buff *skb;
int err = -ENOBUFS;
+ br_switchdev_fdb_notify(fdb, type);
+
skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
@@ -1075,7 +1083,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
struct net_bridge_fdb_entry *fdb;
int err = 0;
- ASSERT_RTNL();
spin_lock_bh(&br->hash_lock);
head = &br->hash[br_mac_hash(addr, vid)];
@@ -1110,7 +1117,6 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
struct net_bridge_fdb_entry *fdb;
int err = 0;
- ASSERT_RTNL();
spin_lock_bh(&br->hash_lock);
fdb = br_fdb_find(br, addr, vid);
@@ -1123,3 +1129,17 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
return err;
}
+
+void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
+ const unsigned char *addr, u16 vid)
+{
+ struct net_bridge_fdb_entry *fdb;
+
+ spin_lock_bh(&br->hash_lock);
+
+ fdb = br_fdb_find(br, addr, vid);
+ if (fdb)
+ fdb->offloaded = 1;
+
+ spin_unlock_bh(&br->hash_lock);
+}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 7f8d05cf9065..f3aef22931ab 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -138,7 +138,7 @@ void br_manage_promisc(struct net_bridge *br)
/* If vlan filtering is disabled or bridge interface is placed
* into promiscuous mode, place all ports in promiscuous mode.
*/
- if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
+ if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
set_all = true;
list_for_each_entry(p, &br->port_list, list) {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b0845480a3ae..09dcdb9c0f3c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -599,7 +599,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
vg = nbp_vlan_group(p);
- if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+ if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
err = __br_mdb_add(net, br, entry);
@@ -694,7 +694,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
vg = nbp_vlan_group(p);
- if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+ if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
err = __br_mdb_del(br, entry);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index faa7261a992f..8dc5c8d69bcd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2176,6 +2176,14 @@ unlock:
return err;
}
+bool br_multicast_enabled(const struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ return !br->multicast_disabled;
+}
+EXPORT_SYMBOL_GPL(br_multicast_enabled);
+
int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
{
unsigned long max_delay;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 32bd3ead9ba1..63dca347b73b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -662,16 +662,26 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
}
/* Set/clear or port flags based on attribute */
-static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[],
- int attrtype, unsigned long mask)
+static int br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[],
+ int attrtype, unsigned long mask)
{
- if (tb[attrtype]) {
- u8 flag = nla_get_u8(tb[attrtype]);
- if (flag)
- p->flags |= mask;
- else
- p->flags &= ~mask;
- }
+ unsigned long flags;
+ int err;
+
+ if (!tb[attrtype])
+ return 0;
+
+ if (nla_get_u8(tb[attrtype]))
+ flags = p->flags | mask;
+ else
+ flags = p->flags & ~mask;
+
+ err = br_switchdev_set_port_flag(p, flags, mask);
+ if (err)
+ return err;
+
+ p->flags = flags;
+ return 0;
}
/* Process bridge protocol info on port */
@@ -681,20 +691,55 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
bool br_vlan_tunnel_old = false;
int err;
- br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
- br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
- br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
- br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
- br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
- br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
- br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
- br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST);
- br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD);
- br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
- br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
+ if (err)
+ return err;
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
+ if (err)
+ return err;
br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false;
- br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
+ if (err)
+ return err;
+
if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL))
nbp_vlan_tunnel_info_flush(p);
@@ -1251,7 +1296,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
u32 stp_enabled = br->stp_enabled;
u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
- u8 vlan_enabled = br_vlan_enabled(br);
+ u8 vlan_enabled = br_vlan_enabled(br->dev);
u64 clockval;
clockval = br_timer_value(&br->hello_timer);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0d177280aa84..c18682f804a0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -169,7 +169,8 @@ struct net_bridge_fdb_entry {
unsigned char is_local:1,
is_static:1,
added_by_user:1,
- added_by_external_learn:1;
+ added_by_external_learn:1,
+ offloaded:1;
/* write-heavy members should not affect lookups */
unsigned long updated ____cacheline_aligned_in_smp;
@@ -284,6 +285,12 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *
rtnl_dereference(dev->rx_handler_data) : NULL;
}
+static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev)
+{
+ return br_port_exists(dev) ?
+ rcu_dereference_rtnl(dev->rx_handler_data) : NULL;
+}
+
struct net_bridge {
spinlock_t lock;
spinlock_t hash_lock;
@@ -530,6 +537,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
+void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
+ const unsigned char *addr, u16 vid);
/* br_forward.c */
enum br_pkt_type {
@@ -854,10 +863,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
return vg->pvid;
}
-static inline int br_vlan_enabled(struct net_bridge *br)
-{
- return br->vlan_enabled;
-}
#else
static inline bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
@@ -945,11 +950,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
return 0;
}
-static inline int br_vlan_enabled(struct net_bridge *br)
-{
- return 0;
-}
-
static inline int __br_vlan_filter_toggle(struct net_bridge *br,
unsigned long val)
{
@@ -1085,6 +1085,11 @@ void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb);
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
const struct sk_buff *skb);
+int br_switchdev_set_port_flag(struct net_bridge_port *p,
+ unsigned long flags,
+ unsigned long mask);
+void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
+ int type);
#else
static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
{
@@ -1101,6 +1106,18 @@ static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
{
return true;
}
+
+static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
+ unsigned long flags,
+ unsigned long mask)
+{
+ return 0;
+}
+
+static inline void
+br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+{
+}
#endif /* CONFIG_NET_SWITCHDEV */
#endif
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 6f12a5271219..89110319ef0f 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char *arg)
static void br_stp_start(struct net_bridge *br)
{
- struct net_bridge_port *p;
int err = -ENOENT;
if (net_eq(dev_net(br->dev), &init_net))
@@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br)
if (!err) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
-
- /* Stop hello and hold timers */
- del_timer(&br->hello_timer);
- list_for_each_entry(p, &br->port_list, list)
- del_timer(&p->hold_timer);
} else {
br->stp_enabled = BR_KERNEL_STP;
br_debug(br, "using kernel STP\n");
@@ -189,7 +183,6 @@ static void br_stp_start(struct net_bridge *br)
static void br_stp_stop(struct net_bridge *br)
{
- struct net_bridge_port *p;
int err;
if (br->stp_enabled == BR_USER_STP) {
@@ -198,10 +191,6 @@ static void br_stp_stop(struct net_bridge *br)
br_err(br, "failed to stop userspace STP (%d)\n", err);
/* To start timers on any ports left in blocking */
- mod_timer(&br->hello_timer, jiffies + br->hello_time);
- list_for_each_entry(p, &br->port_list, list)
- mod_timer(&p->hold_timer,
- round_jiffies(jiffies + BR_HOLD_TIME));
spin_lock_bh(&br->lock);
br_port_state_selection(br);
spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index f4097b900de1..181a44d0f1da 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -55,3 +55,79 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
return !skb->offload_fwd_mark ||
BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
}
+
+/* Flags that can be offloaded to hardware */
+#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
+ BR_MCAST_FLOOD | BR_BCAST_FLOOD)
+
+int br_switchdev_set_port_flag(struct net_bridge_port *p,
+ unsigned long flags,
+ unsigned long mask)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
+ };
+ int err;
+
+ if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
+ return 0;
+
+ err = switchdev_port_attr_get(p->dev, &attr);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ /* Check if specific bridge flag attribute offload is supported */
+ if (!(attr.u.brport_flags_support & mask)) {
+ br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
+ (unsigned int)p->port_no, p->dev->name);
+ return -EOPNOTSUPP;
+ }
+
+ attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
+ attr.flags = SWITCHDEV_F_DEFER;
+ attr.u.brport_flags = flags;
+ err = switchdev_port_attr_set(p->dev, &attr);
+ if (err) {
+ br_warn(p->br, "error setting offload flag on port %u(%s)\n",
+ (unsigned int)p->port_no, p->dev->name);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
+ u16 vid, struct net_device *dev)
+{
+ struct switchdev_notifier_fdb_info info;
+ unsigned long notifier_type;
+
+ info.addr = mac;
+ info.vid = vid;
+ notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
+ call_switchdev_notifiers(notifier_type, dev, &info.info);
+}
+
+void
+br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+{
+ if (!fdb->added_by_user)
+ return;
+
+ switch (type) {
+ case RTM_DELNEIGH:
+ br_switchdev_fdb_call_notifiers(false, fdb->addr.addr,
+ fdb->vlan_id,
+ fdb->dst->dev);
+ break;
+ case RTM_NEWNEIGH:
+ br_switchdev_fdb_call_notifiers(true, fdb->addr.addr,
+ fdb->vlan_id,
+ fdb->dst->dev);
+ break;
+ }
+}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index b838213c408e..26a1a56639b2 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -706,6 +706,14 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
return __br_vlan_filter_toggle(br, val);
}
+bool br_vlan_enabled(const struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ return !!br->vlan_enabled;
+}
+EXPORT_SYMBOL_GPL(br_vlan_enabled);
+
int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
{
int err = 0;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 346ef6b00b8f..c16dd3a47fc6 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -111,7 +111,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
__wsum csum;
u8 proto;
- if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
+ if (!nft_bridge_iphdr_validate(oldskb))
return;
/* IP header checks: fragment. */
@@ -226,9 +226,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
__be16 fo;
u8 proto = ip6h->nexthdr;
- if (skb->csum_bad)
- return false;
-
if (skb_csum_unnecessary(skb))
return true;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 21f18ea2fce4..7506b853a84d 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1103,7 +1103,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
}
-static struct net_proto_family caif_family_ops = {
+static const struct net_proto_family caif_family_ops = {
.family = PF_CAIF,
.create = caif_create,
.owner = THIS_MODULE,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..e5311a7c70da 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -161,6 +161,45 @@ done:
return skb;
}
+struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
+ struct sk_buff_head *queue,
+ unsigned int flags,
+ void (*destructor)(struct sock *sk,
+ struct sk_buff *skb),
+ int *peeked, int *off, int *err,
+ struct sk_buff **last)
+{
+ struct sk_buff *skb;
+ int _off = *off;
+
+ *last = queue->prev;
+ skb_queue_walk(queue, skb) {
+ if (flags & MSG_PEEK) {
+ if (_off >= skb->len && (skb->len || _off ||
+ skb->peeked)) {
+ _off -= skb->len;
+ continue;
+ }
+ if (!skb->len) {
+ skb = skb_set_peeked(skb);
+ if (unlikely(IS_ERR(skb))) {
+ *err = PTR_ERR(skb);
+ return NULL;
+ }
+ }
+ *peeked = 1;
+ atomic_inc(&skb->users);
+ } else {
+ __skb_unlink(skb, queue);
+ if (destructor)
+ destructor(sk, skb);
+ }
+ *off = _off;
+ return skb;
+ }
+ return NULL;
+}
+
/**
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
@@ -222,40 +261,14 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
- int _off = *off;
-
- *last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
- skb_queue_walk(queue, skb) {
- *last = skb;
- if (flags & MSG_PEEK) {
- if (_off >= skb->len && (skb->len || _off ||
- skb->peeked)) {
- _off -= skb->len;
- continue;
- }
- if (!skb->len) {
- skb = skb_set_peeked(skb);
- if (IS_ERR(skb)) {
- error = PTR_ERR(skb);
- spin_unlock_irqrestore(&queue->lock,
- cpu_flags);
- goto no_packet;
- }
- }
- *peeked = 1;
- atomic_inc(&skb->users);
- } else {
- __skb_unlink(skb, queue);
- if (destructor)
- destructor(sk, skb);
- }
- spin_unlock_irqrestore(&queue->lock, cpu_flags);
- *off = _off;
- return skb;
- }
-
+ skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
+ peeked, off, &error, last);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ if (error)
+ goto no_packet;
+ if (skb)
+ return skb;
if (!sk_can_busy_loop(sk))
break;
@@ -317,9 +330,7 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{
bool slow;
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users))) {
+ if (!skb_unref(skb)) {
sk_peek_offset_bwd(sk, len);
return;
}
@@ -335,8 +346,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
}
EXPORT_SYMBOL(__skb_free_datagram_locked);
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
- unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+ struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb))
{
@@ -344,15 +355,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
if (flags & MSG_PEEK) {
err = -ENOENT;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
+ spin_lock_bh(&sk_queue->lock);
+ if (skb == skb_peek(sk_queue)) {
+ __skb_unlink(skb, sk_queue);
atomic_dec(&skb->users);
if (destructor)
destructor(sk, skb);
err = 0;
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ spin_unlock_bh(&sk_queue->lock);
}
atomic_inc(&sk->sk_drops);
@@ -383,7 +394,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
- int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
+ int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
+ NULL);
kfree_skb(skb);
sk_mem_reclaim_partial(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6d60149287a1..8658074ecad6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/highmem.h>
@@ -142,6 +143,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/crash_dump.h>
+#include <linux/sctp.h>
#include "net-sysfs.h"
@@ -161,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct net_device *dev,
struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -865,6 +868,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL(dev_get_by_index);
/**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (napi_id < MIN_NAPI_ID)
+ return NULL;
+
+ napi = napi_by_id(napi_id);
+
+ return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
* @name: a pointer to the buffer where the name will be stored.
@@ -2612,6 +2640,47 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
+int skb_crc32c_csum_help(struct sk_buff *skb)
+{
+ __le32 crc32c_csum;
+ int ret = 0, offset, start;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ goto out;
+
+ if (unlikely(skb_is_gso(skb)))
+ goto out;
+
+ /* Before computing a checksum, we should make sure no frag could
+ * be modified by an external entity : checksum could be wrong.
+ */
+ if (unlikely(skb_has_shared_frag(skb))) {
+ ret = __skb_linearize(skb);
+ if (ret)
+ goto out;
+ }
+ start = skb_checksum_start_offset(skb);
+ offset = start + offsetof(struct sctphdr, checksum);
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, offset + sizeof(__le32))) {
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (ret)
+ goto out;
+ }
+ crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
+ skb->len - start, ~(__u32)0,
+ crc32c_csum_stub));
+ *(__le32 *)(skb->data + offset) = crc32c_csum;
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
+out:
+ return ret;
+}
+
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
@@ -2954,6 +3023,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
return skb;
}
+int skb_csum_hwoffload_help(struct sk_buff *skb,
+ const netdev_features_t features)
+{
+ if (unlikely(skb->csum_not_inet))
+ return !!(features & NETIF_F_SCTP_CRC) ? 0 :
+ skb_crc32c_csum_help(skb);
+
+ return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+}
+EXPORT_SYMBOL(skb_csum_hwoffload_help);
+
static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{
netdev_features_t features;
@@ -2992,8 +3072,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_CSUM_MASK) &&
- skb_checksum_help(skb))
+ if (skb_csum_hwoffload_help(skb, features))
goto out_kfree_skb;
}
}
@@ -3179,7 +3258,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3191,6 +3270,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*ret = NET_XMIT_SUCCESS;
consume_skb(skb);
return NULL;
@@ -3949,7 +4029,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3960,6 +4040,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
@@ -4637,9 +4718,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (netif_elide_gro(skb->dev))
goto normal;
- if (skb->csum_bad)
- goto normal;
-
gro_list_prepare(napi, skb);
rcu_read_lock();
@@ -7015,7 +7093,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
GFP_KERNEL);
/*
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..77f04e71100f 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -225,6 +225,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
rx_filter_valid = 1;
break;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index a6bb95fa87b2..60ed6f343a63 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -352,7 +352,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* bpf_convert_filter - convert filter program
* @prog: the user passed filter program
* @len: the length of the user passed filter program
- * @new_prog: buffer where converted program will be stored
+ * @new_prog: allocated 'struct bpf_prog' or NULL
* @new_len: pointer to store length of converted program
*
* Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
@@ -364,14 +364,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
*
* 2) 2nd pass to remap in two passes: 1st pass finds new
* jump offsets, 2nd pass remapping:
- * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
* bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
*/
static int bpf_convert_filter(struct sock_filter *prog, int len,
- struct bpf_insn *new_prog, int *new_len)
+ struct bpf_prog *new_prog, int *new_len)
{
- int new_flen = 0, pass = 0, target, i;
- struct bpf_insn *new_insn;
+ int new_flen = 0, pass = 0, target, i, stack_off;
+ struct bpf_insn *new_insn, *first_insn = NULL;
struct sock_filter *fp;
int *addrs = NULL;
u8 bpf_src;
@@ -383,6 +382,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
return -EINVAL;
if (new_prog) {
+ first_insn = new_prog->insnsi;
addrs = kcalloc(len, sizeof(*addrs),
GFP_KERNEL | __GFP_NOWARN);
if (!addrs)
@@ -390,11 +390,11 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
}
do_pass:
- new_insn = new_prog;
+ new_insn = first_insn;
fp = prog;
/* Classic BPF related prologue emission. */
- if (new_insn) {
+ if (new_prog) {
/* Classic BPF expects A and X to be reset first. These need
* to be guaranteed to be the first two instructions.
*/
@@ -415,7 +415,7 @@ do_pass:
struct bpf_insn *insn = tmp_insns;
if (addrs)
- addrs[i] = new_insn - new_prog;
+ addrs[i] = new_insn - first_insn;
switch (fp->code) {
/* All arithmetic insns and skb loads map as-is. */
@@ -561,17 +561,25 @@ do_pass:
/* Store to stack. */
case BPF_ST:
case BPF_STX:
+ stack_off = fp->k * 4 + 4;
*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
BPF_ST ? BPF_REG_A : BPF_REG_X,
- -(BPF_MEMWORDS - fp->k) * 4);
+ -stack_off);
+ /* check_load_and_stores() verifies that classic BPF can
+ * load from stack only after write, so tracking
+ * stack_depth for ST|STX insns is enough
+ */
+ if (new_prog && new_prog->aux->stack_depth < stack_off)
+ new_prog->aux->stack_depth = stack_off;
break;
/* Load from stack. */
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
+ stack_off = fp->k * 4 + 4;
*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
BPF_REG_A : BPF_REG_X, BPF_REG_FP,
- -(BPF_MEMWORDS - fp->k) * 4);
+ -stack_off);
break;
/* A = K or X = K */
@@ -619,13 +627,13 @@ do_pass:
if (!new_prog) {
/* Only calculating new length. */
- *new_len = new_insn - new_prog;
+ *new_len = new_insn - first_insn;
return 0;
}
pass++;
- if (new_flen != new_insn - new_prog) {
- new_flen = new_insn - new_prog;
+ if (new_flen != new_insn - first_insn) {
+ new_flen = new_insn - first_insn;
if (pass > 2)
goto err;
goto do_pass;
@@ -1017,7 +1025,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
fp->len = new_len;
/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
- err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
+ err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
if (err)
/* 2nd bpf_convert_filter() can fail only if it fails
* to allocate memory, remapping must succeed. Note,
@@ -1866,6 +1874,24 @@ static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
+{
+ /* Set user specified hash as L4(+), so that it gets returned
+ * on skb_get_hash() call unless BPF prog later on triggers a
+ * skb_clear_hash().
+ */
+ __skb_set_sw_hash(skb, hash, true);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_set_hash_proto = {
+ .func = bpf_set_hash,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
u16, vlan_tci)
{
@@ -2736,6 +2762,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_get_hash_recalc_proto;
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
+ case BPF_FUNC_set_hash:
+ return &bpf_set_hash_proto;
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
@@ -2767,12 +2795,6 @@ xdp_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-cg_skb_func_proto(enum bpf_func_id func_id)
-{
- return sk_filter_func_proto(func_id);
-}
-
-static const struct bpf_func_proto *
lwt_inout_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -2834,7 +2856,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
}
}
-static bool __is_valid_access(int off, int size)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type,
+ int *ctx_field_size)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
@@ -2850,9 +2873,27 @@ static bool __is_valid_access(int off, int size)
offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
return false;
break;
- default:
+ case offsetof(struct __sk_buff, data) ...
+ offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
+ case offsetof(struct __sk_buff, data_end) ...
+ offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
if (size != sizeof(__u32))
return false;
+ break;
+ default:
+ /* permit narrower load for not cb/data/data_end fields */
+ *ctx_field_size = 4;
+ if (type == BPF_WRITE) {
+ if (size != sizeof(__u32))
+ return false;
+ } else {
+ if (size != sizeof(__u32))
+#ifdef __LITTLE_ENDIAN
+ return (off & 0x3) == 0 && (size == 1 || size == 2);
+#else
+ return (off & 0x3) + size == 4 && (size == 1 || size == 2);
+#endif
+ }
}
return true;
@@ -2860,12 +2901,16 @@ static bool __is_valid_access(int off, int size)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ enum bpf_reg_type *reg_type,
+ int *ctx_field_size)
{
switch (off) {
- case offsetof(struct __sk_buff, tc_classid):
- case offsetof(struct __sk_buff, data):
- case offsetof(struct __sk_buff, data_end):
+ case offsetof(struct __sk_buff, tc_classid) ...
+ offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+ case offsetof(struct __sk_buff, data) ...
+ offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
+ case offsetof(struct __sk_buff, data_end) ...
+ offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
return false;
}
@@ -2879,15 +2924,17 @@ static bool sk_filter_is_valid_access(int off, int size,
}
}
- return __is_valid_access(off, size);
+ return __is_valid_access(off, size, type, ctx_field_size);
}
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ enum bpf_reg_type *reg_type,
+ int *ctx_field_size)
{
switch (off) {
- case offsetof(struct __sk_buff, tc_classid):
+ case offsetof(struct __sk_buff, tc_classid) ...
+ offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
return false;
}
@@ -2912,12 +2959,13 @@ static bool lwt_is_valid_access(int off, int size,
break;
}
- return __is_valid_access(off, size);
+ return __is_valid_access(off, size, type, ctx_field_size);
}
static bool sock_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ enum bpf_reg_type *reg_type,
+ int *ctx_field_size)
{
if (type == BPF_WRITE) {
switch (off) {
@@ -2980,7 +3028,8 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ enum bpf_reg_type *reg_type,
+ int *ctx_field_size)
{
if (type == BPF_WRITE) {
switch (off) {
@@ -3005,7 +3054,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break;
}
- return __is_valid_access(off, size);
+ return __is_valid_access(off, size, type, ctx_field_size);
}
static bool __is_valid_xdp_access(int off, int size)
@@ -3022,7 +3071,8 @@ static bool __is_valid_xdp_access(int off, int size)
static bool xdp_is_valid_access(int off, int size,
enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ enum bpf_reg_type *reg_type,
+ int *ctx_field_size)
{
if (type == BPF_WRITE)
return false;
@@ -3336,7 +3386,7 @@ const struct bpf_verifier_ops xdp_prog_ops = {
};
const struct bpf_verifier_ops cg_skb_prog_ops = {
- .get_func_proto = cg_skb_func_proto,
+ .get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.test_run = bpf_prog_test_run_skb,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 28d94bce4df8..fc5fc4594c90 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -18,6 +18,7 @@
#include <linux/stddef.h>
#include <linux/if_ether.h>
#include <linux/mpls.h>
+#include <linux/tcp.h>
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
@@ -342,6 +343,64 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
}
+static void
+__skb_flow_dissect_tcp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int thoff, int hlen)
+{
+ struct flow_dissector_key_tcp *key_tcp;
+ struct tcphdr *th, _th;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP))
+ return;
+
+ th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th);
+ if (!th)
+ return;
+
+ if (unlikely(__tcp_hdrlen(th) < sizeof(_th)))
+ return;
+
+ key_tcp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ target_container);
+ key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF));
+}
+
+static void
+__skb_flow_dissect_ipv4(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, const struct iphdr *iph)
+{
+ struct flow_dissector_key_ip *key_ip;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP))
+ return;
+
+ key_ip = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ target_container);
+ key_ip->tos = iph->tos;
+ key_ip->ttl = iph->ttl;
+}
+
+static void
+__skb_flow_dissect_ipv6(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, const struct ipv6hdr *iph)
+{
+ struct flow_dissector_key_ip *key_ip;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP))
+ return;
+
+ key_ip = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ target_container);
+ key_ip->tos = ipv6_get_dsfield(iph);
+ key_ip->ttl = iph->hop_limit;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -444,6 +503,9 @@ ip:
}
}
+ __skb_flow_dissect_ipv4(skb, flow_dissector,
+ target_container, data, iph);
+
if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
goto out_good;
@@ -489,6 +551,9 @@ ipv6:
goto out_good;
}
+ __skb_flow_dissect_ipv6(skb, flow_dissector,
+ target_container, data, iph);
+
if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
goto out_good;
@@ -683,6 +748,10 @@ ip_proto_again:
case IPPROTO_MPLS:
proto = htons(ETH_P_MPLS_UC);
goto mpls;
+ case IPPROTO_TCP:
+ __skb_flow_dissect_tcp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
default:
break;
}
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index b3bc0a31af9f..1307731ddfe4 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -240,7 +240,8 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
static int bpf_build_state(struct nlattr *nla,
unsigned int family, const void *cfg,
- struct lwtunnel_state **ts)
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[LWT_BPF_MAX + 1];
struct lwtunnel_state *newts;
@@ -250,7 +251,7 @@ static int bpf_build_state(struct nlattr *nla,
if (family != AF_INET && family != AF_INET6)
return -EAFNOSUPPORT;
- ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL);
+ ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack);
if (ret < 0)
return ret;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index cfae3d5fe11f..d9cb3532f1dd 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -103,37 +103,53 @@ EXPORT_SYMBOL(lwtunnel_encap_del_ops);
int lwtunnel_build_state(u16 encap_type,
struct nlattr *encap, unsigned int family,
- const void *cfg, struct lwtunnel_state **lws)
+ const void *cfg, struct lwtunnel_state **lws,
+ struct netlink_ext_ack *extack)
{
const struct lwtunnel_encap_ops *ops;
+ bool found = false;
int ret = -EINVAL;
if (encap_type == LWTUNNEL_ENCAP_NONE ||
- encap_type > LWTUNNEL_ENCAP_MAX)
+ encap_type > LWTUNNEL_ENCAP_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack, encap,
+ "Unknown LWT encapsulation type");
return ret;
+ }
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
- ret = ops->build_state(encap, family, cfg, lws);
+ found = true;
+ ret = ops->build_state(encap, family, cfg, lws, extack);
if (ret)
module_put(ops->owner);
}
rcu_read_unlock();
+ /* don't rely on -EOPNOTSUPP to detect match as build_state
+ * handlers could return it
+ */
+ if (!found) {
+ NL_SET_ERR_MSG_ATTR(extack, encap,
+ "LWT encapsulation type not supported");
+ }
+
return ret;
}
EXPORT_SYMBOL(lwtunnel_build_state);
-int lwtunnel_valid_encap_type(u16 encap_type)
+int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
{
const struct lwtunnel_encap_ops *ops;
int ret = -EINVAL;
if (encap_type == LWTUNNEL_ENCAP_NONE ||
- encap_type > LWTUNNEL_ENCAP_MAX)
+ encap_type > LWTUNNEL_ENCAP_MAX) {
+ NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
return ret;
+ }
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
@@ -153,11 +169,16 @@ int lwtunnel_valid_encap_type(u16 encap_type)
}
}
#endif
- return ops ? 0 : -EOPNOTSUPP;
+ ret = ops ? 0 : -EOPNOTSUPP;
+ if (ret < 0)
+ NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
+
+ return ret;
}
EXPORT_SYMBOL(lwtunnel_valid_encap_type);
-int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
+int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
+ struct netlink_ext_ack *extack)
{
struct rtnexthop *rtnh = (struct rtnexthop *)attr;
struct nlattr *nla_entype;
@@ -174,7 +195,8 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
if (nla_entype) {
encap_type = nla_get_u16(nla_entype);
- if (lwtunnel_valid_encap_type(encap_type) != 0)
+ if (lwtunnel_valid_encap_type(encap_type,
+ extack) != 0)
return -EOPNOTSUPP;
}
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d274f81fcc2c..dadb5eef91c3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -118,6 +118,50 @@ unsigned long neigh_rand_reach_time(unsigned long base)
EXPORT_SYMBOL(neigh_rand_reach_time);
+static bool neigh_del(struct neighbour *n, __u8 state,
+ struct neighbour __rcu **np, struct neigh_table *tbl)
+{
+ bool retval = false;
+
+ write_lock(&n->lock);
+ if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+ struct neighbour *neigh;
+
+ neigh = rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock));
+ rcu_assign_pointer(*np, neigh);
+ n->dead = 1;
+ retval = true;
+ }
+ write_unlock(&n->lock);
+ if (retval)
+ neigh_cleanup_and_release(n);
+ return retval;
+}
+
+bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
+{
+ struct neigh_hash_table *nht;
+ void *pkey = ndel->primary_key;
+ u32 hash_val;
+ struct neighbour *n;
+ struct neighbour __rcu **np;
+
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
+ hash_val = hash_val >> (32 - nht->hash_shift);
+
+ np = &nht->hash_buckets[hash_val];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock)))) {
+ if (n == ndel)
+ return neigh_del(n, 0, np, tbl);
+ np = &n->next;
+ }
+ return false;
+}
+
static int neigh_forced_gc(struct neigh_table *tbl)
{
int shrunk = 0;
@@ -140,19 +184,10 @@ static int neigh_forced_gc(struct neigh_table *tbl)
* - nobody refers to it.
* - it is not permanent
*/
- write_lock(&n->lock);
- if (atomic_read(&n->refcnt) == 1 &&
- !(n->nud_state & NUD_PERMANENT)) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- n->dead = 1;
- shrunk = 1;
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
+ shrunk = 1;
continue;
}
- write_unlock(&n->lock);
np = &n->next;
}
}
@@ -1649,7 +1684,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
NEIGH_UPDATE_F_OVERRIDE |
NEIGH_UPDATE_F_ADMIN,
NETLINK_CB(skb).portid);
+ write_lock_bh(&tbl->lock);
neigh_release(neigh);
+ neigh_remove_one(neigh, tbl);
+ write_unlock_bh(&tbl->lock);
out:
return err;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 14d09345f00d..4847964931df 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -363,15 +363,10 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- int i;
-
- seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
- dev->name, ha->refcount, ha->global_use);
-
- for (i = 0; i < dev->addr_len; i++)
- seq_printf(seq, "%02x", ha->addr[i]);
-
- seq_putc(seq, '\n');
+ seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
+ dev->ifindex, dev->name,
+ ha->refcount, ha->global_use,
+ (int)dev->addr_len, ha->addr);
}
netif_addr_unlock_bh(dev);
return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 65ea0ff4017c..58e6cc70500d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -323,7 +323,11 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
{
- int res, orig_len = dev->tx_queue_len;
+ unsigned int orig_len = dev->tx_queue_len;
+ int res;
+
+ if (new_len != (unsigned int)new_len)
+ return -ERANGE;
if (new_len != orig_len) {
dev->tx_queue_len = new_len;
@@ -349,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev,
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
-NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
+NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 26bbfababff2..2178db8e47cd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -596,6 +596,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ struct nlattr *nla;
struct net *peer;
int nsid, err;
@@ -603,23 +604,35 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
rtnl_net_policy, extack);
if (err < 0)
return err;
- if (!tb[NETNSA_NSID])
+ if (!tb[NETNSA_NSID]) {
+ NL_SET_ERR_MSG(extack, "nsid is missing");
return -EINVAL;
+ }
nsid = nla_get_s32(tb[NETNSA_NSID]);
- if (tb[NETNSA_PID])
+ if (tb[NETNSA_PID]) {
peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
- else if (tb[NETNSA_FD])
+ nla = tb[NETNSA_PID];
+ } else if (tb[NETNSA_FD]) {
peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
- else
+ nla = tb[NETNSA_FD];
+ } else {
+ NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
return -EINVAL;
- if (IS_ERR(peer))
+ }
+ if (IS_ERR(peer)) {
+ NL_SET_BAD_ATTR(extack, nla);
+ NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
return PTR_ERR(peer);
+ }
spin_lock_bh(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
spin_unlock_bh(&net->nsid_lock);
err = -EEXIST;
+ NL_SET_BAD_ATTR(extack, nla);
+ NL_SET_ERR_MSG(extack,
+ "Peer netns already has a nsid assigned");
goto out;
}
@@ -628,6 +641,10 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
+ } else if (err == -ENOSPC && nsid >= 0) {
+ err = -EEXIST;
+ NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
+ NL_SET_ERR_MSG(extack, "The specified nsid is already used");
}
out:
put_net(peer);
@@ -670,6 +687,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ struct nlattr *nla;
struct sk_buff *msg;
struct net *peer;
int err, id;
@@ -678,15 +696,22 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
rtnl_net_policy, extack);
if (err < 0)
return err;
- if (tb[NETNSA_PID])
+ if (tb[NETNSA_PID]) {
peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
- else if (tb[NETNSA_FD])
+ nla = tb[NETNSA_PID];
+ } else if (tb[NETNSA_FD]) {
peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
- else
+ nla = tb[NETNSA_FD];
+ } else {
+ NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
return -EINVAL;
+ }
- if (IS_ERR(peer))
+ if (IS_ERR(peer)) {
+ NL_SET_BAD_ATTR(extack, nla);
+ NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
return PTR_ERR(peer);
+ }
msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
if (!msg) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5e61456f6bc7..2769ad9834d1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -941,6 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ + nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1283,9 +1284,40 @@ err_cancel:
return err;
}
+static u32 rtnl_get_event(unsigned long event)
+{
+ u32 rtnl_event_type = IFLA_EVENT_NONE;
+
+ switch (event) {
+ case NETDEV_REBOOT:
+ rtnl_event_type = IFLA_EVENT_REBOOT;
+ break;
+ case NETDEV_FEAT_CHANGE:
+ rtnl_event_type = IFLA_EVENT_FEATURES;
+ break;
+ case NETDEV_BONDING_FAILOVER:
+ rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER;
+ break;
+ case NETDEV_NOTIFY_PEERS:
+ rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS;
+ break;
+ case NETDEV_RESEND_IGMP:
+ rtnl_event_type = IFLA_EVENT_IGMP_RESEND;
+ break;
+ case NETDEV_CHANGEINFODATA:
+ rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS;
+ break;
+ default:
+ break;
+ }
+
+ return rtnl_event_type;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
- unsigned int flags, u32 ext_filter_mask)
+ unsigned int flags, u32 ext_filter_mask,
+ u32 event)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
@@ -1334,6 +1366,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
+ if (event != IFLA_EVENT_NONE) {
+ if (nla_put_u32(skb, IFLA_EVENT, event))
+ goto nla_put_failure;
+ }
+
if (rtnl_fill_link_ifmap(skb, dev))
goto nla_put_failure;
@@ -1468,6 +1505,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
[IFLA_XDP] = { .type = NLA_NESTED },
+ [IFLA_EVENT] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1627,7 +1665,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask);
+ ext_filter_mask, 0);
if (err < 0) {
if (likely(skb->len))
@@ -2049,8 +2087,8 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_TXQLEN]) {
- unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
- unsigned long orig_len = dev->tx_queue_len;
+ unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
+ unsigned int orig_len = dev->tx_queue_len;
if (dev->tx_queue_len ^ value) {
dev->tx_queue_len = value;
@@ -2737,7 +2775,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENOBUFS;
err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask);
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
@@ -2809,7 +2847,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
}
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
- unsigned int change, gfp_t flags)
+ unsigned int change,
+ u32 event, gfp_t flags)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2820,7 +2859,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
+ err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2841,18 +2880,25 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
}
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
- gfp_t flags)
+static void rtmsg_ifinfo_event(int type, struct net_device *dev,
+ unsigned int change, u32 event,
+ gfp_t flags)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+}
EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
@@ -4169,7 +4215,8 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_NOTIFY_PEERS:
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
+ rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
+ GFP_KERNEL);
break;
default:
break;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index ae35cce3a40d..7232274de334 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -51,7 +51,8 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+u32 secure_tcpv6_ts_off(const struct net *net,
+ const __be32 *saddr, const __be32 *daddr)
{
const struct {
struct in6_addr saddr;
@@ -61,7 +62,7 @@ u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
.daddr = *(struct in6_addr *)daddr,
};
- if (sysctl_tcp_timestamps != 1)
+ if (net->ipv4.sysctl_tcp_timestamps != 1)
return 0;
ts_secret_init();
@@ -113,9 +114,9 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (sysctl_tcp_timestamps != 1)
+ if (net->ipv4.sysctl_tcp_timestamps != 1)
return 0;
ts_secret_init();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1be7c01efe2..c4d2c1f824bb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -643,12 +643,10 @@ fastpath:
kmem_cache_free(skbuff_fclone_cache, fclones);
}
-static void skb_release_head_state(struct sk_buff *skb)
+void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
-#ifdef CONFIG_XFRM
- secpath_put(skb->sp);
-#endif
+ secpath_reset(skb);
if (skb->destructor) {
WARN_ON(in_irq());
skb->destructor(skb);
@@ -694,12 +692,9 @@ EXPORT_SYMBOL(__kfree_skb);
*/
void kfree_skb(struct sk_buff *skb)
{
- if (unlikely(!skb))
- return;
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ if (!skb_unref(skb))
return;
+
trace_kfree_skb(skb, __builtin_return_address(0));
__kfree_skb(skb);
}
@@ -746,17 +741,32 @@ EXPORT_SYMBOL(skb_tx_error);
*/
void consume_skb(struct sk_buff *skb)
{
- if (unlikely(!skb))
- return;
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ if (!skb_unref(skb))
return;
+
trace_consume_skb(skb);
__kfree_skb(skb);
}
EXPORT_SYMBOL(consume_skb);
+/**
+ * consume_stateless_skb - free an skbuff, assuming it is stateless
+ * @skb: buffer to free
+ *
+ * Works like consume_skb(), but this variant assumes that all the head
+ * states have been already dropped.
+ */
+void consume_stateless_skb(struct sk_buff *skb)
+{
+ if (!skb_unref(skb))
+ return;
+
+ trace_consume_skb(skb);
+ if (likely(skb->head))
+ skb_release_data(skb);
+ kfree_skbmem(skb);
+}
+
void __kfree_skb_flush(void)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -807,10 +817,9 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ if (!skb_unref(skb))
return;
+
/* if reaching here SKB is ready to free */
trace_consume_skb(skb);
@@ -2243,6 +2252,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
+{
+ net_warn_ratelimited(
+ "%s: attempt to compute crc32c without libcrc32c.ko\n",
+ __func__);
+ return 0;
+}
+
+static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
+ int offset, int len)
+{
+ net_warn_ratelimited(
+ "%s: attempt to compute crc32c without libcrc32c.ko\n",
+ __func__);
+ return 0;
+}
+
+static const struct skb_checksum_ops default_crc32c_ops = {
+ .update = warn_crc32c_csum_update,
+ .combine = warn_crc32c_csum_combine,
+};
+
+const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
+ &default_crc32c_ops;
+EXPORT_SYMBOL(crc32c_csum_stub);
+
/**
* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
* @from: source buffer
@@ -2620,7 +2655,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
- skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+ skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */
@@ -3235,8 +3271,8 @@ normal:
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
- SKBTX_SHARED_FRAG;
+ skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
while (pos < offset + len) {
if (i >= nfrags) {
@@ -3482,24 +3518,18 @@ void __init skb_init(void)
NULL);
}
-/**
- * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
- * @skb: Socket buffer containing the buffers to be mapped
- * @sg: The scatter-gather list to map into
- * @offset: The offset into the buffer's contents to start mapping
- * @len: Length of buffer space to be mapped
- *
- * Fill the specified scatter-gather list with mappings/pointers into a
- * region of the buffer space attached to a socket buffer.
- */
static int
-__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
+ unsigned int recursion_level)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
int elt = 0;
+ if (unlikely(recursion_level >= 24))
+ return -EMSGSIZE;
+
if (copy > 0) {
if (copy > len)
copy = len;
@@ -3518,6 +3548,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+ return -EMSGSIZE;
if (copy > len)
copy = len;
@@ -3532,16 +3564,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
}
skb_walk_frags(skb, frag_iter) {
- int end;
+ int end, ret;
WARN_ON(start > offset + len);
end = start + frag_iter->len;
if ((copy = end - offset) > 0) {
+ if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+ return -EMSGSIZE;
+
if (copy > len)
copy = len;
- elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
- copy);
+ ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
+ copy, recursion_level + 1);
+ if (unlikely(ret < 0))
+ return ret;
+ elt += ret;
if ((len -= copy) == 0)
return elt;
offset += copy;
@@ -3552,6 +3590,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+/**
+ * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ * @skb: Socket buffer containing the buffers to be mapped
+ * @sg: The scatter-gather list to map into
+ * @offset: The offset into the buffer's contents to start mapping
+ * @len: Length of buffer space to be mapped
+ *
+ * Fill the specified scatter-gather list with mappings/pointers into a
+ * region of the buffer space attached to a socket buffer. Returns either
+ * the number of scatterlist items used, or -EMSGSIZE if the contents
+ * could not fit.
+ */
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+ int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
+
+ if (nsg <= 0)
+ return nsg;
+
+ sg_mark_end(&sg[nsg - 1]);
+
+ return nsg;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
* sglist without mark the sg which contain last skb data as the end.
* So the caller can mannipulate sg list as will when padding new data after
@@ -3574,19 +3637,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
int offset, int len)
{
- return __skb_to_sgvec(skb, sg, offset, len);
+ return __skb_to_sgvec(skb, sg, offset, len, 0);
}
EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
-int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
- int nsg = __skb_to_sgvec(skb, sg, offset, len);
- sg_mark_end(&sg[nsg - 1]);
-
- return nsg;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
/**
* skb_cow_data - Check that a socket buffer's data buffers are writable
@@ -3878,6 +3933,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
+ if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+ return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
@@ -3899,7 +3958,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
return;
if (tsonly) {
- skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+ skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags &
+ SKBTX_ANY_TSTAMP;
skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 727f924b7f91..ad8a4bc84126 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1038,6 +1038,10 @@ set_rcvbuf:
#endif
case SO_MAX_PACING_RATE:
+ if (val != ~0U)
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
sk->sk_max_pacing_rate = val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate);
@@ -2072,6 +2076,26 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
}
EXPORT_SYMBOL(sock_cmsg_send);
+static void sk_enter_memory_pressure(struct sock *sk)
+{
+ if (!sk->sk_prot->enter_memory_pressure)
+ return;
+
+ sk->sk_prot->enter_memory_pressure(sk);
+}
+
+static void sk_leave_memory_pressure(struct sock *sk)
+{
+ if (sk->sk_prot->leave_memory_pressure) {
+ sk->sk_prot->leave_memory_pressure(sk);
+ } else {
+ unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
+
+ if (memory_pressure && *memory_pressure)
+ *memory_pressure = 0;
+ }
+}
+
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 93106120f987..733f523707ac 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -178,10 +178,6 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
[DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)},
};
-static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
- [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)},
-};
-
/* DCB number of traffic classes nested attributes. */
static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
[DCB_FEATCFG_ATTR_ALL] = {.type = NLA_FLAG},
@@ -1463,8 +1459,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
struct dcb_app *app_data;
+
if (nla_type(attr) != DCB_ATTR_IEEE_APP)
continue;
+
+ if (nla_len(attr) < sizeof(struct dcb_app)) {
+ err = -ERANGE;
+ goto err;
+ }
+
app_data = nla_data(attr);
if (ops->ieee_setapp)
err = ops->ieee_setapp(netdev, app_data);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 5e3a7302f774..e1295d5f2c56 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -233,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- const u32 now = ccid2_time_stamp;
+ const u32 now = ccid2_jiffies32;
struct ccid2_seq *next;
/* slow-start after idle periods (RFC 2581, RFC 2861) */
@@ -466,7 +466,7 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
* The cleanest solution is to not use the ccid2s_sent field at all
* and instead use DCCP timestamps: requires changes in other places.
*/
- ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
+ ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent);
}
static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
@@ -478,7 +478,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
return;
}
- hc->tx_last_cong = ccid2_time_stamp;
+ hc->tx_last_cong = ccid2_jiffies32;
hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -731,7 +731,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_rto = DCCP_TIMEOUT_INIT;
hc->tx_rpdupack = -1;
- hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp;
+ hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
hc->tx_cwnd_used = 0;
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 18c97543e522..6e50ef2898fb 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -27,7 +27,7 @@
* CCID-2 timestamping faces the same issues as TCP timestamping.
* Hence we reuse/share as much of the code as possible.
*/
-#define ccid2_time_stamp tcp_time_stamp
+#define ccid2_jiffies32 ((u32)jiffies)
/* NUMDUPACK parameter from RFC 4341, p. 6 */
#define NUMDUPACK 3
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 405483a07efc..73a0399dc7a2 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -447,7 +447,7 @@ static void dn_destruct(struct sock *sk)
dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
}
-static int dn_memory_pressure;
+static unsigned long dn_memory_pressure;
static void dn_enter_memory_pressure(struct sock *sk)
{
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 81a0868edb1d..cc5f8f971689 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -25,16 +25,19 @@ config NET_DSA_TAG_DSA
config NET_DSA_TAG_EDSA
bool
-config NET_DSA_TAG_TRAILER
+config NET_DSA_TAG_KSZ
bool
-config NET_DSA_TAG_QCA
+config NET_DSA_TAG_LAN9303
bool
config NET_DSA_TAG_MTK
bool
-config NET_DSA_TAG_LAN9303
+config NET_DSA_TAG_TRAILER
+ bool
+
+config NET_DSA_TAG_QCA
bool
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0b747d75e65a..fcce25da937c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,12 +1,13 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
+dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
-dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
-dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
-dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
+dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 90038d45a547..416ac4ef9ba9 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -24,7 +24,7 @@
#include <linux/phy_fixed.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -40,26 +40,29 @@ static const struct dsa_device_ops none_ops = {
};
const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
#ifdef CONFIG_NET_DSA_TAG_DSA
[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_EDSA
[DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
- [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_KSZ
+ [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_QCA
- [DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_LAN9303
+ [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_MTK
[DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_LAN9303
- [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_QCA
+ [DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
#endif
[DSA_TAG_PROTO_NONE] = &none_ops,
};
@@ -109,23 +112,22 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
return ops;
}
-int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
+int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
{
+ struct dsa_switch *ds = cpu_dp->ds;
struct net_device *master;
struct ethtool_ops *cpu_ops;
- master = ds->dst->master_netdev;
- if (ds->master_netdev)
- master = ds->master_netdev;
+ master = cpu_dp->netdev;
cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
if (!cpu_ops)
return -ENOMEM;
- memcpy(&ds->dst->master_ethtool_ops, master->ethtool_ops,
+ memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
sizeof(struct ethtool_ops));
- ds->dst->master_orig_ethtool_ops = master->ethtool_ops;
- memcpy(cpu_ops, &ds->dst->master_ethtool_ops,
+ cpu_dp->orig_ethtool_ops = master->ethtool_ops;
+ memcpy(cpu_ops, &cpu_dp->ethtool_ops,
sizeof(struct ethtool_ops));
dsa_cpu_port_ethtool_init(cpu_ops);
master->ethtool_ops = cpu_ops;
@@ -133,15 +135,9 @@ int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
return 0;
}
-void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
+void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
{
- struct net_device *master;
-
- master = ds->dst->master_netdev;
- if (ds->master_netdev)
- master = ds->master_netdev;
-
- master->ethtool_ops = ds->dst->master_orig_ethtool_ops;
+ cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
}
void dsa_cpu_dsa_destroy(struct dsa_port *port)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 7796580e99ee..52af8401af07 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -18,7 +18,7 @@
#include <linux/rtnetlink.h>
#include <linux/of.h>
#include <linux/of_net.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
static LIST_HEAD(dsa_switch_trees);
@@ -214,66 +214,59 @@ static int dsa_dst_complete(struct dsa_switch_tree *dst)
return 0;
}
-static int dsa_dsa_port_apply(struct dsa_port *port, u32 index,
- struct dsa_switch *ds)
+static int dsa_dsa_port_apply(struct dsa_port *port)
{
+ struct dsa_switch *ds = port->ds;
int err;
- err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+ err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
if (err) {
dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
- index, err);
+ port->index, err);
return err;
}
- memset(&ds->ports[index].devlink_port, 0,
- sizeof(ds->ports[index].devlink_port));
+ memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- return devlink_port_register(ds->devlink,
- &ds->ports[index].devlink_port,
- index);
+ return devlink_port_register(ds->devlink, &port->devlink_port,
+ port->index);
}
-static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index,
- struct dsa_switch *ds)
+static void dsa_dsa_port_unapply(struct dsa_port *port)
{
- devlink_port_unregister(&ds->ports[index].devlink_port);
+ devlink_port_unregister(&port->devlink_port);
dsa_cpu_dsa_destroy(port);
}
-static int dsa_cpu_port_apply(struct dsa_port *port, u32 index,
- struct dsa_switch *ds)
+static int dsa_cpu_port_apply(struct dsa_port *port)
{
+ struct dsa_switch *ds = port->ds;
int err;
- err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+ err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
if (err) {
dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
- index, err);
+ port->index, err);
return err;
}
- ds->cpu_port_mask |= BIT(index);
-
- memset(&ds->ports[index].devlink_port, 0,
- sizeof(ds->ports[index].devlink_port));
- err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
- index);
+ memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+ err = devlink_port_register(ds->devlink, &port->devlink_port,
+ port->index);
return err;
}
-static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index,
- struct dsa_switch *ds)
+static void dsa_cpu_port_unapply(struct dsa_port *port)
{
- devlink_port_unregister(&ds->ports[index].devlink_port);
+ devlink_port_unregister(&port->devlink_port);
dsa_cpu_dsa_destroy(port);
- ds->cpu_port_mask &= ~BIT(index);
+ port->ds->cpu_port_mask &= ~BIT(port->index);
}
-static int dsa_user_port_apply(struct dsa_port *port, u32 index,
- struct dsa_switch *ds)
+static int dsa_user_port_apply(struct dsa_port *port)
{
+ struct dsa_switch *ds = port->ds;
const char *name = port->name;
int err;
@@ -282,35 +275,32 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index,
if (!name)
name = "eth%d";
- err = dsa_slave_create(ds, ds->dev, index, name);
+ err = dsa_slave_create(ds, ds->dev, port->index, name);
if (err) {
dev_warn(ds->dev, "Failed to create slave %d: %d\n",
- index, err);
- ds->ports[index].netdev = NULL;
+ port->index, err);
+ port->netdev = NULL;
return err;
}
- memset(&ds->ports[index].devlink_port, 0,
- sizeof(ds->ports[index].devlink_port));
- err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
- index);
+ memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+ err = devlink_port_register(ds->devlink, &port->devlink_port,
+ port->index);
if (err)
return err;
- devlink_port_type_eth_set(&ds->ports[index].devlink_port,
- ds->ports[index].netdev);
+ devlink_port_type_eth_set(&port->devlink_port, port->netdev);
return 0;
}
-static void dsa_user_port_unapply(struct dsa_port *port, u32 index,
- struct dsa_switch *ds)
+static void dsa_user_port_unapply(struct dsa_port *port)
{
- devlink_port_unregister(&ds->ports[index].devlink_port);
- if (ds->ports[index].netdev) {
- dsa_slave_destroy(ds->ports[index].netdev);
- ds->ports[index].netdev = NULL;
- ds->enabled_port_mask &= ~(1 << index);
+ devlink_port_unregister(&port->devlink_port);
+ if (port->netdev) {
+ dsa_slave_destroy(port->netdev);
+ port->netdev = NULL;
+ port->ds->enabled_port_mask &= ~(1 << port->index);
}
}
@@ -347,7 +337,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
return err;
if (ds->ops->set_addr) {
- err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
+ err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
if (err < 0)
return err;
}
@@ -370,20 +360,20 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
continue;
if (dsa_port_is_dsa(port)) {
- err = dsa_dsa_port_apply(port, index, ds);
+ err = dsa_dsa_port_apply(port);
if (err)
return err;
continue;
}
if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_port_apply(port, index, ds);
+ err = dsa_cpu_port_apply(port);
if (err)
return err;
continue;
}
- err = dsa_user_port_apply(port, index, ds);
+ err = dsa_user_port_apply(port);
if (err)
continue;
}
@@ -402,16 +392,16 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
continue;
if (dsa_port_is_dsa(port)) {
- dsa_dsa_port_unapply(port, index, ds);
+ dsa_dsa_port_unapply(port);
continue;
}
if (dsa_port_is_cpu(port)) {
- dsa_cpu_port_unapply(port, index, ds);
+ dsa_cpu_port_unapply(port);
continue;
}
- dsa_user_port_unapply(port, index, ds);
+ dsa_user_port_unapply(port);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -443,8 +433,8 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
return err;
}
- if (dst->cpu_switch) {
- err = dsa_cpu_port_ethtool_setup(dst->cpu_switch);
+ if (dst->cpu_dp) {
+ err = dsa_cpu_port_ethtool_setup(dst->cpu_dp);
if (err)
return err;
}
@@ -454,7 +444,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
* sent to the tag format's receive function.
*/
wmb();
- dst->master_netdev->dsa_ptr = (void *)dst;
+ dst->cpu_dp->netdev->dsa_ptr = dst;
dst->applied = true;
return 0;
@@ -468,7 +458,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
if (!dst->applied)
return;
- dst->master_netdev->dsa_ptr = NULL;
+ dst->cpu_dp->netdev->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point get sent
@@ -484,9 +474,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
dsa_ds_unapply(dst, ds);
}
- if (dst->cpu_switch) {
- dsa_cpu_port_ethtool_restore(dst->cpu_switch);
- dst->cpu_switch = NULL;
+ if (dst->cpu_dp) {
+ dsa_cpu_port_ethtool_restore(dst->cpu_dp);
+ dst->cpu_dp = NULL;
}
pr_info("DSA: tree %d unapplied\n", dst->tree);
@@ -500,6 +490,8 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
enum dsa_tag_protocol tag_protocol;
struct net_device *ethernet_dev;
struct device_node *ethernet;
+ struct dsa_port *p;
+ unsigned int i;
if (port->dn) {
ethernet = of_parse_phandle(port->dn, "ethernet", 0);
@@ -514,15 +506,18 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
if (!ethernet_dev)
return -EPROBE_DEFER;
- if (!ds->master_netdev)
- ds->master_netdev = ethernet_dev;
+ if (!dst->cpu_dp) {
+ dst->cpu_dp = port;
+ dst->cpu_dp->netdev = ethernet_dev;
- if (!dst->master_netdev)
- dst->master_netdev = ethernet_dev;
+ for (i = 0; i < ds->num_ports; i++) {
+ p = &ds->ports[i];
+ if (!dsa_port_is_valid(p) ||
+ i == index)
+ continue;
- if (!dst->cpu_switch) {
- dst->cpu_switch = ds;
- dst->cpu_port = index;
+ p->cpu_dp = port;
+ }
}
tag_protocol = ds->ops->get_tag_protocol(ds);
@@ -534,6 +529,12 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
dst->rcv = dst->tag_ops->rcv;
+ /* Initialize cpu_port_mask now for drv->setup()
+ * to have access to a correct value, just like what
+ * net/dsa/dsa.c::dsa_switch_setup_one does.
+ */
+ ds->cpu_port_mask |= BIT(index);
+
return 0;
}
@@ -545,14 +546,22 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
for (index = 0; index < ds->num_ports; index++) {
port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
+ if (!dsa_port_is_valid(port) ||
+ dsa_port_is_dsa(port))
continue;
if (dsa_port_is_cpu(port)) {
err = dsa_cpu_parse(port, index, dst, ds);
if (err)
return err;
+ } else {
+ /* Initialize enabled_port_mask now for drv->setup()
+ * to have access to a correct value, just like what
+ * net/dsa/dsa.c::dsa_switch_setup_one does.
+ */
+ ds->enabled_port_mask |= BIT(index);
}
+
}
pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
@@ -576,7 +585,7 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst)
return err;
}
- if (!dst->master_netdev) {
+ if (!dst->cpu_dp->netdev) {
pr_warn("Tree has no master device\n");
return -EINVAL;
}
@@ -601,13 +610,6 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
return -EINVAL;
ds->ports[reg].dn = port;
-
- /* Initialize enabled_port_mask now for ops->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- if (!dsa_port_is_cpu(&ds->ports[reg]))
- ds->enabled_port_mask |= 1 << reg;
}
return 0;
@@ -623,14 +625,6 @@ static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds)
continue;
ds->ports[i].name = cd->port_names[i];
-
- /* Initialize enabled_port_mask now for drv->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- if (!dsa_port_is_cpu(&ds->ports[i]))
- ds->enabled_port_mask |= 1 << i;
-
valid_name_found = true;
}
@@ -690,10 +684,10 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds,
return ports;
}
-static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev)
+static int _dsa_register_switch(struct dsa_switch *ds)
{
- struct dsa_chip_data *pdata = dev->platform_data;
- struct device_node *np = dev->of_node;
+ struct dsa_chip_data *pdata = ds->dev->platform_data;
+ struct device_node *np = ds->dev->of_node;
struct dsa_switch_tree *dst;
struct device_node *ports;
u32 tree, index;
@@ -807,12 +801,12 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
}
EXPORT_SYMBOL_GPL(dsa_switch_alloc);
-int dsa_register_switch(struct dsa_switch *ds, struct device *dev)
+int dsa_register_switch(struct dsa_switch *ds)
{
int err;
mutex_lock(&dsa2_mutex);
- err = _dsa_register_switch(ds, dev);
+ err = _dsa_register_switch(ds);
mutex_unlock(&dsa2_mutex);
return err;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index f4a88e485213..55982cc39b24 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,6 +14,56 @@
#include <linux/phy.h>
#include <linux/netdevice.h>
#include <linux/netpoll.h>
+#include <net/dsa.h>
+
+enum {
+ DSA_NOTIFIER_AGEING_TIME,
+ DSA_NOTIFIER_BRIDGE_JOIN,
+ DSA_NOTIFIER_BRIDGE_LEAVE,
+ DSA_NOTIFIER_FDB_ADD,
+ DSA_NOTIFIER_FDB_DEL,
+ DSA_NOTIFIER_MDB_ADD,
+ DSA_NOTIFIER_MDB_DEL,
+ DSA_NOTIFIER_VLAN_ADD,
+ DSA_NOTIFIER_VLAN_DEL,
+};
+
+/* DSA_NOTIFIER_AGEING_TIME */
+struct dsa_notifier_ageing_time_info {
+ struct switchdev_trans *trans;
+ unsigned int ageing_time;
+};
+
+/* DSA_NOTIFIER_BRIDGE_* */
+struct dsa_notifier_bridge_info {
+ struct net_device *br;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_FDB_* */
+struct dsa_notifier_fdb_info {
+ const struct switchdev_obj_port_fdb *fdb;
+ struct switchdev_trans *trans;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_MDB_* */
+struct dsa_notifier_mdb_info {
+ const struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_trans *trans;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_VLAN_* */
+struct dsa_notifier_vlan_info {
+ const struct switchdev_obj_port_vlan *vlan;
+ struct switchdev_trans *trans;
+ int sw_index;
+ int port;
+};
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -23,6 +73,7 @@ struct dsa_device_ops {
};
struct dsa_slave_priv {
+ /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
@@ -52,13 +103,46 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
struct dsa_port *dport, int port);
void dsa_cpu_dsa_destroy(struct dsa_port *dport);
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
-int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
-void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
+int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
+void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
/* legacy.c */
int dsa_legacy_register(void);
void dsa_legacy_unregister(void);
+/* port.c */
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+ struct switchdev_trans *trans);
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+ struct switchdev_trans *trans);
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+ struct switchdev_trans *trans);
+int dsa_port_fdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans);
+int dsa_port_fdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb);
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb);
+int dsa_port_mdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans);
+int dsa_port_mdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+ switchdev_obj_dump_cb_t *cb);
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans);
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_vlan_dump(struct dsa_port *dp,
+ struct switchdev_obj_port_vlan *vlan,
+ switchdev_obj_dump_cb_t *cb);
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -75,25 +159,38 @@ void dsa_slave_unregister_notifier(void);
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
+/* tag_brcm.c */
+extern const struct dsa_device_ops brcm_netdev_ops;
+
/* tag_dsa.c */
extern const struct dsa_device_ops dsa_netdev_ops;
/* tag_edsa.c */
extern const struct dsa_device_ops edsa_netdev_ops;
-/* tag_trailer.c */
-extern const struct dsa_device_ops trailer_netdev_ops;
+/* tag_ksz.c */
+extern const struct dsa_device_ops ksz_netdev_ops;
-/* tag_brcm.c */
-extern const struct dsa_device_ops brcm_netdev_ops;
+/* tag_lan9303.c */
+extern const struct dsa_device_ops lan9303_netdev_ops;
+
+/* tag_mtk.c */
+extern const struct dsa_device_ops mtk_netdev_ops;
/* tag_qca.c */
extern const struct dsa_device_ops qca_netdev_ops;
-/* tag_mtk.c */
-extern const struct dsa_device_ops mtk_netdev_ops;
+/* tag_trailer.c */
+extern const struct dsa_device_ops trailer_netdev_ops;
-/* tag_lan9303.c */
-extern const struct dsa_device_ops lan9303_netdev_ops;
+static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
+{
+ return p->dp->cpu_dp->netdev;
+}
+
+static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
+{
+ return dst->cpu_dp;
+}
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 7281098df04e..e60906125375 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -22,7 +22,7 @@
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
/* switch driver registration ***********************************************/
@@ -101,9 +101,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
struct dsa_switch_tree *dst = ds->dst;
struct dsa_chip_data *cd = ds->cd;
bool valid_name_found = false;
+ struct net_device *master;
int index = ds->index;
int i, ret;
+ master = dst->cpu_dp->netdev;
+
/*
* Validate supplied switch configuration.
*/
@@ -115,18 +118,18 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
continue;
if (!strcmp(name, "cpu")) {
- if (dst->cpu_switch) {
- netdev_err(dst->master_netdev,
+ if (dst->cpu_dp) {
+ netdev_err(master,
"multiple cpu ports?!\n");
return -EINVAL;
}
- dst->cpu_switch = ds;
- dst->cpu_port = i;
+ dst->cpu_dp = &ds->ports[i];
ds->cpu_port_mask |= 1 << i;
} else if (!strcmp(name, "dsa")) {
ds->dsa_port_mask |= 1 << i;
} else {
ds->enabled_port_mask |= 1 << i;
+ ds->ports[i].cpu_dp = dst->cpu_dp;
}
valid_name_found = true;
}
@@ -144,7 +147,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
* tagging protocol to the preferred tagging format of this
* switch.
*/
- if (dst->cpu_switch == ds) {
+ if (dst->cpu_dp->ds == ds) {
enum dsa_tag_protocol tag_protocol;
tag_protocol = ops->get_tag_protocol(ds);
@@ -169,7 +172,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
return ret;
if (ops->set_addr) {
- ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
+ ret = ops->set_addr(ds, master->dev_addr);
if (ret < 0)
return ret;
}
@@ -196,17 +199,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
if (ret < 0)
- netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
+ netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
index, i, cd->port_names[i], ret);
}
/* Perform configuration of the CPU and DSA ports */
ret = dsa_cpu_dsa_setups(ds, parent);
if (ret < 0)
- netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
+ netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
index);
- ret = dsa_cpu_port_ethtool_setup(ds);
+ ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
if (ret)
return ret;
@@ -218,6 +221,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
struct device *parent, struct device *host_dev)
{
struct dsa_chip_data *cd = dst->pd->chip + index;
+ struct net_device *master = dst->cpu_dp->netdev;
const struct dsa_switch_ops *ops;
struct dsa_switch *ds;
int ret;
@@ -229,11 +233,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
*/
ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
if (!ops) {
- netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
+ netdev_err(master, "[%d]: could not detect attached switch\n",
index);
return ERR_PTR(-EINVAL);
}
- netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
+ netdev_info(master, "[%d]: detected a %s switch\n",
index, name);
@@ -576,8 +580,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
unsigned configured = 0;
dst->pd = pd;
- dst->master_netdev = dev;
- dst->cpu_port = -1;
+ dst->cpu_dp->netdev = dev;
for (i = 0; i < pd->nr_chips; i++) {
struct dsa_switch *ds;
@@ -606,7 +609,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
* sent to the tag format's receive function.
*/
wmb();
- dev->dsa_ptr = (void *)dst;
+ dev->dsa_ptr = dst;
return 0;
}
@@ -673,7 +676,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- dst->master_netdev->dsa_ptr = NULL;
+ dst->cpu_dp->netdev->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point get sent
@@ -688,9 +691,9 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
- dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+ dsa_cpu_port_ethtool_restore(dst->cpu_dp);
- dev_put(dst->master_netdev);
+ dev_put(dst->cpu_dp->netdev);
}
static int dsa_remove(struct platform_device *pdev)
diff --git a/net/dsa/port.c b/net/dsa/port.c
new file mode 100644
index 000000000000..efc3bce3a89d
--- /dev/null
+++ b/net/dsa/port.c
@@ -0,0 +1,259 @@
+/*
+ * Handling of a single switch port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/if_bridge.h>
+#include <linux/notifier.h>
+
+#include "dsa_priv.h"
+
+static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+{
+ struct raw_notifier_head *nh = &dp->ds->dst->nh;
+ int err;
+
+ err = raw_notifier_call_chain(nh, e, v);
+
+ return notifier_to_errno(err);
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+ struct switchdev_trans *trans)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
+
+ if (ds->ops->port_stp_state_set)
+ ds->ops->port_stp_state_set(ds, port, state);
+
+ if (ds->ops->port_fast_age) {
+ /* Fast age FDB entries or flush appropriate forwarding database
+ * for the given port, if we are moving it from Learning or
+ * Forwarding state, to Disabled or Blocking or Listening state.
+ */
+
+ if ((dp->stp_state == BR_STATE_LEARNING ||
+ dp->stp_state == BR_STATE_FORWARDING) &&
+ (state == BR_STATE_DISABLED ||
+ state == BR_STATE_BLOCKING ||
+ state == BR_STATE_LISTENING))
+ ds->ops->port_fast_age(ds, port);
+ }
+
+ dp->stp_state = state;
+
+ return 0;
+}
+
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+{
+ int err;
+
+ err = dsa_port_set_state(dp, state, NULL);
+ if (err)
+ pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
+}
+
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
+{
+ struct dsa_notifier_bridge_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .br = br,
+ };
+ int err;
+
+ /* Here the port is already bridged. Reflect the current configuration
+ * so that drivers can program their chips accordingly.
+ */
+ dp->bridge_dev = br;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info);
+
+ /* The bridging is rolled back on error */
+ if (err)
+ dp->bridge_dev = NULL;
+
+ return err;
+}
+
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
+{
+ struct dsa_notifier_bridge_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .br = br,
+ };
+ int err;
+
+ /* Here the port is already unbridged. Reflect the current configuration
+ * so that drivers can program their chips accordingly.
+ */
+ dp->bridge_dev = NULL;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
+ if (err)
+ pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+
+ /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+ * so allow it to be in BR_STATE_FORWARDING to be kept functional
+ */
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+}
+
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+ struct switchdev_trans *trans)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ if (ds->ops->port_vlan_filtering)
+ return ds->ops->port_vlan_filtering(ds, dp->index,
+ vlan_filtering);
+
+ return 0;
+}
+
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+ struct switchdev_trans *trans)
+{
+ unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
+ unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+ struct dsa_notifier_ageing_time_info info = {
+ .ageing_time = ageing_time,
+ .trans = trans,
+ };
+
+ if (switchdev_trans_ph_prepare(trans))
+ return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+
+ dp->ageing_time = ageing_time;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+}
+
+int dsa_port_fdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans)
+{
+ struct dsa_notifier_fdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .trans = trans,
+ .fdb = fdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
+}
+
+int dsa_port_fdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb)
+{
+ struct dsa_notifier_fdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .fdb = fdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
+}
+
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_fdb_dump)
+ return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
+
+ return -EOPNOTSUPP;
+}
+
+int dsa_port_mdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
+{
+ struct dsa_notifier_mdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .trans = trans,
+ .mdb = mdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
+}
+
+int dsa_port_mdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_notifier_mdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mdb = mdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
+}
+
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+ switchdev_obj_dump_cb_t *cb)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_mdb_dump)
+ return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
+
+ return -EOPNOTSUPP;
+}
+
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
+{
+ struct dsa_notifier_vlan_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .trans = trans,
+ .vlan = vlan,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+}
+
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_notifier_vlan_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vlan = vlan,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+}
+
+int dsa_port_vlan_dump(struct dsa_port *dp,
+ struct switchdev_obj_port_vlan *vlan,
+ switchdev_obj_dump_cb_t *cb)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_vlan_dump)
+ return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
+
+ return -EOPNOTSUPP;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7693182df81e..9507bd38cf04 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,28 +17,16 @@
#include <linux/of_mdio.h>
#include <linux/mdio.h>
#include <linux/list.h>
-#include <net/dsa.h>
#include <net/rtnetlink.h>
-#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
+
#include "dsa_priv.h"
static bool dsa_slave_dev_check(struct net_device *dev);
-static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct raw_notifier_head *nh = &p->dp->ds->dst->nh;
- int err;
-
- err = raw_notifier_call_chain(nh, e, v);
-
- return notifier_to_errno(err);
-}
-
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -78,48 +66,16 @@ static int dsa_slave_get_iflink(const struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- return p->dp->ds->dst->master_netdev->ifindex;
-}
-
-static inline bool dsa_port_is_bridged(struct dsa_port *dp)
-{
- return !!dp->bridge_dev;
+ return dsa_master_netdev(p)->ifindex;
}
-static void dsa_slave_set_state(struct net_device *dev, u8 state)
+static int dsa_slave_open(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_port *dp = p->dp;
struct dsa_switch *ds = dp->ds;
- int port = dp->index;
-
- if (ds->ops->port_stp_state_set)
- ds->ops->port_stp_state_set(ds, port, state);
-
- if (ds->ops->port_fast_age) {
- /* Fast age FDB entries or flush appropriate forwarding database
- * for the given port, if we are moving it from Learning or
- * Forwarding state, to Disabled or Blocking or Listening state.
- */
-
- if ((dp->stp_state == BR_STATE_LEARNING ||
- dp->stp_state == BR_STATE_FORWARDING) &&
- (state == BR_STATE_DISABLED ||
- state == BR_STATE_BLOCKING ||
- state == BR_STATE_LISTENING))
- ds->ops->port_fast_age(ds, port);
- }
-
- dp->stp_state = state;
-}
-
-static int dsa_slave_open(struct net_device *dev)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = p->dp->ds->dst->master_netdev;
- struct dsa_switch *ds = p->dp->ds;
- u8 stp_state = dsa_port_is_bridged(p->dp) ?
- BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+ struct net_device *master = dsa_master_netdev(p);
+ u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
if (!(master->flags & IFF_UP))
@@ -148,7 +104,7 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_promisc;
}
- dsa_slave_set_state(dev, stp_state);
+ dsa_port_set_state_now(p->dp, stp_state);
if (p->phy)
phy_start(p->phy);
@@ -171,7 +127,7 @@ out:
static int dsa_slave_close(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = p->dp->ds->dst->master_netdev;
+ struct net_device *master = dsa_master_netdev(p);
struct dsa_switch *ds = p->dp->ds;
if (p->phy)
@@ -190,7 +146,7 @@ static int dsa_slave_close(struct net_device *dev)
if (ds->ops->port_disable)
ds->ops->port_disable(ds, p->dp->index, p->phy);
- dsa_slave_set_state(dev, BR_STATE_DISABLED);
+ dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
return 0;
}
@@ -198,7 +154,7 @@ static int dsa_slave_close(struct net_device *dev)
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = p->dp->ds->dst->master_netdev;
+ struct net_device *master = dsa_master_netdev(p);
if (change & IFF_ALLMULTI)
dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -209,7 +165,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
static void dsa_slave_set_rx_mode(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = p->dp->ds->dst->master_netdev;
+ struct net_device *master = dsa_master_netdev(p);
dev_mc_sync(master, dev);
dev_uc_sync(master, dev);
@@ -218,7 +174,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = p->dp->ds->dst->master_netdev;
+ struct net_device *master = dsa_master_netdev(p);
struct sockaddr *addr = a;
int err;
@@ -243,140 +199,6 @@ out:
return 0;
}
-static int dsa_slave_port_vlan_add(struct net_device *dev,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans);
- }
-
- ds->ops->port_vlan_add(ds, dp->index, vlan, trans);
-
- return 0;
-}
-
-static int dsa_slave_port_vlan_del(struct net_device *dev,
- const struct switchdev_obj_port_vlan *vlan)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (!ds->ops->port_vlan_del)
- return -EOPNOTSUPP;
-
- return ds->ops->port_vlan_del(ds, p->dp->index, vlan);
-}
-
-static int dsa_slave_port_vlan_dump(struct net_device *dev,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_vlan_dump)
- return ds->ops->port_vlan_dump(ds, p->dp->index, vlan, cb);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_fdb_add(struct net_device *dev,
- const struct switchdev_obj_port_fdb *fdb,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_fdb_prepare(ds, p->dp->index, fdb, trans);
- }
-
- ds->ops->port_fdb_add(ds, p->dp->index, fdb, trans);
-
- return 0;
-}
-
-static int dsa_slave_port_fdb_del(struct net_device *dev,
- const struct switchdev_obj_port_fdb *fdb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- int ret = -EOPNOTSUPP;
-
- if (ds->ops->port_fdb_del)
- ret = ds->ops->port_fdb_del(ds, p->dp->index, fdb);
-
- return ret;
-}
-
-static int dsa_slave_port_fdb_dump(struct net_device *dev,
- struct switchdev_obj_port_fdb *fdb,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_fdb_dump)
- return ds->ops->port_fdb_dump(ds, p->dp->index, fdb, cb);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_mdb_add(struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_mdb_prepare(ds, p->dp->index, mdb, trans);
- }
-
- ds->ops->port_mdb_add(ds, p->dp->index, mdb, trans);
-
- return 0;
-}
-
-static int dsa_slave_port_mdb_del(struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_mdb_del)
- return ds->ops->port_mdb_del(ds, p->dp->index, mdb);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_mdb_dump(struct net_device *dev,
- struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_mdb_dump)
- return ds->ops->port_mdb_dump(ds, p->dp->index, mdb, cb);
-
- return -EOPNOTSUPP;
-}
-
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -387,96 +209,24 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
-static int dsa_slave_stp_state_set(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (switchdev_trans_ph_prepare(trans))
- return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
-
- dsa_slave_set_state(dev, attr->u.stp_state);
-
- return 0;
-}
-
-static int dsa_slave_vlan_filtering(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
- if (switchdev_trans_ph_prepare(trans))
- return 0;
-
- if (ds->ops->port_vlan_filtering)
- return ds->ops->port_vlan_filtering(ds, p->dp->index,
- attr->u.vlan_filtering);
-
- return 0;
-}
-
-static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
- unsigned int ageing_time)
-{
- int i;
-
- for (i = 0; i < ds->num_ports; ++i) {
- struct dsa_port *dp = &ds->ports[i];
-
- if (dp && dp->ageing_time && dp->ageing_time < ageing_time)
- ageing_time = dp->ageing_time;
- }
-
- return ageing_time;
-}
-
-static int dsa_slave_ageing_time(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
- unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
- return -ERANGE;
- if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
- return -ERANGE;
- return 0;
- }
-
- /* Keep the fastest ageing time in case of multiple bridges */
- p->dp->ageing_time = ageing_time;
- ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
-
- if (ds->ops->set_ageing_time)
- return ds->ops->set_ageing_time(ds, ageing_time);
-
- return 0;
-}
-
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int ret;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
- ret = dsa_slave_stp_state_set(dev, attr, trans);
+ ret = dsa_port_set_state(dp, attr->u.stp_state, trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
- ret = dsa_slave_vlan_filtering(dev, attr, trans);
+ ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
+ trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
- ret = dsa_slave_ageing_time(dev, attr, trans);
+ ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
break;
default:
ret = -EOPNOTSUPP;
@@ -490,6 +240,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err;
/* For the prepare phase, ensure the full set of changes is feasable in
@@ -499,18 +251,14 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_slave_port_fdb_add(dev,
- SWITCHDEV_OBJ_PORT_FDB(obj),
- trans);
+ err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- trans);
+ err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_port_vlan_add(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj),
- trans);
+ err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
+ trans);
break;
default:
err = -EOPNOTSUPP;
@@ -523,19 +271,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
static int dsa_slave_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_slave_port_fdb_del(dev,
- SWITCHDEV_OBJ_PORT_FDB(obj));
+ err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_port_vlan_del(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj));
+ err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
default:
err = -EOPNOTSUPP;
@@ -549,22 +297,19 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
struct switchdev_obj *obj,
switchdev_obj_dump_cb_t *cb)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_slave_port_fdb_dump(dev,
- SWITCHDEV_OBJ_PORT_FDB(obj),
- cb);
+ err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- cb);
+ err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_port_vlan_dump(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj),
- cb);
+ err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
break;
default:
err = -EOPNOTSUPP;
@@ -574,57 +319,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
return err;
}
-static int dsa_slave_bridge_port_join(struct net_device *dev,
- struct net_device *br)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_notifier_bridge_info info = {
- .sw_index = p->dp->ds->index,
- .port = p->dp->index,
- .br = br,
- };
- int err;
-
- /* Here the port is already bridged. Reflect the current configuration
- * so that drivers can program their chips accordingly.
- */
- p->dp->bridge_dev = br;
-
- err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info);
-
- /* The bridging is rolled back on error */
- if (err)
- p->dp->bridge_dev = NULL;
-
- return err;
-}
-
-static void dsa_slave_bridge_port_leave(struct net_device *dev,
- struct net_device *br)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_notifier_bridge_info info = {
- .sw_index = p->dp->ds->index,
- .port = p->dp->index,
- .br = br,
- };
- int err;
-
- /* Here the port is already unbridged. Reflect the current configuration
- * so that drivers can program their chips accordingly.
- */
- p->dp->bridge_dev = NULL;
-
- err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
- if (err)
- netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
-
- /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
- * so allow it to be in BR_STATE_FORWARDING to be kept functional
- */
- dsa_slave_set_state(dev, BR_STATE_FORWARDING);
-}
-
static int dsa_slave_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
@@ -663,10 +357,14 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
- /* Transmit function may have to reallocate the original SKB */
+ /* Transmit function may have to reallocate the original SKB,
+ * in which case it must have freed it. Only free it here on error.
+ */
nskb = p->xmit(skb, dev);
- if (!nskb)
+ if (!nskb) {
+ kfree_skb(skb);
return NETDEV_TX_OK;
+ }
/* SKB for netpoll still need to be mangled with the protocol-specific
* tag to be successfully transmitted
@@ -677,7 +375,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
/* Queue the SKB for transmission on the parent interface, but
* do not modify its EtherType
*/
- nskb->dev = p->dp->ds->dst->master_netdev;
+ nskb->dev = dsa_master_netdev(p);
dev_queue_xmit(nskb);
return NETDEV_TX_OK;
@@ -689,12 +387,13 @@ dsa_slave_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- int err = -EOPNOTSUPP;
- if (p->phy != NULL)
- err = phy_ethtool_ksettings_get(p->phy, cmd);
+ if (!p->phy)
+ return -EOPNOTSUPP;
- return err;
+ phy_ethtool_ksettings_get(p->phy, cmd);
+
+ return 0;
}
static int
@@ -821,14 +520,14 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
uint64_t *data)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds = dst->cpu_switch;
- s8 cpu_port = dst->cpu_port;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+ struct dsa_switch *ds = cpu_dp->ds;
+ s8 cpu_port = cpu_dp->index;
int count = 0;
- if (dst->master_ethtool_ops.get_sset_count) {
- count = dst->master_ethtool_ops.get_sset_count(dev,
- ETH_SS_STATS);
- dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
+ if (cpu_dp->ethtool_ops.get_sset_count) {
+ count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
+ cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
}
if (ds->ops->get_ethtool_stats)
@@ -838,11 +537,12 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds = dst->cpu_switch;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+ struct dsa_switch *ds = cpu_dp->ds;
int count = 0;
- if (dst->master_ethtool_ops.get_sset_count)
- count += dst->master_ethtool_ops.get_sset_count(dev, sset);
+ if (cpu_dp->ethtool_ops.get_sset_count)
+ count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
count += ds->ops->get_sset_count(ds);
@@ -854,8 +554,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
uint32_t stringset, uint8_t *data)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds = dst->cpu_switch;
- s8 cpu_port = dst->cpu_port;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+ struct dsa_switch *ds = cpu_dp->ds;
+ s8 cpu_port = cpu_dp->index;
int len = ETH_GSTRING_LEN;
int mcount = 0, count;
unsigned int i;
@@ -866,10 +567,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
/* We do not want to be NULL-terminated, since this is a prefix */
pfx[sizeof(pfx) - 1] = '_';
- if (dst->master_ethtool_ops.get_sset_count) {
- mcount = dst->master_ethtool_ops.get_sset_count(dev,
- ETH_SS_STATS);
- dst->master_ethtool_ops.get_strings(dev, stringset, data);
+ if (cpu_dp->ethtool_ops.get_sset_count) {
+ mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
+ cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
}
if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
@@ -985,8 +685,7 @@ static int dsa_slave_netpoll_setup(struct net_device *dev,
struct netpoll_info *ni)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- struct net_device *master = ds->dst->master_netdev;
+ struct net_device *master = dsa_master_netdev(p);
struct netpoll *netpoll;
int err = 0;
@@ -1138,10 +837,13 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
}
static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
- __be16 protocol, struct tc_to_netdev *tc)
+ u32 chain_index, __be16 protocol,
+ struct tc_to_netdev *tc)
{
bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
- int ret = -EOPNOTSUPP;
+
+ if (chain_index)
+ return -EOPNOTSUPP;
switch (tc->type) {
case TC_SETUP_MATCHALL:
@@ -1155,10 +857,8 @@ static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
return 0;
}
default:
- break;
+ return -EOPNOTSUPP;
}
-
- return ret;
}
void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
@@ -1441,11 +1141,11 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
struct net_device *master;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
+ struct dsa_port *cpu_dp;
int ret;
- master = ds->dst->master_netdev;
- if (ds->master_netdev)
- master = ds->master_netdev;
+ cpu_dp = ds->dst->cpu_dp;
+ master = cpu_dp->netdev;
slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
NET_NAME_UNKNOWN, ether_setup);
@@ -1528,14 +1228,16 @@ static bool dsa_slave_dev_check(struct net_device *dev)
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err = NOTIFY_DONE;
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking) {
- err = dsa_slave_bridge_port_join(dev, info->upper_dev);
+ err = dsa_port_bridge_join(dp, info->upper_dev);
err = notifier_from_errno(err);
} else {
- dsa_slave_bridge_port_leave(dev, info->upper_dev);
+ dsa_port_bridge_leave(dp, info->upper_dev);
err = NOTIFY_OK;
}
}
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index ca6e26e514f0..f1029a8d0e20 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -12,7 +12,47 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
-#include <net/dsa.h>
+#include <net/switchdev.h>
+
+#include "dsa_priv.h"
+
+static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
+ unsigned int ageing_time)
+{
+ int i;
+
+ for (i = 0; i < ds->num_ports; ++i) {
+ struct dsa_port *dp = &ds->ports[i];
+
+ if (dp->ageing_time && dp->ageing_time < ageing_time)
+ ageing_time = dp->ageing_time;
+ }
+
+ return ageing_time;
+}
+
+static int dsa_switch_ageing_time(struct dsa_switch *ds,
+ struct dsa_notifier_ageing_time_info *info)
+{
+ unsigned int ageing_time = info->ageing_time;
+ struct switchdev_trans *trans = info->trans;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+ return -ERANGE;
+ if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+ return -ERANGE;
+ return 0;
+ }
+
+ /* Program the fastest ageing time in case of multiple bridges */
+ ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time);
+
+ if (ds->ops->set_ageing_time)
+ return ds->ops->set_ageing_time(ds, ageing_time);
+
+ return 0;
+}
static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
@@ -40,6 +80,127 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
return 0;
}
+static int dsa_switch_fdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+ const struct switchdev_obj_port_fdb *fdb = info->fdb;
+ struct switchdev_trans *trans = info->trans;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
+ }
+
+ ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+
+ return 0;
+}
+
+static int dsa_switch_fdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+ const struct switchdev_obj_port_fdb *fdb = info->fdb;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (!ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_fdb_del(ds, info->port, fdb);
+}
+
+static int dsa_switch_mdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
+ const struct switchdev_obj_port_mdb *mdb = info->mdb;
+ struct switchdev_trans *trans = info->trans;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_mdb_prepare(ds, info->port, mdb, trans);
+ }
+
+ ds->ops->port_mdb_add(ds, info->port, mdb, trans);
+
+ return 0;
+}
+
+static int dsa_switch_mdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
+ const struct switchdev_obj_port_mdb *mdb = info->mdb;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (!ds->ops->port_mdb_del)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_mdb_del(ds, info->port, mdb);
+}
+
+static int dsa_switch_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ const struct switchdev_obj_port_vlan *vlan = info->vlan;
+ struct switchdev_trans *trans = info->trans;
+ DECLARE_BITMAP(members, ds->num_ports);
+ int port, err;
+
+ /* Build a mask of VLAN members */
+ bitmap_zero(members, ds->num_ports);
+ if (ds->index == info->sw_index)
+ set_bit(info->port, members);
+ for (port = 0; port < ds->num_ports; port++)
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ set_bit(port, members);
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+ return -EOPNOTSUPP;
+
+ for_each_set_bit(port, members, ds->num_ports) {
+ err = ds->ops->port_vlan_prepare(ds, port, vlan, trans);
+ if (err)
+ return err;
+ }
+ }
+
+ for_each_set_bit(port, members, ds->num_ports)
+ ds->ops->port_vlan_add(ds, port, vlan, trans);
+
+ return 0;
+}
+
+static int dsa_switch_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ const struct switchdev_obj_port_vlan *vlan = info->vlan;
+
+ if (!ds->ops->port_vlan_del)
+ return -EOPNOTSUPP;
+
+ if (ds->index == info->sw_index)
+ return ds->ops->port_vlan_del(ds, info->port, vlan);
+
+ return 0;
+}
+
static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
{
@@ -47,12 +208,33 @@ static int dsa_switch_event(struct notifier_block *nb,
int err;
switch (event) {
+ case DSA_NOTIFIER_AGEING_TIME:
+ err = dsa_switch_ageing_time(ds, info);
+ break;
case DSA_NOTIFIER_BRIDGE_JOIN:
err = dsa_switch_bridge_join(ds, info);
break;
case DSA_NOTIFIER_BRIDGE_LEAVE:
err = dsa_switch_bridge_leave(ds, info);
break;
+ case DSA_NOTIFIER_FDB_ADD:
+ err = dsa_switch_fdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_FDB_DEL:
+ err = dsa_switch_fdb_del(ds, info);
+ break;
+ case DSA_NOTIFIER_MDB_ADD:
+ err = dsa_switch_mdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_MDB_DEL:
+ err = dsa_switch_mdb_del(ds, info);
+ break;
+ case DSA_NOTIFIER_VLAN_ADD:
+ err = dsa_switch_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_VLAN_DEL:
+ err = dsa_switch_vlan_del(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 2a9b52c5af86..c697d9815177 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,7 +12,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
/* This tag length is 4 bytes, older ones were 6 bytes, we do not
@@ -65,7 +65,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
u8 *brcm_tag;
if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
- goto out_free;
+ return NULL;
skb_push(skb, BRCM_TAG_LEN);
@@ -86,10 +86,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK;
return skb;
-
-out_free:
- kfree_skb(skb);
- return NULL;
}
static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -97,34 +93,33 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+ struct dsa_switch *ds = cpu_dp->ds;
int source_port;
u8 *brcm_tag;
- ds = dst->cpu_switch;
-
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
- goto out_drop;
+ return NULL;
/* skb->data points to the EtherType, the tag is right before it */
brcm_tag = skb->data - 2;
/* The opcode should never be different than 0b000 */
if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
- goto out_drop;
+ return NULL;
/* We should never see a reserved reason code without knowing how to
* handle it
*/
if (unlikely(brcm_tag[2] & BRCM_EG_RC_RSVD))
- goto out_drop;
+ return NULL;
/* Locate which port this is coming from */
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
/* Validate port against switch setup, either the port is totally */
if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
- goto out_drop;
+ return NULL;
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
@@ -137,9 +132,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->dev = ds->ports[source_port].netdev;
return skb;
-
-out_drop:
- return NULL;
}
const struct dsa_device_ops brcm_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 1c6633f0de01..12867a4b458f 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,7 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -28,7 +28,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
if (skb_cow_head(skb, 0) < 0)
- goto out_free;
+ return NULL;
/*
* Construct tagged FROM_CPU DSA tag from 802.1q tag.
@@ -46,7 +46,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
}
} else {
if (skb_cow_head(skb, DSA_HLEN) < 0)
- goto out_free;
+ return NULL;
skb_push(skb, DSA_HLEN);
memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
@@ -62,10 +62,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
}
return skb;
-
-out_free:
- kfree_skb(skb);
- return NULL;
}
static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -79,7 +75,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_port;
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
- goto out_drop;
+ return NULL;
/*
* The ethertype field is part of the DSA header.
@@ -90,7 +86,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
* Check that frame type is either TO_CPU or FORWARD.
*/
if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0)
- goto out_drop;
+ return NULL;
/*
* Determine source device and port.
@@ -103,14 +99,14 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
* port is a registered DSA port.
*/
if (source_device >= DSA_MAX_SWITCHES)
- goto out_drop;
+ return NULL;
ds = dst->ds[source_device];
if (!ds)
- goto out_drop;
+ return NULL;
if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
- goto out_drop;
+ return NULL;
/*
* Convert the DSA header to an 802.1q header if the 'tagged'
@@ -161,9 +157,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
skb->dev = ds->ports[source_port].netdev;
return skb;
-
-out_drop:
- return NULL;
}
const struct dsa_device_ops dsa_netdev_ops = {
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index d9c668aa5e54..67a9d26f9075 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,7 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -30,7 +30,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
if (skb_cow_head(skb, DSA_HLEN) < 0)
- goto out_free;
+ return NULL;
skb_push(skb, DSA_HLEN);
memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
@@ -55,7 +55,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
}
} else {
if (skb_cow_head(skb, EDSA_HLEN) < 0)
- goto out_free;
+ return NULL;
skb_push(skb, EDSA_HLEN);
memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN);
@@ -75,10 +75,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
}
return skb;
-
-out_free:
- kfree_skb(skb);
- return NULL;
}
static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -92,7 +88,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_port;
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
- goto out_drop;
+ return NULL;
/*
* Skip the two null bytes after the ethertype.
@@ -103,7 +99,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
* Check that frame type is either TO_CPU or FORWARD.
*/
if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0)
- goto out_drop;
+ return NULL;
/*
* Determine source device and port.
@@ -116,14 +112,14 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
* port is a registered DSA port.
*/
if (source_device >= DSA_MAX_SWITCHES)
- goto out_drop;
+ return NULL;
ds = dst->ds[source_device];
if (!ds)
- goto out_drop;
+ return NULL;
if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
- goto out_drop;
+ return NULL;
/*
* If the 'tagged' bit is set, convert the DSA tag to a 802.1q
@@ -180,9 +176,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
skb->dev = ds->ports[source_port].netdev;
return skb;
-
-out_drop:
- return NULL;
}
const struct dsa_device_ops edsa_netdev_ops = {
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
new file mode 100644
index 000000000000..fab41de8e983
--- /dev/null
+++ b/net/dsa/tag_ksz.c
@@ -0,0 +1,99 @@
+/*
+ * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling
+ * Copyright (c) 2017 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ_INGRESS_TAG_LEN 2
+#define KSZ_EGRESS_TAG_LEN 1
+
+static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct sk_buff *nskb;
+ int padlen;
+ u8 *tag;
+
+ padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
+
+ if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+ nskb = skb;
+ } else {
+ nskb = alloc_skb(NET_IP_ALIGN + skb->len +
+ padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+ skb_reserve(nskb, NET_IP_ALIGN);
+
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb,
+ skb_network_header(skb) - skb->head);
+ skb_set_transport_header(nskb,
+ skb_transport_header(skb) - skb->head);
+ skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
+ kfree_skb(skb);
+ }
+
+ /* skb is freed when it fails */
+ if (skb_put_padto(nskb, nskb->len + padlen))
+ return NULL;
+
+ tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
+ tag[0] = 0;
+ tag[1] = 1 << p->dp->index; /* destination port */
+
+ return nskb;
+}
+
+static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+ struct dsa_switch *ds = cpu_dp->ds;
+ u8 *tag;
+ int source_port;
+
+ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+
+ source_port = tag[0] & 7;
+ if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ return NULL;
+
+ pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
+
+ skb->dev = ds->ports[source_port].netdev;
+
+ return skb;
+}
+
+const struct dsa_device_ops ksz_netdev_ops = {
+ .xmit = ksz_xmit,
+ .rcv = ksz_rcv,
+};
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 70130ed5c21a..247774d149f9 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -14,7 +14,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
/* To define the outgoing port and to discover the incoming port a regular
@@ -52,7 +52,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) {
dev_dbg(&dev->dev,
"Cannot make room for the special tag. Dropping packet\n");
- goto out_free;
+ return NULL;
}
/* provide 'LAN9303_TAG_LEN' bytes additional space */
@@ -66,9 +66,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
lan9303_tag[1] = htons(p->dp->index | BIT(4));
return skb;
-out_free:
- kfree_skb(skb);
- return NULL;
}
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 837cdddb53f0..2f32b7ea3365 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,7 +13,7 @@
*/
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define MTK_HDR_LEN 4
@@ -27,7 +27,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
u8 *mtk_tag;
if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
- goto out_free;
+ return NULL;
skb_push(skb, MTK_HDR_LEN);
@@ -41,10 +41,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
mtk_tag[3] = 0;
return skb;
-
-out_free:
- kfree_skb(skb);
- return NULL;
}
static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -57,7 +53,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
__be16 *phdr, hdr;
if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
- goto out_drop;
+ return NULL;
/* The MTK header is added by the switch between src addr
* and ethertype at this point, skb->data points to 2 bytes
@@ -79,19 +75,16 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
*/
ds = dst->ds[0];
if (!ds)
- goto out_drop;
+ return NULL;
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
if (!ds->ports[port].netdev)
- goto out_drop;
+ return NULL;
skb->dev = ds->ports[port].netdev;
return skb;
-
-out_drop:
- return NULL;
}
const struct dsa_device_ops mtk_netdev_ops = {
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 3ba3f59f7a34..1867a3d11f28 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -12,7 +12,7 @@
*/
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define QCA_HDR_LEN 2
@@ -45,7 +45,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_bytes += skb->len;
if (skb_cow_head(skb, 0) < 0)
- goto out_free;
+ return NULL;
skb_push(skb, QCA_HDR_LEN);
@@ -60,10 +60,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
*phdr = htons(hdr);
return skb;
-
-out_free:
- kfree_skb(skb);
- return NULL;
}
static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -71,13 +67,14 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
struct dsa_switch *ds;
u8 ver;
int port;
__be16 *phdr, hdr;
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
- goto out_drop;
+ return NULL;
/* The QCA header is added by the switch between src addr and Ethertype
* At this point, skb->data points to ethertype so header should be
@@ -89,7 +86,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Make sure the version is correct */
ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
if (unlikely(ver != QCA_HDR_VERSION))
- goto out_drop;
+ return NULL;
/* Remove QCA tag and recalculate checksum */
skb_pull_rcsum(skb, QCA_HDR_LEN);
@@ -99,22 +96,19 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* This protocol doesn't support cascading multiple switches so it's
* safe to assume the switch is first in the tree
*/
- ds = dst->cpu_switch;
+ ds = cpu_dp->ds;
if (!ds)
- goto out_drop;
+ return NULL;
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
if (!ds->ports[port].netdev)
- goto out_drop;
+ return NULL;
/* Update skb & forward the frame accordingly */
skb->dev = ds->ports[port].netdev;
return skb;
-
-out_drop:
- return NULL;
}
const struct dsa_device_ops qca_netdev_ops = {
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index aafc2fc74c30..172f13167896 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,7 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -32,10 +32,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
padlen = 60 - skb->len;
nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC);
- if (nskb == NULL) {
- kfree_skb(skb);
+ if (!nskb)
return NULL;
- }
skb_reserve(nskb, NET_IP_ALIGN);
skb_reset_mac_header(nskb);
@@ -63,32 +61,28 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
+ struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+ struct dsa_switch *ds = cpu_dp->ds;
u8 *trailer;
int source_port;
- ds = dst->cpu_switch;
-
if (skb_linearize(skb))
- goto out_drop;
+ return NULL;
trailer = skb_tail_pointer(skb) - 4;
if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 ||
(trailer[2] & 0xef) != 0x00 || trailer[3] != 0x00)
- goto out_drop;
+ return NULL;
source_port = trailer[1] & 7;
if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
- goto out_drop;
+ return NULL;
pskb_trim_rcsum(skb, skb->len - 4);
skb->dev = ds->ports[source_port].netdev;
return skb;
-
-out_drop:
- return NULL;
}
const struct dsa_device_ops trailer_netdev_ops = {
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index eedba7670b51..a60658c85a9a 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -301,15 +301,14 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out_skb;
skb->dev = dev;
- skb->sk = sk;
skb->protocol = htons(ETH_P_IEEE802154);
- dev_put(dev);
-
err = dev_queue_xmit(skb);
if (err > 0)
err = net_xmit_errno(err);
+ dev_put(dev);
+
return err ?: size;
out_skb:
@@ -690,15 +689,14 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out_skb;
skb->dev = dev;
- skb->sk = sk;
skb->protocol = htons(ETH_P_IEEE802154);
- dev_put(dev);
-
err = dev_queue_xmit(skb);
if (err > 0)
err = net_xmit_errno(err);
+ dev_put(dev);
+
return err ?: size;
out_skb:
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 22377c8ff14b..e8f862358518 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -220,7 +220,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
sg_init_table(sg, nfrags + sglists);
- skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ if (unlikely(err < 0))
+ goto out_free;
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
@@ -393,7 +395,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, ihl);
sg_init_table(sg, nfrags + sglists);
- skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ if (unlikely(err < 0))
+ goto out_free;
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e9f3386a528b..a651c53260ec 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1113,13 +1113,17 @@ static int arp_invalidate(struct net_device *dev, __be32 ip)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
+ struct neigh_table *tbl = &arp_tbl;
if (neigh) {
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_ADMIN, 0);
+ write_lock_bh(&tbl->lock);
neigh_release(neigh);
+ neigh_remove_one(neigh, tbl);
+ write_unlock_bh(&tbl->lock);
}
return err;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df14815a3b8c..a7dd088d5fc9 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -176,6 +176,7 @@ EXPORT_SYMBOL(__ip_dev_find);
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
+static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
int destroy);
#ifdef CONFIG_SYSCTL
@@ -441,6 +442,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
+ struct in_validator_info ivi;
+ int ret;
ASSERT_RTNL();
@@ -471,6 +474,23 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
}
}
+ /* Allow any devices that wish to register ifaddr validtors to weigh
+ * in now, before changes are committed. The rntl lock is serializing
+ * access here, so the state should not change between a validator call
+ * and a final notify on commit. This isn't invoked on promotion under
+ * the assumption that validators are checking the address itself, and
+ * not the flags.
+ */
+ ivi.ivi_addr = ifa->ifa_address;
+ ivi.ivi_dev = ifa->ifa_dev;
+ ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
+ NETDEV_UP, &ivi);
+ ret = notifier_to_errno(ret);
+ if (ret) {
+ inet_free_ifa(ifa);
+ return ret;
+ }
+
if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
prandom_seed((__force u32) ifa->ifa_local);
ifap = last_primary;
@@ -1356,6 +1376,19 @@ int unregister_inetaddr_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_inetaddr_notifier);
+int register_inetaddr_validator_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
+}
+EXPORT_SYMBOL(register_inetaddr_validator_notifier);
+
+int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
+ nb);
+}
+EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
+
/* Rename ifa_labels for a device name change. Make some effort to preserve
* existing alias numbering and to create unique labels if possible.
*/
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 93322f895eab..d815d1755473 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -377,9 +377,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
esp->esph = esph;
sg_init_table(sg, esp->nfrags);
- skb_to_sgvec(skb, sg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + esp->clen + alen);
+ err = skb_to_sgvec(skb, sg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ if (unlikely(err < 0))
+ goto error;
if (!esp->inplace) {
int allocsize;
@@ -403,9 +405,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
spin_unlock_bh(&x->lock);
sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
- skb_to_sgvec(skb, dsg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + esp->clen + alen);
+ err = skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ if (unlikely(err < 0))
+ goto error;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -690,7 +694,9 @@ skip_cow:
esp_input_set_header(skb, seqhi);
sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ err = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(err < 0))
+ goto out;
skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 83e3ed258467..4e678fa892dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -588,13 +588,15 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (cmd == SIOCDELRT) {
tb = fib_get_table(net, cfg.fc_table);
if (tb)
- err = fib_table_delete(net, tb, &cfg);
+ err = fib_table_delete(net, tb, &cfg,
+ NULL);
else
err = -ESRCH;
} else {
tb = fib_new_table(net, cfg.fc_table);
if (tb)
- err = fib_table_insert(net, tb, &cfg);
+ err = fib_table_insert(net, tb,
+ &cfg, NULL);
else
err = -ENOBUFS;
}
@@ -626,14 +628,15 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct fib_config *cfg)
+ struct nlmsghdr *nlh, struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
@@ -654,6 +657,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_nlinfo.nl_net = net;
if (cfg->fc_type > RTN_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid route type");
err = -EINVAL;
goto errout;
}
@@ -681,7 +685,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
break;
case RTA_MULTIPATH:
err = lwtunnel_valid_encap_type_attr(nla_data(attr),
- nla_len(attr));
+ nla_len(attr),
+ extack);
if (err < 0)
goto errout;
cfg->fc_mp = nla_data(attr);
@@ -698,7 +703,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
break;
case RTA_ENCAP_TYPE:
cfg->fc_encap_type = nla_get_u16(attr);
- err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
+ extack);
if (err < 0)
goto errout;
break;
@@ -718,17 +724,18 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib_table *tb;
int err;
- err = rtm_to_fib_config(net, skb, nlh, &cfg);
+ err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
if (err < 0)
goto errout;
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
+ NL_SET_ERR_MSG(extack, "FIB table does not exist");
err = -ESRCH;
goto errout;
}
- err = fib_table_delete(net, tb, &cfg);
+ err = fib_table_delete(net, tb, &cfg, extack);
errout:
return err;
}
@@ -741,7 +748,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib_table *tb;
int err;
- err = rtm_to_fib_config(net, skb, nlh, &cfg);
+ err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
if (err < 0)
goto errout;
@@ -751,7 +758,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- err = fib_table_insert(net, tb, &cfg);
+ err = fib_table_insert(net, tb, &cfg, extack);
errout:
return err;
}
@@ -845,9 +852,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
cfg.fc_scope = RT_SCOPE_HOST;
if (cmd == RTM_NEWROUTE)
- fib_table_insert(net, tb, &cfg);
+ fib_table_insert(net, tb, &cfg, NULL);
else
- fib_table_delete(net, tb, &cfg);
+ fib_table_delete(net, tb, &cfg, NULL);
}
void fib_add_ifaddr(struct in_ifaddr *ifa)
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 9c02920725db..769ab87ebc4b 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -28,8 +28,10 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
/* Exported by fib_semantics.c */
void fib_release_info(struct fib_info *);
-struct fib_info *fib_create_info(struct fib_config *cfg);
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
+struct fib_info *fib_create_info(struct fib_config *cfg,
+ struct netlink_ext_ack *extack);
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+ struct netlink_ext_ack *extack);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ad9ad4aab5da..2157dc08c407 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/netlink.h>
#include <net/arp.h>
#include <net/ip.h>
@@ -456,7 +457,8 @@ static int fib_detect_death(struct fib_info *fi, int order,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
+ struct netlink_ext_ack *extack)
{
int nhs = 0;
@@ -466,22 +468,35 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
}
/* leftover implies invalid nexthop configuration, discard it */
- return remaining > 0 ? 0 : nhs;
+ if (remaining > 0) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop configuration - extra data after nexthops");
+ nhs = 0;
+ }
+
+ return nhs;
}
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
- int remaining, struct fib_config *cfg)
+ int remaining, struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
int ret;
change_nexthops(fi) {
int attrlen;
- if (!rtnh_ok(rtnh, remaining))
+ if (!rtnh_ok(rtnh, remaining)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop configuration - extra data after nexthop");
return -EINVAL;
+ }
- if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
return -EINVAL;
+ }
nexthop_nh->nh_flags =
(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
@@ -507,13 +522,17 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
nla_entype = nla_find(attrs, attrlen,
RTA_ENCAP_TYPE);
- if (!nla_entype)
+ if (!nla_entype) {
+ NL_SET_BAD_ATTR(extack, nla);
+ NL_SET_ERR_MSG(extack,
+ "Encap type is missing");
goto err_inval;
+ }
ret = lwtunnel_build_state(nla_get_u16(
nla_entype),
nla, AF_INET, cfg,
- &lwtstate);
+ &lwtstate, extack);
if (ret)
goto errout;
nexthop_nh->nh_lwtstate =
@@ -595,7 +614,8 @@ static inline void fib_add_weight(struct fib_info *fi,
static int fib_encap_match(u16 encap_type,
struct nlattr *encap,
const struct fib_nh *nh,
- const struct fib_config *cfg)
+ const struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct lwtunnel_state *lwtstate;
int ret, result = 0;
@@ -603,8 +623,8 @@ static int fib_encap_match(u16 encap_type,
if (encap_type == LWTUNNEL_ENCAP_NONE)
return 0;
- ret = lwtunnel_build_state(encap_type, encap,
- AF_INET, cfg, &lwtstate);
+ ret = lwtunnel_build_state(encap_type, encap, AF_INET,
+ cfg, &lwtstate, extack);
if (!ret) {
result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
lwtstate_free(lwtstate);
@@ -613,7 +633,8 @@ static int fib_encap_match(u16 encap_type,
return result;
}
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
struct rtnexthop *rtnh;
@@ -625,9 +646,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
if (cfg->fc_oif || cfg->fc_gw) {
if (cfg->fc_encap) {
- if (fib_encap_match(cfg->fc_encap_type,
- cfg->fc_encap, fi->fib_nh, cfg))
- return 1;
+ if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
+ fi->fib_nh, cfg, extack))
+ return 1;
}
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
@@ -716,7 +737,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
* |-> {local prefix} (terminal node)
*/
static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
- struct fib_nh *nh)
+ struct fib_nh *nh, struct netlink_ext_ack *extack)
{
int err = 0;
struct net *net;
@@ -729,16 +750,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
if (nh->nh_flags & RTNH_F_ONLINK) {
unsigned int addr_type;
- if (cfg->fc_scope >= RT_SCOPE_LINK)
+ if (cfg->fc_scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid scope");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, nh->nh_oif);
if (!dev)
return -ENODEV;
- if (!(dev->flags & IFF_UP))
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device is not up");
return -ENETDOWN;
+ }
addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
- if (addr_type != RTN_UNICAST)
+ if (addr_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid gateway");
return -EINVAL;
+ }
if (!netif_carrier_ok(dev))
nh->nh_flags |= RTNH_F_LINKDOWN;
nh->nh_dev = dev;
@@ -778,18 +808,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
}
if (err) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid gateway");
rcu_read_unlock();
return err;
}
}
err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
goto out;
+ }
nh->nh_scope = res.scope;
nh->nh_oif = FIB_RES_OIF(res);
nh->nh_dev = dev = FIB_RES_DEV(res);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "No egress device for nexthop gateway");
goto out;
+ }
dev_hold(dev);
if (!netif_carrier_ok(dev))
nh->nh_flags |= RTNH_F_LINKDOWN;
@@ -797,17 +834,21 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
} else {
struct in_device *in_dev;
- if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
+ if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
return -EINVAL;
-
+ }
rcu_read_lock();
err = -ENODEV;
in_dev = inetdev_by_index(net, nh->nh_oif);
if (!in_dev)
goto out;
err = -ENETDOWN;
- if (!(in_dev->dev->flags & IFF_UP))
+ if (!(in_dev->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
goto out;
+ }
nh->nh_dev = in_dev->dev;
dev_hold(nh->nh_dev);
nh->nh_scope = RT_SCOPE_HOST;
@@ -982,7 +1023,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
return 0;
}
-struct fib_info *fib_create_info(struct fib_config *cfg)
+struct fib_info *fib_create_info(struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
int err;
struct fib_info *fi = NULL;
@@ -994,15 +1036,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto err_inval;
/* Fast check to catch the most weird cases */
- if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
+ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
+ NL_SET_ERR_MSG(extack, "Invalid scope");
goto err_inval;
+ }
- if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
goto err_inval;
+ }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
- nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
+ nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
if (nhs == 0)
goto err_inval;
}
@@ -1065,18 +1112,29 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
if (err != 0)
goto failure;
- if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
+ if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device index does not match RTA_OIF");
goto err_inval;
- if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
+ }
+ if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop gateway does not match RTA_GATEWAY");
goto err_inval;
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
+ if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop class id does not match RTA_FLOW");
goto err_inval;
+ }
#endif
#else
+ NL_SET_ERR_MSG(extack,
+ "Multipath support not enabled in kernel");
goto err_inval;
#endif
} else {
@@ -1085,11 +1143,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (cfg->fc_encap) {
struct lwtunnel_state *lwtstate;
- if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+ if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
+ NL_SET_ERR_MSG(extack,
+ "LWT encap type not specified");
goto err_inval;
+ }
err = lwtunnel_build_state(cfg->fc_encap_type,
cfg->fc_encap, AF_INET, cfg,
- &lwtstate);
+ &lwtstate, extack);
if (err)
goto failure;
@@ -1109,8 +1170,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
}
if (fib_props[cfg->fc_type].error) {
- if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+ if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Gateway, device and multipath can not be specified for this route type");
goto err_inval;
+ }
goto link_it;
} else {
switch (cfg->fc_type) {
@@ -1121,19 +1185,30 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
case RTN_MULTICAST:
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid route type");
goto err_inval;
}
}
- if (cfg->fc_scope > RT_SCOPE_HOST)
+ if (cfg->fc_scope > RT_SCOPE_HOST) {
+ NL_SET_ERR_MSG(extack, "Invalid scope");
goto err_inval;
+ }
if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
- if (nhs != 1 || nh->nh_gw)
+ if (nhs != 1) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have multiple nexthops");
+ goto err_inval;
+ }
+ if (nh->nh_gw) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have a gateway");
goto err_inval;
+ }
nh->nh_scope = RT_SCOPE_NOWHERE;
nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
err = -ENODEV;
@@ -1143,7 +1218,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
int linkdown = 0;
change_nexthops(fi) {
- err = fib_check_nh(cfg, fi, nexthop_nh);
+ err = fib_check_nh(cfg, fi, nexthop_nh, extack);
if (err != 0)
goto failure;
if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1153,8 +1228,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_flags |= RTNH_F_LINKDOWN;
}
- if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
+ if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
goto err_inval;
+ }
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 51182ff2b441..d56659e97a6e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1099,9 +1099,25 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
return 0;
}
+static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
+{
+ if (plen > KEYLENGTH) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ return false;
+ }
+
+ if ((plen < KEYLENGTH) && (key << plen)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid prefix for given prefix length");
+ return false;
+ }
+
+ return true;
+}
+
/* Caller must hold RTNL. */
int fib_table_insert(struct net *net, struct fib_table *tb,
- struct fib_config *cfg)
+ struct fib_config *cfg, struct netlink_ext_ack *extack)
{
enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
struct trie *t = (struct trie *)tb->tb_data;
@@ -1115,17 +1131,14 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
u32 key;
int err;
- if (plen > KEYLENGTH)
- return -EINVAL;
-
key = ntohl(cfg->fc_dst);
- pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
-
- if ((plen < KEYLENGTH) && (key << plen))
+ if (!fib_valid_key_len(key, plen, extack))
return -EINVAL;
- fi = fib_create_info(cfg);
+ pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
+
+ fi = fib_create_info(cfg, extack);
if (IS_ERR(fi)) {
err = PTR_ERR(fi);
goto err;
@@ -1452,6 +1465,7 @@ found:
if (!(fib_flags & FIB_LOOKUP_NOREF))
atomic_inc(&fi->fib_clntref);
+ res->prefix = htonl(n->key);
res->prefixlen = KEYLENGTH - fa->fa_slen;
res->nh_sel = nhsel;
res->type = fa->fa_type;
@@ -1507,7 +1521,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
/* Caller must hold RTNL. */
int fib_table_delete(struct net *net, struct fib_table *tb,
- struct fib_config *cfg)
+ struct fib_config *cfg, struct netlink_ext_ack *extack)
{
struct trie *t = (struct trie *) tb->tb_data;
struct fib_alias *fa, *fa_to_delete;
@@ -1517,12 +1531,9 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
u8 tos = cfg->fc_tos;
u32 key;
- if (plen > KEYLENGTH)
- return -EINVAL;
-
key = ntohl(cfg->fc_dst);
- if ((plen < KEYLENGTH) && (key << plen))
+ if (!fib_valid_key_len(key, plen, extack))
return -EINVAL;
l = fib_find_node(t, &tp, key);
@@ -1551,7 +1562,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
fi->fib_prefsrc == cfg->fc_prefsrc) &&
(!cfg->fc_protocol ||
fi->fib_protocol == cfg->fc_protocol) &&
- fib_nh_match(cfg, fi) == 0) {
+ fib_nh_match(cfg, fi, extack) == 0) {
fa_to_delete = fa;
break;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 805f6607f8d9..8e0257d01200 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <net/genetlink.h>
#include <net/gue.h>
+#include <net/fou.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/udp.h>
@@ -859,25 +860,6 @@ size_t gue_encap_hlen(struct ip_tunnel_encap *e)
}
EXPORT_SYMBOL(gue_encap_hlen);
-static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
- struct flowi4 *fl4, u8 *protocol, __be16 sport)
-{
- struct udphdr *uh;
-
- skb_push(skb, sizeof(struct udphdr));
- skb_reset_transport_header(skb);
-
- uh = udp_hdr(skb);
-
- uh->dest = e->dport;
- uh->source = sport;
- uh->len = htons(skb->len);
- udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
- fl4->saddr, fl4->daddr, skb->len);
-
- *protocol = IPPROTO_UDP;
-}
-
int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type)
{
@@ -894,24 +876,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
}
EXPORT_SYMBOL(__fou_build_header);
-int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi4 *fl4)
-{
- int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
- SKB_GSO_UDP_TUNNEL;
- __be16 sport;
- int err;
-
- err = __fou_build_header(skb, e, protocol, &sport, type);
- if (err)
- return err;
-
- fou_build_udp(skb, e, fl4, protocol, sport);
-
- return 0;
-}
-EXPORT_SYMBOL(fou_build_header);
-
int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type)
{
@@ -985,8 +949,46 @@ int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
}
EXPORT_SYMBOL(__gue_build_header);
-int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi4 *fl4)
+#ifdef CONFIG_NET_FOU_IP_TUNNELS
+
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+ SKB_GSO_UDP_TUNNEL;
+ __be16 sport;
+ int err;
+
+ err = __fou_build_header(skb, e, protocol, &sport, type);
+ if (err)
+ return err;
+
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+
+static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
{
int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
SKB_GSO_UDP_TUNNEL;
@@ -1001,9 +1003,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
-EXPORT_SYMBOL(gue_build_header);
-#ifdef CONFIG_NET_FOU_IP_TUNNELS
static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 9144fa7df2ad..c2be26b98b5f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -489,7 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key_hash(net, fl4, skb_in);
+ rt = ip_route_output_key_hash(net, fl4, skb_in);
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1054d330bf9d..a3fa1a5b6d98 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -25,6 +25,7 @@
#include <net/xfrm.h>
#include <net/tcp.h>
#include <net/sock_reuseport.h>
+#include <net/addrconf.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -789,7 +790,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
- newsk->sk_write_space = sk_stream_write_space;
/* listeners have SOCK_RCU_FREE, not the children */
sock_reset_flag(newsk, SOCK_RCU_FREE);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index baf196eaf1d8..90e11479c725 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -228,14 +228,16 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
static int ip_tun_build_state(struct nlattr *attr,
unsigned int family, const void *cfg,
- struct lwtunnel_state **ts)
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
{
struct ip_tunnel_info *tun_info;
struct lwtunnel_state *new_state;
struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
int err;
- err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL);
+ err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy,
+ extack);
if (err < 0)
return err;
@@ -325,7 +327,8 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
static int ip6_tun_build_state(struct nlattr *attr,
unsigned int family, const void *cfg,
- struct lwtunnel_state **ts)
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
{
struct ip_tunnel_info *tun_info;
struct lwtunnel_state *new_state;
@@ -333,7 +336,7 @@ static int ip6_tun_build_state(struct nlattr *attr,
int err;
err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
- NULL);
+ extack);
if (err < 0)
return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8ae425cad818..a1199895b8a6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2526,6 +2526,129 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
return ipmr_mfc_delete(tbl, &mfcc, parent);
}
+static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
+{
+ u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
+
+ if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
+ nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
+ nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
+ mrt->mroute_reg_vif_num) ||
+ nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
+ mrt->mroute_do_assert) ||
+ nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+ return false;
+
+ return true;
+}
+
+static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
+{
+ struct nlattr *vif_nest;
+ struct vif_device *vif;
+
+ /* if the VIF doesn't exist just continue */
+ if (!VIF_EXISTS(mrt, vifid))
+ return true;
+
+ vif = &mrt->vif_table[vifid];
+ vif_nest = nla_nest_start(skb, IPMRA_VIF);
+ if (!vif_nest)
+ return false;
+ if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
+ nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
+ nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
+ nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
+ IPMRA_VIFA_PAD) ||
+ nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out,
+ IPMRA_VIFA_PAD) ||
+ nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in,
+ IPMRA_VIFA_PAD) ||
+ nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out,
+ IPMRA_VIFA_PAD) ||
+ nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
+ nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
+ nla_nest_cancel(skb, vif_nest);
+ return false;
+ }
+ nla_nest_end(skb, vif_nest);
+
+ return true;
+}
+
+static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlmsghdr *nlh = NULL;
+ unsigned int t = 0, s_t;
+ unsigned int e = 0, s_e;
+ struct mr_table *mrt;
+
+ s_t = cb->args[0];
+ s_e = cb->args[1];
+
+ ipmr_for_each_table(mrt, net) {
+ struct nlattr *vifs, *af;
+ struct ifinfomsg *hdr;
+ u32 i;
+
+ if (t < s_t)
+ goto skip_table;
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWLINK,
+ sizeof(*hdr), NLM_F_MULTI);
+ if (!nlh)
+ break;
+
+ hdr = nlmsg_data(nlh);
+ memset(hdr, 0, sizeof(*hdr));
+ hdr->ifi_family = RTNL_FAMILY_IPMR;
+
+ af = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!af) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ if (!ipmr_fill_table(mrt, skb)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS);
+ if (!vifs) {
+ nla_nest_end(skb, af);
+ nlmsg_end(skb, nlh);
+ goto out;
+ }
+ for (i = 0; i < mrt->maxvif; i++) {
+ if (e < s_e)
+ goto skip_entry;
+ if (!ipmr_fill_vif(mrt, i, skb)) {
+ nla_nest_end(skb, vifs);
+ nla_nest_end(skb, af);
+ nlmsg_end(skb, nlh);
+ goto out;
+ }
+skip_entry:
+ e++;
+ }
+ s_e = 0;
+ e = 0;
+ nla_nest_end(skb, vifs);
+ nla_nest_end(skb, af);
+ nlmsg_end(skb, nlh);
+skip_table:
+ t++;
+ }
+
+out:
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+
#ifdef CONFIG_PROC_FS
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
@@ -2868,6 +2991,9 @@ int __init ip_mr_init(void)
ipmr_rtm_route, NULL, NULL);
rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
ipmr_rtm_route, NULL, NULL);
+
+ rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
+ NULL, ipmr_rtm_dumplink, NULL);
return 0;
#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 7cd8d0d918f8..6f8d9e5e062b 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -172,7 +172,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
struct iphdr *iph = ip_hdr(skb_in);
u8 proto;
- if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+ if (iph->frag_off & htons(IP_OFFSET))
return;
if (skb_csum_unnecessary(skb_in)) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index fa44e752a9a3..43eb6567b3a0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -250,6 +250,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER),
SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED),
SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES),
+ SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO),
SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD),
SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6883b3d4ba8f..9b38cf18144e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -114,6 +114,8 @@
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
+#include "fib_lookup.h"
+
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -1860,9 +1862,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
*/
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+ u8 tos, struct net_device *dev,
+ struct fib_result *res)
{
- struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct ip_tunnel_info *tun_info;
struct flowi4 fl4;
@@ -1892,8 +1894,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
- res.fi = NULL;
- res.table = NULL;
+ res->fi = NULL;
+ res->table = NULL;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
@@ -1929,17 +1931,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- err = fib_lookup(net, &fl4, &res, 0);
+ err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
err = -EHOSTUNREACH;
goto no_route;
}
- if (res.type == RTN_BROADCAST)
+ if (res->type == RTN_BROADCAST)
goto brd_input;
- if (res.type == RTN_LOCAL) {
+ if (res->type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
0, dev, in_dev, &itag);
if (err < 0)
@@ -1951,10 +1953,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res.type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
out: return err;
brd_input:
@@ -1968,14 +1970,14 @@ brd_input:
goto martian_source;
}
flags |= RTCF_BROADCAST;
- res.type = RTN_BROADCAST;
+ res->type = RTN_BROADCAST;
RT_CACHE_STAT_INC(in_brd);
local_input:
do_cache = false;
- if (res.fi) {
+ if (res->fi) {
if (!itag) {
- rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
@@ -1986,7 +1988,7 @@ local_input:
}
rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
- flags | RTCF_LOCAL, res.type,
+ flags | RTCF_LOCAL, res->type,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
if (!rth)
goto e_nobufs;
@@ -1996,18 +1998,18 @@ local_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res.table)
- rth->rt_table_id = res.table->tb_id;
+ if (res->table)
+ rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
- if (res.type == RTN_UNREACHABLE) {
+ if (res->type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
if (do_cache) {
- struct fib_nh *nh = &FIB_RES_NH(res);
+ struct fib_nh *nh = &FIB_RES_NH(*res);
rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2027,9 +2029,9 @@ local_input:
no_route:
RT_CACHE_STAT_INC(in_no_route);
- res.type = RTN_UNREACHABLE;
- res.fi = NULL;
- res.table = NULL;
+ res->type = RTN_UNREACHABLE;
+ res->fi = NULL;
+ res->table = NULL;
goto local_input;
/*
@@ -2059,11 +2061,22 @@ martian_source:
int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
{
- int res;
+ struct fib_result res;
+ int err;
tos &= IPTOS_RT_MASK;
rcu_read_lock();
+ err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(ip_route_input_noref);
+
+/* called with rcu_read_lock held */
+int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, struct fib_result *res)
+{
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
hardware multicast filters :-( As result the host on multicasting
@@ -2078,6 +2091,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(daddr)) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
+ int err = -EINVAL;
if (in_dev)
our = ip_check_mc_rcu(in_dev, daddr, saddr,
@@ -2093,7 +2107,6 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
ip_hdr(skb)->protocol);
}
- res = -EINVAL;
if (our
#ifdef CONFIG_IP_MROUTE
||
@@ -2101,17 +2114,14 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- res = ip_route_input_mc(skb, daddr, saddr,
+ err = ip_route_input_mc(skb, daddr, saddr,
tos, dev, our);
}
- rcu_read_unlock();
- return res;
+ return err;
}
- res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
- rcu_read_unlock();
- return res;
+
+ return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
}
-EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
@@ -2254,29 +2264,40 @@ add:
* Major route resolver routine.
*/
-struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- const struct sk_buff *skb)
+struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+ const struct sk_buff *skb)
{
- struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
- unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
- int orig_oif;
- int err = -ENETUNREACH;
res.tclassid = 0;
res.fi = NULL;
res.table = NULL;
- orig_oif = fl4->flowi4_oif;
-
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
rcu_read_lock();
+ rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
+ rcu_read_unlock();
+
+ return rth;
+}
+EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
+
+struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
+ struct fib_result *res,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev_out = NULL;
+ int orig_oif = fl4->flowi4_oif;
+ unsigned int flags = 0;
+ struct rtable *rth;
+ int err = -ENETUNREACH;
+
if (fl4->saddr) {
rth = ERR_PTR(-EINVAL);
if (ipv4_is_multicast(fl4->saddr) ||
@@ -2362,15 +2383,15 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
dev_out = net->loopback_dev;
fl4->flowi4_oif = LOOPBACK_IFINDEX;
- res.type = RTN_LOCAL;
+ res->type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
}
- err = fib_lookup(net, fl4, &res, 0);
+ err = fib_lookup(net, fl4, res, 0);
if (err) {
- res.fi = NULL;
- res.table = NULL;
+ res->fi = NULL;
+ res->table = NULL;
if (fl4->flowi4_oif &&
(ipv4_is_multicast(fl4->daddr) ||
!netif_index_is_l3_master(net, fl4->flowi4_oif))) {
@@ -2395,43 +2416,41 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
if (fl4->saddr == 0)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
- res.type = RTN_UNICAST;
+ res->type = RTN_UNICAST;
goto make_route;
}
rth = ERR_PTR(err);
goto out;
}
- if (res.type == RTN_LOCAL) {
+ if (res->type == RTN_LOCAL) {
if (!fl4->saddr) {
- if (res.fi->fib_prefsrc)
- fl4->saddr = res.fi->fib_prefsrc;
+ if (res->fi->fib_prefsrc)
+ fl4->saddr = res->fi->fib_prefsrc;
else
fl4->saddr = fl4->daddr;
}
/* L3 master device is the loopback for that domain */
- dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
+ dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
}
- fib_select_path(net, &res, fl4, skb);
+ fib_select_path(net, res, fl4, skb);
- dev_out = FIB_RES_DEV(res);
+ dev_out = FIB_RES_DEV(*res);
fl4->flowi4_oif = dev_out->ifindex;
make_route:
- rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
+ rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
out:
- rcu_read_unlock();
return rth;
}
-EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
{
@@ -2525,9 +2544,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
+/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
- u32 seq, int event)
+ u32 seq)
{
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
@@ -2536,7 +2556,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
u32 error;
u32 metrics[RTAX_MAX];
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0);
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
if (!nlh)
return -EMSGSIZE;
@@ -2644,6 +2664,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
+ struct fib_result res = {};
struct rtable *rt = NULL;
struct flowi4 fl4;
__be32 dst = 0;
@@ -2700,10 +2721,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
+ rcu_read_lock();
+
if (iif) {
struct net_device *dev;
- dev = __dev_get_by_index(net, iif);
+ dev = dev_get_by_index_rcu(net, iif);
if (!dev) {
err = -ENODEV;
goto errout_free;
@@ -2712,14 +2735,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->protocol = htons(ETH_P_IP);
skb->dev = dev;
skb->mark = mark;
- err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+ err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
+ dev, &res);
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
err = -rt->dst.error;
} else {
- rt = ip_route_output_key(net, &fl4);
-
+ rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
err = 0;
if (IS_ERR(rt))
err = PTR_ERR(rt);
@@ -2735,17 +2758,25 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = rt->rt_table_id;
- err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- RTM_NEWROUTE);
+ if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+ err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
+ rt->rt_type, res.prefix, res.prefixlen,
+ fl4.flowi4_tos, res.fi, 0);
+ else
+ err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
if (err < 0)
goto errout_free;
+ rcu_read_unlock();
+
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
return err;
errout_free:
+ rcu_read_unlock();
kfree_skb(skb);
goto errout;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0257d965f111..7835bb4a1fab 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -66,10 +66,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
* Since subsequent timestamps use the normal tcp_time_stamp value, we
* must make sure that the resulting initial timestamp is <= tcp_time_stamp.
*/
-__u32 cookie_init_timestamp(struct request_sock *req)
+u64 cookie_init_timestamp(struct request_sock *req)
{
struct inet_request_sock *ireq;
- u32 ts, ts_now = tcp_time_stamp;
+ u32 ts, ts_now = tcp_time_stamp_raw();
u32 options = 0;
ireq = inet_rsk(req);
@@ -88,7 +88,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
ts <<= TSBITS;
ts |= options;
}
- return ts;
+ return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
}
@@ -232,7 +232,8 @@ EXPORT_SYMBOL(tcp_get_cookie_sock);
* return false if we decode a tcp option that is disabled
* on the host.
*/
-bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
+bool cookie_timestamp_decode(const struct net *net,
+ struct tcp_options_received *tcp_opt)
{
/* echoed timestamp, lowest bits contain options */
u32 options = tcp_opt->rcv_tsecr;
@@ -242,12 +243,12 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
return true;
}
- if (!sysctl_tcp_timestamps)
+ if (!net->ipv4.sysctl_tcp_timestamps)
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -256,7 +257,7 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return sysctl_tcp_window_scaling != 0;
+ return net->ipv4.sysctl_tcp_window_scaling != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@@ -312,14 +313,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+ tsoff = secure_tcp_ts_off(sock_net(sk),
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(&tcp_opt))
+ if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
ret = NULL;
@@ -343,7 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack.v64 = 0;
+ treq->snt_synack = 0;
treq->tfo_listener = false;
ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 86957e9cd6c6..7065234a89a5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -365,27 +365,6 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
static struct ctl_table ipv4_table[] = {
{
- .procname = "tcp_timestamps",
- .data = &sysctl_tcp_timestamps,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_window_scaling",
- .data = &sysctl_tcp_window_scaling,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_sack",
- .data = &sysctl_tcp_sack,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_retrans_collapse",
.data = &sysctl_tcp_retrans_collapse,
.maxlen = sizeof(int),
@@ -1116,6 +1095,27 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = &one,
},
#endif
+ {
+ .procname = "tcp_sack",
+ .data = &init_net.ipv4.sysctl_tcp_sack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_window_scaling",
+ .data = &init_net.ipv4.sysctl_tcp_window_scaling,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_timestamps",
+ .data = &init_net.ipv4.sysctl_tcp_timestamps,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b5ea036ca781..cc8fd8b747a4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -320,17 +320,36 @@ struct tcp_splice_state {
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
-int tcp_memory_pressure __read_mostly;
-EXPORT_SYMBOL(tcp_memory_pressure);
+unsigned long tcp_memory_pressure __read_mostly;
+EXPORT_SYMBOL_GPL(tcp_memory_pressure);
void tcp_enter_memory_pressure(struct sock *sk)
{
- if (!tcp_memory_pressure) {
+ unsigned long val;
+
+ if (tcp_memory_pressure)
+ return;
+ val = jiffies;
+
+ if (!val)
+ val--;
+ if (!cmpxchg(&tcp_memory_pressure, 0, val))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
- tcp_memory_pressure = 1;
- }
}
-EXPORT_SYMBOL(tcp_enter_memory_pressure);
+EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
+
+void tcp_leave_memory_pressure(struct sock *sk)
+{
+ unsigned long val;
+
+ if (!tcp_memory_pressure)
+ return;
+ val = xchg(&tcp_memory_pressure, 0);
+ if (val)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
+ jiffies_to_msecs(jiffies - val));
+}
+EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
/* Convert seconds to retransmits based on initial and max timeout */
static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
@@ -386,7 +405,7 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
- minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U);
+ minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -2186,7 +2205,7 @@ adjudge_to_death:
/* Now socket is owned by kernel and we acquire BH lock
- to finish close. No need to check for user refs.
+ * to finish close. No need to check for user refs.
*/
local_bh_disable();
bh_lock_sock(sk);
@@ -2480,7 +2499,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_MAXSEG:
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
- * know which interface is going to be used */
+ * know which interface is going to be used
+ */
if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
@@ -2715,7 +2735,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (!tp->repair)
err = -EPERM;
else
- tp->tsoffset = val - tcp_time_stamp;
+ tp->tsoffset = val - tcp_time_stamp_raw();
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
@@ -2766,7 +2786,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
stats[i] = tp->chrono_stat[i - 1];
if (i == tp->chrono_type)
- stats[i] += tcp_time_stamp - tp->chrono_start;
+ stats[i] += tcp_jiffies32 - tp->chrono_start;
stats[i] *= USEC_PER_SEC / HZ;
total += stats[i];
}
@@ -2850,7 +2870,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_retrans = tp->retrans_out;
info->tcpi_fackets = tp->fackets_out;
- now = tcp_time_stamp;
+ now = tcp_jiffies32;
info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
@@ -3081,7 +3101,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp + tp->tsoffset;
+ val = tcp_time_stamp_raw() + tp->tsoffset;
break;
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b89bce4c721e..dbcc9352a48f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -52,10 +52,9 @@
* There is a public e-mail list for discussing BBR development and testing:
* https://groups.google.com/forum/#!forum/bbr-dev
*
- * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
- * since pacing is integral to the BBR design and implementation.
- * BBR without pacing would not function properly, and may incur unnecessary
- * high packet loss rates.
+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * otherwise TCP stack falls back to an internal pacing using one high
+ * resolution timer per TCP socket and may use more resources.
*/
#include <linux/module.h>
#include <net/tcp.h>
@@ -92,7 +91,7 @@ struct bbr {
struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */
u32 rtt_cnt; /* count of packet-timed rounds elapsed */
u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
- struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */
+ u64 cycle_mstamp; /* time of this cycle phase start */
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
@@ -412,7 +411,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool is_full_length =
- skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+ tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
bbr->min_rtt_us;
u32 inflight, bw;
@@ -498,7 +497,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+ bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
bbr->lt_last_delivered = tp->delivered;
bbr->lt_last_lost = tp->lost;
bbr->lt_rtt_cnt = 0;
@@ -552,7 +551,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
struct bbr *bbr = inet_csk_ca(sk);
u32 lost, delivered;
u64 bw;
- s32 t;
+ u32 t;
if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */
if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
@@ -604,15 +603,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
return;
/* Find average delivery rate in this sampling interval. */
- t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
- if (t < 1)
- return; /* interval is less than one jiffy, so wait */
- t = jiffies_to_usecs(t);
- /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
- if (t < 1) {
+ t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
+ if ((s32)t < 1)
+ return; /* interval is less than one ms, so wait */
+ /* Check if can multiply without overflow */
+ if (t >= ~0U / USEC_PER_MSEC) {
bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */
return;
}
+ t *= USEC_PER_MSEC;
bw = (u64)delivered * BW_UNIT;
do_div(bw, t);
bbr_lt_bw_interval_done(sk, bw);
@@ -731,12 +730,12 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
bool filter_expired;
/* Track min RTT seen in the min_rtt_win_sec filter window: */
- filter_expired = after(tcp_time_stamp,
+ filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
(rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
bbr->min_rtt_us = rs->rtt_us;
- bbr->min_rtt_stamp = tcp_time_stamp;
+ bbr->min_rtt_stamp = tcp_jiffies32;
}
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
@@ -755,7 +754,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
/* Maintain min packets in flight for max(200 ms, 1 round). */
if (!bbr->probe_rtt_done_stamp &&
tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
- bbr->probe_rtt_done_stamp = tcp_time_stamp +
+ bbr->probe_rtt_done_stamp = tcp_jiffies32 +
msecs_to_jiffies(bbr_probe_rtt_mode_ms);
bbr->probe_rtt_round_done = 0;
bbr->next_rtt_delivered = tp->delivered;
@@ -763,8 +762,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
if (bbr->probe_rtt_round_done &&
- after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
- bbr->min_rtt_stamp = tcp_time_stamp;
+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
+ bbr->min_rtt_stamp = tcp_jiffies32;
bbr->restore_cwnd = 1; /* snap to prior_cwnd */
bbr_reset_mode(sk);
}
@@ -811,7 +810,7 @@ static void bbr_init(struct sock *sk)
bbr->probe_rtt_done_stamp = 0;
bbr->probe_rtt_round_done = 0;
bbr->min_rtt_us = tcp_min_rtt(tp);
- bbr->min_rtt_stamp = tcp_time_stamp;
+ bbr->min_rtt_stamp = tcp_jiffies32;
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
@@ -826,10 +825,12 @@ static void bbr_init(struct sock *sk)
bbr->idle_restart = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
- bbr->cycle_mstamp.v64 = 0;
+ bbr->cycle_mstamp = 0;
bbr->cycle_idx = 0;
bbr_reset_lt_bw_sampling(sk);
bbr_reset_startup_mode(sk);
+
+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
static u32 bbr_sndbuf_expand(struct sock *sk)
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 36087bca9f48..609965f0e298 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -84,14 +84,14 @@ static void bictcp_init(struct sock *sk)
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
if (ca->last_cwnd == cwnd &&
- (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+ (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
return;
ca->last_cwnd = cwnd;
- ca->last_time = tcp_time_stamp;
+ ca->last_time = tcp_jiffies32;
if (ca->epoch_start == 0) /* record the beginning of an epoch */
- ca->epoch_start = tcp_time_stamp;
+ ca->epoch_start = tcp_jiffies32;
/* start off normal */
if (cwnd <= low_window) {
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0683ba447d77..57ae5b5ae643 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -155,7 +155,7 @@ static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
- u32 now = tcp_time_stamp;
+ u32 now = tcp_jiffies32;
s32 delta;
delta = now - tcp_sk(sk)->lsndtime;
@@ -231,21 +231,21 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
ca->ack_cnt += acked; /* count the number of ACKed packets */
if (ca->last_cwnd == cwnd &&
- (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+ (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
return;
/* The CUBIC function can update ca->cnt at most once per jiffy.
* On all cwnd reduction events, ca->epoch_start is set to 0,
* which will force a recalculation of ca->cnt.
*/
- if (ca->epoch_start && tcp_time_stamp == ca->last_time)
+ if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
goto tcp_friendliness;
ca->last_cwnd = cwnd;
- ca->last_time = tcp_time_stamp;
+ ca->last_time = tcp_jiffies32;
if (ca->epoch_start == 0) {
- ca->epoch_start = tcp_time_stamp; /* record beginning */
+ ca->epoch_start = tcp_jiffies32; /* record beginning */
ca->ack_cnt = acked; /* start counting */
ca->tcp_cwnd = cwnd; /* syn with cubic */
@@ -276,7 +276,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
* if the cwnd < 1 million packets !!!
*/
- t = (s32)(tcp_time_stamp - ca->epoch_start);
+ t = (s32)(tcp_jiffies32 - ca->epoch_start);
t += msecs_to_jiffies(ca->delay_min >> 3);
/* change the unit from HZ to bictcp_HZ */
t <<= BICTCP_HZ;
@@ -448,7 +448,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
return;
/* Discard delay samples right after fast recovery */
- if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
return;
delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a4d8e76738f..3eb78cde6ff0 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -104,7 +104,7 @@ static void measure_achieved_throughput(struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- u32 now = tcp_time_stamp;
+ u32 now = tcp_jiffies32;
if (icsk->icsk_ca_state == TCP_CA_Open)
ca->pkts_acked = sample->pkts_acked;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 174d4376baa5..2ab7e2fa9bb9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -76,9 +76,6 @@
#include <asm/unaligned.h>
#include <linux/errqueue.h>
-int sysctl_tcp_timestamps __read_mostly = 1;
-int sysctl_tcp_window_scaling __read_mostly = 1;
-int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300;
int sysctl_tcp_dsack __read_mostly = 1;
@@ -112,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
+#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -441,7 +439,7 @@ void tcp_init_buffer_space(struct sock *sk)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
@@ -463,7 +461,7 @@ void tcp_init_buffer_space(struct sock *sk)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
/* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -555,11 +553,11 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
{
u32 delta_us;
- if (tp->rcv_rtt_est.time.v64 == 0)
+ if (tp->rcv_rtt_est.time == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+ delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
@@ -571,13 +569,15 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
- TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
- tcp_rcv_rtt_update(tp,
- jiffies_to_usecs(tcp_time_stamp -
- tp->rx_opt.rcv_tsecr),
- 0);
+ TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
/*
@@ -590,7 +590,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
- time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+ time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
@@ -672,7 +672,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_rcv_rtt_measure(tp);
- now = tcp_time_stamp;
+ now = tcp_jiffies32;
if (!icsk->icsk_ack.ato) {
/* The _first_ data packet received, initialize
@@ -885,6 +885,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
struct tcp_sock *tp = tcp_sk(sk);
int mib_idx;
+ if (WARN_ON_ONCE(metric < 0))
+ return;
+
if (metric > tp->reordering) {
tp->reordering = min(sysctl_tcp_max_reordering, metric);
@@ -1134,8 +1137,8 @@ struct tcp_sacktag_state {
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
*/
- struct skb_mstamp first_sackt;
- struct skb_mstamp last_sackt;
+ u64 first_sackt;
+ u64 last_sackt;
struct rate_sample *rate;
int flag;
};
@@ -1200,7 +1203,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq,
int dup_sack, int pcount,
- const struct skb_mstamp *xmit_time)
+ u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count;
@@ -1242,9 +1245,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
- if (state->first_sackt.v64 == 0)
- state->first_sackt = *xmit_time;
- state->last_sackt = *xmit_time;
+ if (state->first_sackt == 0)
+ state->first_sackt = xmit_time;
+ state->last_sackt = xmit_time;
}
if (sacked & TCPCB_LOST) {
@@ -1304,7 +1307,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
- &skb->skb_mstamp);
+ skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
if (skb == tp->lost_skb_hint)
@@ -1356,8 +1359,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tcp_advance_highest_sack(sk, skb);
tcp_skb_collapse_tstamp(prev, skb);
- if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
- TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+ if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
+ TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
@@ -1587,7 +1590,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->end_seq,
dup_sack,
tcp_skb_pcount(skb),
- &skb->skb_mstamp);
+ skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
if (!before(TCP_SKB_CB(skb)->seq,
@@ -1954,7 +1957,7 @@ void tcp_enter_loss(struct sock *sk)
}
tp->snd_cwnd = 1;
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->retrans_out = 0;
tp->lost_out = 0;
@@ -2383,7 +2386,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
tcp_ecn_withdraw_cwr(tp);
}
}
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->undo_marker = 0;
}
@@ -2520,7 +2523,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
tp->snd_cwnd = tp->snd_ssthresh;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
@@ -2590,7 +2593,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
tcp_mss_to_mtu(sk, tp->mss_cache) /
icsk->icsk_mtup.probe_size;
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
@@ -2911,13 +2914,13 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
struct tcp_sock *tp = tcp_sk(sk);
u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
- minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
}
-static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
- long seq_rtt_us, long sack_rtt_us,
- long ca_rtt_us)
+static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
+ long seq_rtt_us, long sack_rtt_us,
+ long ca_rtt_us, struct rate_sample *rs)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -2936,9 +2939,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
- tp->rx_opt.rcv_tsecr);
+ flag & FLAG_ACKED) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+ seq_rtt_us = ca_rtt_us = delta_us;
+ }
+ rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
return false;
@@ -2958,16 +2965,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
{
+ struct rate_sample rs;
long rtt_us = -1L;
- if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
- struct skb_mstamp now;
+ if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
+ rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
- skb_mstamp_get(&now);
- rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
- }
-
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
}
@@ -2976,7 +2980,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
const struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
- tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
+ tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
}
/* Restart timer after forward progress on connection.
@@ -3001,14 +3005,14 @@ void tcp_rearm_rto(struct sock *sk)
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk);
- const u32 rto_time_stamp =
- tcp_skb_timestamp(skb) + rto;
- s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
- /* delta may not be positive if the socket is locked
+ u64 rto_time_stamp = skb->skb_mstamp +
+ jiffies_to_usecs(rto);
+ s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+ /* delta_us may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta > 0)
- rto = delta;
+ if (delta_us > 0)
+ rto = usecs_to_jiffies(delta_us);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
@@ -3060,9 +3064,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct skb_mstamp first_ackt, last_ackt;
+ u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp *now = &tp->tcp_mstamp;
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
@@ -3075,7 +3078,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
bool rtt_update;
int flag = 0;
- first_ackt.v64 = 0;
+ first_ackt = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3106,8 +3109,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
flag |= FLAG_RETRANS_DATA_ACKED;
} else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
- WARN_ON_ONCE(last_ackt.v64 == 0);
- if (!first_ackt.v64)
+ WARN_ON_ONCE(last_ackt == 0);
+ if (!first_ackt)
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
@@ -3122,7 +3125,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- &skb->skb_mstamp);
+ skb->skb_mstamp);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3165,17 +3168,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
- ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
+ if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
+ seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
+ ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
}
- if (sack->first_sackt.v64) {
- sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
- ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
+ if (sack->first_sackt) {
+ sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
+ ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
}
- sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
- ca_rtt_us);
+ ca_rtt_us, sack->rate);
if (flag & FLAG_ACKED) {
tcp_rearm_rto(sk);
@@ -3201,7 +3203,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
+ sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
@@ -3211,7 +3213,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (icsk->icsk_ca_ops->pkts_acked) {
struct ack_sample sample = { .pkts_acked = pkts_acked,
- .rtt_us = ca_rtt_us,
+ .rtt_us = sack->rate->rtt_us,
.in_flight = last_in_flight };
icsk->icsk_ca_ops->pkts_acked(sk, &sample);
@@ -3390,7 +3392,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
u32 *last_oow_ack_time)
{
if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+ s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx);
@@ -3398,7 +3400,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
}
}
- *last_oow_ack_time = tcp_time_stamp;
+ *last_oow_ack_time = tcp_jiffies32;
return false; /* not rate-limited: go ahead, send dupack now! */
}
@@ -3553,7 +3555,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
- sack_state.first_sackt.v64 = 0;
+ sack_state.first_sackt = 0;
sack_state.rate = &rs;
/* We very likely will need to access write queue head. */
@@ -3565,7 +3567,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una)) {
/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
if (before(ack, prior_snd_una - tp->max_window)) {
- tcp_send_challenge_ack(sk, skb);
+ if (!(flag & FLAG_NO_CHALLENGE_ACK))
+ tcp_send_challenge_ack(sk, skb);
return -1;
}
goto old_ack;
@@ -3636,7 +3639,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
*/
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->rcv_tstamp = tcp_jiffies32;
if (!prior_packets)
goto no_queue;
@@ -3718,7 +3721,8 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
-void tcp_parse_options(const struct sk_buff *skb,
+void tcp_parse_options(const struct net *net,
+ const struct sk_buff *skb,
struct tcp_options_received *opt_rx, int estab,
struct tcp_fastopen_cookie *foc)
{
@@ -3759,7 +3763,7 @@ void tcp_parse_options(const struct sk_buff *skb,
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && sysctl_tcp_window_scaling) {
+ !estab && net->ipv4.sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@@ -3775,7 +3779,7 @@ void tcp_parse_options(const struct sk_buff *skb,
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && sysctl_tcp_timestamps))) {
+ (!estab && net->ipv4.sysctl_tcp_timestamps))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3783,7 +3787,7 @@ void tcp_parse_options(const struct sk_buff *skb,
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && sysctl_tcp_sack) {
+ !estab && net->ipv4.sysctl_tcp_sack) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@@ -3852,7 +3856,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
/* Fast parse options. This hopes to only see timestamps.
* If it is wrong it falls back on tcp_parse_options().
*/
-static bool tcp_fast_parse_options(const struct sk_buff *skb,
+static bool tcp_fast_parse_options(const struct net *net,
+ const struct sk_buff *skb,
const struct tcphdr *th, struct tcp_sock *tp)
{
/* In the spirit of fast parsing, compare doff directly to constant
@@ -3867,7 +3872,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
return true;
}
- tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
+ tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
@@ -5019,7 +5024,7 @@ static void tcp_new_space(struct sock *sk)
if (tcp_should_expand_sndbuf(sk)) {
tcp_sndbuf_expand(sk);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
sk->sk_write_space(sk);
@@ -5228,7 +5233,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
bool rst_seq_match = false;
/* RFC1323: H1. Apply PAWS check first. */
- if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
+ tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5356,7 +5362,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
@@ -5554,7 +5560,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_set_state(sk, TCP_ESTABLISHED);
- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ icsk->icsk_ack.lrcvtime = tcp_jiffies32;
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5571,7 +5577,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
- tp->lsndtime = tcp_time_stamp;
+ tp->lsndtime = tcp_jiffies32;
tcp_init_buffer_space(sk);
@@ -5599,7 +5605,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
tcp_clear_options(&opt);
opt.user_mss = opt.mss_clamp = 0;
- tcp_parse_options(synack, &opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
mss = opt.mss_clamp;
}
@@ -5653,7 +5659,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
- tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
+ tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
@@ -5672,7 +5678,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
- tcp_time_stamp)) {
+ tcp_time_stamp(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
@@ -5917,7 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
@@ -5929,7 +5935,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
}
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
@@ -5948,13 +5954,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
/* step 5: check the ACK field */
acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- FLAG_UPDATE_TS_RECENT) > 0;
+ FLAG_UPDATE_TS_RECENT |
+ FLAG_NO_CHALLENGE_ACK) > 0;
+ if (!acceptable) {
+ if (sk->sk_state == TCP_SYN_RECV)
+ return 1; /* send one RST */
+ tcp_send_challenge_ack(sk, skb);
+ goto discard;
+ }
switch (sk->sk_state) {
case TCP_SYN_RECV:
- if (!acceptable)
- return 1;
-
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
@@ -6008,7 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_update_pacing_rate(sk);
/* Prevent spurious tcp_cwnd_restart() on first data packet */
- tp->lsndtime = tcp_time_stamp;
+ tp->lsndtime = tcp_jiffies32;
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
@@ -6023,14 +6033,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
* our SYNACK so stop the SYNACK timer.
*/
if (req) {
- /* Return RST if ack_seq is invalid.
- * Note that RFC793 only says to generate a
- * DUPACK for it but for TCP Fast Open it seems
- * better to treat this case like TCP_SYN_RECV
- * above.
- */
- if (!acceptable)
- return 1;
/* We no longer need the request sock. */
reqsk_fastopen_remove(sk, req, false);
tcp_rearm_rto(sk);
@@ -6202,7 +6204,7 @@ static void tcp_openreq_init(struct request_sock *req,
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- skb_mstamp_get(&tcp_rsk(req)->snt_synack);
+ tcp_rsk(req)->snt_synack = tcp_clock_us();
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
@@ -6330,7 +6332,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
+ want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -6348,7 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
if (tmp_opt.tstamp_ok)
- tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb);
+ tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
/* Kill the following clause, if you dislike this way. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5ab2aac5ca19..1dc8c449e16a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -102,10 +102,9 @@ static u32 tcp_v4_init_seq(const struct sk_buff *skb)
tcp_hdr(skb)->source);
}
-static u32 tcp_v4_init_ts_off(const struct sk_buff *skb)
+static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcp_ts_off(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr);
+ return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -242,7 +241,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
- tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
+ tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
+ inet->inet_saddr,
inet->inet_daddr);
}
@@ -376,8 +376,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
struct sock *sk;
struct sk_buff *skb;
struct request_sock *fastopen;
- __u32 seq, snd_una;
- __u32 remaining;
+ u32 seq, snd_una;
+ s32 remaining;
+ u32 delta_us;
int err;
struct net *net = dev_net(icmp_skb->dev);
@@ -483,11 +484,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
skb = tcp_write_queue_head(sk);
BUG_ON(!skb);
+ tcp_mstamp_refresh(tp);
+ delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
remaining = icsk->icsk_rto -
- min(icsk->icsk_rto,
- tcp_time_stamp - tcp_skb_timestamp(skb));
+ usecs_to_jiffies(delta_us);
- if (remaining) {
+ if (remaining > 0) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
} else {
@@ -811,7 +813,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp + tcptw->tw_ts_offset,
+ tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
@@ -839,7 +841,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp + tcp_rsk(req)->ts_off,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -2385,6 +2387,7 @@ struct proto tcp_prot = {
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
@@ -2463,6 +2466,9 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
+ net->ipv4.sysctl_tcp_sack = 1;
+ net->ipv4.sysctl_tcp_window_scaling = 1;
+ net->ipv4.sysctl_tcp_timestamps = 1;
return 0;
fail:
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index d6fb6c067af4..ae10ed64fe13 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -37,7 +37,7 @@
#include <net/tcp.h>
/* resolution of owd */
-#define LP_RESOL 1000
+#define LP_RESOL TCP_TS_HZ
/**
* enum tcp_lp_state
@@ -147,9 +147,9 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
tp->rx_opt.rcv_tsecr == lp->local_ref_time)
goto out;
- m = HZ * (tp->rx_opt.rcv_tsval -
- lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr -
- lp->local_ref_time);
+ m = TCP_TS_HZ *
+ (tp->rx_opt.rcv_tsval - lp->remote_ref_time) /
+ (tp->rx_opt.rcv_tsecr - lp->local_ref_time);
if (m < 0)
m = -m;
@@ -194,7 +194,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
if (lp->flag & LP_VALID_RHZ) {
owd =
tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
- tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ);
+ tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ);
if (owd < 0)
owd = -owd;
}
@@ -264,18 +264,19 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ u32 now = tcp_time_stamp(tp);
u32 delta;
if (sample->rtt_us > 0)
tcp_lp_rtt_sample(sk, sample->rtt_us);
/* calc inference */
- delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ delta = now - tp->rx_opt.rcv_tsecr;
if ((s32)delta > 0)
lp->inference = 3 * delta;
/* test if within inference */
- if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
+ if (lp->last_drop && (now - lp->last_drop < lp->inference))
lp->flag |= LP_WITHIN_INF;
else
lp->flag &= ~LP_WITHIN_INF;
@@ -312,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
/* record this drop time */
- lp->last_drop = tcp_time_stamp;
+ lp->last_drop = now;
}
static struct tcp_congestion_ops tcp_lp __read_mostly = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 653bbd67e3a3..102b2c90bb80 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -524,7 +524,7 @@ reset:
tp->snd_cwnd = 1;
else
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 717be4de5324..d30ee31e94eb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
if (tmp_opt.rcv_tsecr)
@@ -445,9 +445,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->srtt_us = 0;
newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
- minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
+ minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
- newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
newtp->packets_out = 0;
newtp->retrans_out = 0;
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
newtp->tlp_high_seq = 0;
- newtp->lsndtime = treq->snt_synack.stamp_jiffies;
+ newtp->lsndtime = tcp_jiffies32;
newsk->sk_txhash = treq->txhash;
newtp->last_oow_ack_time = 0;
newtp->total_retrans = req->num_retrans;
@@ -526,7 +526,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
- newtp->rack.mstamp.v64 = 0;
+ newtp->rack.mstamp = 0;
newtp->rack.advanced = 0;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
@@ -559,7 +559,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 5de82a8d4d87..6d650ed3cb59 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -424,8 +424,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
}
/* Extract info for Tcp socket info provided via netlink */
-size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
- union tcp_cc_info *info)
+static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
{
const struct tcpnv *ca = inet_csk_ca(sk);
@@ -440,7 +440,6 @@ size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
}
return 0;
}
-EXPORT_SYMBOL_GPL(tcpnv_get_info);
static struct tcp_congestion_ops tcpnv __read_mostly = {
.init = tcpnv_init,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4858e190f6ac..9a9c395b6235 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
tp->snd_cwnd = max(cwnd, restart_cwnd);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_cwnd_used = 0;
}
@@ -160,7 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- const u32 now = tcp_time_stamp;
+ const u32 now = tcp_jiffies32;
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
@@ -569,18 +569,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sysctl_tcp_timestamps && !*md5)) {
+ if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sysctl_tcp_window_scaling)) {
+ if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sysctl_tcp_sack)) {
+ if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -904,6 +904,72 @@ out:
sk_free(sk);
}
+/* Note: Called under hard irq.
+ * We can not call TCP stack right away.
+ */
+enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
+{
+ struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
+ struct sock *sk = (struct sock *)tp;
+ unsigned long nval, oval;
+
+ for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
+ struct tsq_tasklet *tsq;
+ bool empty;
+
+ if (oval & TSQF_QUEUED)
+ break;
+
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+ nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
+ if (nval != oval)
+ continue;
+
+ if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+ break;
+ /* queue this socket to tasklet queue */
+ tsq = this_cpu_ptr(&tsq_tasklet);
+ empty = list_empty(&tsq->head);
+ list_add(&tp->tsq_node, &tsq->head);
+ if (empty)
+ tasklet_schedule(&tsq->tasklet);
+ break;
+ }
+ return HRTIMER_NORESTART;
+}
+
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+ return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
+static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+{
+ u64 len_ns;
+ u32 rate;
+
+ if (!tcp_needs_internal_pacing(sk))
+ return;
+ rate = sk->sk_pacing_rate;
+ if (!rate || rate == ~0U)
+ return;
+
+ /* Should account for header sizes as sch_fq does,
+ * but lets make things simple.
+ */
+ len_ns = (u64)skb->len * NSEC_PER_SEC;
+ do_div(len_ns, rate);
+ hrtimer_start(&tcp_sk(sk)->pacing_timer,
+ ktime_add_ns(ktime_get(), len_ns),
+ HRTIMER_MODE_ABS_PINNED);
+}
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -931,8 +997,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk);
+ skb->skb_mstamp = tp->tcp_mstamp;
if (clone_it) {
- skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
tcp_rate_skb_sent(sk, skb);
@@ -1034,6 +1100,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
+ tcp_internal_pacing(sk, skb);
}
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -1261,9 +1328,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
return 0;
}
-/* This is similar to __pskb_pull_head() (it will go to core/skbuff.c
- * eventually). The difference is that pulled data not copied, but
- * immediately discarded.
+/* This is similar to __pskb_pull_tail(). The difference is that pulled
+ * data is not copied, but immediately discarded.
*/
static int __pskb_trim_head(struct sk_buff *skb, int len)
{
@@ -1298,7 +1364,6 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
}
shinfo->nr_frags = k;
- skb_reset_tail_pointer(skb);
skb->data_len -= len;
skb->len = skb->data_len;
return len;
@@ -1408,7 +1473,7 @@ void tcp_mtup_init(struct sock *sk)
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
- icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
}
EXPORT_SYMBOL(tcp_mtup_init);
@@ -1509,7 +1574,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
}
tp->snd_cwnd_used = 0;
}
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
@@ -1530,14 +1595,14 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
} else {
/* Network starves. */
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
if (sysctl_tcp_slow_start_after_idle &&
- (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+ (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1839,7 +1904,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 age, send_win, cong_win, limit, in_flight;
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp now;
struct sk_buff *head;
int win_divisor;
@@ -1852,7 +1916,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
/* Avoid bursty behavior by allowing defer
* only if the last write was recent.
*/
- if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1895,8 +1959,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
}
head = tcp_write_queue_head(sk);
- skb_mstamp_get(&now);
- age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
+
+ age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
goto send_now;
@@ -1921,7 +1985,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
s32 delta;
interval = net->ipv4.sysctl_tcp_probe_interval;
- delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+ delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -1933,7 +1997,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
/* Update probe time stamp */
- icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
}
}
@@ -2086,6 +2150,12 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
+static bool tcp_pacing_check(const struct sock *sk)
+{
+ return tcp_needs_internal_pacing(sk) &&
+ hrtimer_active(&tcp_sk(sk)->pacing_timer);
+}
+
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* (These limits are doubled for retransmits)
@@ -2130,7 +2200,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
{
- const u32 now = tcp_time_stamp;
+ const u32 now = tcp_jiffies32;
if (tp->chrono_type > TCP_CHRONO_UNSPEC)
tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
@@ -2207,15 +2277,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
max_segs = tcp_tso_segs(sk, mss_now);
+ tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
+ if (tcp_pacing_check(sk))
+ break;
+
tso_segs = tcp_init_tso_segs(skb, mss_now);
BUG_ON(!tso_segs);
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- skb_mstamp_get(&skb->skb_mstamp);
+ skb->skb_mstamp = tp->tcp_mstamp;
goto repair; /* Skip network transmission */
}
@@ -2342,10 +2416,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
timeout = max_t(u32, timeout, msecs_to_jiffies(10));
/* If RTO is shorter, just schedule TLP in its place. */
- tlp_time_stamp = tcp_time_stamp + timeout;
+ tlp_time_stamp = tcp_jiffies32 + timeout;
rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
- s32 delta = rto_time_stamp - tcp_time_stamp;
+ s32 delta = rto_time_stamp - tcp_jiffies32;
if (delta > 0)
timeout = delta;
}
@@ -2803,7 +2877,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- skb_mstamp_get(&skb->skb_mstamp);
+ skb->skb_mstamp = tp->tcp_mstamp;
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
@@ -2878,6 +2952,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (skb == tcp_send_head(sk))
break;
+
+ if (tcp_pacing_check(sk))
+ break;
+
/* we could do better than to assign each time */
if (!hole)
tp->retransmit_skb_hint = skb;
@@ -3015,7 +3093,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
skb_reserve(skb, MAX_TCP_HEADER);
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
TCPHDR_ACK | TCPHDR_RST);
- skb_mstamp_get(&skb->skb_mstamp);
+ tcp_mstamp_refresh(tcp_sk(sk));
/* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
@@ -3111,10 +3189,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
+ skb->skb_mstamp = cookie_init_timestamp(req);
else
#endif
- skb_mstamp_get(&skb->skb_mstamp);
+ skb->skb_mstamp = tcp_clock_us();
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
@@ -3193,8 +3271,9 @@ static void tcp_connect_init(struct sock *sk)
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
- tp->tcp_header_len = sizeof(struct tcphdr) +
- (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+ tp->tcp_header_len = sizeof(struct tcphdr);
+ if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
if (tp->af_specific->md5_lookup(sk, sk))
@@ -3225,7 +3304,7 @@ static void tcp_connect_init(struct sock *sk)
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sysctl_tcp_window_scaling,
+ sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
@@ -3244,7 +3323,7 @@ static void tcp_connect_init(struct sock *sk)
if (likely(!tp->repair))
tp->rcv_nxt = 0;
else
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->rcv_tstamp = tcp_jiffies32;
tp->rcv_wup = tp->rcv_nxt;
tp->copied_seq = tp->rcv_nxt;
@@ -3373,7 +3452,8 @@ int tcp_connect(struct sock *sk)
return -ENOBUFS;
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
- tp->retrans_stamp = tcp_time_stamp;
+ tcp_mstamp_refresh(tp);
+ tp->retrans_stamp = tcp_time_stamp(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
@@ -3492,7 +3572,6 @@ void tcp_send_ack(struct sock *sk)
skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */
- skb_mstamp_get(&buff->skb_mstamp);
tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
}
EXPORT_SYMBOL_GPL(tcp_send_ack);
@@ -3526,15 +3605,16 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
* send it.
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
- skb_mstamp_get(&skb->skb_mstamp);
NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
}
+/* Called from setsockopt( ... TCP_REPAIR ) */
void tcp_send_window_probe(struct sock *sk)
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+ tcp_mstamp_refresh(tcp_sk(sk));
tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
}
}
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c6a9fa894646..ad99569d4c1e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -78,7 +78,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- if (!scb->tx.delivered_mstamp.v64)
+ if (!scb->tx.delivered_mstamp)
return;
if (!rs->prior_delivered ||
@@ -89,9 +89,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
/* Find the duration of the "send phase" of this window: */
- rs->interval_us = skb_mstamp_us_delta(
- &skb->skb_mstamp,
- &scb->tx.first_tx_mstamp);
+ rs->interval_us = tcp_stamp_us_delta(
+ skb->skb_mstamp,
+ scb->tx.first_tx_mstamp);
/* Record send time of most recently ACKed packet: */
tp->first_tx_mstamp = skb->skb_mstamp;
@@ -101,7 +101,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
* we don't need to reset since it'll be freed soon.
*/
if (scb->sacked & TCPCB_SACKED_ACKED)
- scb->tx.delivered_mstamp.v64 = 0;
+ scb->tx.delivered_mstamp = 0;
}
/* Update the connection delivery information and generate a rate sample. */
@@ -125,7 +125,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
/* Return an invalid sample if no timing information is available. */
- if (!rs->prior_mstamp.v64) {
+ if (!rs->prior_mstamp) {
rs->delivered = -1;
rs->interval_us = -1;
return;
@@ -138,8 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* longer phase.
*/
snd_us = rs->interval_us; /* send phase */
- ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp,
- &rs->prior_mstamp); /* ack phase */
+ ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+ rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);
/* Normally we expect interval_us >= min-rtt.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 362b8c75bfab..fe9a493d0208 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -17,12 +17,9 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
-static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
- const struct skb_mstamp *t2,
- u32 seq1, u32 seq2)
+static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
{
- return skb_mstamp_after(t1, t2) ||
- (t1->v64 == t2->v64 && after(seq1, seq2));
+ return t1 > t2 || (t1 == t2 && after(seq1, seq2));
}
/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
@@ -72,14 +69,14 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
scb->sacked & TCPCB_SACKED_ACKED)
continue;
- if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+ if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
tp->rack.end_seq, scb->end_seq)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
*/
- u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp,
- &skb->skb_mstamp);
+ u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
+ skb->skb_mstamp);
s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
if (remaining < 0) {
@@ -127,16 +124,16 @@ void tcp_rack_mark_lost(struct sock *sk)
* draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- const struct skb_mstamp *xmit_time)
+ u64 xmit_time)
{
u32 rtt_us;
- if (tp->rack.mstamp.v64 &&
- !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+ if (tp->rack.mstamp &&
+ !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
end_seq, tp->rack.end_seq))
return;
- rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time);
+ rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
if (sacked & TCPCB_RETRANS) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
@@ -152,7 +149,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
return;
}
tp->rack.rtt_us = rtt_us;
- tp->rack.mstamp = *xmit_time;
+ tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
tp->rack.advanced = 1;
}
@@ -166,7 +163,6 @@ void tcp_rack_reo_timeout(struct sock *sk)
u32 timeout, prior_inflight;
prior_inflight = tcp_packets_in_flight(tp);
- skb_mstamp_get(&tp->tcp_mstamp);
tcp_rack_detect_loss(sk, &timeout);
if (prior_inflight != tcp_packets_in_flight(tp)) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 14672543cf0b..c0feeeef962a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -63,7 +63,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
/* If peer does not open window for long time, or did not transmit
* anything for long time, penalize it. */
- if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
shift++;
/* If some dubious ICMP arrived, penalize even more. */
@@ -73,7 +73,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
if (tcp_check_oom(sk, shift)) {
/* Catch exceptional cases, when connection requires reset.
* 1. Last segment was sent recently. */
- if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
(!tp->snd_wnd && !tp->packets_out))
do_reset = true;
@@ -115,7 +115,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
if (net->ipv4.sysctl_tcp_mtu_probing) {
if (!icsk->icsk_mtup.enabled) {
icsk->icsk_mtup.enabled = 1;
- icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct net *net = sock_net(sk);
@@ -139,22 +139,18 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
* @timeout: A custom timeout value.
* If set to 0 the default timeout is calculated and used.
* Using TCP_RTO_MIN and the number of unsuccessful retransmits.
- * @syn_set: true if the SYN Bit was set.
*
* The default "timeout" value this function can calculate and use
* is equivalent to the timeout of a TCP Connection
* after "boundary" unsuccessful, exponentially backed-off
- * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
- * syn_set flag is set.
- *
+ * retransmissions with an initial RTO of TCP_RTO_MIN.
*/
static bool retransmits_timed_out(struct sock *sk,
unsigned int boundary,
- unsigned int timeout,
- bool syn_set)
+ unsigned int timeout)
{
+ const unsigned int rto_base = TCP_RTO_MIN;
unsigned int linear_backoff_thresh, start_ts;
- unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
if (!inet_csk(sk)->icsk_retransmits)
return false;
@@ -172,7 +168,7 @@ static bool retransmits_timed_out(struct sock *sk,
timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
}
- return (tcp_time_stamp - start_ts) >= timeout;
+ return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout);
}
/* A write timeout has occurred. Process the after effects. */
@@ -181,8 +177,8 @@ static int tcp_write_timeout(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
+ bool expired, do_reset;
int retry_until;
- bool do_reset, syn_set = false;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits) {
@@ -196,9 +192,9 @@ static int tcp_write_timeout(struct sock *sk)
sk_rethink_txhash(sk);
}
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
- syn_set = true;
+ expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
+ if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
/* Some middle-boxes may black-hole Fast Open _after_
* the handshake. Therefore we conservatively disable
* Fast Open on this path on recurring timeouts after
@@ -224,15 +220,15 @@ static int tcp_write_timeout(struct sock *sk)
retry_until = tcp_orphan_retries(sk, alive);
do_reset = alive ||
- !retransmits_timed_out(sk, retry_until, 0, 0);
+ !retransmits_timed_out(sk, retry_until, 0);
if (tcp_out_of_resources(sk, do_reset))
return 1;
}
+ expired = retransmits_timed_out(sk, retry_until,
+ icsk->icsk_user_timeout);
}
-
- if (retransmits_timed_out(sk, retry_until,
- syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
+ if (expired) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
@@ -339,9 +335,10 @@ static void tcp_probe_timer(struct sock *sk)
*/
start_ts = tcp_skb_timestamp(tcp_send_head(sk));
if (!start_ts)
- skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
+ tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
else if (icsk->icsk_user_timeout &&
- (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+ (s32)(tcp_time_stamp(tp) - start_ts) >
+ jiffies_to_msecs(icsk->icsk_user_timeout))
goto abort;
max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
@@ -451,7 +448,7 @@ void tcp_retransmit_timer(struct sock *sk)
tp->snd_una, tp->snd_nxt);
}
#endif
- if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
+ if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
tcp_write_err(sk);
goto out;
}
@@ -539,7 +536,7 @@ out_reset_timer:
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
+ if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
__sk_dst_reset(sk);
out:;
@@ -561,6 +558,7 @@ void tcp_write_timer_handler(struct sock *sk)
goto out;
}
+ tcp_mstamp_refresh(tcp_sk(sk));
event = icsk->icsk_pending;
switch (event) {
@@ -710,4 +708,7 @@ void tcp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
+ hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_PINNED);
+ tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 9775453b8d17..bec9cafbe3f9 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -68,7 +68,7 @@ static void tcp_westwood_init(struct sock *sk)
w->cumul_ack = 0;
w->reset_rtt_min = 1;
w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
- w->rtt_win_sx = tcp_time_stamp;
+ w->rtt_win_sx = tcp_jiffies32;
w->snd_una = tcp_sk(sk)->snd_una;
w->first_ack = 1;
}
@@ -116,7 +116,7 @@ static void tcp_westwood_pkts_acked(struct sock *sk,
static void westwood_update_window(struct sock *sk)
{
struct westwood *w = inet_csk_ca(sk);
- s32 delta = tcp_time_stamp - w->rtt_win_sx;
+ s32 delta = tcp_jiffies32 - w->rtt_win_sx;
/* Initialize w->snd_una with the first acked sequence number in order
* to fix mismatch between tp->snd_una and w->snd_una for the first
@@ -140,7 +140,7 @@ static void westwood_update_window(struct sock *sk)
westwood_filter(w, delta);
w->bk = 0;
- w->rtt_win_sx = tcp_time_stamp;
+ w->rtt_win_sx = tcp_jiffies32;
}
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1d6219bf2d6b..2bc638c48b86 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1163,23 +1163,110 @@ out:
return ret;
}
+/* Copy as much information as possible into skb->dev_scratch to avoid
+ * possibly multiple cache miss on dequeue();
+ */
+#if BITS_PER_LONG == 64
+
+/* we can store multiple info here: truesize, len and the bit needed to
+ * compute skb_csum_unnecessary will be on cold cache lines at recvmsg
+ * time.
+ * skb->len can be stored on 16 bits since the udp header has been already
+ * validated and pulled.
+ */
+struct udp_dev_scratch {
+ u32 truesize;
+ u16 len;
+ bool is_linear;
+ bool csum_unnecessary;
+};
+
+static void udp_set_dev_scratch(struct sk_buff *skb)
+{
+ struct udp_dev_scratch *scratch;
+
+ BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
+ scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
+ scratch->truesize = skb->truesize;
+ scratch->len = skb->len;
+ scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
+ scratch->is_linear = !skb_is_nonlinear(skb);
+}
+
+static int udp_skb_truesize(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
+}
+
+static unsigned int udp_skb_len(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+}
+
+static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+}
+
+static bool udp_skb_is_linear(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+}
+
+#else
+static void udp_set_dev_scratch(struct sk_buff *skb)
+{
+ skb->dev_scratch = skb->truesize;
+}
+
+static int udp_skb_truesize(struct sk_buff *skb)
+{
+ return skb->dev_scratch;
+}
+
+static unsigned int udp_skb_len(struct sk_buff *skb)
+{
+ return skb->len;
+}
+
+static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+ return skb_csum_unnecessary(skb);
+}
+
+static bool udp_skb_is_linear(struct sk_buff *skb)
+{
+ return !skb_is_nonlinear(skb);
+}
+#endif
+
/* fully reclaim rmem/fwd memory allocated for skb */
-static void udp_rmem_release(struct sock *sk, int size, int partial)
+static void udp_rmem_release(struct sock *sk, int size, int partial,
+ bool rx_queue_lock_held)
{
struct udp_sock *up = udp_sk(sk);
+ struct sk_buff_head *sk_queue;
int amt;
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
if (size < (sk->sk_rcvbuf >> 2) &&
- !skb_queue_empty(&sk->sk_receive_queue))
+ !skb_queue_empty(&up->reader_queue))
return;
} else {
size += up->forward_deficit;
}
up->forward_deficit = 0;
+ /* acquire the sk_receive_queue for fwd allocated memory scheduling,
+ * if the called don't held it already
+ */
+ sk_queue = &sk->sk_receive_queue;
+ if (!rx_queue_lock_held)
+ spin_lock(&sk_queue->lock);
+
+
sk->sk_forward_alloc += size;
amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
sk->sk_forward_alloc -= amt;
@@ -1188,19 +1275,33 @@ static void udp_rmem_release(struct sock *sk, int size, int partial)
__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
atomic_sub(size, &sk->sk_rmem_alloc);
+
+ /* this can save us from acquiring the rx queue lock on next receive */
+ skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
+
+ if (!rx_queue_lock_held)
+ spin_unlock(&sk_queue->lock);
}
-/* Note: called with sk_receive_queue.lock held.
+/* Note: called with reader_queue.lock held.
* Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
* This avoids a cache line miss while receive_queue lock is held.
* Look at __udp_enqueue_schedule_skb() to find where this copy is done.
*/
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
{
- udp_rmem_release(sk, skb->dev_scratch, 1);
+ prefetch(&skb->data);
+ udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
}
EXPORT_SYMBOL(udp_skb_destructor);
+/* as above, but the caller held the rx queue lock, too */
+static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
+{
+ prefetch(&skb->data);
+ udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
+}
+
/* Idea of busylocks is to let producers grab an extra spinlock
* to relieve pressure on the receive_queue spinlock shared by consumer.
* Under flood, this means that only one producer can be in line
@@ -1252,10 +1353,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
busy = busylock_acquire(sk);
}
size = skb->truesize;
- /* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss
- * in udp_skb_destructor()
- */
- skb->dev_scratch = size;
+ udp_set_dev_scratch(skb);
/* we drop only if the receive buf is full and the receive
* queue contains some other skb
@@ -1306,14 +1404,16 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
void udp_destruct_sock(struct sock *sk)
{
/* reclaim completely the forward allocated memory */
+ struct udp_sock *up = udp_sk(sk);
unsigned int total = 0;
struct sk_buff *skb;
- while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
+ while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
total += skb->truesize;
kfree_skb(skb);
}
- udp_rmem_release(sk, total, 0);
+ udp_rmem_release(sk, total, 0, true);
inet_sock_destruct(sk);
}
@@ -1321,6 +1421,7 @@ EXPORT_SYMBOL_GPL(udp_destruct_sock);
int udp_init_sock(struct sock *sk)
{
+ skb_queue_head_init(&udp_sk(sk)->reader_queue);
sk->sk_destruct = udp_destruct_sock;
return 0;
}
@@ -1334,10 +1435,31 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
sk_peek_offset_bwd(sk, len);
unlock_sock_fast(sk, slow);
}
- consume_skb(skb);
+
+ consume_stateless_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_consume_udp);
+static struct sk_buff *__first_packet_length(struct sock *sk,
+ struct sk_buff_head *rcvq,
+ int *total)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_peek(rcvq)) != NULL &&
+ udp_lib_checksum_complete(skb)) {
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+ IS_UDPLITE(sk));
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ atomic_inc(&sk->sk_drops);
+ __skb_unlink(skb, rcvq);
+ *total += skb->truesize;
+ kfree_skb(skb);
+ }
+ return skb;
+}
+
/**
* first_packet_length - return length of first packet in receive queue
* @sk: socket
@@ -1347,26 +1469,24 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
*/
static int first_packet_length(struct sock *sk)
{
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
+ struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
+ struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff *skb;
int total = 0;
int res;
spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL &&
- udp_lib_checksum_complete(skb)) {
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
- IS_UDPLITE(sk));
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- atomic_inc(&sk->sk_drops);
- __skb_unlink(skb, rcvq);
- total += skb->truesize;
- kfree_skb(skb);
+ skb = __first_packet_length(sk, rcvq, &total);
+ if (!skb && !skb_queue_empty(sk_queue)) {
+ spin_lock(&sk_queue->lock);
+ skb_queue_splice_tail_init(sk_queue, rcvq);
+ spin_unlock(&sk_queue->lock);
+
+ skb = __first_packet_length(sk, rcvq, &total);
}
res = skb ? skb->len : -1;
if (total)
- udp_rmem_release(sk, total, 1);
+ udp_rmem_release(sk, total, 1, false);
spin_unlock_bh(&rcvq->lock);
return res;
}
@@ -1400,6 +1520,89 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
EXPORT_SYMBOL(udp_ioctl);
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
+ int noblock, int *peeked, int *off, int *err)
+{
+ struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
+ struct sk_buff_head *queue;
+ struct sk_buff *last;
+ long timeo;
+ int error;
+
+ queue = &udp_sk(sk)->reader_queue;
+ flags |= noblock ? MSG_DONTWAIT : 0;
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ do {
+ struct sk_buff *skb;
+
+ error = sock_error(sk);
+ if (error)
+ break;
+
+ error = -EAGAIN;
+ *peeked = 0;
+ do {
+ spin_lock_bh(&queue->lock);
+ skb = __skb_try_recv_from_queue(sk, queue, flags,
+ udp_skb_destructor,
+ peeked, off, err,
+ &last);
+ if (skb) {
+ spin_unlock_bh(&queue->lock);
+ return skb;
+ }
+
+ if (skb_queue_empty(sk_queue)) {
+ spin_unlock_bh(&queue->lock);
+ goto busy_check;
+ }
+
+ /* refill the reader queue and walk it again
+ * keep both queues locked to avoid re-acquiring
+ * the sk_receive_queue lock if fwd memory scheduling
+ * is needed.
+ */
+ spin_lock(&sk_queue->lock);
+ skb_queue_splice_tail_init(sk_queue, queue);
+
+ skb = __skb_try_recv_from_queue(sk, queue, flags,
+ udp_skb_dtor_locked,
+ peeked, off, err,
+ &last);
+ spin_unlock(&sk_queue->lock);
+ spin_unlock_bh(&queue->lock);
+ if (skb)
+ return skb;
+
+busy_check:
+ if (!sk_can_busy_loop(sk))
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+ } while (!skb_queue_empty(sk_queue));
+
+ /* sk_queue is empty, reader_queue may contain peeked packets */
+ } while (timeo &&
+ !__skb_wait_for_more_packets(sk, &error, &timeo,
+ (struct sk_buff *)sk_queue));
+
+ *err = error;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__skb_recv_udp);
+
+static int copy_linear_skb(struct sk_buff *skb, int len, int off,
+ struct iov_iter *to)
+{
+ int n, copy = len - off;
+
+ n = copy_to_iter(skb->data + off, copy, to);
+ if (n == copy)
+ return 0;
+
+ return -EFAULT;
+}
+
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -1426,7 +1629,7 @@ try_again:
if (!skb)
return err;
- ulen = skb->len;
+ ulen = udp_skb_len(skb);
copied = len;
if (copied > ulen - off)
copied = ulen - off;
@@ -1441,14 +1644,18 @@ try_again:
if (copied < ulen || peeking ||
(is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
- checksum_valid = !udp_lib_checksum_complete(skb);
+ checksum_valid = udp_skb_csum_unnecessary(skb) ||
+ !__udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
}
- if (checksum_valid || skb_csum_unnecessary(skb))
- err = skb_copy_datagram_msg(skb, off, msg, copied);
- else {
+ if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
+ if (udp_skb_is_linear(skb))
+ err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
+ else
+ err = skb_copy_datagram_msg(skb, off, msg, copied);
+ } else {
err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL)
@@ -1490,7 +1697,8 @@ try_again:
return err;
csum_copy_err:
- if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+ if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+ udp_skb_destructor)) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
}
@@ -1624,6 +1832,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id_once(sk, skb);
}
+ /* clear all pending head states while they are hot in the cache */
+ skb_release_head_state(skb);
+
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -2325,6 +2536,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
+ if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+ mask |= POLLIN | POLLRDNORM;
+
sock_rps_record_flow(sk);
/* Check for false positives due to checksum errors */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6a4fb1e629fb..0aa36b093013 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -963,6 +963,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
+ struct in6_validator_info i6vi;
unsigned int hash;
int err = 0;
int addr_type = ipv6_addr_type(addr);
@@ -974,6 +975,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
return ERR_PTR(-EADDRNOTAVAIL);
rcu_read_lock_bh();
+
+ in6_dev_hold(idev);
+
if (idev->dead) {
err = -ENODEV; /*XXX*/
goto out2;
@@ -984,6 +988,17 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out2;
}
+ i6vi.i6vi_addr = *addr;
+ i6vi.i6vi_dev = idev;
+ rcu_read_unlock_bh();
+
+ err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
+
+ rcu_read_lock_bh();
+ err = notifier_to_errno(err);
+ if (err)
+ goto out2;
+
spin_lock(&addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
@@ -1034,7 +1049,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa->rt = rt;
ifa->idev = idev;
- in6_dev_hold(idev);
/* For caller */
in6_ifa_hold(ifa);
@@ -1062,6 +1076,7 @@ out2:
inet6addr_notifier_call_chain(NETDEV_UP, ifa);
else {
kfree(ifa);
+ in6_dev_put(idev);
ifa = ERR_PTR(err);
}
@@ -2280,7 +2295,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
cfg.fc_flags |= RTF_NONEXTHOP;
#endif
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
}
@@ -2335,7 +2350,7 @@ static void addrconf_add_mroute(struct net_device *dev)
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
}
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index bfa941fc1165..9e3488d50b15 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,6 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
EXPORT_SYMBOL(__ipv6_addr_type);
static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain);
int register_inet6addr_notifier(struct notifier_block *nb)
{
@@ -107,6 +108,24 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);
+int register_inet6addr_validator_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&inet6addr_validator_chain, nb);
+}
+EXPORT_SYMBOL(register_inet6addr_validator_notifier);
+
+int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb);
+}
+EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
+
+int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
+{
+ return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v);
+}
+EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
+
static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
struct dst_entry **u2,
struct flowi6 *u3)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dda6035e3b84..755f38271dd5 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -423,7 +423,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
sg_init_table(sg, nfrags + sglists);
- skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ if (unlikely(err < 0))
+ goto out_free;
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
@@ -606,7 +608,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->hop_limit = 0;
sg_init_table(sg, nfrags + sglists);
- skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+ if (unlikely(err < 0))
+ goto out_free;
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1fe99ba8066c..2ede4e459c4e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -346,9 +346,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi);
sg_init_table(sg, esp->nfrags);
- skb_to_sgvec(skb, sg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + esp->clen + alen);
+ err = skb_to_sgvec(skb, sg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ if (unlikely(err < 0))
+ goto error;
if (!esp->inplace) {
int allocsize;
@@ -372,9 +374,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
spin_unlock_bh(&x->lock);
sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
- skb_to_sgvec(skb, dsg,
- (unsigned char *)esph - skb->data,
- assoclen + ivlen + esp->clen + alen);
+ err = skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + esp->clen + alen);
+ if (unlikely(err < 0))
+ goto error;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -618,7 +622,9 @@ skip_cow:
esp_input_set_header(skb, seqhi);
sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ ret = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(ret < 0))
+ goto out;
skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 9ea249b9451e..6de3c04b0f30 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -14,6 +14,8 @@
#include <net/udp.h>
#include <net/udp_tunnel.h>
+#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
+
static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
struct flowi6 *fl6, u8 *protocol, __be16 sport)
{
@@ -33,8 +35,8 @@ static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
*protocol = IPPROTO_UDP;
}
-int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi6 *fl6)
+static int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi6 *fl6)
{
__be16 sport;
int err;
@@ -49,10 +51,9 @@ int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
-EXPORT_SYMBOL(fou6_build_header);
-int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi6 *fl6)
+static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi6 *fl6)
{
__be16 sport;
int err;
@@ -67,9 +68,6 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
-EXPORT_SYMBOL(gue6_build_header);
-
-#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
.encap_hlen = fou_encap_hlen,
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index b3df03e3faa0..0c02a09bc351 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -91,7 +91,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
drop:
kfree_skb(skb);
- return -EINVAL;
+ return err;
}
static int ila_input(struct sk_buff *skb)
@@ -117,7 +117,8 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
static int ila_build_state(struct nlattr *nla,
unsigned int family, const void *cfg,
- struct lwtunnel_state **ts)
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
{
struct ila_lwt *ilwt;
struct ila_params *p;
@@ -146,7 +147,7 @@ static int ila_build_state(struct nlattr *nla,
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL);
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
if (ret < 0)
return ret;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d4bf2c68a545..deea901746c8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -473,7 +473,8 @@ out:
static struct fib6_node *fib6_add_1(struct fib6_node *root,
struct in6_addr *addr, int plen,
int offset, int allow_create,
- int replace_required, int sernum)
+ int replace_required, int sernum,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -497,6 +498,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
if (!allow_create) {
if (replace_required) {
+ NL_SET_ERR_MSG(extack,
+ "Can not replace route - no match found");
pr_warn("Can't replace route, no match found\n");
return ERR_PTR(-ENOENT);
}
@@ -543,6 +546,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
* That would keep IPv6 consistent with IPv4
*/
if (replace_required) {
+ NL_SET_ERR_MSG(extack,
+ "Can not replace route - no match found");
pr_warn("Can't replace route, no match found\n");
return ERR_PTR(-ENOENT);
}
@@ -964,7 +969,8 @@ void fib6_force_start_gc(struct net *net)
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc)
+ struct nl_info *info, struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -987,7 +993,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required, sernum);
+ replace_required, sernum, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -1028,7 +1034,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum);
+ allow_create, replace_required, sernum,
+ extack);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
@@ -1047,7 +1054,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum);
+ allow_create, replace_required, sernum,
+ extack);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bf8a58a1c32d..0d6f3b6345de 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -67,9 +67,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct in6_addr *nexthop;
int ret;
- skb->protocol = htons(ETH_P_IPV6);
- skb->dev = dev;
-
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -154,6 +151,9 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
if (unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
@@ -869,7 +869,6 @@ fail_toobig:
if (skb->sk && dst_allfrag(skb_dst(skb)))
sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
- skb->dev = skb_dst(skb)->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
err = -EMSGSIZE;
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index eedee5d108d9..f63b18e05c69 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
__be16 fo;
u8 proto;
- if (skb->csum_bad)
- return false;
-
if (skb_csum_unnecessary(skb))
return true;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7cebd954d5bb..18fe6e2b88d5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -938,14 +938,15 @@ EXPORT_SYMBOL(rt6_lookup);
*/
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
- struct mx6_config *mxc)
+ struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info, mxc);
+ err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -956,7 +957,7 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
struct mx6_config mxc = { .mx = NULL, };
- return __ip6_ins_rt(rt, &info, &mxc);
+ return __ip6_ins_rt(rt, &info, &mxc, NULL);
}
static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
@@ -1844,7 +1845,8 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
@@ -1855,14 +1857,25 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
int err = -EINVAL;
/* RTF_PCPU is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_PCPU)
+ if (cfg->fc_flags & RTF_PCPU) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
goto out;
+ }
- if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+ if (cfg->fc_dst_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ goto out;
+ }
+ if (cfg->fc_src_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid source address length");
goto out;
+ }
#ifndef CONFIG_IPV6_SUBTREES
- if (cfg->fc_src_len)
+ if (cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack,
+ "Specifying source address requires IPV6_SUBTREES to be enabled");
goto out;
+ }
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
@@ -1926,7 +1939,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
err = lwtunnel_build_state(cfg->fc_encap_type,
cfg->fc_encap, AF_INET6, cfg,
- &lwtstate);
+ &lwtstate, extack);
if (err)
goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate);
@@ -2013,9 +2026,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
err = -EINVAL;
if (ipv6_chk_addr_and_flags(net, gw_addr,
gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0))
+ dev : NULL, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
goto out;
-
+ }
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -2031,8 +2045,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
addressing
*/
if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED)))
+ IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid gateway address");
goto out;
+ }
if (cfg->fc_table) {
grt = ip6_nh_lookup_table(net, cfg, gw_addr);
@@ -2072,8 +2089,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
goto out;
}
err = -EINVAL;
- if (!dev || (dev->flags & IFF_LOOPBACK))
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
+ goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
goto out;
+ }
}
err = -ENODEV;
@@ -2082,6 +2105,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
goto out;
}
@@ -2111,13 +2135,14 @@ out:
return ERR_PTR(err);
}
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct mx6_config mxc = { .mx = NULL, };
struct rt6_info *rt;
int err;
- rt = ip6_route_info_create(cfg);
+ rt = ip6_route_info_create(cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
@@ -2128,7 +2153,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (err)
goto out;
- err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+ err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
kfree(mxc.mx);
@@ -2222,7 +2247,8 @@ out_put:
return err;
}
-static int ip6_route_del(struct fib6_config *cfg)
+static int ip6_route_del(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct fib6_table *table;
struct fib6_node *fn;
@@ -2230,8 +2256,10 @@ static int ip6_route_del(struct fib6_config *cfg)
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
- if (!table)
+ if (!table) {
+ NL_SET_ERR_MSG(extack, "FIB table does not exist");
return err;
+ }
read_lock_bh(&table->tb6_lock);
@@ -2483,7 +2511,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
if (!prefixlen)
cfg.fc_flags |= RTF_DEFAULT;
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
}
@@ -2529,7 +2557,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
cfg.fc_gateway = *gwaddr;
- if (!ip6_route_add(&cfg)) {
+ if (!ip6_route_add(&cfg, NULL)) {
struct fib6_table *table;
table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -2622,10 +2650,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
- err = ip6_route_add(&cfg);
+ err = ip6_route_add(&cfg, NULL);
break;
case SIOCDELRT:
- err = ip6_route_del(&cfg);
+ err = ip6_route_del(&cfg, NULL);
break;
default:
err = -EINVAL;
@@ -2904,7 +2932,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct fib6_config *cfg)
+ struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
@@ -2988,7 +3017,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
- cfg->fc_mp_len);
+ cfg->fc_mp_len, extack);
if (err < 0)
goto errout;
}
@@ -3007,7 +3036,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_ENCAP_TYPE]) {
cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
- err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
if (err < 0)
goto errout;
}
@@ -3098,7 +3127,8 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
-static int ip6_route_multipath_add(struct fib6_config *cfg)
+static int ip6_route_multipath_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rt6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
@@ -3146,7 +3176,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- rt = ip6_route_info_create(&r_cfg);
+ rt = ip6_route_info_create(&r_cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
@@ -3171,7 +3201,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
rt_last = nh->rt6_info;
- err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
+ err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
/* save reference to first route for notification */
if (!rt_notif && !err)
rt_notif = nh->rt6_info;
@@ -3213,7 +3243,7 @@ add_errout:
list_for_each_entry(nh, &rt6_nh_list, next) {
if (err_nh == nh)
break;
- ip6_route_del(&nh->r_cfg);
+ ip6_route_del(&nh->r_cfg, extack);
}
cleanup:
@@ -3228,7 +3258,8 @@ cleanup:
return err;
}
-static int ip6_route_multipath_del(struct fib6_config *cfg)
+static int ip6_route_multipath_del(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
@@ -3255,7 +3286,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg)
r_cfg.fc_flags |= RTF_GATEWAY;
}
}
- err = ip6_route_del(&r_cfg);
+ err = ip6_route_del(&r_cfg, extack);
if (err)
last_err = err;
@@ -3271,15 +3302,15 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config cfg;
int err;
- err = rtm_to_fib6_config(skb, nlh, &cfg);
+ err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
if (err < 0)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath_del(&cfg);
+ return ip6_route_multipath_del(&cfg, extack);
else {
cfg.fc_delete_all_nh = 1;
- return ip6_route_del(&cfg);
+ return ip6_route_del(&cfg, extack);
}
}
@@ -3289,14 +3320,14 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config cfg;
int err;
- err = rtm_to_fib6_config(skb, nlh, &cfg);
+ err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
if (err < 0)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath_add(&cfg);
+ return ip6_route_multipath_add(&cfg, extack);
else
- return ip6_route_add(&cfg);
+ return ip6_route_add(&cfg, extack);
}
static size_t rt6_nlmsg_size(struct rt6_info *rt)
@@ -3577,11 +3608,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
+ int err, iif = 0, oif = 0;
+ struct dst_entry *dst;
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6;
- int err, iif = 0, oif = 0;
+ bool fibmatch;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
extack);
@@ -3592,6 +3625,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
memset(&fl6, 0, sizeof(fl6));
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
+ fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
@@ -3637,12 +3671,23 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
- flags);
+ if (!fibmatch)
+ dst = ip6_route_input_lookup(net, dev, &fl6, flags);
} else {
fl6.flowi6_oif = oif;
- rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
+ if (!fibmatch)
+ dst = ip6_route_output(net, NULL, &fl6);
+ }
+
+ if (fibmatch)
+ dst = ip6_route_lookup(net, &fl6, 0);
+
+ rt = container_of(dst, struct rt6_info, dst);
+ if (rt->dst.error) {
+ err = rt->dst.error;
+ ip6_rt_put(rt);
+ goto errout;
}
if (rt == net->ipv6.ip6_null_entry) {
@@ -3659,10 +3704,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
skb_dst_set(skb, &rt->dst);
-
- err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
+ if (fibmatch)
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ else
+ err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 5f44ffed2576..15fba55e3da8 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -303,13 +303,9 @@ static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
{
struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
- struct net *net = sock_net(skb->sk);
- struct seg6_pernet_data *sdata;
struct seg6_hmac_info *hinfo;
int ret;
- sdata = seg6_pernet(net);
-
ret = rhashtable_walk_start(iter);
if (ret && ret != -EAGAIN)
goto done;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 6a495490d43e..264d772d3c7d 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -326,7 +326,8 @@ drop:
static int seg6_build_state(struct nlattr *nla,
unsigned int family, const void *cfg,
- struct lwtunnel_state **ts)
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
struct seg6_iptunnel_encap *tuninfo;
@@ -336,7 +337,7 @@ static int seg6_build_state(struct nlattr *nla,
int err;
err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
- seg6_iptunnel_policy, NULL);
+ seg6_iptunnel_policy, extack);
if (err < 0)
return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5abc3692b901..2f7e99af67db 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -162,15 +162,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+ tsoff = secure_tcpv6_ts_off(sock_net(sk),
+ ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(&tcp_opt))
+ if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
ret = NULL;
@@ -211,7 +212,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack.v64 = 0;
+ treq->snt_synack = 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
treq->ts_off = 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4f4310a36a04..84ad50218255 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -109,9 +109,9 @@ static u32 tcp_v6_init_seq(const struct sk_buff *skb)
tcp_hdr(skb)->source);
}
-static u32 tcp_v6_init_ts_off(const struct sk_buff *skb)
+static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
- return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+ return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
}
@@ -292,7 +292,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
- tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32,
+ tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
+ np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
}
@@ -949,7 +950,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp + tcptw->tw_ts_offset,
+ tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
@@ -971,7 +972,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp + tcp_rsk(req)->ts_off,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
@@ -1248,9 +1249,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
- if (tcp_filter(sk, skb))
- goto discard;
-
/*
* socket locking is here for SMP purposes as backlog rcv
* is currently called with bh processing disabled.
@@ -1909,6 +1907,7 @@ struct proto tcpv6_prot = {
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 06ec39b79609..2e9b52bded2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -455,7 +455,8 @@ try_again:
return err;
csum_copy_err:
- if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+ if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+ udp_skb_destructor)) {
if (is_udp4) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_CSUMERRORS, is_udplite);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index deca20fb2ce2..da49191f7ad0 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1985,7 +1985,7 @@ static int kcm_create(struct net *net, struct socket *sock,
return 0;
}
-static struct net_proto_family kcm_family_ops = {
+static const struct net_proto_family kcm_family_ops = {
.family = PF_KCM,
.create = kcm_create,
.owner = THIS_MODULE,
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1b7a4daf283c..3a0282188ad6 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -449,44 +449,21 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
buf_size, true, false);
}
-void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif,
- const u8 *addr, u16 tid)
+void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
+ const u8 *addr, unsigned int bit)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
- struct ieee80211_rx_agg *rx_agg;
- struct sk_buff *skb = dev_alloc_skb(0);
-
- if (unlikely(!skb))
- return;
-
- rx_agg = (struct ieee80211_rx_agg *) &skb->cb;
- memcpy(&rx_agg->addr, addr, ETH_ALEN);
- rx_agg->tid = tid;
-
- skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_START;
- skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&local->hw, &sdata->work);
-}
-EXPORT_SYMBOL(ieee80211_start_rx_ba_session_offl);
-
-void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif,
- const u8 *addr, u16 tid)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_rx_agg *rx_agg;
- struct sk_buff *skb = dev_alloc_skb(0);
-
- if (unlikely(!skb))
- return;
+ struct sta_info *sta;
- rx_agg = (struct ieee80211_rx_agg *) &skb->cb;
- memcpy(&rx_agg->addr, addr, ETH_ALEN);
- rx_agg->tid = tid;
+ rcu_read_lock();
+ sta = sta_info_get_bss(sdata, addr);
+ if (!sta)
+ goto unlock;
- skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_STOP;
- skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl);
+ ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ unlock:
+ rcu_read_unlock();
}
-EXPORT_SYMBOL(ieee80211_stop_rx_ba_session_offl);
+EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4a388fe8c2d1..f9eb2486d550 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1876,6 +1876,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
ifmsh->user_mpm = setup->user_mpm;
ifmsh->mesh_auth_id = setup->auth_id;
ifmsh->security = IEEE80211_MESH_SEC_NONE;
+ ifmsh->userspace_handles_dfs = setup->userspace_handles_dfs;
if (setup->is_authenticated)
ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
if (setup->is_secure)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 42601820db20..b15412c21ac9 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -154,6 +154,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
p += scnprintf(p,
bufsz+buf-p,
+ "target %uus interval %uus ecn %s\n",
+ codel_time_to_us(sta->cparams.target),
+ codel_time_to_us(sta->cparams.interval),
+ sta->cparams.ecn ? "yes" : "no");
+ p += scnprintf(p,
+ bufsz+buf-p,
"tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 6ca5442b1e03..9e71226c2d25 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -331,6 +331,18 @@ void ieee80211_ba_session_work(struct work_struct *work)
sta, tid, WLAN_BACK_RECIPIENT,
WLAN_REASON_UNSPECIFIED, true);
+ if (test_and_clear_bit(tid,
+ sta->ampdu_mlme.tid_rx_manage_offl))
+ __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
+ IEEE80211_MAX_AMPDU_BUF,
+ false, true);
+
+ if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
+ sta->ampdu_mlme.tid_rx_manage_offl))
+ ___ieee80211_stop_rx_ba_session(
+ sta, tid, WLAN_BACK_RECIPIENT,
+ 0, false);
+
spin_lock_bh(&sta->lock);
tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 364d4e137649..660ac6a426f4 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -808,7 +808,6 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
memset(&params, 0, sizeof(params));
- memset(&csa_ie, 0, sizeof(csa_ie));
err = ieee80211_parse_ch_switch_ie(sdata, elems,
ifibss->chandef.chan->band,
sta_flags, ifibss->bssid, &csa_ie);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5e002f62c235..2197c62a0a6e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -643,6 +643,8 @@ struct ieee80211_if_mesh {
unsigned long wrkq_flags;
unsigned long mbss_changed;
+ bool userspace_handles_dfs;
+
u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
size_t mesh_id_len;
/* Active Path Selection Protocol Identifier */
@@ -1029,17 +1031,6 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif)
return shift;
}
-struct ieee80211_rx_agg {
- u8 addr[ETH_ALEN];
- u16 tid;
-};
-
-enum sdata_queue_type {
- IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
- IEEE80211_SDATA_QUEUE_RX_AGG_START = 3,
- IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4,
-};
-
enum {
IEEE80211_RX_MSG = 1,
IEEE80211_TX_STATUS_MSG = 2,
@@ -1432,6 +1423,7 @@ struct ieee80211_csa_ie {
u8 count;
u8 ttl;
u16 pre_value;
+ u16 reason_code;
};
/* Parsed Information Elements */
@@ -2057,6 +2049,8 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
const struct cfg80211_chan_def *chandef,
u16 prot_mode, bool rifs_mode);
+void ieee80211_ie_build_wide_bw_cs(u8 *pos,
+ const struct cfg80211_chan_def *chandef);
u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u32 cap);
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f5f50150ba1c..9228ac73c429 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1237,7 +1237,6 @@ static void ieee80211_iface_work(struct work_struct *work)
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct sta_info *sta;
- struct ieee80211_rx_agg *rx_agg;
if (!ieee80211_sdata_running(sdata))
return;
@@ -1252,28 +1251,8 @@ static void ieee80211_iface_work(struct work_struct *work)
while ((skb = skb_dequeue(&sdata->skb_queue))) {
struct ieee80211_mgmt *mgmt = (void *)skb->data;
- if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
- rx_agg = (void *)&skb->cb;
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, rx_agg->addr);
- if (sta)
- __ieee80211_start_rx_ba_session(sta,
- 0, 0, 0, 1, rx_agg->tid,
- IEEE80211_MAX_AMPDU_BUF,
- false, true);
- mutex_unlock(&local->sta_mtx);
- } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_STOP) {
- rx_agg = (void *)&skb->cb;
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, rx_agg->addr);
- if (sta)
- __ieee80211_stop_rx_ba_session(sta,
- rx_agg->tid,
- WLAN_BACK_RECIPIENT, 0,
- false);
- mutex_unlock(&local->sta_mtx);
- } else if (ieee80211_is_action(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_BACK) {
+ if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_BACK) {
int len = skb->len;
mutex_lock(&local->sta_mtx);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 737e1f082b0d..ad5d1cf39190 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -690,6 +690,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
2 + sizeof(struct ieee80211_channel_sw_ie) +
/* Mesh Channel Switch Parameters */
2 + sizeof(struct ieee80211_mesh_chansw_params_ie) +
+ /* Channel Switch Wrapper + Wide Bandwidth CSA IE */
+ 2 + 2 + sizeof(struct ieee80211_wide_bw_chansw_ie) +
+ 2 + sizeof(struct ieee80211_sec_chan_offs_ie) +
2 + 8 + /* supported rates */
2 + 3; /* DS params */
tail_len = 2 + (IEEE80211_MAX_SUPP_RATES - 8) +
@@ -736,8 +739,12 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
rcu_read_lock();
csa = rcu_dereference(ifmsh->csa);
if (csa) {
- pos = skb_put(skb, 13);
- memset(pos, 0, 13);
+ enum nl80211_channel_type ct;
+ struct cfg80211_chan_def *chandef;
+ int ie_len = 2 + sizeof(struct ieee80211_channel_sw_ie) +
+ 2 + sizeof(struct ieee80211_mesh_chansw_params_ie);
+
+ pos = skb_put_zero(skb, ie_len);
*pos++ = WLAN_EID_CHANNEL_SWITCH;
*pos++ = 3;
*pos++ = 0x0;
@@ -760,6 +767,37 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
pos += 2;
put_unaligned_le16(ifmsh->pre_value, pos);
pos += 2;
+
+ switch (csa->settings.chandef.width) {
+ case NL80211_CHAN_WIDTH_40:
+ ie_len = 2 + sizeof(struct ieee80211_sec_chan_offs_ie);
+ pos = skb_put_zero(skb, ie_len);
+
+ *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */
+ *pos++ = 1; /* len */
+ ct = cfg80211_get_chandef_type(&csa->settings.chandef);
+ if (ct == NL80211_CHAN_HT40PLUS)
+ *pos++ = IEEE80211_HT_PARAM_CHA_SEC_ABOVE;
+ else
+ *pos++ = IEEE80211_HT_PARAM_CHA_SEC_BELOW;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ /* Channel Switch Wrapper + Wide Bandwidth CSA IE */
+ ie_len = 2 + 2 +
+ sizeof(struct ieee80211_wide_bw_chansw_ie);
+ pos = skb_put_zero(skb, ie_len);
+
+ *pos++ = WLAN_EID_CHANNEL_SWITCH_WRAPPER; /* EID */
+ *pos++ = 5; /* len */
+ /* put sub IE */
+ chandef = &csa->settings.chandef;
+ ieee80211_ie_build_wide_bw_cs(pos, chandef);
+ break;
+ default:
+ break;
+ }
}
rcu_read_unlock();
@@ -916,6 +954,21 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
ieee80211_configure_filter(local);
}
+static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata)
+{
+ int err;
+
+ /* if the current channel is a DFS channel, mark the channel as
+ * unavailable.
+ */
+ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
+ &sdata->vif.bss_conf.chandef,
+ NL80211_IFTYPE_MESH_POINT);
+ if (err > 0)
+ cfg80211_radar_event(sdata->local->hw.wiphy,
+ &sdata->vif.bss_conf.chandef, GFP_ATOMIC);
+}
+
static bool
ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems, bool beacon)
@@ -933,19 +986,20 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (!sband)
return false;
- sta_flags = IEEE80211_STA_DISABLE_VHT;
+ sta_flags = 0;
switch (sdata->vif.bss_conf.chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
sta_flags |= IEEE80211_STA_DISABLE_HT;
case NL80211_CHAN_WIDTH_20:
sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
+ case NL80211_CHAN_WIDTH_40:
+ sta_flags |= IEEE80211_STA_DISABLE_VHT;
break;
default:
break;
}
memset(&params, 0, sizeof(params));
- memset(&csa_ie, 0, sizeof(csa_ie));
err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
sta_flags, sdata->vif.addr,
&csa_ie);
@@ -954,11 +1008,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (err)
return false;
+ /* Mark the channel unavailable if the reason for the switch is
+ * regulatory.
+ */
+ if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY)
+ ieee80211_mesh_csa_mark_radar(sdata);
+
params.chandef = csa_ie.chandef;
params.count = csa_ie.count;
if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &params.chandef,
- IEEE80211_CHAN_DISABLED)) {
+ IEEE80211_CHAN_DISABLED) ||
+ !cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef,
+ NL80211_IFTYPE_MESH_POINT)) {
sdata_info(sdata,
"mesh STA %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n",
sdata->vif.addr,
@@ -974,9 +1036,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
NL80211_IFTYPE_MESH_POINT);
if (err < 0)
return false;
- if (err > 0)
- /* TODO: DFS not (yet) supported */
+ if (err > 0 && !ifmsh->userspace_handles_dfs) {
+ sdata_info(sdata,
+ "mesh STA %pM switches to channel requiring DFS (%d MHz, width:%d, CF1/2: %d/%d MHz), aborting\n",
+ sdata->vif.addr,
+ params.chandef.chan->center_freq,
+ params.chandef.width,
+ params.chandef.center_freq1,
+ params.chandef.center_freq2);
return false;
+ }
params.radar_required = err;
@@ -1233,7 +1302,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
pos = mgmt->u.action.u.chan_switch.variable;
baselen = offsetof(struct ieee80211_mgmt,
u.action.u.chan_switch.variable);
- ieee802_11_parse_elems(pos, len - baselen, false, &elems);
+ ieee802_11_parse_elems(pos, len - baselen, true, &elems);
ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl;
if (!--ifmsh->chsw_ttl)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1131cd504a15..82cfd232a25e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -264,8 +264,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
band = sband->band;
/* capability info */
- pos = skb_put(skb, 2);
- memset(pos, 0, 2);
+ pos = skb_put_zero(skb, 2);
if (action == WLAN_SP_MESH_PEERING_CONFIRM) {
/* AID */
pos = skb_put(skb, 2);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cc8e6ea1b27e..1929bce8e518 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1122,7 +1122,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
current_band = cbss->channel->band;
- memset(&csa_ie, 0, sizeof(csa_ie));
res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
ifmgd->flags,
ifmgd->associated->bssid, &csa_ie);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index ea1f4315c521..76f303fda3ed 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -943,6 +943,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
+ ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta));
+
return 0;
}
EXPORT_SYMBOL(rate_control_set_rates);
@@ -991,4 +993,3 @@ void rate_control_deinitialize(struct ieee80211_local *local)
local->rate_ctrl = NULL;
rate_control_free(local, ref);
}
-
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3674fe3d67dc..004a2283c5d9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -237,7 +237,6 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
if (!skb)
return;
- skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
skb_queue_tail(&sdata->skb_queue, skb);
ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
@@ -1217,7 +1216,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
/* if this mpdu is fragmented - terminate rx aggregation session */
sc = le16_to_cpu(hdr->seq_ctrl);
if (sc & IEEE80211_SCTL_FRAG) {
- skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
skb_queue_tail(&rx->sdata->skb_queue, skb);
ieee80211_queue_work(&local->hw, &rx->sdata->work);
return;
@@ -3104,7 +3102,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
return RX_QUEUED;
queue:
- rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
skb_queue_tail(&sdata->skb_queue, rx->skb);
ieee80211_queue_work(&local->hw, &sdata->work);
if (rx->sta)
@@ -3250,7 +3247,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
}
/* queue up frame and kick off work to process it */
- rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
skb_queue_tail(&sdata->skb_queue, rx->skb);
ieee80211_queue_work(&rx->local->hw, &sdata->work);
if (rx->sta)
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 0782e486fe89..bf8f5dcea1c4 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -36,6 +36,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
int secondary_channel_offset = -1;
+ memset(csa_ie, 0, sizeof(*csa_ie));
+
sec_chan_offs = elems->sec_chan_offs;
wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
@@ -76,6 +78,11 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags;
csa_ie->pre_value = le16_to_cpu(
elems->mesh_chansw_params_ie->mesh_pre_value);
+
+ if (elems->mesh_chansw_params_ie->mesh_flags &
+ WLAN_EID_CHAN_SWITCH_PARAM_REASON)
+ csa_ie->reason_code = le16_to_cpu(
+ elems->mesh_chansw_params_ie->mesh_reason);
}
new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 403e3cc58b57..46e1809356f6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -20,6 +20,7 @@
#include <linux/timer.h>
#include <linux/rtnetlink.h>
+#include <net/codel.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -425,6 +426,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA;
+ sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD;
+ sta->cparams.target = MS2TIME(20);
+ sta->cparams.interval = MS2TIME(100);
+ sta->cparams.ecn = true;
+
sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
return sta;
@@ -2310,3 +2316,27 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta)
return stats->last_rx;
return sta->status_stats.last_ack;
}
+
+static void sta_update_codel_params(struct sta_info *sta, u32 thr)
+{
+ if (!sta->sdata->local->ops->wake_tx_queue)
+ return;
+
+ if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) {
+ sta->cparams.target = MS2TIME(50);
+ sta->cparams.interval = MS2TIME(300);
+ sta->cparams.ecn = false;
+ } else {
+ sta->cparams.target = MS2TIME(20);
+ sta->cparams.interval = MS2TIME(100);
+ sta->cparams.ecn = true;
+ }
+}
+
+void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
+ u32 thr)
+{
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+
+ sta_update_codel_params(sta, thr);
+}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index ea0747d6a6da..3acbdfa9f649 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -233,6 +233,8 @@ struct tid_ampdu_rx {
* RX timer expired until the work for it runs
* @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the
* driver requested to close until the work for it runs
+ * @tid_rx_manage_offl: bitmap indicating which BA sessions were requested
+ * to be treated as started/stopped due to offloading
* @agg_session_valid: bitmap indicating which TID has a rx BA session open on
* @unexpected_agg: bitmap indicating which TID already sent a delBA due to
* unexpected aggregation related frames outside a session
@@ -250,6 +252,7 @@ struct sta_ampdu_mlme {
u8 tid_rx_token[IEEE80211_NUM_TIDS];
unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
+ unsigned long tid_rx_manage_offl[BITS_TO_LONGS(2 * IEEE80211_NUM_TIDS)];
unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
/* tx */
@@ -396,6 +399,14 @@ struct ieee80211_sta_rx_stats {
};
/**
+ * The bandwidth threshold below which the per-station CoDel parameters will be
+ * scaled to be more lenient (to prevent starvation of slow stations). This
+ * value will be scaled by the number of active stations when it is being
+ * applied.
+ */
+#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */
+
+/**
* struct sta_info - STA information
*
* This structure collects information about a station that
@@ -448,6 +459,7 @@ struct ieee80211_sta_rx_stats {
* @known_smps_mode: the smps_mode the client thinks we are in. Relevant for
* AP only.
* @cipher_scheme: optional cipher scheme for this station
+ * @cparams: CoDel parameters for this station.
* @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
* @fast_tx: TX fastpath information
* @fast_rx: RX fastpath information
@@ -551,6 +563,8 @@ struct sta_info {
enum ieee80211_smps_mode known_smps_mode;
const struct ieee80211_cipher_scheme *cipher_scheme;
+ struct codel_params cparams;
+
u8 reserved_tid;
struct cfg80211_chan_def tdls_chandef;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index be47ac5cd8c8..a9fa6ee57e8f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -546,6 +546,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
skb->wifi_acked_valid = 1;
skb->wifi_acked = acked;
}
+
+ ieee80211_led_tx(local);
}
/*
@@ -823,8 +825,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
}
}
- ieee80211_led_tx(local);
-
/* SNMP counters
* Fragments are passed to low-level drivers as separate skbs, so these
* are actually fragments, not frames. Update frame counters only for
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 0d645bc148d0..3d9ac17af407 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -92,16 +92,19 @@
__field(u16, ssn) \
__field(u8, buf_size) \
__field(bool, amsdu) \
- __field(u16, timeout)
+ __field(u16, timeout) \
+ __field(u16, action)
#define AMPDU_ACTION_ASSIGN STA_NAMED_ASSIGN(params->sta); \
__entry->tid = params->tid; \
__entry->ssn = params->ssn; \
__entry->buf_size = params->buf_size; \
__entry->amsdu = params->amsdu; \
- __entry->timeout = params->timeout;
-#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d"
+ __entry->timeout = params->timeout; \
+ __entry->action = params->action;
+#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d action %d"
#define AMPDU_ACTION_PR_ARG STA_PR_ARG, __entry->tid, __entry->ssn, \
- __entry->buf_size, __entry->amsdu, __entry->timeout
+ __entry->buf_size, __entry->amsdu, __entry->timeout, \
+ __entry->action
/*
* Tracing for driver callbacks.
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 04b22f8982fe..b8dc41191835 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1340,9 +1340,16 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
local = container_of(fq, struct ieee80211_local, fq);
txqi = container_of(tin, struct txq_info, tin);
- cparams = &local->cparams;
cstats = &txqi->cstats;
+ if (txqi->txq.sta) {
+ struct sta_info *sta = container_of(txqi->txq.sta,
+ struct sta_info, sta);
+ cparams = &sta->cparams;
+ } else {
+ cparams = &local->cparams;
+ }
+
if (flow == &txqi->def_flow)
cvars = &txqi->def_cvars;
else
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ac9ac6c35594..de0f1cdb64d4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2414,6 +2414,35 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
return pos + sizeof(struct ieee80211_ht_operation);
}
+void ieee80211_ie_build_wide_bw_cs(u8 *pos,
+ const struct cfg80211_chan_def *chandef)
+{
+ *pos++ = WLAN_EID_WIDE_BW_CHANNEL_SWITCH; /* EID */
+ *pos++ = 3; /* IE length */
+ /* New channel width */
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_80:
+ *pos++ = IEEE80211_VHT_CHANWIDTH_80MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ *pos++ = IEEE80211_VHT_CHANWIDTH_160MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_80P80:
+ *pos++ = IEEE80211_VHT_CHANWIDTH_80P80MHZ;
+ break;
+ default:
+ *pos++ = IEEE80211_VHT_CHANWIDTH_USE_HT;
+ }
+
+ /* new center frequency segment 0 */
+ *pos++ = ieee80211_frequency_to_channel(chandef->center_freq1);
+ /* new center frequency segment 1 */
+ if (chandef->center_freq2)
+ *pos++ = ieee80211_frequency_to_channel(chandef->center_freq2);
+ else
+ *pos++ = 0;
+}
+
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef)
{
@@ -2964,6 +2993,7 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
skb = dev_alloc_skb(local->tx_headroom + hdr_len +
5 + /* channel switch announcement element */
3 + /* secondary channel offset element */
+ 5 + /* wide bandwidth channel switch announcement */
8); /* mesh channel switch parameters element */
if (!skb)
return -ENOMEM;
@@ -3022,6 +3052,13 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
pos += 2;
}
+ if (csa_settings->chandef.width == NL80211_CHAN_WIDTH_80 ||
+ csa_settings->chandef.width == NL80211_CHAN_WIDTH_80P80 ||
+ csa_settings->chandef.width == NL80211_CHAN_WIDTH_160) {
+ skb_put(skb, 5);
+ ieee80211_ie_build_wide_bw_cs(pos, &csa_settings->chandef);
+ }
+
ieee80211_tx_skb(sdata, skb);
return 0;
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7b05fd1497ce..b51582d92740 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -684,6 +684,54 @@ errout:
return err;
}
+static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
+ u8 via_addr[], struct netlink_ext_ack *extack)
+{
+ struct rtvia *via = nla_data(nla);
+ int err = -EINVAL;
+ int alen;
+
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Invalid attribute length for RTA_VIA");
+ goto errout;
+ }
+ alen = nla_len(nla) -
+ offsetof(struct rtvia, rtvia_addr);
+ if (alen > MAX_VIA_ALEN) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Invalid address length for RTA_VIA");
+ goto errout;
+ }
+
+ /* Validate the address family */
+ switch (via->rtvia_family) {
+ case AF_PACKET:
+ *via_table = NEIGH_LINK_TABLE;
+ break;
+ case AF_INET:
+ *via_table = NEIGH_ARP_TABLE;
+ if (alen != 4)
+ goto errout;
+ break;
+ case AF_INET6:
+ *via_table = NEIGH_ND_TABLE;
+ if (alen != 16)
+ goto errout;
+ break;
+ default:
+ /* Unsupported address family */
+ goto errout;
+ }
+
+ memcpy(via_addr, via->rtvia_addr, alen);
+ *via_alen = alen;
+ err = 0;
+
+errout:
+ return err;
+}
+
static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
struct mpls_route *rt)
{
@@ -695,8 +743,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
if (!nh)
return -ENOMEM;
- err = -EINVAL;
-
nh->nh_labels = cfg->rc_output_labels;
for (i = 0; i < nh->nh_labels; i++)
nh->nh_label[i] = cfg->rc_output_label[i];
@@ -720,7 +766,8 @@ errout:
static int mpls_nh_build(struct net *net, struct mpls_route *rt,
struct mpls_nh *nh, int oif, struct nlattr *via,
- struct nlattr *newdst, u8 max_labels)
+ struct nlattr *newdst, u8 max_labels,
+ struct netlink_ext_ack *extack)
{
int err = -ENOMEM;
@@ -728,15 +775,15 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
goto errout;
if (newdst) {
- err = nla_get_labels(newdst, max_labels,
- &nh->nh_labels, nh->nh_label);
+ err = nla_get_labels(newdst, max_labels, &nh->nh_labels,
+ nh->nh_label, extack);
if (err)
goto errout;
}
if (via) {
err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
- __mpls_nh_via(rt, nh));
+ __mpls_nh_via(rt, nh), extack);
if (err)
goto errout;
} else {
@@ -782,7 +829,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
nla = nla_find(attrs, attrlen, RTA_NEWDST);
if (nla &&
- nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0)
+ nla_get_labels(nla, MAX_NEW_LABELS, &n_labels,
+ NULL, NULL) != 0)
return 0;
*max_labels = max_t(u8, *max_labels, n_labels);
@@ -802,7 +850,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
}
static int mpls_nh_build_multi(struct mpls_route_config *cfg,
- struct mpls_route *rt, u8 max_labels)
+ struct mpls_route *rt, u8 max_labels,
+ struct netlink_ext_ack *extack)
{
struct rtnexthop *rtnh = cfg->rc_mp;
struct nlattr *nla_via, *nla_newdst;
@@ -836,7 +885,7 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
rtnh->rtnh_ifindex, nla_via, nla_newdst,
- max_labels);
+ max_labels, extack);
if (err)
goto errout;
@@ -855,7 +904,28 @@ errout:
return err;
}
-static int mpls_route_add(struct mpls_route_config *cfg)
+static bool mpls_label_ok(struct net *net, unsigned int index,
+ struct netlink_ext_ack *extack)
+{
+ /* Reserved labels may not be set */
+ if (index < MPLS_LABEL_FIRST_UNRESERVED) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher");
+ return false;
+ }
+
+ /* The full 20 bit range may not be supported. */
+ if (index >= net->mpls.platform_labels) {
+ NL_SET_ERR_MSG(extack,
+ "Label >= configured maximum in platform_labels");
+ return false;
+ }
+
+ return true;
+}
+
+static int mpls_route_add(struct mpls_route_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct mpls_route __rcu **platform_label;
struct net *net = cfg->rc_nlinfo.nl_net;
@@ -874,18 +944,15 @@ static int mpls_route_add(struct mpls_route_config *cfg)
index = find_free_label(net);
}
- /* Reserved labels may not be set */
- if (index < MPLS_LABEL_FIRST_UNRESERVED)
- goto errout;
-
- /* The full 20 bit range may not be supported. */
- if (index >= net->mpls.platform_labels)
+ if (!mpls_label_ok(net, index, extack))
goto errout;
/* Append makes no sense with mpls */
err = -EOPNOTSUPP;
- if (cfg->rc_nlflags & NLM_F_APPEND)
+ if (cfg->rc_nlflags & NLM_F_APPEND) {
+ NL_SET_ERR_MSG(extack, "MPLS does not support route append");
goto errout;
+ }
err = -EEXIST;
platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -912,8 +979,10 @@ static int mpls_route_add(struct mpls_route_config *cfg)
nhs = 1;
}
- if (nhs == 0)
+ if (nhs == 0) {
+ NL_SET_ERR_MSG(extack, "Route does not contain a nexthop");
goto errout;
+ }
err = -ENOMEM;
rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
@@ -927,7 +996,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
if (cfg->rc_mp)
- err = mpls_nh_build_multi(cfg, rt, max_labels);
+ err = mpls_nh_build_multi(cfg, rt, max_labels, extack);
else
err = mpls_nh_build_from_cfg(cfg, rt);
if (err)
@@ -943,7 +1012,8 @@ errout:
return err;
}
-static int mpls_route_del(struct mpls_route_config *cfg)
+static int mpls_route_del(struct mpls_route_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct net *net = cfg->rc_nlinfo.nl_net;
unsigned index;
@@ -951,12 +1021,7 @@ static int mpls_route_del(struct mpls_route_config *cfg)
index = cfg->rc_label;
- /* Reserved labels may not be removed */
- if (index < MPLS_LABEL_FIRST_UNRESERVED)
- goto errout;
-
- /* The full 20 bit range may not be supported */
- if (index >= net->mpls.platform_labels)
+ if (!mpls_label_ok(net, index, extack))
goto errout;
mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
@@ -1541,8 +1606,8 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
}
EXPORT_SYMBOL_GPL(nla_put_labels);
-int nla_get_labels(const struct nlattr *nla,
- u8 max_labels, u8 *labels, u32 label[])
+int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
+ u32 label[], struct netlink_ext_ack *extack)
{
unsigned len = nla_len(nla);
struct mpls_shim_hdr *nla_label;
@@ -1553,13 +1618,18 @@ int nla_get_labels(const struct nlattr *nla,
/* len needs to be an even multiple of 4 (the label size). Number
* of labels is a u8 so check for overflow.
*/
- if (len & 3 || len / 4 > 255)
+ if (len & 3 || len / 4 > 255) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Invalid length for labels attribute");
return -EINVAL;
+ }
/* Limit the number of new labels allowed */
nla_labels = len/4;
- if (nla_labels > max_labels)
+ if (nla_labels > max_labels) {
+ NL_SET_ERR_MSG(extack, "Too many labels");
return -EINVAL;
+ }
/* when label == NULL, caller wants number of labels */
if (!label)
@@ -1574,8 +1644,29 @@ int nla_get_labels(const struct nlattr *nla,
/* Ensure the bottom of stack flag is properly set
* and ttl and tc are both clear.
*/
- if ((dec.bos != bos) || dec.ttl || dec.tc)
+ if (dec.ttl) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "TTL in label must be 0");
+ return -EINVAL;
+ }
+
+ if (dec.tc) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Traffic class in label must be 0");
return -EINVAL;
+ }
+
+ if (dec.bos != bos) {
+ NL_SET_BAD_ATTR(extack, nla);
+ if (bos) {
+ NL_SET_ERR_MSG(extack,
+ "BOS bit must be set in first label");
+ } else {
+ NL_SET_ERR_MSG(extack,
+ "BOS bit can only be set in first label");
+ }
+ return -EINVAL;
+ }
switch (dec.label) {
case MPLS_LABEL_IMPLNULL:
@@ -1583,6 +1674,8 @@ int nla_get_labels(const struct nlattr *nla,
* assign and distribute, but which never
* actually appears in the encapsulation.
*/
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Implicit NULL Label (3) can not be used in encapsulation");
return -EINVAL;
}
@@ -1594,50 +1687,10 @@ out:
}
EXPORT_SYMBOL_GPL(nla_get_labels);
-int nla_get_via(const struct nlattr *nla, u8 *via_alen,
- u8 *via_table, u8 via_addr[])
-{
- struct rtvia *via = nla_data(nla);
- int err = -EINVAL;
- int alen;
-
- if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
- goto errout;
- alen = nla_len(nla) -
- offsetof(struct rtvia, rtvia_addr);
- if (alen > MAX_VIA_ALEN)
- goto errout;
-
- /* Validate the address family */
- switch (via->rtvia_family) {
- case AF_PACKET:
- *via_table = NEIGH_LINK_TABLE;
- break;
- case AF_INET:
- *via_table = NEIGH_ARP_TABLE;
- if (alen != 4)
- goto errout;
- break;
- case AF_INET6:
- *via_table = NEIGH_ND_TABLE;
- if (alen != 16)
- goto errout;
- break;
- default:
- /* Unsupported address family */
- goto errout;
- }
-
- memcpy(via_addr, via->rtvia_addr, alen);
- *via_alen = alen;
- err = 0;
-
-errout:
- return err;
-}
-
-static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct mpls_route_config *cfg)
+static int rtm_to_route_config(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct mpls_route_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
@@ -1645,35 +1698,54 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
err = -EINVAL;
rtm = nlmsg_data(nlh);
- if (rtm->rtm_family != AF_MPLS)
+ if (rtm->rtm_family != AF_MPLS) {
+ NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg");
goto errout;
- if (rtm->rtm_dst_len != 20)
+ }
+ if (rtm->rtm_dst_len != 20) {
+ NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS");
goto errout;
- if (rtm->rtm_src_len != 0)
+ }
+ if (rtm->rtm_src_len != 0) {
+ NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS");
goto errout;
- if (rtm->rtm_tos != 0)
+ }
+ if (rtm->rtm_tos != 0) {
+ NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS");
goto errout;
- if (rtm->rtm_table != RT_TABLE_MAIN)
+ }
+ if (rtm->rtm_table != RT_TABLE_MAIN) {
+ NL_SET_ERR_MSG(extack,
+ "MPLS only supports the main route table");
goto errout;
+ }
/* Any value is acceptable for rtm_protocol */
/* As mpls uses destination specific addresses
* (or source specific address in the case of multicast)
* all addresses have universal scope.
*/
- if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
+ if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid route scope - MPLS only supports UNIVERSE");
goto errout;
- if (rtm->rtm_type != RTN_UNICAST)
+ }
+ if (rtm->rtm_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid route type - MPLS only supports UNICAST");
goto errout;
- if (rtm->rtm_flags != 0)
+ }
+ if (rtm->rtm_flags != 0) {
+ NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS");
goto errout;
+ }
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
@@ -1696,26 +1768,26 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
case RTA_NEWDST:
if (nla_get_labels(nla, MAX_NEW_LABELS,
&cfg->rc_output_labels,
- cfg->rc_output_label))
+ cfg->rc_output_label, extack))
goto errout;
break;
case RTA_DST:
{
u8 label_count;
if (nla_get_labels(nla, 1, &label_count,
- &cfg->rc_label))
+ &cfg->rc_label, extack))
goto errout;
- /* Reserved labels may not be set */
- if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
+ if (!mpls_label_ok(cfg->rc_nlinfo.nl_net,
+ cfg->rc_label, extack))
goto errout;
-
break;
}
case RTA_VIA:
{
if (nla_get_via(nla, &cfg->rc_via_alen,
- &cfg->rc_via_table, cfg->rc_via))
+ &cfg->rc_via_table, cfg->rc_via,
+ extack))
goto errout;
break;
}
@@ -1729,14 +1801,18 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
{
u8 ttl_propagate = nla_get_u8(nla);
- if (ttl_propagate > 1)
+ if (ttl_propagate > 1) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "RTA_TTL_PROPAGATE can only be 0 or 1");
goto errout;
+ }
cfg->rc_ttl_propagate = ttl_propagate ?
MPLS_TTL_PROP_ENABLED :
MPLS_TTL_PROP_DISABLED;
break;
}
default:
+ NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute");
/* Unsupported attribute */
goto errout;
}
@@ -1757,11 +1833,11 @@ static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!cfg)
return -ENOMEM;
- err = rtm_to_route_config(skb, nlh, cfg);
+ err = rtm_to_route_config(skb, nlh, cfg, extack);
if (err < 0)
goto out;
- err = mpls_route_del(cfg);
+ err = mpls_route_del(cfg, extack);
out:
kfree(cfg);
@@ -1779,11 +1855,11 @@ static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!cfg)
return -ENOMEM;
- err = rtm_to_route_config(skb, nlh, cfg);
+ err = rtm_to_route_config(skb, nlh, cfg, extack);
if (err < 0)
goto out;
- err = mpls_route_add(cfg);
+ err = mpls_route_add(cfg, extack);
out:
kfree(cfg);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 4db6a5971322..cf65aec2e551 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -203,9 +203,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
const u32 label[]);
int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
- u32 label[]);
-int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
- u8 via[]);
+ u32 label[], struct netlink_ext_ack *extack);
bool mpls_output_possible(const struct net_device *dev);
unsigned int mpls_dev_mtu(const struct net_device *dev);
bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 369c7a23c86c..6e558a419f60 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -159,7 +159,8 @@ drop:
static int mpls_build_state(struct nlattr *nla,
unsigned int family, const void *cfg,
- struct lwtunnel_state **ts)
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
{
struct mpls_iptunnel_encap *tun_encap_info;
struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
@@ -168,17 +169,18 @@ static int mpls_build_state(struct nlattr *nla,
int ret;
ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
- mpls_iptunnel_policy, NULL);
+ mpls_iptunnel_policy, extack);
if (ret < 0)
return ret;
- if (!tb[MPLS_IPTUNNEL_DST])
+ if (!tb[MPLS_IPTUNNEL_DST]) {
+ NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing");
return -EINVAL;
-
+ }
/* determine number of labels */
- if (nla_get_labels(tb[MPLS_IPTUNNEL_DST],
- MAX_NEW_LABELS, &n_labels, NULL))
+ if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+ &n_labels, NULL, extack))
return -EINVAL;
newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
@@ -188,7 +190,8 @@ static int mpls_build_state(struct nlattr *nla,
tun_encap_info = mpls_lwtunnel_encap(newts);
ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
- &tun_encap_info->labels, tun_encap_info->label);
+ &tun_encap_info->labels, tun_encap_info->label,
+ extack);
if (ret)
goto errout;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index a504e87c6ddf..49bd8bb16b18 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -152,7 +152,7 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
struct synproxy_options *opts)
{
opts->tsecr = opts->tsval;
- opts->tsval = tcp_time_stamp & ~0x3f;
+ opts->tsval = tcp_time_stamp_raw() & ~0x3f;
if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
opts->tsval |= opts->wscale;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 54e40fa47822..d3e594eb36d0 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -48,7 +48,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto,
return rc;
}
-static struct net_proto_family nfc_sock_family_ops = {
+static const struct net_proto_family nfc_sock_family_ops = {
.owner = THIS_MODULE,
.family = PF_NFC,
.create = nfc_sock_create,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7b17da9a94a0..9ddc9f8412a2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -453,7 +453,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
/* Complete checksum if needed */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (err = skb_checksum_help(skb)))
+ (err = skb_csum_hwoffload_help(skb, 0)))
goto out;
/* Older versions of OVS user space enforce alignment of the last
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e3eeed19cc7a..f9349a495caf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -188,7 +188,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
struct packet_sock;
-static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev);
@@ -196,8 +195,7 @@ static void *packet_previous_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
int status);
static void packet_increment_head(struct packet_ring_buffer *buff);
-static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
- struct tpacket_block_desc *);
+static int prb_curr_blk_in_use(struct tpacket_block_desc *);
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
struct packet_sock *);
static void prb_retire_current_block(struct tpacket_kbdq_core *,
@@ -721,7 +719,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
/* Case 1. Queue was frozen because user-space was
* lagging behind.
*/
- if (prb_curr_blk_in_use(pkc, pbd)) {
+ if (prb_curr_blk_in_use(pbd)) {
/*
* Ok, user-space is still behind.
* So just refresh the timer.
@@ -972,8 +970,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
}
}
-static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
- struct tpacket_block_desc *pbd)
+static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
{
return TP_STATUS_USER & BLOCK_STATUS(pbd);
}
@@ -1064,7 +1061,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po,
* Check if that last block which caused the queue to freeze,
* is still in_use by user-space.
*/
- if (prb_curr_blk_in_use(pkc, pbd)) {
+ if (prb_curr_blk_in_use(pbd)) {
/* Can't record this packet */
return NULL;
} else {
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index a9a8c7d5a4a9..c7a5d861906b 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -111,6 +111,9 @@ struct qrtr_node {
struct list_head item;
};
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+
/* Release node resources and free the node.
*
* Do not call directly, use qrtr_node_release. To be used with
@@ -245,14 +248,11 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
}
EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
-/* Allocate and construct a resume-tx packet. */
-static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
- u32 dst_node, u32 port)
+static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
+ u32 src_node, u32 dst_node)
{
- const int pkt_len = 20;
struct qrtr_hdr *hdr;
struct sk_buff *skb;
- __le32 *buf;
skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
if (!skb)
@@ -261,7 +261,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE);
hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+ hdr->type = cpu_to_le32(type);
hdr->src_node_id = cpu_to_le32(src_node);
hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
hdr->confirm_rx = cpu_to_le32(0);
@@ -269,6 +269,22 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
hdr->dst_node_id = cpu_to_le32(dst_node);
hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+ return skb;
+}
+
+/* Allocate and construct a resume-tx packet. */
+static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
+ u32 dst_node, u32 port)
+{
+ const int pkt_len = 20;
+ struct sk_buff *skb;
+ __le32 *buf;
+
+ skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len,
+ src_node, dst_node);
+ if (!skb)
+ return NULL;
+
buf = (__le32 *)skb_put(skb, pkt_len);
memset(buf, 0, pkt_len);
buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
@@ -278,6 +294,45 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
return skb;
}
+/* Allocate and construct a BYE message to signal remote termination */
+static struct sk_buff *qrtr_alloc_local_bye(u32 src_node)
+{
+ const int pkt_len = 20;
+ struct sk_buff *skb;
+ __le32 *buf;
+
+ skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len,
+ src_node, qrtr_local_nid);
+ if (!skb)
+ return NULL;
+
+ buf = (__le32 *)skb_put(skb, pkt_len);
+ memset(buf, 0, pkt_len);
+ buf[0] = cpu_to_le32(QRTR_TYPE_BYE);
+
+ return skb;
+}
+
+static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq)
+{
+ const int pkt_len = 20;
+ struct sk_buff *skb;
+ __le32 *buf;
+
+ skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len,
+ sq->sq_node, QRTR_NODE_BCAST);
+ if (!skb)
+ return NULL;
+
+ buf = (__le32 *)skb_put(skb, pkt_len);
+ memset(buf, 0, pkt_len);
+ buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+ buf[1] = cpu_to_le32(sq->sq_node);
+ buf[2] = cpu_to_le32(sq->sq_port);
+
+ return skb;
+}
+
static struct qrtr_sock *qrtr_port_lookup(int port);
static void qrtr_port_put(struct qrtr_sock *ipc);
@@ -369,11 +424,17 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
{
struct qrtr_node *node = ep->node;
+ struct sk_buff *skb;
mutex_lock(&node->ep_lock);
node->ep = NULL;
mutex_unlock(&node->ep_lock);
+ /* Notify the local controller about the event */
+ skb = qrtr_alloc_local_bye(node->nid);
+ if (skb)
+ qrtr_local_enqueue(NULL, skb);
+
qrtr_node_release(node);
ep->node = NULL;
}
@@ -408,8 +469,15 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
/* Remove port assignment. */
static void qrtr_port_remove(struct qrtr_sock *ipc)
{
+ struct sk_buff *skb;
int port = ipc->us.sq_port;
+ skb = qrtr_alloc_del_client(&ipc->us);
+ if (skb) {
+ skb_set_owner_w(skb, &ipc->sk);
+ qrtr_bcast_enqueue(NULL, skb);
+ }
+
if (port == QRTR_PORT_CTRL)
port = 0;
@@ -462,6 +530,26 @@ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
return 0;
}
+/* Reset all non-control ports */
+static void qrtr_reset_ports(void)
+{
+ struct qrtr_sock *ipc;
+ int id;
+
+ mutex_lock(&qrtr_port_lock);
+ idr_for_each_entry(&qrtr_ports, ipc, id) {
+ /* Don't reset control port */
+ if (id == 0)
+ continue;
+
+ sock_hold(&ipc->sk);
+ ipc->sk.sk_err = ENETRESET;
+ wake_up_interruptible(sk_sleep(&ipc->sk));
+ sock_put(&ipc->sk);
+ }
+ mutex_unlock(&qrtr_port_lock);
+}
+
/* Bind socket to address.
*
* Socket should be locked upon call.
@@ -490,6 +578,10 @@ static int __qrtr_bind(struct socket *sock,
sock_reset_flag(sk, SOCK_ZAPPED);
+ /* Notify all open ports about the new controller */
+ if (port == QRTR_PORT_CTRL)
+ qrtr_reset_ports();
+
return 0;
}
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 76c01cbd56e3..41bd496531d4 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -81,8 +81,7 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
rfkill->type = (unsigned)id->driver_data;
- return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev),
- acpi_rfkill_default_gpios);
+ return devm_acpi_dev_add_driver_gpios(dev, acpi_rfkill_default_gpios);
}
static int rfkill_gpio_probe(struct platform_device *pdev)
@@ -154,8 +153,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev)
rfkill_unregister(rfkill->rfkill_dev);
rfkill_destroy(rfkill->rfkill_dev);
- acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev));
-
return 0;
}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index b9da4d6b914f..9c68d2f8ba39 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -19,6 +19,7 @@ rxrpc-y := \
local_event.o \
local_object.o \
misc.o \
+ net_ns.o \
output.o \
peer_event.o \
peer_object.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7fb59c3f1542..58ae0db52ea1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -38,9 +38,6 @@ MODULE_PARM_DESC(debug, "RxRPC debugging mask");
static struct proto rxrpc_proto;
static const struct proto_ops rxrpc_rpc_ops;
-/* local epoch for detecting local-end reset */
-u32 rxrpc_epoch;
-
/* current debugging ID */
atomic_t rxrpc_debug_id;
@@ -134,9 +131,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
{
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
- struct sock *sk = sock->sk;
struct rxrpc_local *local;
- struct rxrpc_sock *rx = rxrpc_sk(sk);
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
u16 service_id = srx->srx_service;
int ret;
@@ -148,31 +144,48 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
lock_sock(&rx->sk);
- if (rx->sk.sk_state != RXRPC_UNBOUND) {
- ret = -EINVAL;
- goto error_unlock;
- }
-
- memcpy(&rx->srx, srx, sizeof(rx->srx));
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNBOUND:
+ rx->srx = *srx;
+ local = rxrpc_lookup_local(sock_net(&rx->sk), &rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
+ }
- local = rxrpc_lookup_local(&rx->srx);
- if (IS_ERR(local)) {
- ret = PTR_ERR(local);
- goto error_unlock;
- }
+ if (service_id) {
+ write_lock(&local->services_lock);
+ if (rcu_access_pointer(local->service))
+ goto service_in_use;
+ rx->local = local;
+ rcu_assign_pointer(local->service, rx);
+ write_unlock(&local->services_lock);
+
+ rx->sk.sk_state = RXRPC_SERVER_BOUND;
+ } else {
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ }
+ break;
- if (service_id) {
- write_lock(&local->services_lock);
- if (rcu_access_pointer(local->service))
- goto service_in_use;
- rx->local = local;
- rcu_assign_pointer(local->service, rx);
- write_unlock(&local->services_lock);
+ case RXRPC_SERVER_BOUND:
+ ret = -EINVAL;
+ if (service_id == 0)
+ goto error_unlock;
+ ret = -EADDRINUSE;
+ if (service_id == rx->srx.srx_service)
+ goto error_unlock;
+ ret = -EINVAL;
+ srx->srx_service = rx->srx.srx_service;
+ if (memcmp(srx, &rx->srx, sizeof(*srx)) != 0)
+ goto error_unlock;
+ rx->second_service = service_id;
+ rx->sk.sk_state = RXRPC_SERVER_BOUND2;
+ break;
- rx->sk.sk_state = RXRPC_SERVER_BOUND;
- } else {
- rx->local = local;
- rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ default:
+ ret = -EINVAL;
+ goto error_unlock;
}
release_sock(&rx->sk);
@@ -209,6 +222,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
ret = -EADDRNOTAVAIL;
break;
case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_BOUND2:
ASSERT(rx->local != NULL);
max = READ_ONCE(rxrpc_max_backlog);
ret = -EINVAL;
@@ -248,6 +262,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @srx: The address of the peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
+ * @tx_total_len: Total length of data to transmit during the call (or -1)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
*
@@ -262,6 +277,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct sockaddr_rxrpc *srx,
struct key *key,
unsigned long user_call_ID,
+ s64 tx_total_len,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx)
{
@@ -289,7 +305,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.security_level = 0;
cp.exclusive = false;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+ call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
+ gfp);
/* The socket has been unlocked. */
if (!IS_ERR(call))
call->notify_rx = notify_rx;
@@ -434,7 +451,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
ret = -EAFNOSUPPORT;
goto error_unlock;
}
- local = rxrpc_lookup_local(&rx->srx);
+ local = rxrpc_lookup_local(sock_net(sock->sk), &rx->srx);
if (IS_ERR(local)) {
ret = PTR_ERR(local);
goto error_unlock;
@@ -476,6 +493,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
unsigned int min_sec_level;
+ u16 service_upgrade[2];
int ret;
_enter(",%d,%d,,%d", level, optname, optlen);
@@ -532,6 +550,28 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
rx->min_sec_level = min_sec_level;
goto success;
+ case RXRPC_UPGRADEABLE_SERVICE:
+ ret = -EINVAL;
+ if (optlen != sizeof(service_upgrade) ||
+ rx->service_upgrade.from != 0)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_SERVER_BOUND2)
+ goto error;
+ ret = -EFAULT;
+ if (copy_from_user(service_upgrade, optval,
+ sizeof(service_upgrade)) != 0)
+ goto error;
+ ret = -EINVAL;
+ if ((service_upgrade[0] != rx->srx.srx_service ||
+ service_upgrade[1] != rx->second_service) &&
+ (service_upgrade[0] != rx->second_service ||
+ service_upgrade[1] != rx->srx.srx_service))
+ goto error;
+ rx->service_upgrade.from = service_upgrade[0];
+ rx->service_upgrade.to = service_upgrade[1];
+ goto success;
+
default:
break;
}
@@ -545,6 +585,34 @@ error:
}
/*
+ * Get socket options.
+ */
+static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *_optlen)
+{
+ int optlen;
+
+ if (level != SOL_RXRPC)
+ return -EOPNOTSUPP;
+
+ if (get_user(optlen, _optlen))
+ return -EFAULT;
+
+ switch (optname) {
+ case RXRPC_SUPPORTED_CMSG:
+ if (optlen < sizeof(int))
+ return -ETOOSMALL;
+ if (put_user(RXRPC__SUPPORTED - 1, (int __user *)optval) ||
+ put_user(sizeof(int), _optlen))
+ return -EFAULT;
+ return 0;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/*
* permit an RxRPC socket to be polled
*/
static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
@@ -582,9 +650,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
_enter("%p,%d", sock, protocol);
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
/* we support transport protocol UDP/UDP6 only */
if (protocol != PF_INET &&
IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
@@ -750,7 +815,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
.listen = rxrpc_listen,
.shutdown = rxrpc_shutdown,
.setsockopt = rxrpc_setsockopt,
- .getsockopt = sock_no_getsockopt,
+ .getsockopt = rxrpc_getsockopt,
.sendmsg = rxrpc_sendmsg,
.recvmsg = rxrpc_recvmsg,
.mmap = sock_no_mmap,
@@ -780,8 +845,6 @@ static int __init af_rxrpc_init(void)
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
- get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
- rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
get_random_bytes(&tmp, sizeof(tmp));
tmp &= 0x3fffffff;
if (tmp == 0)
@@ -809,6 +872,10 @@ static int __init af_rxrpc_init(void)
goto error_security;
}
+ ret = register_pernet_subsys(&rxrpc_net_ops);
+ if (ret)
+ goto error_pernet;
+
ret = proto_register(&rxrpc_proto, 1);
if (ret < 0) {
pr_crit("Cannot register protocol\n");
@@ -839,11 +906,6 @@ static int __init af_rxrpc_init(void)
goto error_sysctls;
}
-#ifdef CONFIG_PROC_FS
- proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
- proc_create("rxrpc_conns", 0, init_net.proc_net,
- &rxrpc_connection_seq_fops);
-#endif
return 0;
error_sysctls:
@@ -855,6 +917,8 @@ error_key_type:
error_sock:
proto_unregister(&rxrpc_proto);
error_proto:
+ unregister_pernet_subsys(&rxrpc_net_ops);
+error_pernet:
rxrpc_exit_security();
error_security:
destroy_workqueue(rxrpc_workqueue);
@@ -875,14 +939,16 @@ static void __exit af_rxrpc_exit(void)
unregister_key_type(&key_type_rxrpc);
sock_unregister(PF_RXRPC);
proto_unregister(&rxrpc_proto);
- rxrpc_destroy_all_calls();
- rxrpc_destroy_all_connections();
+ unregister_pernet_subsys(&rxrpc_net_ops);
ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
- rxrpc_destroy_all_locals();
- remove_proc_entry("rxrpc_conns", init_net.proc_net);
- remove_proc_entry("rxrpc_calls", init_net.proc_net);
+ /* Make sure the local and peer records pinned by any dying connections
+ * are released.
+ */
+ rcu_barrier();
+ rxrpc_destroy_client_conn_ids();
+
destroy_workqueue(rxrpc_workqueue);
rxrpc_exit_security();
kmem_cache_destroy(rxrpc_call_jar);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7486926e60a8..69b97339ff9d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -11,6 +11,8 @@
#include <linux/atomic.h>
#include <linux/seqlock.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
@@ -59,12 +61,44 @@ enum {
RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */
RXRPC_CLIENT_BOUND, /* client local address bound */
RXRPC_SERVER_BOUND, /* server local address bound */
+ RXRPC_SERVER_BOUND2, /* second server local address bound */
RXRPC_SERVER_LISTENING, /* server listening for connections */
RXRPC_SERVER_LISTEN_DISABLED, /* server listening disabled */
RXRPC_CLOSE, /* socket is being closed */
};
/*
+ * Per-network namespace data.
+ */
+struct rxrpc_net {
+ struct proc_dir_entry *proc_net; /* Subdir in /proc/net */
+ u32 epoch; /* Local epoch for detecting local-end reset */
+ struct list_head calls; /* List of calls active in this namespace */
+ rwlock_t call_lock; /* Lock for ->calls */
+
+ struct list_head conn_proc_list; /* List of conns in this namespace for proc */
+ struct list_head service_conns; /* Service conns in this namespace */
+ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
+ struct delayed_work service_conn_reaper;
+
+ unsigned int nr_client_conns;
+ unsigned int nr_active_client_conns;
+ bool kill_all_client_conns;
+ spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
+ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
+ struct list_head waiting_client_conns;
+ struct list_head active_client_conns;
+ struct list_head idle_client_conns;
+ struct delayed_work client_conn_reaper;
+
+ struct list_head local_endpoints;
+ struct mutex local_mutex; /* Lock for ->local_endpoints */
+
+ spinlock_t peer_hash_lock; /* Lock for ->peer_hash */
+ DECLARE_HASHTABLE (peer_hash, 10);
+};
+
+/*
* Service backlog preallocation.
*
* This contains circular buffers of preallocated peers, connections and calls
@@ -109,8 +143,14 @@ struct rxrpc_sock {
u32 min_sec_level; /* minimum security level */
#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
bool exclusive; /* Exclusive connection for a client socket */
+ u16 second_service; /* Additional service bound to the endpoint */
+ struct {
+ /* Service upgrade information */
+ u16 from; /* Service ID to upgrade (if not 0) */
+ u16 to; /* service ID to upgrade to */
+ } service_upgrade;
sa_family_t family; /* Protocol family created with */
- struct sockaddr_rxrpc srx; /* local address */
+ struct sockaddr_rxrpc srx; /* Primary Service/local addresses */
struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */
};
@@ -211,6 +251,7 @@ struct rxrpc_security {
struct rxrpc_local {
struct rcu_head rcu;
atomic_t usage;
+ struct rxrpc_net *rxnet; /* The network ns in which this resides */
struct list_head link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
@@ -259,6 +300,8 @@ struct rxrpc_peer {
u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
u8 rtt_cursor; /* next entry at which to insert */
u8 rtt_usage; /* amount of cache actually used */
+
+ u8 cong_cwnd; /* Congestion window size */
};
/*
@@ -279,6 +322,7 @@ struct rxrpc_conn_parameters {
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
u16 service_id; /* Service ID for this connection */
u32 security_level; /* Security level selected */
};
@@ -293,6 +337,7 @@ enum rxrpc_conn_flag {
RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */
RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
+ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
};
/*
@@ -309,6 +354,7 @@ enum rxrpc_conn_cache_state {
RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */
RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */
RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */
+ RXRPC_CONN_CLIENT_UPGRADE, /* Conn is on active list, probing for upgrade */
RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */
RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */
RXRPC_CONN__NR_CACHE_STATES
@@ -352,7 +398,6 @@ struct rxrpc_connection {
u32 call_counter; /* Call ID counter */
u32 last_call; /* ID of last call */
u8 last_type; /* Type of last packet */
- u16 last_service_id;
union {
u32 last_seq;
u32 last_abort;
@@ -383,6 +428,7 @@ struct rxrpc_connection {
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 security_nonce; /* response re-use preventer */
+ u16 service_id; /* Service ID, possibly upgraded */
u8 size_align; /* data size alignment (for security) */
u8 security_size; /* security header size */
u8 security_ix; /* security type */
@@ -484,6 +530,7 @@ struct rxrpc_call {
struct rb_node sock_node; /* Node in rx->calls */
struct sk_buff *tx_pending; /* Tx socket buffer being filled */
wait_queue_head_t waitq; /* Wait queue for channel or Tx */
+ s64 tx_total_len; /* Total length left to be transmitted (or -1) */
__be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
unsigned long flags;
@@ -601,7 +648,6 @@ struct rxrpc_ack_summary {
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
-extern u32 rxrpc_epoch;
extern atomic_t rxrpc_debug_id;
extern struct workqueue_struct *rxrpc_workqueue;
@@ -634,15 +680,13 @@ extern const char *const rxrpc_call_states[];
extern const char *const rxrpc_call_completions[];
extern unsigned int rxrpc_max_call_lifetime;
extern struct kmem_cache *rxrpc_call_jar;
-extern struct list_head rxrpc_calls;
-extern rwlock_t rxrpc_call_lock;
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
struct rxrpc_call *rxrpc_alloc_call(gfp_t);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *,
- unsigned long, gfp_t);
+ unsigned long, s64, gfp_t);
void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
@@ -653,7 +697,7 @@ void rxrpc_see_call(struct rxrpc_call *);
void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_cleanup_call(struct rxrpc_call *);
-void __exit rxrpc_destroy_all_calls(void);
+void rxrpc_destroy_all_calls(struct rxrpc_net *);
static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
{
@@ -773,7 +817,8 @@ int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_call *);
void rxrpc_put_client_conn(struct rxrpc_connection *);
-void __exit rxrpc_destroy_all_client_connections(void);
+void rxrpc_discard_expired_client_conns(struct work_struct *);
+void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
/*
* conn_event.c
@@ -784,9 +829,6 @@ void rxrpc_process_connection(struct work_struct *);
* conn_object.c
*/
extern unsigned int rxrpc_connection_expiry;
-extern struct list_head rxrpc_connections;
-extern struct list_head rxrpc_connection_proc_list;
-extern rwlock_t rxrpc_connection_lock;
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
@@ -800,7 +842,8 @@ void rxrpc_see_connection(struct rxrpc_connection *);
void rxrpc_get_connection(struct rxrpc_connection *);
struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *);
void rxrpc_put_service_conn(struct rxrpc_connection *);
-void __exit rxrpc_destroy_all_connections(void);
+void rxrpc_service_connection_reaper(struct work_struct *);
+void rxrpc_destroy_all_connections(struct rxrpc_net *);
static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
{
@@ -828,8 +871,9 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
*/
struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
struct sk_buff *);
-struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t);
-void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t);
+void rxrpc_new_incoming_connection(struct rxrpc_sock *,
+ struct rxrpc_connection *, struct sk_buff *);
void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
/*
@@ -861,9 +905,9 @@ extern void rxrpc_process_local_events(struct rxrpc_local *);
/*
* local_object.c
*/
-struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *);
+struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
void __rxrpc_put_local(struct rxrpc_local *);
-void __exit rxrpc_destroy_all_locals(void);
+void rxrpc_destroy_all_locals(struct rxrpc_net *);
static inline void rxrpc_get_local(struct rxrpc_local *local)
{
@@ -902,6 +946,17 @@ extern unsigned int rxrpc_resend_timeout;
extern const s8 rxrpc_ack_priority[];
/*
+ * net_ns.c
+ */
+extern unsigned int rxrpc_net_id;
+extern struct pernet_operations rxrpc_net_ops;
+
+static inline struct rxrpc_net *rxrpc_net(struct net *net)
+{
+ return net_generic(net, rxrpc_net_id);
+}
+
+/*
* output.c
*/
int rxrpc_send_ack_packet(struct rxrpc_call *, bool);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 1752fcf8e8f1..dd30d74824b0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -38,6 +38,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
{
const void *here = __builtin_return_address(0);
struct rxrpc_call *call;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
int max, tmp;
unsigned int size = RXRPC_BACKLOG_MAX;
unsigned int head, tail, call_head, call_tail;
@@ -79,7 +80,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
if (CIRC_CNT(head, tail, size) < max) {
struct rxrpc_connection *conn;
- conn = rxrpc_prealloc_service_connection(gfp);
+ conn = rxrpc_prealloc_service_connection(rxnet, gfp);
if (!conn)
return -ENOMEM;
b->conn_backlog[head] = conn;
@@ -136,9 +137,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
- write_lock(&rxrpc_call_lock);
- list_add_tail(&call->link, &rxrpc_calls);
- write_unlock(&rxrpc_call_lock);
+ write_lock(&rxnet->call_lock);
+ list_add_tail(&call->link, &rxnet->calls);
+ write_unlock(&rxnet->call_lock);
b->call_backlog[call_head] = call;
smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
@@ -185,6 +186,7 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
{
struct rxrpc_backlog *b = rx->backlog;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
unsigned int size = RXRPC_BACKLOG_MAX, head, tail;
if (!b)
@@ -209,10 +211,10 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->conn_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_connection *conn = b->conn_backlog[tail];
- write_lock(&rxrpc_connection_lock);
+ write_lock(&rxnet->conn_lock);
list_del(&conn->link);
list_del(&conn->proc_link);
- write_unlock(&rxrpc_connection_lock);
+ write_unlock(&rxnet->conn_lock);
kfree(conn);
tail = (tail + 1) & (size - 1);
}
@@ -294,7 +296,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
conn->params.local = local;
conn->params.peer = peer;
rxrpc_see_connection(conn);
- rxrpc_new_incoming_connection(conn, skb);
+ rxrpc_new_incoming_connection(rx, conn, skb);
} else {
rxrpc_get_connection(conn);
}
@@ -308,6 +310,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
rxrpc_see_call(call);
call->conn = conn;
call->peer = rxrpc_get_peer(conn->params.peer);
+ call->cong_cwnd = call->peer->cong_cwnd;
return call;
}
@@ -339,7 +342,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
/* Get the socket providing the service */
rx = rcu_dereference(local->service);
- if (rx && service_id == rx->srx.srx_service)
+ if (rx && (service_id == rx->srx.srx_service ||
+ service_id == rx->second_service))
goto found_service;
trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 47f7f4205653..d7809a0620b4 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -44,8 +44,6 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
};
struct kmem_cache *rxrpc_call_jar;
-LIST_HEAD(rxrpc_calls);
-DEFINE_RWLOCK(rxrpc_call_lock);
static void rxrpc_call_timer_expired(unsigned long _call)
{
@@ -129,6 +127,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->tx_total_len = -1;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -137,12 +136,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
call->tx_winsize = 16;
call->rx_expect_next = 1;
- if (RXRPC_TX_SMSS > 2190)
- call->cong_cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- call->cong_cwnd = 3;
- else
- call->cong_cwnd = 4;
+ call->cong_cwnd = 2;
call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
return call;
@@ -203,10 +197,12 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
struct sockaddr_rxrpc *srx,
unsigned long user_call_ID,
+ s64 tx_total_len,
gfp_t gfp)
__releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call, *xcall;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
struct rb_node *parent, **pp;
const void *here = __builtin_return_address(0);
int ret;
@@ -220,6 +216,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return call;
}
+ call->tx_total_len = tx_total_len;
trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
here, (const void *)user_call_ID);
@@ -255,9 +252,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
- write_lock(&rxrpc_call_lock);
- list_add_tail(&call->link, &rxrpc_calls);
- write_unlock(&rxrpc_call_lock);
+ write_lock(&rxnet->call_lock);
+ list_add_tail(&call->link, &rxnet->calls);
+ write_unlock(&rxnet->call_lock);
/* From this point on, the call is protected by its own lock. */
release_sock(&rx->sk);
@@ -508,6 +505,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
*/
void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
+ struct rxrpc_net *rxnet;
const void *here = __builtin_return_address(0);
int n;
@@ -520,9 +518,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
_debug("call %d dead", call->debug_id);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
- write_lock(&rxrpc_call_lock);
- list_del_init(&call->link);
- write_unlock(&rxrpc_call_lock);
+ if (!list_empty(&call->link)) {
+ rxnet = rxrpc_net(sock_net(&call->socket->sk));
+ write_lock(&rxnet->call_lock);
+ list_del_init(&call->link);
+ write_unlock(&rxnet->call_lock);
+ }
rxrpc_cleanup_call(call);
}
@@ -570,21 +571,23 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
}
/*
- * Make sure that all calls are gone.
+ * Make sure that all calls are gone from a network namespace. To reach this
+ * point, any open UDP sockets in that namespace must have been closed, so any
+ * outstanding calls cannot be doing I/O.
*/
-void __exit rxrpc_destroy_all_calls(void)
+void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
{
struct rxrpc_call *call;
_enter("");
- if (list_empty(&rxrpc_calls))
+ if (list_empty(&rxnet->calls))
return;
- write_lock(&rxrpc_call_lock);
+ write_lock(&rxnet->call_lock);
- while (!list_empty(&rxrpc_calls)) {
- call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
+ while (!list_empty(&rxnet->calls)) {
+ call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
_debug("Zapping call %p", call);
rxrpc_see_call(call);
@@ -595,10 +598,10 @@ void __exit rxrpc_destroy_all_calls(void)
rxrpc_call_states[call->state],
call->flags, call->events);
- write_unlock(&rxrpc_call_lock);
+ write_unlock(&rxnet->call_lock);
cond_resched();
- write_lock(&rxrpc_call_lock);
+ write_lock(&rxnet->call_lock);
}
- write_unlock(&rxrpc_call_lock);
+ write_unlock(&rxnet->call_lock);
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index e8dea0d49e7f..eb2157680399 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -31,22 +31,25 @@
* may freely grant available channels to new calls and calls may be
* waiting on it for channels to become available.
*
- * The connection is on the rxrpc_active_client_conns list which is kept
+ * The connection is on the rxnet->active_client_conns list which is kept
* in activation order for culling purposes.
*
* rxrpc_nr_active_client_conns is held incremented also.
*
- * (4) CULLED - The connection got summarily culled to try and free up
+ * (4) UPGRADE - As for ACTIVE, but only one call may be in progress and is
+ * being used to probe for service upgrade.
+ *
+ * (5) CULLED - The connection got summarily culled to try and free up
* capacity. Calls currently in progress on the connection are allowed to
* continue, but new calls will have to wait. There can be no waiters in
* this state - the conn would have to go to the WAITING state instead.
*
- * (5) IDLE - The connection has no calls in progress upon it and must have
+ * (6) IDLE - The connection has no calls in progress upon it and must have
* been exposed to the world (ie. the EXPOSED flag must be set). When it
* expires, the EXPOSED flag is cleared and the connection transitions to
* the INACTIVE state.
*
- * The connection is on the rxrpc_idle_client_conns list which is kept in
+ * The connection is on the rxnet->idle_client_conns list which is kept in
* order of how soon they'll expire.
*
* There are flags of relevance to the cache:
@@ -85,27 +88,13 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
-static unsigned int rxrpc_nr_client_conns;
-static unsigned int rxrpc_nr_active_client_conns;
-static __read_mostly bool rxrpc_kill_all_client_conns;
-
-static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
-static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
-static LIST_HEAD(rxrpc_waiting_client_conns);
-static LIST_HEAD(rxrpc_active_client_conns);
-static LIST_HEAD(rxrpc_idle_client_conns);
-
/*
* We use machine-unique IDs for our client connections.
*/
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
-static void rxrpc_cull_active_client_conns(void);
-static void rxrpc_discard_expired_client_conns(struct work_struct *);
-
-static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
- rxrpc_discard_expired_client_conns);
+static void rxrpc_cull_active_client_conns(struct rxrpc_net *);
/*
* Get a connection ID and epoch for a client connection from the global pool.
@@ -116,6 +105,7 @@ static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
gfp_t gfp)
{
+ struct rxrpc_net *rxnet = conn->params.local->rxnet;
int id;
_enter("");
@@ -131,7 +121,7 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
spin_unlock(&rxrpc_conn_id_lock);
idr_preload_end();
- conn->proto.epoch = rxrpc_epoch;
+ conn->proto.epoch = rxnet->epoch;
conn->proto.cid = id << RXRPC_CIDSHIFT;
set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
_leave(" [CID %x]", conn->proto.cid);
@@ -183,6 +173,7 @@ static struct rxrpc_connection *
rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
{
struct rxrpc_connection *conn;
+ struct rxrpc_net *rxnet = cp->local->rxnet;
int ret;
_enter("");
@@ -196,10 +187,13 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
atomic_set(&conn->usage, 1);
if (cp->exclusive)
__set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ if (cp->upgrade)
+ __set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
conn->params = *cp;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
conn->state = RXRPC_CONN_CLIENT;
+ conn->service_id = cp->service_id;
ret = rxrpc_get_client_connection_id(conn, gfp);
if (ret < 0)
@@ -213,9 +207,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
if (ret < 0)
goto error_2;
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
- write_unlock(&rxrpc_connection_lock);
+ write_lock(&rxnet->conn_lock);
+ list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
+ write_unlock(&rxnet->conn_lock);
/* We steal the caller's peer ref. */
cp->peer = NULL;
@@ -243,12 +237,13 @@ error_0:
*/
static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
{
+ struct rxrpc_net *rxnet = conn->params.local->rxnet;
int id_cursor, id, distance, limit;
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
- if (conn->proto.epoch != rxrpc_epoch)
+ if (conn->proto.epoch != rxnet->epoch)
goto mark_dont_reuse;
/* The IDR tree gets very expensive on memory if the connection IDs are
@@ -297,6 +292,12 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
if (!cp->peer)
goto error;
+ call->cong_cwnd = cp->peer->cong_cwnd;
+ if (call->cong_cwnd >= call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ else
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+
/* If the connection is not meant to be exclusive, search the available
* connections to see if the connection we want to use already exists.
*/
@@ -310,7 +311,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
#define cmp(X) ((long)conn->params.X - (long)cp->X)
diff = (cmp(peer) ?:
cmp(key) ?:
- cmp(security_level));
+ cmp(security_level) ?:
+ cmp(upgrade));
#undef cmp
if (diff < 0) {
p = p->rb_left;
@@ -354,6 +356,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
if (cp->exclusive) {
call->conn = candidate;
call->security_ix = candidate->security_ix;
+ call->service_id = candidate->service_id;
_leave(" = 0 [exclusive %d]", candidate->debug_id);
return 0;
}
@@ -374,7 +377,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
#define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
diff = (cmp(peer) ?:
cmp(key) ?:
- cmp(security_level));
+ cmp(security_level) ?:
+ cmp(upgrade));
#undef cmp
if (diff < 0) {
pp = &(*pp)->rb_left;
@@ -403,6 +407,7 @@ candidate_published:
set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
call->conn = candidate;
call->security_ix = candidate->security_ix;
+ call->service_id = candidate->service_id;
spin_unlock(&local->client_conns_lock);
_leave(" = 0 [new %d]", candidate->debug_id);
return 0;
@@ -424,6 +429,7 @@ found_extant_conn:
spin_lock(&conn->channel_lock);
call->conn = conn;
call->security_ix = conn->security_ix;
+ call->service_id = conn->service_id;
list_add(&call->chan_wait_link, &conn->waiting_calls);
spin_unlock(&conn->channel_lock);
_leave(" = 0 [extant %d]", conn->debug_id);
@@ -440,12 +446,18 @@ error:
/*
* Activate a connection.
*/
-static void rxrpc_activate_conn(struct rxrpc_connection *conn)
+static void rxrpc_activate_conn(struct rxrpc_net *rxnet,
+ struct rxrpc_connection *conn)
{
- trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
- conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
- rxrpc_nr_active_client_conns++;
- list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+ if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) {
+ trace_rxrpc_client(conn, -1, rxrpc_client_to_upgrade);
+ conn->cache_state = RXRPC_CONN_CLIENT_UPGRADE;
+ } else {
+ trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
+ conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+ }
+ rxnet->nr_active_client_conns++;
+ list_move_tail(&conn->cache_link, &rxnet->active_client_conns);
}
/*
@@ -460,25 +472,28 @@ static void rxrpc_activate_conn(struct rxrpc_connection *conn)
* channels if it has been culled to make space and then re-requested by a new
* call.
*/
-static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet,
+ struct rxrpc_connection *conn)
{
unsigned int nr_conns;
_enter("%d,%d", conn->debug_id, conn->cache_state);
- if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE ||
+ conn->cache_state == RXRPC_CONN_CLIENT_UPGRADE)
goto out;
- spin_lock(&rxrpc_client_conn_cache_lock);
+ spin_lock(&rxnet->client_conn_cache_lock);
- nr_conns = rxrpc_nr_client_conns;
+ nr_conns = rxnet->nr_client_conns;
if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
trace_rxrpc_client(conn, -1, rxrpc_client_count);
- rxrpc_nr_client_conns = nr_conns + 1;
+ rxnet->nr_client_conns = nr_conns + 1;
}
switch (conn->cache_state) {
case RXRPC_CONN_CLIENT_ACTIVE:
+ case RXRPC_CONN_CLIENT_UPGRADE:
case RXRPC_CONN_CLIENT_WAITING:
break;
@@ -494,21 +509,21 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
}
out_unlock:
- spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_unlock(&rxnet->client_conn_cache_lock);
out:
_leave(" [%d]", conn->cache_state);
return;
activate_conn:
_debug("activate");
- rxrpc_activate_conn(conn);
+ rxrpc_activate_conn(rxnet, conn);
goto out_unlock;
wait_for_capacity:
_debug("wait");
trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
- list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+ list_move_tail(&conn->cache_link, &rxnet->waiting_client_conns);
goto out_unlock;
}
@@ -582,6 +597,9 @@ static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn)
case RXRPC_CONN_CLIENT_ACTIVE:
mask = RXRPC_ACTIVE_CHANS_MASK;
break;
+ case RXRPC_CONN_CLIENT_UPGRADE:
+ mask = 0x01;
+ break;
default:
return;
}
@@ -660,18 +678,19 @@ int rxrpc_connect_call(struct rxrpc_call *call,
struct sockaddr_rxrpc *srx,
gfp_t gfp)
{
+ struct rxrpc_net *rxnet = cp->local->rxnet;
int ret;
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
- rxrpc_discard_expired_client_conns(NULL);
- rxrpc_cull_active_client_conns();
+ rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work);
+ rxrpc_cull_active_client_conns(rxnet);
ret = rxrpc_get_client_conn(call, cp, srx, gfp);
if (ret < 0)
return ret;
- rxrpc_animate_client_conn(call->conn);
+ rxrpc_animate_client_conn(rxnet, call->conn);
rxrpc_activate_channels(call->conn);
ret = rxrpc_wait_for_channel(call, gfp);
@@ -729,6 +748,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
unsigned int channel = call->cid & RXRPC_CHANNELMASK;
struct rxrpc_connection *conn = call->conn;
struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk));
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
call->conn = NULL;
@@ -750,7 +770,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
/* We must deactivate or idle the connection if it's now
* waiting for nothing.
*/
- spin_lock(&rxrpc_client_conn_cache_lock);
+ spin_lock(&rxnet->client_conn_cache_lock);
if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
list_empty(&conn->waiting_calls) &&
!conn->active_chans)
@@ -787,14 +807,23 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
* list. It might even get moved back to the active list whilst we're
* waiting for the lock.
*/
- spin_lock(&rxrpc_client_conn_cache_lock);
+ spin_lock(&rxnet->client_conn_cache_lock);
switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_UPGRADE:
+ /* Deal with termination of a service upgrade probe. */
+ if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+ clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
+ trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
+ conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+ rxrpc_activate_channels_locked(conn);
+ }
+ /* fall through */
case RXRPC_CONN_CLIENT_ACTIVE:
if (list_empty(&conn->waiting_calls)) {
rxrpc_deactivate_one_channel(conn, channel);
if (!conn->active_chans) {
- rxrpc_nr_active_client_conns--;
+ rxnet->nr_active_client_conns--;
goto idle_connection;
}
goto out;
@@ -820,7 +849,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
}
out:
- spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_unlock(&rxnet->client_conn_cache_lock);
out_2:
spin_unlock(&conn->channel_lock);
rxrpc_put_connection(conn);
@@ -835,11 +864,11 @@ idle_connection:
trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
conn->idle_timestamp = jiffies;
conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
- list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
- if (rxrpc_idle_client_conns.next == &conn->cache_link &&
- !rxrpc_kill_all_client_conns)
+ list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
+ if (rxnet->idle_client_conns.next == &conn->cache_link &&
+ !rxnet->kill_all_client_conns)
queue_delayed_work(rxrpc_workqueue,
- &rxrpc_client_conn_reap,
+ &rxnet->client_conn_reaper,
rxrpc_conn_idle_client_expiry);
} else {
trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
@@ -857,6 +886,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
{
struct rxrpc_connection *next = NULL;
struct rxrpc_local *local = conn->params.local;
+ struct rxrpc_net *rxnet = local->rxnet;
unsigned int nr_conns;
trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
@@ -875,18 +905,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
trace_rxrpc_client(conn, -1, rxrpc_client_uncount);
- spin_lock(&rxrpc_client_conn_cache_lock);
- nr_conns = --rxrpc_nr_client_conns;
+ spin_lock(&rxnet->client_conn_cache_lock);
+ nr_conns = --rxnet->nr_client_conns;
if (nr_conns < rxrpc_max_client_connections &&
- !list_empty(&rxrpc_waiting_client_conns)) {
- next = list_entry(rxrpc_waiting_client_conns.next,
+ !list_empty(&rxnet->waiting_client_conns)) {
+ next = list_entry(rxnet->waiting_client_conns.next,
struct rxrpc_connection, cache_link);
rxrpc_get_connection(next);
- rxrpc_activate_conn(next);
+ rxrpc_activate_conn(rxnet, next);
}
- spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_unlock(&rxnet->client_conn_cache_lock);
}
rxrpc_kill_connection(conn);
@@ -921,10 +951,10 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn)
/*
* Kill the longest-active client connections to make room for new ones.
*/
-static void rxrpc_cull_active_client_conns(void)
+static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet)
{
struct rxrpc_connection *conn;
- unsigned int nr_conns = rxrpc_nr_client_conns;
+ unsigned int nr_conns = rxnet->nr_client_conns;
unsigned int nr_active, limit;
_enter("");
@@ -936,14 +966,15 @@ static void rxrpc_cull_active_client_conns(void)
}
limit = rxrpc_reap_client_connections;
- spin_lock(&rxrpc_client_conn_cache_lock);
- nr_active = rxrpc_nr_active_client_conns;
+ spin_lock(&rxnet->client_conn_cache_lock);
+ nr_active = rxnet->nr_active_client_conns;
while (nr_active > limit) {
- ASSERT(!list_empty(&rxrpc_active_client_conns));
- conn = list_entry(rxrpc_active_client_conns.next,
+ ASSERT(!list_empty(&rxnet->active_client_conns));
+ conn = list_entry(rxnet->active_client_conns.next,
struct rxrpc_connection, cache_link);
- ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+ ASSERTIFCMP(conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE,
+ conn->cache_state, ==, RXRPC_CONN_CLIENT_UPGRADE);
if (list_empty(&conn->waiting_calls)) {
trace_rxrpc_client(conn, -1, rxrpc_client_to_culled);
@@ -953,14 +984,14 @@ static void rxrpc_cull_active_client_conns(void)
trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
list_move_tail(&conn->cache_link,
- &rxrpc_waiting_client_conns);
+ &rxnet->waiting_client_conns);
}
nr_active--;
}
- rxrpc_nr_active_client_conns = nr_active;
- spin_unlock(&rxrpc_client_conn_cache_lock);
+ rxnet->nr_active_client_conns = nr_active;
+ spin_unlock(&rxnet->client_conn_cache_lock);
ASSERTCMP(nr_active, >=, 0);
_leave(" [culled]");
}
@@ -972,22 +1003,25 @@ static void rxrpc_cull_active_client_conns(void)
* This may be called from conn setup or from a work item so cannot be
* considered non-reentrant.
*/
-static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+void rxrpc_discard_expired_client_conns(struct work_struct *work)
{
struct rxrpc_connection *conn;
+ struct rxrpc_net *rxnet =
+ container_of(to_delayed_work(work),
+ struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
bool did_discard = false;
- _enter("%c", work ? 'w' : 'n');
+ _enter("");
- if (list_empty(&rxrpc_idle_client_conns)) {
+ if (list_empty(&rxnet->idle_client_conns)) {
_leave(" [empty]");
return;
}
/* Don't double up on the discarding */
- if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+ if (!spin_trylock(&rxnet->client_conn_discard_lock)) {
_leave(" [already]");
return;
}
@@ -995,19 +1029,19 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *work)
/* We keep an estimate of what the number of conns ought to be after
* we've discarded some so that we don't overdo the discarding.
*/
- nr_conns = rxrpc_nr_client_conns;
+ nr_conns = rxnet->nr_client_conns;
next:
- spin_lock(&rxrpc_client_conn_cache_lock);
+ spin_lock(&rxnet->client_conn_cache_lock);
- if (list_empty(&rxrpc_idle_client_conns))
+ if (list_empty(&rxnet->idle_client_conns))
goto out;
- conn = list_entry(rxrpc_idle_client_conns.next,
+ conn = list_entry(rxnet->idle_client_conns.next,
struct rxrpc_connection, cache_link);
ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
- if (!rxrpc_kill_all_client_conns) {
+ if (!rxnet->kill_all_client_conns) {
/* If the number of connections is over the reap limit, we
* expedite discard by reducing the expiry timeout. We must,
* however, have at least a short grace period to be able to do
@@ -1030,7 +1064,7 @@ next:
conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
list_del_init(&conn->cache_link);
- spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_unlock(&rxnet->client_conn_cache_lock);
/* When we cleared the EXPOSED flag, we took on responsibility for the
* reference that that had on the usage count. We deal with that here.
@@ -1050,14 +1084,14 @@ not_yet_expired:
* then things get messier.
*/
_debug("not yet");
- if (!rxrpc_kill_all_client_conns)
+ if (!rxnet->kill_all_client_conns)
queue_delayed_work(rxrpc_workqueue,
- &rxrpc_client_conn_reap,
+ &rxnet->client_conn_reaper,
conn_expires_at - now);
out:
- spin_unlock(&rxrpc_client_conn_cache_lock);
- spin_unlock(&rxrpc_client_conn_discard_mutex);
+ spin_unlock(&rxnet->client_conn_cache_lock);
+ spin_unlock(&rxnet->client_conn_discard_lock);
_leave("");
}
@@ -1065,17 +1099,17 @@ out:
* Preemptively destroy all the client connection records rather than waiting
* for them to time out
*/
-void __exit rxrpc_destroy_all_client_connections(void)
+void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
{
_enter("");
- spin_lock(&rxrpc_client_conn_cache_lock);
- rxrpc_kill_all_client_conns = true;
- spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_lock(&rxnet->client_conn_cache_lock);
+ rxnet->kill_all_client_conns = true;
+ spin_unlock(&rxnet->client_conn_cache_lock);
- cancel_delayed_work(&rxrpc_client_conn_reap);
+ cancel_delayed_work(&rxnet->client_conn_reaper);
- if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+ if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0))
_debug("destroy: queue failed");
_leave("");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 46babcf82ce8..59a51a56e7c8 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -74,7 +74,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
pkt.whdr.userStatus = 0;
pkt.whdr.securityIndex = conn->security_ix;
pkt.whdr._rsvd = 0;
- pkt.whdr.serviceId = htons(chan->last_service_id);
+ pkt.whdr.serviceId = htons(conn->service_id);
len = sizeof(pkt.whdr);
switch (chan->last_type) {
@@ -208,7 +208,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
whdr.userStatus = 0;
whdr.securityIndex = conn->security_ix;
whdr._rsvd = 0;
- whdr.serviceId = htons(conn->params.service_id);
+ whdr.serviceId = htons(conn->service_id);
word = htonl(conn->local_abort);
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index b0ecb770fdce..929b50d5afe8 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -22,13 +22,6 @@
*/
unsigned int rxrpc_connection_expiry = 10 * 60;
-static void rxrpc_connection_reaper(struct work_struct *work);
-
-LIST_HEAD(rxrpc_connections);
-LIST_HEAD(rxrpc_connection_proc_list);
-DEFINE_RWLOCK(rxrpc_connection_lock);
-static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
-
static void rxrpc_destroy_connection(struct rcu_head *);
/*
@@ -174,7 +167,6 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
* through the channel, whilst disposing of the actual call record.
*/
trace_rxrpc_disconnect_call(call);
- chan->last_service_id = call->service_id;
if (call->abort_code) {
chan->last_abort = call->abort_code;
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
@@ -201,6 +193,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
+ call->peer->cong_cwnd = call->cong_cwnd;
+
spin_lock_bh(&conn->params.peer->lock);
hlist_del_init(&call->error_link);
spin_unlock_bh(&conn->params.peer->lock);
@@ -222,15 +216,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
*/
void rxrpc_kill_connection(struct rxrpc_connection *conn)
{
+ struct rxrpc_net *rxnet = conn->params.local->rxnet;
+
ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
!rcu_access_pointer(conn->channels[1].call) &&
!rcu_access_pointer(conn->channels[2].call) &&
!rcu_access_pointer(conn->channels[3].call));
ASSERT(list_empty(&conn->cache_link));
- write_lock(&rxrpc_connection_lock);
+ write_lock(&rxnet->conn_lock);
list_del_init(&conn->proc_link);
- write_unlock(&rxrpc_connection_lock);
+ write_unlock(&rxnet->conn_lock);
/* Drain the Rx queue. Note that even though we've unpublished, an
* incoming packet could still be being added to our Rx queue, so we
@@ -309,14 +305,17 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
*/
void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
+ struct rxrpc_net *rxnet;
const void *here = __builtin_return_address(0);
int n;
n = atomic_dec_return(&conn->usage);
trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
ASSERTCMP(n, >=, 0);
- if (n == 0)
- rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ if (n == 0) {
+ rxnet = conn->params.local->rxnet;
+ rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0);
+ }
}
/*
@@ -348,9 +347,12 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
/*
* reap dead service connections
*/
-static void rxrpc_connection_reaper(struct work_struct *work)
+void rxrpc_service_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
+ struct rxrpc_net *rxnet =
+ container_of(to_delayed_work(work),
+ struct rxrpc_net, service_conn_reaper);
unsigned long reap_older_than, earliest, idle_timestamp, now;
LIST_HEAD(graveyard);
@@ -361,8 +363,8 @@ static void rxrpc_connection_reaper(struct work_struct *work)
reap_older_than = now - rxrpc_connection_expiry * HZ;
earliest = ULONG_MAX;
- write_lock(&rxrpc_connection_lock);
- list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ write_lock(&rxnet->conn_lock);
+ list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
ASSERTCMP(atomic_read(&conn->usage), >, 0);
if (likely(atomic_read(&conn->usage) > 1))
continue;
@@ -393,12 +395,12 @@ static void rxrpc_connection_reaper(struct work_struct *work)
list_move_tail(&conn->link, &graveyard);
}
- write_unlock(&rxrpc_connection_lock);
+ write_unlock(&rxnet->conn_lock);
if (earliest != ULONG_MAX) {
_debug("reschedule reaper %ld", (long) earliest - now);
ASSERT(time_after(earliest, now));
- rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+ rxrpc_queue_delayed_work(&rxnet->client_conn_reaper,
earliest - now);
}
@@ -418,36 +420,30 @@ static void rxrpc_connection_reaper(struct work_struct *work)
* preemptively destroy all the service connection records rather than
* waiting for them to time out
*/
-void __exit rxrpc_destroy_all_connections(void)
+void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
{
struct rxrpc_connection *conn, *_p;
bool leak = false;
_enter("");
- rxrpc_destroy_all_client_connections();
+ rxrpc_destroy_all_client_connections(rxnet);
rxrpc_connection_expiry = 0;
- cancel_delayed_work(&rxrpc_connection_reap);
- rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ cancel_delayed_work(&rxnet->client_conn_reaper);
+ rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0);
flush_workqueue(rxrpc_workqueue);
- write_lock(&rxrpc_connection_lock);
- list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ write_lock(&rxnet->conn_lock);
+ list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
conn, atomic_read(&conn->usage));
leak = true;
}
- write_unlock(&rxrpc_connection_lock);
+ write_unlock(&rxnet->conn_lock);
BUG_ON(leak);
- ASSERT(list_empty(&rxrpc_connection_proc_list));
-
- /* Make sure the local and peer records pinned by any dying connections
- * are released.
- */
- rcu_barrier();
- rxrpc_destroy_client_conn_ids();
+ ASSERT(list_empty(&rxnet->conn_proc_list));
_leave("");
}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index eef551f40dc2..e60fcd2a4a02 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -121,7 +121,8 @@ replace_old_connection:
* Preallocate a service connection. The connection is placed on the proc and
* reap lists so that we don't have to get the lock from BH context.
*/
-struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet,
+ gfp_t gfp)
{
struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
@@ -132,10 +133,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
atomic_set(&conn->usage, 2);
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
- write_unlock(&rxrpc_connection_lock);
+ write_lock(&rxnet->conn_lock);
+ list_add_tail(&conn->link, &rxnet->service_conns);
+ list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
+ write_unlock(&rxnet->conn_lock);
trace_rxrpc_conn(conn, rxrpc_conn_new_service,
atomic_read(&conn->usage),
@@ -149,7 +150,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
* Set up an incoming connection. This is called in BH context with the RCU
* read lock held.
*/
-void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
+void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
+ struct rxrpc_connection *conn,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -159,6 +161,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
conn->proto.epoch = sp->hdr.epoch;
conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
conn->params.service_id = sp->hdr.serviceId;
+ conn->service_id = sp->hdr.serviceId;
conn->security_ix = sp->hdr.securityIndex;
conn->out_clientflag = 0;
if (conn->security_ix)
@@ -166,6 +169,14 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
else
conn->state = RXRPC_CONN_SERVICE;
+ /* See if we should upgrade the service. This can only happen on the
+ * first packet on a new connection. Once done, it applies to all
+ * subsequent calls on that connection.
+ */
+ if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE &&
+ conn->service_id == rx->service_upgrade.from)
+ conn->service_id = rx->service_upgrade.to;
+
/* Make the connection a target for incoming packets. */
rxrpc_publish_service_conn(conn->params.peer, conn);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 45dba732a3b4..e56e23ed2229 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1142,6 +1142,13 @@ void rxrpc_data_ready(struct sock *udp_sk)
if (sp->hdr.securityIndex != conn->security_ix)
goto wrong_security;
+ if (sp->hdr.serviceId != conn->service_id) {
+ if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) ||
+ conn->service_id != conn->params.service_id)
+ goto reupgrade;
+ conn->service_id = sp->hdr.serviceId;
+ }
+
if (sp->hdr.callNumber == 0) {
/* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
@@ -1194,6 +1201,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
rxrpc_input_implicit_end_call(conn, call);
call = NULL;
}
+
+ if (call && sp->hdr.serviceId != call->service_id)
+ call->service_id = sp->hdr.serviceId;
} else {
skew = 0;
call = NULL;
@@ -1237,11 +1247,18 @@ wrong_security:
skb->priority = RXKADINCONSISTENCY;
goto post_abort;
+reupgrade:
+ rcu_read_unlock();
+ trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+ goto protocol_error;
+
bad_message_unlock:
rcu_read_unlock();
bad_message:
trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
skb->priority = RX_PROTOCOL_ERROR;
post_abort:
skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index ff4864d550b8..38b99db30e54 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -25,9 +25,6 @@
static void rxrpc_local_processor(struct work_struct *);
static void rxrpc_local_rcu(struct rcu_head *);
-static DEFINE_MUTEX(rxrpc_local_mutex);
-static LIST_HEAD(rxrpc_local_endpoints);
-
/*
* Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
* same or greater than.
@@ -77,13 +74,15 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
/*
* Allocate a new local endpoint.
*/
-static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
+static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
+ const struct sockaddr_rxrpc *srx)
{
struct rxrpc_local *local;
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
atomic_set(&local->usage, 1);
+ local->rxnet = rxnet;
INIT_LIST_HEAD(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
init_rwsem(&local->defrag_sem);
@@ -95,6 +94,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
rwlock_init(&local->services_lock);
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
+ local->srx.srx_service = 0;
}
_leave(" = %p", local);
@@ -105,7 +105,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
* create the local socket
* - must be called with rxrpc_local_mutex locked
*/
-static int rxrpc_open_socket(struct rxrpc_local *local)
+static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
{
struct sock *sock;
int ret, opt;
@@ -114,7 +114,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local)
local, local->srx.transport_type, local->srx.transport.family);
/* create a socket to represent the local endpoint */
- ret = sock_create_kern(&init_net, local->srx.transport.family,
+ ret = sock_create_kern(net, local->srx.transport.family,
local->srx.transport_type, 0, &local->socket);
if (ret < 0) {
_leave(" = %d [socket]", ret);
@@ -172,9 +172,11 @@ error:
/*
* Look up or create a new local endpoint using the specified local address.
*/
-struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
+struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+ const struct sockaddr_rxrpc *srx)
{
struct rxrpc_local *local;
+ struct rxrpc_net *rxnet = rxrpc_net(net);
struct list_head *cursor;
const char *age;
long diff;
@@ -183,10 +185,10 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
_enter("{%d,%d,%pISp}",
srx->transport_type, srx->transport.family, &srx->transport);
- mutex_lock(&rxrpc_local_mutex);
+ mutex_lock(&rxnet->local_mutex);
- for (cursor = rxrpc_local_endpoints.next;
- cursor != &rxrpc_local_endpoints;
+ for (cursor = rxnet->local_endpoints.next;
+ cursor != &rxnet->local_endpoints;
cursor = cursor->next) {
local = list_entry(cursor, struct rxrpc_local, link);
@@ -220,11 +222,11 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
goto found;
}
- local = rxrpc_alloc_local(srx);
+ local = rxrpc_alloc_local(rxnet, srx);
if (!local)
goto nomem;
- ret = rxrpc_open_socket(local);
+ ret = rxrpc_open_socket(local, net);
if (ret < 0)
goto sock_error;
@@ -232,7 +234,7 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
age = "new";
found:
- mutex_unlock(&rxrpc_local_mutex);
+ mutex_unlock(&rxnet->local_mutex);
_net("LOCAL %s %d {%pISp}",
age, local->debug_id, &local->srx.transport);
@@ -243,13 +245,13 @@ found:
nomem:
ret = -ENOMEM;
sock_error:
- mutex_unlock(&rxrpc_local_mutex);
+ mutex_unlock(&rxnet->local_mutex);
kfree(local);
_leave(" = %d", ret);
return ERR_PTR(ret);
addr_in_use:
- mutex_unlock(&rxrpc_local_mutex);
+ mutex_unlock(&rxnet->local_mutex);
_leave(" = -EADDRINUSE");
return ERR_PTR(-EADDRINUSE);
}
@@ -273,6 +275,7 @@ void __rxrpc_put_local(struct rxrpc_local *local)
static void rxrpc_local_destroyer(struct rxrpc_local *local)
{
struct socket *socket = local->socket;
+ struct rxrpc_net *rxnet = local->rxnet;
_enter("%d", local->debug_id);
@@ -286,9 +289,9 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
}
local->dead = true;
- mutex_lock(&rxrpc_local_mutex);
+ mutex_lock(&rxnet->local_mutex);
list_del_init(&local->link);
- mutex_unlock(&rxrpc_local_mutex);
+ mutex_unlock(&rxnet->local_mutex);
ASSERT(RB_EMPTY_ROOT(&local->client_conns));
ASSERT(!local->service);
@@ -357,7 +360,7 @@ static void rxrpc_local_rcu(struct rcu_head *rcu)
/*
* Verify the local endpoint list is empty by this point.
*/
-void __exit rxrpc_destroy_all_locals(void)
+void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
{
struct rxrpc_local *local;
@@ -365,15 +368,13 @@ void __exit rxrpc_destroy_all_locals(void)
flush_workqueue(rxrpc_workqueue);
- if (!list_empty(&rxrpc_local_endpoints)) {
- mutex_lock(&rxrpc_local_mutex);
- list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+ if (!list_empty(&rxnet->local_endpoints)) {
+ mutex_lock(&rxnet->local_mutex);
+ list_for_each_entry(local, &rxnet->local_endpoints, link) {
pr_err("AF_RXRPC: Leaked local %p {%d}\n",
local, atomic_read(&local->usage));
}
- mutex_unlock(&rxrpc_local_mutex);
+ mutex_unlock(&rxnet->local_mutex);
BUG();
}
-
- rcu_barrier();
}
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
new file mode 100644
index 000000000000..7edceb8522f5
--- /dev/null
+++ b/net/rxrpc/net_ns.c
@@ -0,0 +1,84 @@
+/* rxrpc network namespace handling.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/proc_fs.h>
+#include "ar-internal.h"
+
+unsigned int rxrpc_net_id;
+
+/*
+ * Initialise a per-network namespace record.
+ */
+static __net_init int rxrpc_init_net(struct net *net)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(net);
+ int ret;
+
+ get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
+ rxnet->epoch |= RXRPC_RANDOM_EPOCH;
+
+ INIT_LIST_HEAD(&rxnet->calls);
+ rwlock_init(&rxnet->call_lock);
+
+ INIT_LIST_HEAD(&rxnet->conn_proc_list);
+ INIT_LIST_HEAD(&rxnet->service_conns);
+ rwlock_init(&rxnet->conn_lock);
+ INIT_DELAYED_WORK(&rxnet->service_conn_reaper,
+ rxrpc_service_connection_reaper);
+
+ rxnet->nr_client_conns = 0;
+ rxnet->nr_active_client_conns = 0;
+ rxnet->kill_all_client_conns = false;
+ spin_lock_init(&rxnet->client_conn_cache_lock);
+ spin_lock_init(&rxnet->client_conn_discard_lock);
+ INIT_LIST_HEAD(&rxnet->waiting_client_conns);
+ INIT_LIST_HEAD(&rxnet->active_client_conns);
+ INIT_LIST_HEAD(&rxnet->idle_client_conns);
+ INIT_DELAYED_WORK(&rxnet->client_conn_reaper,
+ rxrpc_discard_expired_client_conns);
+
+ INIT_LIST_HEAD(&rxnet->local_endpoints);
+ mutex_init(&rxnet->local_mutex);
+ hash_init(rxnet->peer_hash);
+ spin_lock_init(&rxnet->peer_hash_lock);
+
+ ret = -ENOMEM;
+ rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
+ if (!rxnet->proc_net)
+ goto err_proc;
+
+ proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops);
+ proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops);
+ return 0;
+
+err_proc:
+ return ret;
+}
+
+/*
+ * Clean up a per-network namespace record.
+ */
+static __net_exit void rxrpc_exit_net(struct net *net)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(net);
+
+ rxrpc_destroy_all_calls(rxnet);
+ rxrpc_destroy_all_connections(rxnet);
+ rxrpc_destroy_all_locals(rxnet);
+ proc_remove(rxnet->proc_net);
+}
+
+struct pernet_operations rxrpc_net_ops = {
+ .init = rxrpc_init_net,
+ .exit = rxrpc_exit_net,
+ .id = &rxrpc_net_id,
+ .size = sizeof(struct rxrpc_net),
+};
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5dab1ff3a6c2..5bd2d0fa4a03 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -292,6 +292,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
whdr._rsvd = htons(sp->hdr._rsvd);
whdr.serviceId = htons(call->service_id);
+ if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
+ sp->hdr.seq == 1)
+ whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
+
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
iov[1].iov_base = skb->head;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 862eea6b266c..5787f97f5330 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -26,9 +26,6 @@
#include <net/ip6_route.h>
#include "ar-internal.h"
-static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
-static DEFINE_SPINLOCK(rxrpc_peer_hash_lock);
-
/*
* Hash a peer key.
*/
@@ -124,8 +121,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
unsigned long hash_key)
{
struct rxrpc_peer *peer;
+ struct rxrpc_net *rxnet = local->rxnet;
- hash_for_each_possible_rcu(rxrpc_peer_hash, peer, hash_link, hash_key) {
+ hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) {
if (atomic_read(&peer->usage) == 0)
return NULL;
@@ -230,6 +228,13 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
seqlock_init(&peer->service_conn_lock);
spin_lock_init(&peer->lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+
+ if (RXRPC_TX_SMSS > 2190)
+ peer->cong_cwnd = 2;
+ else if (RXRPC_TX_SMSS > 1095)
+ peer->cong_cwnd = 3;
+ else
+ peer->cong_cwnd = 4;
}
_leave(" = %p", peer);
@@ -301,13 +306,14 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
struct rxrpc_peer *prealloc)
{
struct rxrpc_peer *peer;
+ struct rxrpc_net *rxnet = local->rxnet;
unsigned long hash_key;
hash_key = rxrpc_peer_hash_key(local, &prealloc->srx);
prealloc->local = local;
rxrpc_init_peer(prealloc, hash_key);
- spin_lock(&rxrpc_peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key);
@@ -315,10 +321,10 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
peer = NULL;
if (!peer) {
peer = prealloc;
- hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key);
+ hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
}
- spin_unlock(&rxrpc_peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
return peer;
}
@@ -329,6 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx, gfp_t gfp)
{
struct rxrpc_peer *peer, *candidate;
+ struct rxrpc_net *rxnet = local->rxnet;
unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
_enter("{%pISp}", &srx->transport);
@@ -350,17 +357,17 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
return NULL;
}
- spin_lock_bh(&rxrpc_peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
if (!peer)
- hash_add_rcu(rxrpc_peer_hash,
+ hash_add_rcu(rxnet->peer_hash,
&candidate->hash_link, hash_key);
- spin_unlock_bh(&rxrpc_peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
if (peer)
kfree(candidate);
@@ -379,11 +386,13 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
*/
void __rxrpc_put_peer(struct rxrpc_peer *peer)
{
+ struct rxrpc_net *rxnet = peer->local->rxnet;
+
ASSERT(hlist_empty(&peer->error_targets));
- spin_lock_bh(&rxrpc_peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
hash_del_rcu(&peer->hash_link);
- spin_unlock_bh(&rxrpc_peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
kfree_rcu(peer, rcu);
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index b9bcfbfb095c..7421656963a9 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -30,19 +30,25 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
*/
static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
rcu_read_lock();
- read_lock(&rxrpc_call_lock);
- return seq_list_start_head(&rxrpc_calls, *_pos);
+ read_lock(&rxnet->call_lock);
+ return seq_list_start_head(&rxnet->calls, *_pos);
}
static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- return seq_list_next(v, &rxrpc_calls, pos);
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next(v, &rxnet->calls, pos);
}
static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
{
- read_unlock(&rxrpc_call_lock);
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_unlock(&rxnet->call_lock);
rcu_read_unlock();
}
@@ -52,10 +58,11 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_sock *rx;
struct rxrpc_peer *peer;
struct rxrpc_call *call;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
rxrpc_seq_t tx_hard_ack, rx_hard_ack;
char lbuff[50], rbuff[50];
- if (v == &rxrpc_calls) {
+ if (v == &rxnet->calls) {
seq_puts(seq,
"Proto Local "
" Remote "
@@ -113,7 +120,8 @@ static const struct seq_operations rxrpc_call_seq_ops = {
static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &rxrpc_call_seq_ops);
+ return seq_open_net(inode, file, &rxrpc_call_seq_ops,
+ sizeof(struct seq_net_private));
}
const struct file_operations rxrpc_call_seq_fops = {
@@ -129,27 +137,34 @@ const struct file_operations rxrpc_call_seq_fops = {
*/
static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
{
- read_lock(&rxrpc_connection_lock);
- return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_lock(&rxnet->conn_lock);
+ return seq_list_start_head(&rxnet->conn_proc_list, *_pos);
}
static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
loff_t *pos)
{
- return seq_list_next(v, &rxrpc_connection_proc_list, pos);
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next(v, &rxnet->conn_proc_list, pos);
}
static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
{
- read_unlock(&rxrpc_connection_lock);
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_unlock(&rxnet->conn_lock);
}
static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
char lbuff[50], rbuff[50];
- if (v == &rxrpc_connection_proc_list) {
+ if (v == &rxnet->conn_proc_list) {
seq_puts(seq,
"Proto Local "
" Remote "
@@ -175,7 +190,7 @@ print:
" %s %08x %08x %08x\n",
lbuff,
rbuff,
- conn->params.service_id,
+ conn->service_id,
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
atomic_read(&conn->usage),
@@ -197,7 +212,8 @@ static const struct seq_operations rxrpc_connection_seq_ops = {
static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &rxrpc_connection_seq_ops);
+ return seq_open_net(inode, file, &rxrpc_connection_seq_ops,
+ sizeof(struct seq_net_private));
}
const struct file_operations rxrpc_connection_seq_fops = {
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f9caf3b77509..bdece21f313d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -522,8 +522,11 @@ try_again:
}
if (msg->msg_name) {
- size_t len = sizeof(call->conn->params.peer->srx);
- memcpy(msg->msg_name, &call->conn->params.peer->srx, len);
+ struct sockaddr_rxrpc *srx = msg->msg_name;
+ size_t len = sizeof(call->peer->srx);
+
+ memcpy(msg->msg_name, &call->peer->srx, len);
+ srx->srx_service = call->service_id;
msg->msg_namelen = len;
}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 1bb9b2ccc267..46d1a1f0b55b 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -227,7 +227,9 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
len &= ~(call->conn->size_align - 1);
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, 0, len);
+ err = skb_to_sgvec(skb, sg, 0, len);
+ if (unlikely(err < 0))
+ goto out;
skcipher_request_set_crypt(req, sg, sg, len, iv.x);
crypto_skcipher_encrypt(req);
@@ -324,7 +326,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
bool aborted;
u32 data_size, buf;
u16 check;
- int nsg;
+ int nsg, ret;
_enter("");
@@ -342,7 +344,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
goto nomem;
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, offset, 8);
+ ret = skb_to_sgvec(skb, sg, offset, 8);
+ if (unlikely(ret < 0))
+ return ret;
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
@@ -409,7 +413,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
bool aborted;
u32 data_size, buf;
u16 check;
- int nsg;
+ int nsg, ret;
_enter(",{%d}", skb->len);
@@ -434,7 +438,12 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
}
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, offset, len);
+ ret = skb_to_sgvec(skb, sg, offset, len);
+ if (unlikely(ret < 0)) {
+ if (sg != _sg)
+ kfree(sg);
+ return ret;
+ }
/* decrypt from the session key */
token = call->conn->params.key->payload.data[0];
@@ -640,7 +649,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
whdr.userStatus = 0;
whdr.securityIndex = conn->security_ix;
whdr._rsvd = 0;
- whdr.serviceId = htons(conn->params.service_id);
+ whdr.serviceId = htons(conn->service_id);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 7d921e56e715..e9f428351293 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -121,7 +121,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
_enter("");
- sprintf(kdesc, "%u:%u", conn->params.service_id, conn->security_ix);
+ sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
sec = rxrpc_security_lookup(conn->security_ix);
if (!sec) {
@@ -133,7 +133,8 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
read_lock(&local->services_lock);
rx = rcu_dereference_protected(local->service,
lockdep_is_held(&local->services_lock));
- if (rx && rx->srx.srx_service == conn->params.service_id)
+ if (rx && (rx->srx.srx_service == conn->service_id ||
+ rx->second_service == conn->service_id))
goto found_service;
/* the service appears to have died */
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 96ffa5d5733b..2e636a525a65 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -28,6 +28,15 @@ enum rxrpc_command {
RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
};
+struct rxrpc_send_params {
+ s64 tx_total_len; /* Total Tx data length (if send data) */
+ unsigned long user_call_ID; /* User's call ID */
+ u32 abort_code; /* Abort code to Tx (if abort) */
+ enum rxrpc_command command : 8; /* The command to implement */
+ bool exclusive; /* Shared or exclusive call */
+ bool upgrade; /* If the connection is upgradeable */
+};
+
/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
@@ -199,6 +208,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
more = msg->msg_flags & MSG_MORE;
+ if (call->tx_total_len != -1) {
+ if (len > call->tx_total_len)
+ return -EMSGSIZE;
+ if (!more && len != call->tx_total_len)
+ return -EMSGSIZE;
+ }
+
skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
@@ -291,6 +307,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
sp->remain -= copy;
skb->mark += copy;
copied += copy;
+ if (call->tx_total_len != -1)
+ call->tx_total_len -= copy;
}
/* check for the far side aborting the call or a network error
@@ -362,18 +380,12 @@ efault:
/*
* extract control messages from the sendmsg() control buffer
*/
-static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
- unsigned long *user_call_ID,
- enum rxrpc_command *command,
- u32 *abort_code,
- bool *_exclusive)
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
{
struct cmsghdr *cmsg;
bool got_user_ID = false;
int len;
- *command = RXRPC_CMD_SEND_DATA;
-
if (msg->msg_controllen == 0)
return -EINVAL;
@@ -393,42 +405,55 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
if (msg->msg_flags & MSG_CMSG_COMPAT) {
if (len != sizeof(u32))
return -EINVAL;
- *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+ p->user_call_ID = *(u32 *)CMSG_DATA(cmsg);
} else {
if (len != sizeof(unsigned long))
return -EINVAL;
- *user_call_ID = *(unsigned long *)
+ p->user_call_ID = *(unsigned long *)
CMSG_DATA(cmsg);
}
- _debug("User Call ID %lx", *user_call_ID);
got_user_ID = true;
break;
case RXRPC_ABORT:
- if (*command != RXRPC_CMD_SEND_DATA)
+ if (p->command != RXRPC_CMD_SEND_DATA)
return -EINVAL;
- *command = RXRPC_CMD_SEND_ABORT;
- if (len != sizeof(*abort_code))
+ p->command = RXRPC_CMD_SEND_ABORT;
+ if (len != sizeof(p->abort_code))
return -EINVAL;
- *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
- _debug("Abort %x", *abort_code);
- if (*abort_code == 0)
+ p->abort_code = *(unsigned int *)CMSG_DATA(cmsg);
+ if (p->abort_code == 0)
return -EINVAL;
break;
case RXRPC_ACCEPT:
- if (*command != RXRPC_CMD_SEND_DATA)
+ if (p->command != RXRPC_CMD_SEND_DATA)
return -EINVAL;
- *command = RXRPC_CMD_ACCEPT;
+ p->command = RXRPC_CMD_ACCEPT;
if (len != 0)
return -EINVAL;
break;
case RXRPC_EXCLUSIVE_CALL:
- *_exclusive = true;
+ p->exclusive = true;
+ if (len != 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_UPGRADE_SERVICE:
+ p->upgrade = true;
if (len != 0)
return -EINVAL;
break;
+
+ case RXRPC_TX_LENGTH:
+ if (p->tx_total_len != -1 || len != sizeof(__s64))
+ return -EINVAL;
+ p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
+ if (p->tx_total_len < 0)
+ return -EINVAL;
+ break;
+
default:
return -EINVAL;
}
@@ -436,6 +461,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
if (!got_user_ID)
return -EINVAL;
+ if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
_leave(" = 0");
return 0;
}
@@ -447,7 +474,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
*/
static struct rxrpc_call *
rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
- unsigned long user_call_ID, bool exclusive)
+ struct rxrpc_send_params *p)
__releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_conn_parameters cp;
@@ -471,9 +498,11 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
cp.local = rx->local;
cp.key = rx->key;
cp.security_level = rx->min_sec_level;
- cp.exclusive = rx->exclusive | exclusive;
+ cp.exclusive = rx->exclusive | p->exclusive;
+ cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+ call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID,
+ p->tx_total_len, GFP_KERNEL);
/* The socket is now unlocked */
_leave(" = %p\n", call);
@@ -489,25 +518,29 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
enum rxrpc_call_state state;
- enum rxrpc_command cmd;
struct rxrpc_call *call;
- unsigned long user_call_ID = 0;
- bool exclusive = false;
- u32 abort_code = 0;
int ret;
+ struct rxrpc_send_params p = {
+ .tx_total_len = -1,
+ .user_call_ID = 0,
+ .abort_code = 0,
+ .command = RXRPC_CMD_SEND_DATA,
+ .exclusive = false,
+ .upgrade = true,
+ };
+
_enter("");
- ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
- &exclusive);
+ ret = rxrpc_sendmsg_cmsg(msg, &p);
if (ret < 0)
goto error_release_sock;
- if (cmd == RXRPC_CMD_ACCEPT) {
+ if (p.command == RXRPC_CMD_ACCEPT) {
ret = -EINVAL;
if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
goto error_release_sock;
- call = rxrpc_accept_call(rx, user_call_ID, NULL);
+ call = rxrpc_accept_call(rx, p.user_call_ID, NULL);
/* The socket is now unlocked. */
if (IS_ERR(call))
return PTR_ERR(call);
@@ -515,13 +548,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
return 0;
}
- call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+ call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID);
if (!call) {
ret = -EBADSLT;
- if (cmd != RXRPC_CMD_SEND_DATA)
+ if (p.command != RXRPC_CMD_SEND_DATA)
goto error_release_sock;
- call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
- exclusive);
+ call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p);
/* The socket is now unlocked... */
if (IS_ERR(call))
return PTR_ERR(call);
@@ -545,6 +577,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = -ERESTARTSYS;
goto error_put;
}
+
+ if (p.tx_total_len != -1) {
+ ret = -EINVAL;
+ if (call->tx_total_len != -1 ||
+ call->tx_pending ||
+ call->tx_top != 0)
+ goto error_put;
+ call->tx_total_len = p.tx_total_len;
+ }
}
state = READ_ONCE(call->state);
@@ -554,11 +595,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
if (state >= RXRPC_CALL_COMPLETE) {
/* it's too late for this call */
ret = -ESHUTDOWN;
- } else if (cmd == RXRPC_CMD_SEND_ABORT) {
+ } else if (p.command == RXRPC_CMD_SEND_ABORT) {
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED))
+ if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED))
ret = rxrpc_send_abort_packet(call);
- } else if (cmd != RXRPC_CMD_SEND_DATA) {
+ } else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else if (rxrpc_is_client_call(call) &&
state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
@@ -662,5 +703,24 @@ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
mutex_unlock(&call->user_mutex);
return aborted;
}
-
EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
+/**
+ * rxrpc_kernel_set_tx_length - Set the total Tx length on a call
+ * @sock: The socket the call is on
+ * @call: The call to be informed
+ * @tx_total_len: The amount of data to be transmitted for this call
+ *
+ * Allow a kernel service to set the total transmit length on a call. This
+ * allows buffer-to-packet encrypt-and-copy to be performed.
+ *
+ * This function is primarily for use for setting the reply length since the
+ * request length can be set when beginning the call.
+ */
+void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call,
+ s64 tx_total_len)
+{
+ WARN_ON(call->tx_total_len != -1);
+ call->tx_total_len = tx_total_len;
+}
+EXPORT_SYMBOL(rxrpc_kernel_set_tx_length);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9fb84f0de6af..e70ed26485a2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -649,6 +649,7 @@ config NET_EMATCH_IPSET
config NET_CLS_ACT
bool "Actions"
+ select NET_CLS
---help---
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a90e8f355c00..aed6cf2e9fd8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,6 +28,31 @@
#include <net/act_api.h>
#include <net/netlink.h>
+static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
+{
+ u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
+
+ if (!tp)
+ return -EINVAL;
+ a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true);
+ if (!a->goto_chain)
+ return -ENOMEM;
+ return 0;
+}
+
+static void tcf_action_goto_chain_fini(struct tc_action *a)
+{
+ tcf_chain_put(a->goto_chain);
+}
+
+static void tcf_action_goto_chain_exec(const struct tc_action *a,
+ struct tcf_result *res)
+{
+ const struct tcf_chain *chain = a->goto_chain;
+
+ res->goto_tp = rcu_dereference_bh(chain->filter_chain);
+}
+
static void free_tcf(struct rcu_head *head)
{
struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
@@ -39,6 +64,8 @@ static void free_tcf(struct rcu_head *head)
kfree(p->act_cookie->data);
kfree(p->act_cookie);
}
+ if (p->goto_chain)
+ tcf_action_goto_chain_fini(p);
kfree(p);
}
@@ -465,6 +492,8 @@ repeat:
else /* faulty graph, stop pipeline */
return TC_ACT_OK;
}
+ } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+ tcf_action_goto_chain_exec(a, res);
}
if (ret != TC_ACT_PIPE)
@@ -570,9 +599,9 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
-struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr,
- int bind)
+struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
+ struct nlattr *nla, struct nlattr *est,
+ char *name, int ovr, int bind)
{
struct tc_action *a;
struct tc_action_ops *a_o;
@@ -657,6 +686,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
if (err != ACT_P_CREATED)
module_put(a_o->owner);
+ if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
+ err = tcf_action_goto_chain_init(a, tp);
+ if (err) {
+ LIST_HEAD(actions);
+
+ list_add_tail(&a->list, &actions);
+ tcf_action_destroy(&actions, bind);
+ return ERR_PTR(err);
+ }
+ }
+
return a;
err_mod:
@@ -680,8 +720,9 @@ static void cleanup_a(struct list_head *actions, int ovr)
a->tcfa_refcnt--;
}
-int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind, struct list_head *actions)
+int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
+ struct nlattr *est, char *name, int ovr, int bind,
+ struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
@@ -693,7 +734,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
+ act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
@@ -1020,7 +1061,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
int ret = 0;
LIST_HEAD(actions);
- ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
if (ret)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index ab6fdbd34db7..3317a2f579da 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
sctph->checksum = sctp_compute_cksum(skb,
skb_network_offset(skb) + ihl);
skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
return 1;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 22f88b35a546..39da0c5801c9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -106,13 +106,12 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n,
- struct tcf_proto __rcu **chain, int event)
+ struct tcf_chain *chain, int event)
{
- struct tcf_proto __rcu **it_chain;
struct tcf_proto *tp;
- for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
- it_chain = &tp->next)
+ for (tp = rtnl_dereference(chain->filter_chain);
+ tp; tp = rtnl_dereference(tp->next))
tfilter_notify(net, oskb, n, tp, 0, event, false);
}
@@ -125,11 +124,12 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
if (tp)
first = tp->prio - 1;
- return first;
+ return TC_H_MAJ(first);
}
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
- u32 prio, u32 parent, struct Qdisc *q)
+ u32 prio, u32 parent, struct Qdisc *q,
+ struct tcf_chain *chain)
{
struct tcf_proto *tp;
int err;
@@ -165,6 +165,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->prio = prio;
tp->classid = parent;
tp->q = q;
+ tp->chain = chain;
err = tp->ops->init(tp);
if (err) {
@@ -185,16 +186,226 @@ static void tcf_proto_destroy(struct tcf_proto *tp)
kfree_rcu(tp, rcu);
}
-void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
+ u32 chain_index)
+{
+ struct tcf_chain *chain;
+
+ chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+ if (!chain)
+ return NULL;
+ list_add_tail(&chain->list, &block->chain_list);
+ chain->block = block;
+ chain->index = chain_index;
+ chain->refcnt = 1;
+ return chain;
+}
+
+static void tcf_chain_flush(struct tcf_chain *chain)
{
struct tcf_proto *tp;
- while ((tp = rtnl_dereference(*fl)) != NULL) {
- RCU_INIT_POINTER(*fl, tp->next);
+ if (*chain->p_filter_chain)
+ RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
+ while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
+ RCU_INIT_POINTER(chain->filter_chain, tp->next);
tcf_proto_destroy(tp);
}
}
-EXPORT_SYMBOL(tcf_destroy_chain);
+
+static void tcf_chain_destroy(struct tcf_chain *chain)
+{
+ list_del(&chain->list);
+ tcf_chain_flush(chain);
+ kfree(chain);
+}
+
+struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
+ bool create)
+{
+ struct tcf_chain *chain;
+
+ list_for_each_entry(chain, &block->chain_list, list) {
+ if (chain->index == chain_index) {
+ chain->refcnt++;
+ return chain;
+ }
+ }
+ if (create)
+ return tcf_chain_create(block, chain_index);
+ else
+ return NULL;
+}
+EXPORT_SYMBOL(tcf_chain_get);
+
+void tcf_chain_put(struct tcf_chain *chain)
+{
+ /* Destroy unused chain, with exception of chain 0, which is the
+ * default one and has to be always present.
+ */
+ if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
+ tcf_chain_destroy(chain);
+}
+EXPORT_SYMBOL(tcf_chain_put);
+
+static void
+tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
+ struct tcf_proto __rcu **p_filter_chain)
+{
+ chain->p_filter_chain = p_filter_chain;
+}
+
+int tcf_block_get(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain)
+{
+ struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+ struct tcf_chain *chain;
+ int err;
+
+ if (!block)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&block->chain_list);
+ /* Create chain 0 by default, it has to be always present. */
+ chain = tcf_chain_create(block, 0);
+ if (!chain) {
+ err = -ENOMEM;
+ goto err_chain_create;
+ }
+ tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+ *p_block = block;
+ return 0;
+
+err_chain_create:
+ kfree(block);
+ return err;
+}
+EXPORT_SYMBOL(tcf_block_get);
+
+void tcf_block_put(struct tcf_block *block)
+{
+ struct tcf_chain *chain, *tmp;
+
+ if (!block)
+ return;
+
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_destroy(chain);
+ kfree(block);
+}
+EXPORT_SYMBOL(tcf_block_put);
+
+/* Main classifier routine: scans classifier chain attached
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
+ */
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res, bool compat_mode)
+{
+ __be16 protocol = tc_skb_protocol(skb);
+#ifdef CONFIG_NET_CLS_ACT
+ const int max_reclassify_loop = 4;
+ const struct tcf_proto *orig_tp = tp;
+ const struct tcf_proto *first_tp;
+ int limit = 0;
+
+reclassify:
+#endif
+ for (; tp; tp = rcu_dereference_bh(tp->next)) {
+ int err;
+
+ if (tp->protocol != protocol &&
+ tp->protocol != htons(ETH_P_ALL))
+ continue;
+
+ err = tp->classify(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+ if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
+ first_tp = orig_tp;
+ goto reset;
+ } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
+ first_tp = res->goto_tp;
+ goto reset;
+ }
+#endif
+ if (err >= 0)
+ return err;
+ }
+
+ return TC_ACT_UNSPEC; /* signal: continue lookup */
+#ifdef CONFIG_NET_CLS_ACT
+reset:
+ if (unlikely(limit++ >= max_reclassify_loop)) {
+ net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+ tp->q->ops->id, tp->prio & 0xffff,
+ ntohs(tp->protocol));
+ return TC_ACT_SHOT;
+ }
+
+ tp = first_tp;
+ protocol = tc_skb_protocol(skb);
+ goto reclassify;
+#endif
+}
+EXPORT_SYMBOL(tcf_classify);
+
+struct tcf_chain_info {
+ struct tcf_proto __rcu **pprev;
+ struct tcf_proto __rcu *next;
+};
+
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+{
+ return rtnl_dereference(*chain_info->pprev);
+}
+
+static void tcf_chain_tp_insert(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
+{
+ if (chain->p_filter_chain &&
+ *chain_info->pprev == chain->filter_chain)
+ rcu_assign_pointer(*chain->p_filter_chain, tp);
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+ rcu_assign_pointer(*chain_info->pprev, tp);
+}
+
+static void tcf_chain_tp_remove(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
+{
+ struct tcf_proto *next = rtnl_dereference(chain_info->next);
+
+ if (chain->p_filter_chain && tp == chain->filter_chain)
+ RCU_INIT_POINTER(*chain->p_filter_chain, next);
+ RCU_INIT_POINTER(*chain_info->pprev, next);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ u32 protocol, u32 prio,
+ bool prio_allocate)
+{
+ struct tcf_proto **pprev;
+ struct tcf_proto *tp;
+
+ /* Check the chain for existence of proto-tcf with this priority */
+ for (pprev = &chain->filter_chain;
+ (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+ if (tp->prio >= prio) {
+ if (tp->prio == prio) {
+ if (prio_allocate ||
+ (tp->protocol != protocol && protocol))
+ return ERR_PTR(-EINVAL);
+ } else {
+ tp = NULL;
+ }
+ break;
+ }
+ }
+ chain_info->pprev = pprev;
+ chain_info->next = tp ? tp->next : NULL;
+ return tp;
+}
/* Add/change/delete/get a filter node */
@@ -206,13 +417,14 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct tcmsg *t;
u32 protocol;
u32 prio;
- u32 nprio;
+ bool prio_allocate;
u32 parent;
+ u32 chain_index;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto __rcu **back;
- struct tcf_proto __rcu **chain;
- struct tcf_proto *next;
+ struct tcf_chain_info chain_info;
+ struct tcf_chain *chain = NULL;
+ struct tcf_block *block;
struct tcf_proto *tp;
const struct Qdisc_class_ops *cops;
unsigned long cl;
@@ -234,7 +446,7 @@ replay:
t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
prio = TC_H_MAJ(t->tcm_info);
- nprio = prio;
+ prio_allocate = false;
parent = t->tcm_parent;
cl = 0;
@@ -250,6 +462,7 @@ replay:
*/
if (n->nlmsg_flags & NLM_F_CREATE) {
prio = TC_H_MAKE(0x80000000U, 0U);
+ prio_allocate = true;
break;
}
/* fall-through */
@@ -280,7 +493,7 @@ replay:
if (!cops)
return -EINVAL;
- if (cops->tcf_chain == NULL)
+ if (!cops->tcf_block)
return -EOPNOTSUPP;
/* Do we search for filter, attached to class? */
@@ -291,34 +504,36 @@ replay:
}
/* And the last stroke */
- chain = cops->tcf_chain(q, cl);
- if (chain == NULL) {
+ block = cops->tcf_block(q, cl);
+ if (!block) {
err = -EINVAL;
goto errout;
}
+
+ chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ if (chain_index > TC_ACT_EXT_VAL_MASK) {
+ err = -EINVAL;
+ goto errout;
+ }
+ chain = tcf_chain_get(block, chain_index,
+ n->nlmsg_type == RTM_NEWTFILTER);
+ if (!chain) {
+ err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
+ goto errout;
+ }
+
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
- tcf_destroy_chain(chain);
+ tcf_chain_flush(chain);
err = 0;
goto errout;
}
- /* Check the chain for existence of proto-tcf with this priority */
- for (back = chain;
- (tp = rtnl_dereference(*back)) != NULL;
- back = &tp->next) {
- if (tp->prio >= prio) {
- if (tp->prio == prio) {
- if (!nprio ||
- (tp->protocol != protocol && protocol)) {
- err = -EINVAL;
- goto errout;
- }
- } else {
- tp = NULL;
- }
- break;
- }
+ tp = tcf_chain_tp_find(chain, &chain_info, protocol,
+ prio, prio_allocate);
+ if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
+ goto errout;
}
if (tp == NULL) {
@@ -335,11 +550,11 @@ replay:
goto errout;
}
- if (!nprio)
- nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
+ if (prio_allocate)
+ prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, nprio, parent, q);
+ protocol, prio, parent, q, chain);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
goto errout;
@@ -354,8 +569,7 @@ replay:
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- next = rtnl_dereference(tp->next);
- RCU_INIT_POINTER(*back, next);
+ tcf_chain_tp_remove(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, fh,
RTM_DELTFILTER, false);
tcf_proto_destroy(tp);
@@ -384,11 +598,10 @@ replay:
err = tp->ops->delete(tp, fh, &last);
if (err)
goto errout;
- next = rtnl_dereference(tp->next);
tfilter_notify(net, skb, n, tp, t->tcm_handle,
RTM_DELTFILTER, false);
if (last) {
- RCU_INIT_POINTER(*back, next);
+ tcf_chain_tp_remove(chain, &chain_info, tp);
tcf_proto_destroy(tp);
}
goto errout;
@@ -405,10 +618,8 @@ replay:
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
- if (tp_created) {
- RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
- rcu_assign_pointer(*back, tp);
- }
+ if (tp_created)
+ tcf_chain_tp_insert(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
@@ -416,6 +627,8 @@ replay:
}
errout:
+ if (chain)
+ tcf_chain_put(chain);
if (cl)
cops->put(q, cl);
if (err == -EAGAIN)
@@ -444,6 +657,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+ goto nla_put_failure;
tcm->tcm_handle = fh;
if (RTM_DELTFILTER != event) {
tcm->tcm_handle = 0;
@@ -500,22 +715,76 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
RTM_NEWTFILTER);
}
+static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ long index_start, long *p_index)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_dump_args arg;
+ struct tcf_proto *tp;
+
+ for (tp = rtnl_dereference(chain->filter_chain);
+ tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+ if (*p_index < index_start)
+ continue;
+ if (TC_H_MAJ(tcm->tcm_info) &&
+ TC_H_MAJ(tcm->tcm_info) != tp->prio)
+ continue;
+ if (TC_H_MIN(tcm->tcm_info) &&
+ TC_H_MIN(tcm->tcm_info) != tp->protocol)
+ continue;
+ if (*p_index > index_start)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (cb->args[1] == 0) {
+ if (tcf_fill_node(net, skb, tp, 0,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWTFILTER) <= 0)
+ return false;
+
+ cb->args[1] = 1;
+ }
+ if (!tp->ops->walk)
+ continue;
+ arg.w.fn = tcf_node_dump;
+ arg.skb = skb;
+ arg.cb = cb;
+ arg.w.stop = 0;
+ arg.w.skip = cb->args[1] - 1;
+ arg.w.count = 0;
+ tp->ops->walk(tp, &arg.w);
+ cb->args[1] = arg.w.count + 1;
+ if (arg.w.stop)
+ return false;
+ }
+ return true;
+}
+
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int t;
- int s_t;
+ struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto *tp, __rcu **chain;
+ struct tcf_block *block;
+ struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
- struct tcf_dump_args arg;
+ long index_start;
+ long index;
+ int err;
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
+
+ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ if (err)
+ return err;
+
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
return skb->len;
@@ -529,56 +798,29 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
cops = q->ops->cl_ops;
if (!cops)
goto errout;
- if (cops->tcf_chain == NULL)
+ if (!cops->tcf_block)
goto errout;
if (TC_H_MIN(tcm->tcm_parent)) {
cl = cops->get(q, tcm->tcm_parent);
if (cl == 0)
goto errout;
}
- chain = cops->tcf_chain(q, cl);
- if (chain == NULL)
+ block = cops->tcf_block(q, cl);
+ if (!block)
goto errout;
- s_t = cb->args[0];
-
- for (tp = rtnl_dereference(*chain), t = 0;
- tp; tp = rtnl_dereference(tp->next), t++) {
- if (t < s_t)
- continue;
- if (TC_H_MAJ(tcm->tcm_info) &&
- TC_H_MAJ(tcm->tcm_info) != tp->prio)
- continue;
- if (TC_H_MIN(tcm->tcm_info) &&
- TC_H_MIN(tcm->tcm_info) != tp->protocol)
- continue;
- if (t > s_t)
- memset(&cb->args[1], 0,
- sizeof(cb->args)-sizeof(cb->args[0]));
- if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER) <= 0)
- break;
+ index_start = cb->args[0];
+ index = 0;
- cb->args[1] = 1;
- }
- if (tp->ops->walk == NULL)
+ list_for_each_entry(chain, &block->chain_list, list) {
+ if (tca[TCA_CHAIN] &&
+ nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
- arg.w.fn = tcf_node_dump;
- arg.skb = skb;
- arg.cb = cb;
- arg.w.stop = 0;
- arg.w.skip = cb->args[1] - 1;
- arg.w.count = 0;
- tp->ops->walk(tp, &arg.w);
- cb->args[1] = arg.w.count + 1;
- if (arg.w.stop)
+ if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
break;
}
- cb->args[0] = t;
+ cb->args[0] = index;
errout:
if (cl)
@@ -608,8 +850,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
struct tc_action *act;
if (exts->police && tb[exts->police]) {
- act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
- "police", ovr, TCA_ACT_BIND);
+ act = tcf_action_init_1(net, tp, tb[exts->police],
+ rate_tlv, "police", ovr,
+ TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -620,8 +863,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
LIST_HEAD(actions);
int err, i = 0;
- err = tcf_action_init(net, tb[exts->action], rate_tlv,
- NULL, ovr, TCA_ACT_BIND,
+ err = tcf_action_init(net, tp, tb[exts->action],
+ rate_tlv, NULL, ovr, TCA_ACT_BIND,
&actions);
if (err)
return err;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5ebeae996e63..be0cfdf48976 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code)
case TC_ACT_OK:
case TC_ACT_SHOT:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
case TC_ACT_REDIRECT:
case TC_ACT_UNSPEC:
return code;
@@ -161,6 +162,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
bpf_offload.gen_flags = prog->gen_flags;
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->chain->index,
tp->protocol, &offload);
if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index ca526c0881bd..7832eb93379b 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -49,6 +49,8 @@ struct fl_flow_key {
};
struct flow_dissector_key_ports enc_tp;
struct flow_dissector_key_mpls mpls;
+ struct flow_dissector_key_tcp tcp;
+ struct flow_dissector_key_ip ip;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -237,7 +239,8 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
tc->type = TC_SETUP_CLSFLOWER;
tc->cls_flower = &offload;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
+ tp->protocol, tc);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -273,8 +276,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
tc->type = TC_SETUP_CLSFLOWER;
tc->cls_flower = &offload;
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- tc);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->chain->index, tp->protocol, tc);
if (!err)
f->flags |= TCA_CLS_FLAGS_IN_HW;
@@ -300,7 +303,8 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
tc->type = TC_SETUP_CLSFLOWER;
tc->cls_flower = &offload;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->chain->index, tp->protocol, tc);
}
static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
@@ -424,6 +428,12 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -525,6 +535,19 @@ static int fl_set_key_flags(struct nlattr **tb,
return 0;
}
+static void fl_set_key_ip(struct nlattr **tb,
+ struct flow_dissector_key_ip *key,
+ struct flow_dissector_key_ip *mask)
+{
+ fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
+ &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
+ sizeof(key->tos));
+
+ fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
+ &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
+ sizeof(key->ttl));
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
@@ -567,6 +590,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
sizeof(key->basic.ip_proto));
+ fl_set_key_ip(tb, &key->ip, &mask->ip);
}
if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
@@ -596,6 +620,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
&mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
sizeof(key->tp.dst));
+ fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
+ &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+ sizeof(key->tcp.flags));
} else if (key->basic.ip_proto == IPPROTO_UDP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
&mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
@@ -767,6 +794,10 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_IP, ip);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_TCP, tcp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ICMP, icmp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ARP, arp);
@@ -1074,6 +1105,19 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
return 0;
}
+static int fl_dump_key_ip(struct sk_buff *skb,
+ struct flow_dissector_key_ip *key,
+ struct flow_dissector_key_ip *mask)
+{
+ if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
+ TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
+ fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
+ TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
+ return -1;
+
+ return 0;
+}
+
static int fl_dump_key_vlan(struct sk_buff *skb,
struct flow_dissector_key_vlan *vlan_key,
struct flow_dissector_key_vlan *vlan_mask)
@@ -1187,9 +1231,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if ((key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) &&
- fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
&mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
- sizeof(key->basic.ip_proto)))
+ sizeof(key->basic.ip_proto)) ||
+ fl_dump_key_ip(skb, &key->ip, &mask->ip)))
goto nla_put_failure;
if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
@@ -1215,7 +1260,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
&mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
- sizeof(key->tp.dst))))
+ sizeof(key->tp.dst)) ||
+ fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
+ &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+ sizeof(key->tcp.flags))))
goto nla_put_failure;
else if (key->basic.ip_proto == IPPROTO_UDP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 51859b8edd7e..9dc26c32cf32 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -64,8 +64,9 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
offload.cls_mall->exts = &head->exts;
offload.cls_mall->cookie = cookie;
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- &offload);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->chain->index,
+ tp->protocol, &offload);
if (!err)
head->flags |= TCA_CLS_FLAGS_IN_HW;
@@ -86,8 +87,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
offload.cls_mall->exts = NULL;
offload.cls_mall->cookie = cookie;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- &offload);
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
+ tp->protocol, &offload);
}
static void mall_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d20e72a095d5..2d01195153e6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -441,7 +441,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
offload.cls_u32->knode.handle = handle;
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->protocol, &offload);
+ tp->chain->index, tp->protocol,
+ &offload);
}
}
@@ -465,7 +466,8 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
offload.cls_u32->hnode.prio = h->prio;
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->protocol, &offload);
+ tp->chain->index, tp->protocol,
+ &offload);
if (tc_skip_sw(flags))
return err;
@@ -488,7 +490,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
offload.cls_u32->hnode.prio = h->prio;
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->protocol, &offload);
+ tp->chain->index, tp->protocol,
+ &offload);
}
}
@@ -522,7 +525,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
offload.cls_u32->knode.link_handle = n->ht_down->handle;
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->protocol, &offload);
+ tp->chain->index, tp->protocol,
+ &offload);
if (!err)
n->flags |= TCA_CLS_FLAGS_IN_HW;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e88342fde1bc..5d95401bbc02 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -163,7 +163,7 @@ int register_qdisc(struct Qdisc_ops *qops)
if (!(cops->get && cops->put && cops->walk && cops->leaf))
goto out_einval;
- if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
+ if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
goto out_einval;
}
@@ -1878,54 +1878,6 @@ done:
return skb->len;
}
-/* Main classifier routine: scans classifier chain attached
- * to this qdisc, (optionally) tests for protocol and asks
- * specific classifiers.
- */
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode)
-{
- __be16 protocol = tc_skb_protocol(skb);
-#ifdef CONFIG_NET_CLS_ACT
- const int max_reclassify_loop = 4;
- const struct tcf_proto *old_tp = tp;
- int limit = 0;
-
-reclassify:
-#endif
- for (; tp; tp = rcu_dereference_bh(tp->next)) {
- int err;
-
- if (tp->protocol != protocol &&
- tp->protocol != htons(ETH_P_ALL))
- continue;
-
- err = tp->classify(skb, tp, res);
-#ifdef CONFIG_NET_CLS_ACT
- if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
- goto reset;
-#endif
- if (err >= 0)
- return err;
- }
-
- return TC_ACT_UNSPEC; /* signal: continue lookup */
-#ifdef CONFIG_NET_CLS_ACT
-reset:
- if (unlikely(limit++ >= max_reclassify_loop)) {
- net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
- tp->q->ops->id, tp->prio & 0xffff,
- ntohs(tp->protocol));
- return TC_ACT_SHOT;
- }
-
- tp = old_tp;
- protocol = tc_skb_protocol(skb);
- goto reclassify;
-#endif
-}
-EXPORT_SYMBOL(tc_classify);
-
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 40cbceed4de8..de162592eee0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -43,6 +43,7 @@
struct atm_flow_data {
struct Qdisc *q; /* FIFO, TBF, etc. */
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
void (*old_pop)(struct atm_vcc *vcc,
struct sk_buff *skb); /* chaining */
@@ -143,7 +144,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
list_del_init(&flow->list);
pr_debug("atm_tc_put: qdisc %p\n", flow->q);
qdisc_destroy(flow->q);
- tcf_destroy_chain(&flow->filter_list);
+ tcf_block_put(flow->block);
if (flow->sock) {
pr_debug("atm_tc_put: f_count %ld\n",
file_count(flow->sock->file));
@@ -274,7 +275,13 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
error = -ENOBUFS;
goto err_out;
}
- RCU_INIT_POINTER(flow->filter_list, NULL);
+
+ error = tcf_block_get(&flow->block, &flow->filter_list);
+ if (error) {
+ kfree(flow);
+ goto err_out;
+ }
+
flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!flow->q)
flow->q = &noop_qdisc;
@@ -346,14 +353,13 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- return flow ? &flow->filter_list : &p->link.filter_list;
+ return flow ? flow->block : p->link.block;
}
/* --------------------------- Qdisc operations ---------------------------- */
@@ -377,7 +383,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tc_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
@@ -400,6 +406,7 @@ done:
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
@@ -524,6 +531,7 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
+ int err;
pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
INIT_LIST_HEAD(&p->flows);
@@ -534,7 +542,11 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- RCU_INIT_POINTER(p->link.filter_list, NULL);
+
+ err = tcf_block_get(&p->link.block, &p->link.filter_list);
+ if (err)
+ return err;
+
p->link.vcc = NULL;
p->link.sock = NULL;
p->link.classid = sch->handle;
@@ -561,7 +573,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list)
- tcf_destroy_chain(&flow->filter_list);
+ tcf_block_put(flow->block);
list_for_each_entry_safe(flow, tmp, &p->flows, list) {
if (flow->ref > 1)
@@ -646,7 +658,7 @@ static const struct Qdisc_class_ops atm_class_ops = {
.change = atm_tc_change,
.delete = atm_tc_delete,
.walk = atm_tc_walk,
- .tcf_chain = atm_tc_find_tcf,
+ .tcf_block = atm_tc_tcf_block,
.bind_tcf = atm_tc_bind_filter,
.unbind_tcf = atm_tc_put,
.dump = atm_tc_dump_class,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 7415859fd4c3..481036f6b54e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -127,6 +127,7 @@ struct cbq_class {
struct tc_cbq_xstats xstats;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
int refcnt;
int filters;
@@ -233,7 +234,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tc_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, fl, &res, true);
if (!fl || result < 0)
goto fallback;
@@ -253,6 +254,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -1405,7 +1407,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
WARN_ON(cl->filters);
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
gen_kill_estimator(&cl->rate_est);
@@ -1430,7 +1432,7 @@ static void cbq_destroy(struct Qdisc *sch)
*/
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
}
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
@@ -1585,12 +1587,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
+ err = tcf_block_get(&cl->block, &cl->filter_list);
+ if (err) {
+ kfree(cl);
+ return err;
+ }
+
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ tcf_block_put(cl->block);
kfree(cl);
goto failure;
}
@@ -1688,8 +1697,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
return 0;
}
-static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
- unsigned long arg)
+static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
@@ -1697,7 +1705,7 @@ static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
if (cl == NULL)
cl = &q->link;
- return &cl->filter_list;
+ return cl->block;
}
static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1756,7 +1764,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
.change = cbq_change_class,
.delete = cbq_delete,
.walk = cbq_walk,
- .tcf_chain = cbq_find_tcf,
+ .tcf_block = cbq_tcf_block,
.bind_tcf = cbq_bind_filter,
.unbind_tcf = cbq_unbind_filter,
.dump = cbq_dump_class,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 58a8c32eab23..a413dc1c2098 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -36,6 +36,7 @@ struct drr_class {
struct drr_sched {
struct list_head active;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct Qdisc_class_hash clhash;
};
@@ -190,15 +191,14 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
drr_destroy_class(sch, cl);
}
-static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct drr_sched *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -333,12 +333,13 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -431,6 +432,9 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct drr_sched *q = qdisc_priv(sch);
int err;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
return err;
@@ -462,7 +466,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
struct hlist_node *next;
unsigned int i;
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -477,7 +481,7 @@ static const struct Qdisc_class_ops drr_class_ops = {
.delete = drr_delete_class,
.get = drr_get_class,
.put = drr_put_class,
- .tcf_chain = drr_tcf_chain,
+ .tcf_block = drr_tcf_block,
.bind_tcf = drr_bind_tcf,
.unbind_tcf = drr_unbind_tcf,
.graft = drr_graft_class,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1c0f877f673a..6d94fcc3592a 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -44,6 +44,7 @@ struct mask_value {
struct dsmark_qdisc_data {
struct Qdisc *q;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct mask_value *mv;
u16 indices;
u8 set_tc_index;
@@ -183,11 +184,11 @@ ignore:
}
}
-static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- return &p->filter_list;
+
+ return p->block;
}
/* --------------------------- Qdisc operations ---------------------------- */
@@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tc_classify(skb, fl, &res, false);
+ int result = tcf_classify(skb, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
@@ -242,6 +243,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
#ifdef CONFIG_NET_CLS_ACT
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
@@ -342,6 +344,10 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
+ err = tcf_block_get(&p->block, &p->filter_list);
+ if (err)
+ return err;
+
err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
if (err < 0)
goto errout;
@@ -400,7 +406,7 @@ static void dsmark_destroy(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- tcf_destroy_chain(&p->filter_list);
+ tcf_block_put(p->block);
qdisc_destroy(p->q);
if (p->mv != p->embedded)
kfree(p->mv);
@@ -468,7 +474,7 @@ static const struct Qdisc_class_ops dsmark_class_ops = {
.change = dsmark_change,
.delete = dsmark_delete,
.walk = dsmark_walk,
- .tcf_chain = dsmark_find_tcf,
+ .tcf_block = dsmark_tcf_block,
.bind_tcf = dsmark_bind_filter,
.unbind_tcf = dsmark_put,
.dump = dsmark_dump_class,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index b488721a0059..147fde73a0f5 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -390,9 +390,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->stat_tcp_retrans++;
qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
+ struct sock *sk = skb->sk;
+
fq_flow_add_tail(&q->new_flows, f);
if (time_after(jiffies, f->age + q->flow_refill_delay))
f->credit = max_t(u32, f->credit, q->quantum);
+ if (sk && q->rate_enable) {
+ if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
+ SK_PACING_FQ))
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
+ }
q->inactive_flows--;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9201abce928c..337f2d6d81e4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -55,6 +55,7 @@ struct fq_codel_flow {
struct fq_codel_sched_data {
struct tcf_proto __rcu *filter_list; /* optional external classifier */
+ struct tcf_block *block;
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
@@ -96,12 +97,13 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return 0;
@@ -450,7 +452,7 @@ static void fq_codel_destroy(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
kvfree(q->backlogs);
kvfree(q->flows);
}
@@ -459,6 +461,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
int i;
+ int err;
sch->limit = 10*1024;
q->flows_cnt = 1024;
@@ -478,6 +481,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
return err;
}
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
if (!q->flows) {
q->flows = kvzalloc(q->flows_cnt *
sizeof(struct fq_codel_flow), GFP_KERNEL);
@@ -589,14 +596,13 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -679,7 +685,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = {
.leaf = fq_codel_leaf,
.get = fq_codel_get,
.put = fq_codel_put,
- .tcf_chain = fq_codel_find_tcf,
+ .tcf_block = fq_codel_tcf_block,
.bind_tcf = fq_codel_bind,
.unbind_tcf = fq_codel_put,
.dump = fq_codel_dump_class,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5cb82f6c1b06..b52f74610dc7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,6 +116,7 @@ struct hfsc_class {
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
+ struct tcf_block *block;
unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
@@ -1040,12 +1041,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ err = tcf_block_get(&cl->block, &cl->filter_list);
+ if (err) {
+ kfree(cl);
+ return err;
+ }
+
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ tcf_block_put(cl->block);
kfree(cl);
return err;
}
@@ -1091,7 +1099,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
{
struct hfsc_sched *q = qdisc_priv(sch);
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
qdisc_destroy(cl->qdisc);
gen_kill_estimator(&cl->rate_est);
if (cl != &q->root)
@@ -1142,11 +1150,12 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -1261,8 +1270,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
cl->filter_cnt--;
}
-static struct tcf_proto __rcu **
-hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1270,7 +1278,7 @@ hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
if (cl == NULL)
cl = &q->root;
- return &cl->filter_list;
+ return cl->block;
}
static int
@@ -1515,7 +1523,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
}
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1662,7 +1670,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
.put = hfsc_put_class,
.bind_tcf = hfsc_bind_tcf,
.unbind_tcf = hfsc_unbind_tcf,
- .tcf_chain = hfsc_tcf_chain,
+ .tcf_block = hfsc_tcf_block,
.dump = hfsc_dump_class,
.dump_stats = hfsc_dump_class_stats,
.walk = hfsc_walk
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 570ef3b0c09b..203286ab4427 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -105,6 +105,7 @@ struct htb_class {
int quantum; /* but stored for parent-to-leaf return */
struct tcf_proto __rcu *filter_list; /* class attached filters */
+ struct tcf_block *block;
int filter_cnt;
int refcnt; /* usage count of this class */
@@ -156,6 +157,7 @@ struct htb_sched {
/* filters for qdisc itself */
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
#define HTB_WARN_TOOMANYEVENTS 0x1
unsigned int warned; /* only one warning */
@@ -231,11 +233,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -1017,6 +1020,10 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
if (err < 0)
return err;
@@ -1230,7 +1237,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
qdisc_destroy(cl->un.leaf.q);
}
gen_kill_estimator(&cl->rate_est);
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
kfree(cl);
}
@@ -1248,11 +1255,11 @@ static void htb_destroy(struct Qdisc *sch)
* because filter need its target class alive to be able to call
* unbind_filter on it (without Oops).
*/
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
}
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1396,6 +1403,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
+ err = tcf_block_get(&cl->block, &cl->filter_list);
+ if (err) {
+ kfree(cl);
+ goto failure;
+ }
if (htb_rate_est || tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
@@ -1403,6 +1415,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
+ tcf_block_put(cl->block);
kfree(cl);
goto failure;
}
@@ -1521,14 +1534,12 @@ failure:
return err;
}
-static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
- unsigned long arg)
+static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
- return fl;
+ return cl ? cl->block : q->block;
}
static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1591,7 +1602,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
.change = htb_change_class,
.delete = htb_delete,
.walk = htb_walk,
- .tcf_chain = htb_find_tcf,
+ .tcf_block = htb_tcf_block,
.bind_tcf = htb_bind_filter,
.unbind_tcf = htb_unbind_filter,
.dump = htb_dump_class,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3bab5f66c392..d8a9bebcab90 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -18,6 +18,10 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+struct ingress_sched_data {
+ struct tcf_block *block;
+};
+
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
return NULL;
@@ -47,16 +51,23 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
-static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct ingress_sched_data *q = qdisc_priv(sch);
- return &dev->ingress_cl_list;
+ return q->block;
}
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
+ struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err;
+
+ err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+ if (err)
+ return err;
+
net_inc_ingress_queue();
sch->flags |= TCQ_F_CPUSTATS;
@@ -65,9 +76,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
static void ingress_destroy(struct Qdisc *sch)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct ingress_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&dev->ingress_cl_list);
+ tcf_block_put(q->block);
net_dec_ingress_queue();
}
@@ -91,7 +102,7 @@ static const struct Qdisc_class_ops ingress_class_ops = {
.get = ingress_get,
.put = ingress_put,
.walk = ingress_walk,
- .tcf_chain = ingress_find_tcf,
+ .tcf_block = ingress_tcf_block,
.tcf_cl_offload = ingress_cl_offload,
.bind_tcf = ingress_bind_filter,
.unbind_tcf = ingress_put,
@@ -100,12 +111,18 @@ static const struct Qdisc_class_ops ingress_class_ops = {
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
+ .priv_size = sizeof(struct ingress_sched_data),
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
};
+struct clsact_sched_data {
+ struct tcf_block *ingress_block;
+ struct tcf_block *egress_block;
+};
+
static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
{
switch (TC_H_MIN(classid)) {
@@ -128,16 +145,15 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch,
return clsact_get(sch, classid);
}
-static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct clsact_sched_data *q = qdisc_priv(sch);
switch (cl) {
case TC_H_MIN(TC_H_MIN_INGRESS):
- return &dev->ingress_cl_list;
+ return q->ingress_block;
case TC_H_MIN(TC_H_MIN_EGRESS):
- return &dev->egress_cl_list;
+ return q->egress_block;
default:
return NULL;
}
@@ -145,6 +161,18 @@ static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
{
+ struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err;
+
+ err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+ if (err)
+ return err;
+
+ err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+ if (err)
+ return err;
+
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -155,10 +183,10 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
static void clsact_destroy(struct Qdisc *sch)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct clsact_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&dev->ingress_cl_list);
- tcf_destroy_chain(&dev->egress_cl_list);
+ tcf_block_put(q->egress_block);
+ tcf_block_put(q->ingress_block);
net_dec_ingress_queue();
net_dec_egress_queue();
@@ -169,7 +197,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
.get = clsact_get,
.put = ingress_put,
.walk = ingress_walk,
- .tcf_chain = clsact_find_tcf,
+ .tcf_block = clsact_tcf_block,
.tcf_cl_offload = clsact_cl_offload,
.bind_tcf = clsact_bind_filter,
.unbind_tcf = ingress_put,
@@ -178,6 +206,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.cl_ops = &clsact_class_ops,
.id = "clsact",
+ .priv_size = sizeof(struct clsact_sched_data),
.init = clsact_init,
.destroy = clsact_destroy,
.dump = ingress_dump,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0a4cf27ea54b..e0c02725cd48 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -43,7 +43,7 @@ static void mqprio_destroy(struct Qdisc *sch)
struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
{ .mqprio = &offload } };
- dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
+ dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc);
} else {
netdev_set_num_tc(dev, 0);
}
@@ -152,7 +152,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
{ .mqprio = &offload } };
- err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
+ err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
+ 0, 0, &tc);
if (err)
return err;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 43a3a10b3c81..f143b7bbaa0d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -32,6 +32,7 @@ struct multiq_sched_data {
u16 max_bands;
u16 curband;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct Qdisc **queues;
};
@@ -46,11 +47,12 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -170,7 +172,7 @@ multiq_destroy(struct Qdisc *sch)
int band;
struct multiq_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (band = 0; band < q->bands; band++)
qdisc_destroy(q->queues[band]);
@@ -243,6 +245,10 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
q->max_bands = qdisc_dev(sch)->num_tx_queues;
q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
@@ -367,14 +373,13 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct multiq_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static const struct Qdisc_class_ops multiq_class_ops = {
@@ -383,7 +388,7 @@ static const struct Qdisc_class_ops multiq_class_ops = {
.get = multiq_get,
.put = multiq_put,
.walk = multiq_walk,
- .tcf_chain = multiq_find_tcf,
+ .tcf_block = multiq_tcf_block,
.bind_tcf = multiq_bind,
.unbind_tcf = multiq_put,
.dump = multiq_dump_class,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 92c2e6d448d7..e3e364cc9a70 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -25,6 +25,7 @@
struct prio_sched_data {
int bands;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
};
@@ -42,11 +43,12 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tc_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -145,7 +147,7 @@ prio_destroy(struct Qdisc *sch)
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -204,9 +206,16 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
static int prio_init(struct Qdisc *sch, struct nlattr *opt)
{
+ struct prio_sched_data *q = qdisc_priv(sch);
+ int err;
+
if (!opt)
return -EINVAL;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
return prio_tune(sch, opt);
}
@@ -317,14 +326,13 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct prio_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static const struct Qdisc_class_ops prio_class_ops = {
@@ -333,7 +341,7 @@ static const struct Qdisc_class_ops prio_class_ops = {
.get = prio_get,
.put = prio_put,
.walk = prio_walk,
- .tcf_chain = prio_find_tcf,
+ .tcf_block = prio_tcf_block,
.bind_tcf = prio_bind,
.unbind_tcf = prio_put,
.dump = prio_dump_class,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 041eba3006cc..0e16dfda0bd7 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -182,6 +182,7 @@ struct qfq_group {
struct qfq_sched {
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct Qdisc_class_hash clhash;
u64 oldV, V; /* Precise virtual times. */
@@ -582,15 +583,14 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
qfq_destroy_class(sch, cl);
}
-static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct qfq_sched *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -720,12 +720,13 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
@@ -1438,6 +1439,10 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
int i, j, err;
u32 max_cl_shift, maxbudg_shift, max_classes;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
return err;
@@ -1492,7 +1497,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
struct hlist_node *next;
unsigned int i;
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1508,7 +1513,7 @@ static const struct Qdisc_class_ops qfq_class_ops = {
.delete = qfq_delete_class,
.get = qfq_get_class,
.put = qfq_put_class,
- .tcf_chain = qfq_tcf_chain,
+ .tcf_block = qfq_tcf_block,
.bind_tcf = qfq_bind_tcf,
.unbind_tcf = qfq_unbind_tcf,
.graft = qfq_graft_class,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0f777273ba29..11fb6ec878d6 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -56,6 +56,7 @@ struct sfb_bins {
struct sfb_sched_data {
struct Qdisc *qdisc;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
unsigned long rehash_interval;
unsigned long warmup_time; /* double buffering warmup time in jiffies */
u32 max;
@@ -259,12 +260,13 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return false;
@@ -465,7 +467,7 @@ static void sfb_destroy(struct Qdisc *sch)
{
struct sfb_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
qdisc_destroy(q->qdisc);
}
@@ -549,6 +551,11 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
{
struct sfb_sched_data *q = qdisc_priv(sch);
+ int err;
+
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
q->qdisc = &noop_qdisc;
return sfb_change(sch, opt);
@@ -657,14 +664,13 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct sfb_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
@@ -682,7 +688,7 @@ static const struct Qdisc_class_ops sfb_class_ops = {
.change = sfb_change_class,
.delete = sfb_delete,
.walk = sfb_walk,
- .tcf_chain = sfb_find_tcf,
+ .tcf_block = sfb_tcf_block,
.bind_tcf = sfb_bind,
.unbind_tcf = sfb_put,
.dump = sfb_dump_class,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 332d94be6e1c..f80ea2cc5f1f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -126,6 +126,7 @@ struct sfq_sched_data {
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */
struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
@@ -180,12 +181,13 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return sfq_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return 0;
@@ -697,7 +699,7 @@ static void sfq_destroy(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
q->perturb_period = 0;
del_timer_sync(&q->perturb_timer);
sfq_free(q->ht);
@@ -709,6 +711,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
+ int err;
+
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
(unsigned long)sch);
@@ -815,14 +822,13 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct sfq_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -878,7 +884,7 @@ static const struct Qdisc_class_ops sfq_class_ops = {
.leaf = sfq_leaf,
.get = sfq_get,
.put = sfq_put,
- .tcf_chain = sfq_find_tcf,
+ .tcf_block = sfq_tcf_block,
.bind_tcf = sfq_bind,
.unbind_tcf = sfq_put,
.dump = sfq_dump_class,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 95238284c422..72b07dd9b959 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -246,7 +246,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
- if (sctp_stream_new(asoc, gfp))
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+ 0, gfp))
goto fail_init;
/* Assume that peer would support both address types unless we are
@@ -291,7 +292,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
return asoc;
stream_free:
- sctp_stream_free(asoc->stream);
+ sctp_stream_free(&asoc->stream);
fail_init:
sock_put(asoc->base.sk);
sctp_endpoint_put(asoc->ep);
@@ -365,7 +366,7 @@ void sctp_association_free(struct sctp_association *asoc)
sctp_tsnmap_free(&asoc->peer.tsn_map);
/* Free stream information. */
- sctp_stream_free(asoc->stream);
+ sctp_stream_free(&asoc->stream);
if (asoc->strreset_chunk)
sctp_chunk_free(asoc->strreset_chunk);
@@ -1151,7 +1152,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
/* Reinitialize SSN for both local streams
* and peer's streams.
*/
- sctp_stream_clear(asoc->stream);
+ sctp_stream_clear(&asoc->stream);
/* Flush the ULP reassembly and ordered queue.
* Any data there will now be stale and will
@@ -1177,18 +1178,11 @@ void sctp_assoc_update(struct sctp_association *asoc,
asoc->ctsn_ack_point = asoc->next_tsn - 1;
asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
- if (sctp_state(asoc, COOKIE_WAIT)) {
- sctp_stream_free(asoc->stream);
- asoc->stream = new->stream;
- new->stream = NULL;
- }
+ if (sctp_state(asoc, COOKIE_WAIT))
+ sctp_stream_update(&asoc->stream, &new->stream);
- if (!asoc->assoc_id) {
- /* get a new association id since we don't have one
- * yet.
- */
- sctp_assoc_set_id(asoc, GFP_ATOMIC);
- }
+ /* get a new assoc id if we don't have one yet. */
+ sctp_assoc_set_id(asoc, GFP_ATOMIC);
}
/* SCTP-AUTH: Save the peer parameters from the new associations
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 697721a7a3f1..81466f6442e8 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -307,7 +307,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
struct sctp_stream_out *streamout =
- &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+ &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
@@ -320,7 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
struct sctp_stream_out *streamout =
- &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+ &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 4f5a2b580aa5..275925b93b29 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -35,6 +35,7 @@
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
@@ -98,6 +99,11 @@ static const struct net_offload sctp6_offload = {
},
};
+static const struct skb_checksum_ops crc32c_csum_ops = {
+ .update = sctp_csum_update,
+ .combine = sctp_csum_combine,
+};
+
int __init sctp_offload_init(void)
{
int ret;
@@ -110,6 +116,7 @@ int __init sctp_offload_init(void)
if (ret)
goto ipv4;
+ crc32c_csum_stub = &crc32c_csum_ops;
return ret;
ipv4:
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1409a875ad8e..e2edf2ebbade 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -538,6 +538,7 @@ merge:
} else {
chksum:
head->ip_summed = CHECKSUM_PARTIAL;
+ head->csum_not_inet = 1;
head->csum_start = skb_transport_header(head) - head->head;
head->csum_offset = offsetof(struct sctphdr, checksum);
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index fe4c3d462f6e..20299df163b9 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -363,7 +363,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
- streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
+ streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
@@ -400,9 +400,9 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
q->out_qlen -= chk->skb->len;
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
- if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+ if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
struct sctp_stream_out *streamout =
- &asoc->stream->out[chk->sinfo.sinfo_stream];
+ &asoc->stream.out[chk->sinfo.sinfo_stream];
streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
}
@@ -1036,7 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
* stream identifier.
*/
- if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
+ if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
@@ -1054,7 +1054,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
continue;
}
- if (asoc->stream->out[sid].state == SCTP_STREAM_CLOSED) {
+ if (asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) {
sctp_outq_head_data(q, chunk);
goto sctp_flush_out;
}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index a0b29d43627f..8e34db56bc1d 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -218,8 +218,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
return -ENOMEM;
head = &sctp_ep_hashtable[hash];
- local_bh_disable();
- read_lock(&head->lock);
+ read_lock_bh(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
ep = sctp_ep(epb);
sk = epb->sk;
@@ -234,8 +233,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
sctp_seq_dump_local_addrs(seq, epb);
seq_printf(seq, "\n");
}
- read_unlock(&head->lock);
- local_bh_enable();
+ read_unlock_bh(&head->lock);
return 0;
}
@@ -361,8 +359,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sctp_seq_dump_remote_addrs(seq, assoc);
seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
"%8d %8d %8d %8d",
- assoc->hbinterval, assoc->stream->incnt,
- assoc->stream->outcnt, assoc->max_retrans,
+ assoc->hbinterval, assoc->stream.incnt,
+ assoc->stream.outcnt, assoc->max_retrans,
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
atomic_read(&sk->sk_wmem_alloc),
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 92e332e17391..ea2601501654 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1296,8 +1296,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t),
&auth_hdr);
- hmac = skb_put(retval->skb, hmac_desc->hmac_len);
- memset(hmac, 0, hmac_desc->hmac_len);
+ hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len);
/* Adjust the chunk header to include the empty MAC */
retval->chunk_hdr->length =
@@ -1544,7 +1543,7 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
/* All fragments will be on the same stream */
sid = ntohs(chunk->subh.data_hdr->stream);
- stream = chunk->asoc->stream;
+ stream = &chunk->asoc->stream;
/* Now assign the sequence number to the entire message.
* All fragments must have the same stream sequence number.
@@ -2454,7 +2453,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
* stream sequence number shall be set to 0.
*/
- if (sctp_stream_init(asoc, gfp))
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams, gfp))
goto clean_up;
if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f863b5573e42..df73190da761 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3958,7 +3958,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
/* Silently discard the chunk if stream-id is not valid */
sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->stream->incnt)
+ if (ntohs(skip->stream) >= asoc->stream.incnt)
goto discard_noforce;
}
@@ -4029,7 +4029,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
/* Silently discard the chunk if stream-id is not valid */
sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->stream->incnt)
+ if (ntohs(skip->stream) >= asoc->stream.incnt)
goto gen_shutdown;
}
@@ -6365,7 +6365,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* and discard the DATA chunk.
*/
sid = ntohs(data_hdr->stream);
- if (sid >= asoc->stream->incnt) {
+ if (sid >= asoc->stream.incnt) {
/* Mark tsn as received even though we drop it */
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
@@ -6387,7 +6387,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* and is invalid.
*/
ssn = ntohs(data_hdr->ssn);
- if (ordered && SSN_lt(ssn, sctp_ssn_peek(asoc->stream, in, sid)))
+ if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid)))
return SCTP_IERROR_PROTO_VIOLATION;
/* Send the data up to the user. Note: Schedule the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 30aa0a529215..039a93175adf 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -103,7 +103,7 @@ static int sctp_autobind(struct sock *sk);
static void sctp_sock_migrate(struct sock *, struct sock *,
struct sctp_association *, sctp_socket_type_t);
-static int sctp_memory_pressure;
+static unsigned long sctp_memory_pressure;
static atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
@@ -1494,7 +1494,7 @@ static void sctp_close(struct sock *sk, long timeout)
pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout);
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
sk->sk_shutdown = SHUTDOWN_MASK;
sk->sk_state = SCTP_SS_CLOSING;
@@ -1544,7 +1544,7 @@ static void sctp_close(struct sock *sk, long timeout)
* held and that should be grabbed before socket lock.
*/
spin_lock_bh(&net->sctp.addr_wq_lock);
- bh_lock_sock(sk);
+ bh_lock_sock_nested(sk);
/* Hold the sock, since sk_common_release() will put sock_put()
* and we have just a little more cleanup.
@@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
}
/* Check for invalid stream. */
- if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
+ if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
err = -EINVAL;
goto out_free;
}
@@ -4497,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
info->sctpi_rwnd = asoc->a_rwnd;
info->sctpi_unackdata = asoc->unack_data;
info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
- info->sctpi_instrms = asoc->stream->incnt;
- info->sctpi_outstrms = asoc->stream->outcnt;
+ info->sctpi_instrms = asoc->stream.incnt;
+ info->sctpi_outstrms = asoc->stream.outcnt;
list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
info->sctpi_inqueue++;
list_for_each(pos, &asoc->outqueue.out_chunk_list)
@@ -4727,8 +4727,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
status.sstat_unackdata = asoc->unack_data;
status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
- status.sstat_instrms = asoc->stream->incnt;
- status.sstat_outstrms = asoc->stream->outcnt;
+ status.sstat_instrms = asoc->stream.incnt;
+ status.sstat_outstrms = asoc->stream.outcnt;
status.sstat_fragmentation_point = asoc->frag_point;
status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
@@ -6600,10 +6600,10 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
- if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+ if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
goto out;
- streamout = &asoc->stream->out[params.sprstat_sid];
+ streamout = &asoc->stream.out[params.sprstat_sid];
if (policy == SCTP_PR_SCTP_NONE) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index dda53a293986..82e6d40052a8 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -35,70 +35,43 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
-int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
+int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ gfp_t gfp)
{
- struct sctp_stream *stream;
- int i;
-
- stream = kzalloc(sizeof(*stream), gfp);
- if (!stream)
- return -ENOMEM;
-
- stream->outcnt = asoc->c.sinit_num_ostreams;
- stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
- if (!stream->out) {
- kfree(stream);
- return -ENOMEM;
- }
- for (i = 0; i < stream->outcnt; i++)
- stream->out[i].state = SCTP_STREAM_OPEN;
-
- asoc->stream = stream;
-
- return 0;
-}
-
-int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
-{
- struct sctp_stream *stream = asoc->stream;
int i;
/* Initial stream->out size may be very big, so free it and alloc
* a new one with new outcnt to save memory.
*/
kfree(stream->out);
- stream->outcnt = asoc->c.sinit_num_ostreams;
- stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+
+ stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
if (!stream->out)
- goto nomem;
+ return -ENOMEM;
+ stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
stream->out[i].state = SCTP_STREAM_OPEN;
- stream->incnt = asoc->c.sinit_max_instreams;
- stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
+ if (!incnt)
+ return 0;
+
+ stream->in = kcalloc(incnt, sizeof(*stream->in), gfp);
if (!stream->in) {
kfree(stream->out);
- goto nomem;
+ stream->out = NULL;
+ return -ENOMEM;
}
- return 0;
-
-nomem:
- asoc->stream = NULL;
- kfree(stream);
+ stream->incnt = incnt;
- return -ENOMEM;
+ return 0;
}
void sctp_stream_free(struct sctp_stream *stream)
{
- if (unlikely(!stream))
- return;
-
kfree(stream->out);
kfree(stream->in);
- kfree(stream);
}
void sctp_stream_clear(struct sctp_stream *stream)
@@ -112,6 +85,19 @@ void sctp_stream_clear(struct sctp_stream *stream)
stream->in[i].ssn = 0;
}
+void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
+{
+ sctp_stream_free(stream);
+
+ stream->out = new->out;
+ stream->in = new->in;
+ stream->outcnt = new->outcnt;
+ stream->incnt = new->incnt;
+
+ new->out = NULL;
+ new->in = NULL;
+}
+
static int sctp_send_reconf(struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
@@ -128,7 +114,7 @@ static int sctp_send_reconf(struct sctp_association *asoc,
int sctp_send_reset_streams(struct sctp_association *asoc,
struct sctp_reset_streams *params)
{
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
__u16 i, str_nums, *str_list;
struct sctp_chunk *chunk;
int retval = -EINVAL;
@@ -214,6 +200,7 @@ out:
int sctp_send_reset_assoc(struct sctp_association *asoc)
{
+ struct sctp_stream *stream = &asoc->stream;
struct sctp_chunk *chunk = NULL;
int retval;
__u16 i;
@@ -230,8 +217,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
return -ENOMEM;
/* Block further xmit of data until this request is completed */
- for (i = 0; i < asoc->stream->outcnt; i++)
- asoc->stream->out[i].state = SCTP_STREAM_CLOSED;
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_CLOSED;
asoc->strreset_chunk = chunk;
sctp_chunk_hold(asoc->strreset_chunk);
@@ -241,8 +228,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
sctp_chunk_put(asoc->strreset_chunk);
asoc->strreset_chunk = NULL;
- for (i = 0; i < asoc->stream->outcnt; i++)
- asoc->stream->out[i].state = SCTP_STREAM_OPEN;
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
return retval;
}
@@ -255,7 +242,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
int sctp_send_add_streams(struct sctp_association *asoc,
struct sctp_add_streams *params)
{
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
struct sctp_chunk *chunk = NULL;
int retval = -ENOMEM;
__u32 outcnt, incnt;
@@ -357,7 +344,7 @@ struct sctp_chunk *sctp_process_strreset_outreq(
struct sctp_ulpevent **evp)
{
struct sctp_strreset_outreq *outreq = param.v;
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
__u16 i, nums, flags = 0, *str_p = NULL;
__u32 result = SCTP_STRRESET_DENIED;
__u32 request_seq;
@@ -449,7 +436,7 @@ struct sctp_chunk *sctp_process_strreset_inreq(
struct sctp_ulpevent **evp)
{
struct sctp_strreset_inreq *inreq = param.v;
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
struct sctp_chunk *chunk = NULL;
__u16 i, nums, *str_p;
@@ -523,7 +510,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
{
__u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
struct sctp_strreset_tsnreq *tsnreq = param.v;
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
__u32 request_seq;
__u16 i;
@@ -612,7 +599,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
struct sctp_ulpevent **evp)
{
struct sctp_strreset_addstrm *addstrm = param.v;
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
struct sctp_stream_in *streamin;
__u32 request_seq, incnt;
@@ -687,7 +674,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
struct sctp_ulpevent **evp)
{
struct sctp_strreset_addstrm *addstrm = param.v;
- struct sctp_stream *stream = asoc->stream;
+ struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
struct sctp_stream_out *streamout;
struct sctp_chunk *chunk = NULL;
@@ -758,8 +745,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
union sctp_params param,
struct sctp_ulpevent **evp)
{
+ struct sctp_stream *stream = &asoc->stream;
struct sctp_strreset_resp *resp = param.v;
- struct sctp_stream *stream = asoc->stream;
struct sctp_transport *t;
__u16 i, nums, flags = 0;
sctp_paramhdr_t *req;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index aa3624d50278..25f7e4140566 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -764,7 +764,7 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
__u16 sid, csid, cssn;
sid = event->stream;
- stream = ulpq->asoc->stream;
+ stream = &ulpq->asoc->stream;
event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
@@ -858,7 +858,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
/* Note: The stream ID must be verified before this routine. */
sid = event->stream;
ssn = event->ssn;
- stream = ulpq->asoc->stream;
+ stream = &ulpq->asoc->stream;
/* Is this the expected SSN for this stream ID? */
if (ssn != sctp_ssn_peek(stream, in, sid)) {
@@ -893,7 +893,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
struct sk_buff_head *lobby = &ulpq->lobby;
__u16 csid, cssn;
- stream = ulpq->asoc->stream;
+ stream = &ulpq->asoc->stream;
/* We are holding the chunks by stream, by SSN. */
skb_queue_head_init(&temp);
@@ -958,7 +958,7 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
struct sctp_stream *stream;
/* Note: The stream ID must be verified before this routine. */
- stream = ulpq->asoc->stream;
+ stream = &ulpq->asoc->stream;
/* Is this an old SSN? If so ignore. */
if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid)))
diff --git a/net/socket.c b/net/socket.c
index c2564eb25c6b..8f9dab330d57 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -461,7 +461,7 @@ EXPORT_SYMBOL(sock_from_file);
* @err: pointer to an error code return
*
* The file handle passed in is locked and the socket it is bound
- * too is returned. If an error occurs the err pointer is overwritten
+ * to is returned. If an error occurs the err pointer is overwritten
* with a negative errno code and NULL is returned. The function checks
* for both invalid handles and passing a handle which is not a socket.
*
@@ -662,6 +662,40 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
}
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. Ignore false software
+ * timestamps, which may be made in the __sock_recv_timestamp() call when the
+ * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
+ * hardware timestamp.
+ */
+static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
+{
+ return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
+}
+
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+ struct scm_ts_pktinfo ts_pktinfo;
+ struct net_device *orig_dev;
+
+ if (!skb_mac_header_was_set(skb))
+ return;
+
+ memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
+
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev)
+ ts_pktinfo.if_index = orig_dev->ifindex;
+ rcu_read_unlock();
+
+ ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+ sizeof(ts_pktinfo), &ts_pktinfo);
+}
+
/*
* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
*/
@@ -670,14 +704,16 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
{
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
struct scm_timestamping tss;
- int empty = 1;
+ int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
- if (need_software_tstamp && skb->tstamp == 0)
+ if (need_software_tstamp && skb->tstamp == 0) {
__net_timestamp(skb);
+ false_tstamp = 1;
+ }
if (need_software_tstamp) {
if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
@@ -699,8 +735,13 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
empty = 0;
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
+ !skb_is_swtx_tstamp(skb, false_tstamp) &&
+ ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
empty = 0;
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ !skb_is_err_queue(skb))
+ put_ts_pktinfo(msg, skb);
+ }
if (!empty) {
put_cmsg(msg, SOL_SOCKET,
SCM_TIMESTAMPING, sizeof(tss), &tss);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8d40a7d31c99..25dc67ef9d37 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -571,24 +571,17 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
+static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
/**
* register_switchdev_notifier - Register notifier
* @nb: notifier_block
*
- * Register switch device notifier. This should be used by code
- * which needs to monitor events happening in particular device.
- * Return values are same as for atomic_notifier_chain_register().
+ * Register switch device notifier.
*/
int register_switchdev_notifier(struct notifier_block *nb)
{
- int err;
-
- rtnl_lock();
- err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
- rtnl_unlock();
- return err;
+ return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -597,16 +590,10 @@ EXPORT_SYMBOL_GPL(register_switchdev_notifier);
* @nb: notifier_block
*
* Unregister switch device notifier.
- * Return values are same as for atomic_notifier_chain_unregister().
*/
int unregister_switchdev_notifier(struct notifier_block *nb)
{
- int err;
-
- rtnl_lock();
- err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
- rtnl_unlock();
- return err;
+ return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,18 +603,13 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
* @dev: port device
* @info: notifier information data
*
- * Call all network notifier blocks. This should be called by driver
- * when it needs to propagate hardware event.
- * Return values are same as for atomic_notifier_call_chain().
- * rtnl_lock must be held.
+ * Call all network notifier blocks.
*/
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info)
{
- ASSERT_RTNL();
-
info->dev = dev;
- return raw_notifier_call_chain(&switchdev_notif_chain, val, info);
+ return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 83ea164f16b3..7b33e8c366bc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -711,6 +711,11 @@ int wiphy_register(struct wiphy *wiphy)
(wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2))))
return -EINVAL;
+ if (WARN_ON(wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X) &&
+ (!rdev->ops->set_pmk || !rdev->ops->del_pmk)))
+ return -EINVAL;
+
if (wiphy->addresses)
memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index ec0b1c20ac99..421a6b80ec62 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -174,6 +174,14 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
scan_width);
}
+ err = cfg80211_chandef_dfs_required(&rdev->wiphy,
+ &setup->chandef,
+ NL80211_IFTYPE_MESH_POINT);
+ if (err < 0)
+ return err;
+ if (err > 0 && !setup->userspace_handles_dfs)
+ return -EINVAL;
+
if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef,
NL80211_IFTYPE_MESH_POINT))
return -EINVAL;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c3bc9da30cff..5487cd775b6f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7501,6 +7501,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1];
int err;
bool need_new_beacon = false;
+ bool need_handle_dfs_flag = true;
int len, i;
u32 cs_count;
@@ -7512,6 +7513,12 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
need_new_beacon = true;
+ /* For all modes except AP the handle_dfs flag needs to be
+ * supplied to tell the kernel that userspace will handle radar
+ * events when they happen. Otherwise a switch to a channel
+ * requiring DFS will be rejected.
+ */
+ need_handle_dfs_flag = false;
/* useless if AP is not running */
if (!wdev->beacon_interval)
@@ -7634,8 +7641,13 @@ skip_beacons:
if (err < 0)
return err;
- if (err > 0)
+ if (err > 0) {
params.radar_required = true;
+ if (need_handle_dfs_flag &&
+ !nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) {
+ return -EINVAL;
+ }
+ }
if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
params.block_tx = true;
@@ -8156,6 +8168,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
memcpy(settings->akm_suites, data, len);
}
+ if (info->attrs[NL80211_ATTR_PMK]) {
+ if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN)
+ return -EINVAL;
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK))
+ return -EINVAL;
+ settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]);
+ }
+
return 0;
}
@@ -8860,6 +8881,12 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
+ if (info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS] &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X))
+ return -EINVAL;
+ connect.want_1x = info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS];
+
err = nl80211_crypto_settings(rdev, info, &connect.crypto,
NL80211_MAX_NR_CIPHER_SUITES);
if (err)
@@ -9962,6 +9989,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ setup.userspace_handles_dfs =
+ nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
+
return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
}
@@ -12241,6 +12271,90 @@ static int nl80211_set_multicast_to_unicast(struct sk_buff *skb,
return rdev_set_multicast_to_unicast(rdev, dev, enabled);
}
+static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_pmk_conf pmk_conf = {};
+ int ret;
+
+ if (wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+ return -EOPNOTSUPP;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X))
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK])
+ return -EINVAL;
+
+ wdev_lock(wdev);
+ if (!wdev->current_bss) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
+ pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
+ if (pmk_conf.pmk_len != WLAN_PMK_LEN &&
+ pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (info->attrs[NL80211_ATTR_PMKR0_NAME]) {
+ int r0_name_len = nla_len(info->attrs[NL80211_ATTR_PMKR0_NAME]);
+
+ if (r0_name_len != WLAN_PMK_NAME_LEN) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pmk_conf.pmk_r0_name =
+ nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]);
+ }
+
+ ret = rdev_set_pmk(rdev, dev, &pmk_conf);
+out:
+ wdev_unlock(wdev);
+ return ret;
+}
+
+static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ const u8 *aa;
+ int ret;
+
+ if (wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+ return -EOPNOTSUPP;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X))
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_MAC])
+ return -EINVAL;
+
+ wdev_lock(wdev);
+ aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ ret = rdev_del_pmk(rdev, dev, aa);
+ wdev_unlock(wdev);
+
+ return ret;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -13116,6 +13230,21 @@ static const struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_SET_PMK,
+ .doit = nl80211_set_pmk,
+ .policy = nl80211_policy,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_DEL_PMK,
+ .doit = nl80211_del_pmk,
+ .policy = nl80211_policy,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -13671,7 +13800,9 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
info->req_ie)) ||
(info->resp_ie &&
nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
- info->resp_ie)))
+ info->resp_ie)) ||
+ (info->authorized &&
+ nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 0598c1e5d0ad..ce23d7d49960 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1164,4 +1164,29 @@ rdev_set_coalesce(struct cfg80211_registered_device *rdev,
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
+
+static inline int rdev_set_pmk(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_pmk_conf *pmk_conf)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_set_pmk(&rdev->wiphy, dev, pmk_conf);
+ if (rdev->ops->set_pmk)
+ ret = rdev->ops->set_pmk(&rdev->wiphy, dev, pmk_conf);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, const u8 *aa)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_del_pmk(&rdev->wiphy, dev, aa);
+ if (rdev->ops->del_pmk)
+ ret = rdev->ops->del_pmk(&rdev->wiphy, dev, aa);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 532a0007ce82..0a49b88070d0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -960,6 +960,7 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
ev->rm.resp_ie_len = info->resp_ie_len;
memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
ev->rm.bss = info->bss;
+ ev->rm.authorized = info->authorized;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ca8b2059f92c..0f8db41eaddb 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2258,6 +2258,66 @@ TRACE_EVENT(rdev_tdls_cancel_channel_switch,
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr))
);
+TRACE_EVENT(rdev_set_pmk,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_pmk_conf *pmk_conf),
+
+ TP_ARGS(wiphy, netdev, pmk_conf),
+
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(aa)
+ __field(u8, pmk_len)
+ __field(u8, pmk_r0_name_len)
+ __dynamic_array(u8, pmk, pmk_conf->pmk_len)
+ __dynamic_array(u8, pmk_r0_name, WLAN_PMK_NAME_LEN)
+ ),
+
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(aa, pmk_conf->aa);
+ __entry->pmk_len = pmk_conf->pmk_len;
+ __entry->pmk_r0_name_len =
+ pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0;
+ memcpy(__get_dynamic_array(pmk), pmk_conf->pmk,
+ pmk_conf->pmk_len);
+ memcpy(__get_dynamic_array(pmk_r0_name), pmk_conf->pmk_r0_name,
+ pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0);
+ ),
+
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT
+ "pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG,
+ NETDEV_PR_ARG, MAC_PR_ARG(aa), __entry->pmk_len,
+ __print_array(__get_dynamic_array(pmk),
+ __get_dynamic_array_len(pmk), 1),
+ __entry->pmk_r0_name_len ?
+ __print_array(__get_dynamic_array(pmk_r0_name),
+ __get_dynamic_array_len(pmk_r0_name), 1) : "")
+);
+
+TRACE_EVENT(rdev_del_pmk,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *aa),
+
+ TP_ARGS(wiphy, netdev, aa),
+
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(aa)
+ ),
+
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(aa, aa);
+ ),
+
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4992f1025c9d..96613fe2c6b1 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1219,8 +1219,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
u32 bitrate;
int idx;
- if (WARN_ON_ONCE(rate->mcs > 9))
- return 0;
+ if (rate->mcs > 9)
+ goto warn;
switch (rate->bw) {
case RATE_INFO_BW_160:
@@ -1235,8 +1235,7 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
case RATE_INFO_BW_5:
case RATE_INFO_BW_10:
default:
- WARN_ON(1);
- /* fall through */
+ goto warn;
case RATE_INFO_BW_20:
idx = 0;
}
@@ -1249,6 +1248,10 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
/* do NOT round down here */
return (bitrate + 50000) / 100000;
+ warn:
+ WARN_ONCE(1, "invalid rate bw=%d, mcs=%d, nss=%d\n",
+ rate->bw, rate->mcs, rate->nss);
+ return 0;
}
u32 cfg80211_calculate_bitrate(struct rate_info *rate)